Compare commits

...

12 Commits

Author SHA1 Message Date
dependabot[bot] 00a694f922
Merge 6e7fddb66b into b17e5b22b6 2024-05-04 23:26:19 -03:00
zhangyang2057 b17e5b22b6
Fix macos-latest doesn't support python 3.7 issue. (#1194)
* Fix macos-latest doesn't support python 3.7 issue.

* Set macos version to 12(13 uses Xcode 15).
2024-04-26 17:03:38 +08:00
huochenghai 91ea4df975
fix fold binary (#1182) 2024-04-12 12:26:50 +08:00
Curio Yang a13d43d0f4
Feature/update docs (#1176)
* update faq

* update Homepage

* update link

* update gif link

* update gif link

* update FAQ

* update FAQ
2024-03-13 16:16:47 +08:00
huochenghai dbcde6f228
Feature/egraph extract constrains (#1175)
* add egraph-extract-constrains
* reorder SwapBinaryArgs
2024-03-11 16:02:07 +08:00
Curio Yang 1cdea27230
Feature/update docs (#1174)
* update faq

* update Homepage

* update link

* update gif link

* update gif link
2024-03-11 13:58:17 +08:00
sunnycase bb47ea5803
Revert "add Razor.Templating.Core (#1169)" (#1173)
This reverts commit 2498b1ba0c.
2024-03-08 14:40:03 +08:00
huochenghai 2498b1ba0c
add Razor.Templating.Core (#1169)
* add extract constrains

* refactor buffer schedule

* add Razor.Templating.Core

* reorder SwapBinaryArgs

* Apply code-format changes

* Update NuGet.Config

---------

Co-authored-by: zhengqihang <597323109@qq.com>
Co-authored-by: xhuohai <xhuohai@users.noreply.github.com>
Co-authored-by: sunnycase <sunnycase@live.cn>
2024-03-05 12:01:46 +08:00
zhangyang2057 bdaf0b12c6
Fix in_ci env judge with bool type. (#1171)
* Fix in_ci env judge with bool type.

* Add fix for accuracy_test.
2024-02-22 20:21:20 +08:00
zhangyang2057 24fcfffac0
Fix the missing new line issue of dynamic shape. (#1170) 2024-02-22 10:31:41 +08:00
FusionBolt 699f1980dc
Fix Melgan (#1168)
* update

* Apply code-format changes

---------

Co-authored-by: FusionBolt <FusionBolt@users.noreply.github.com>
2024-02-21 10:12:08 +08:00
dependabot[bot] 6e7fddb66b
Bump torch from 1.9.0 to 1.13.1
Bumps [torch](https://github.com/pytorch/pytorch) from 1.9.0 to 1.13.1.
- [Release notes](https://github.com/pytorch/pytorch/releases)
- [Changelog](https://github.com/pytorch/pytorch/blob/master/RELEASE.md)
- [Commits](https://github.com/pytorch/pytorch/compare/v1.9.0...v1.13.1)

---
updated-dependencies:
- dependency-name: torch
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-18 02:56:20 +00:00
32 changed files with 723 additions and 566 deletions

View File

@ -17,7 +17,7 @@ jobs:
strategy:
matrix:
config:
- {name: x86_64-macos, os: macos-latest, cmakeArgs: -DENABLE_X86SIMD=OFF, buildType: Release}
- {name: x86_64-macos, os: macos-12, cmakeArgs: -DENABLE_X86SIMD=OFF, buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, cmakeArgs: '', buildType: Release}
- {name: x86_64-windows, os: windows-latest, arch: x64, cmakeArgs: -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl, buildType: Release}
@ -79,7 +79,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- {name: x86_64-macos, os: macos-latest, shell: bash, rid: osx-x64, buildType: Release}
- {name: x86_64-macos, os: macos-12, shell: bash, rid: osx-x64, buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash, rid: linux-x64, buildType: Release}
- {name: x86_64-windows, os: windows-latest, shell: bash, rid: win-x64, buildType: Release}
@ -168,7 +168,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- {name: x86_64-macos, os: macos-latest, shell: bash}
- {name: x86_64-macos, os: macos-12, shell: bash}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash}
- {name: x86_64-windows, os: windows-latest, shell: bash}
@ -245,7 +245,7 @@ jobs:
cache-dependency-path: '**/requirements.test.txt'
- name: Install Python Packages
run:
run:
python -m pip install --upgrade pip
pip install -r requirements.test.txt

View File

@ -14,7 +14,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- {name: x86_64-macos, os: macos-latest, shell: bash, rid: osx-x64, buildType: Release}
- {name: x86_64-macos, os: macos-12, shell: bash, rid: osx-x64, buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash, rid: linux-x64, buildType: Release}
- {name: x86_64-windows, os: windows-latest, shell: bash, rid: win-x64, buildType: Release}
@ -53,7 +53,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- {name: x86_64-macos, os: macos-latest}
- {name: x86_64-macos, os: macos-12}
- {name: x86_64-linux, os: ubuntu-latest}
- {name: x86_64-windows, os: windows-latest, arch: x64}

View File

@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
config:
- {name: x86_64-macos, os: macos-latest}
- {name: x86_64-macos, os: macos-12}
- {name: x86_64-linux, os: ubuntu-latest}
- {name: x86_64-windows, os: windows-latest}

View File

@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
config:
- { name: x86_64-macos, os: macos-latest, cmakeArgs: '', buildType: Release }
- { name: x86_64-macos, os: macos-12, cmakeArgs: '', buildType: Release }
- { name: x86_64-linux, os: ubuntu-latest, cmakeArgs: '', buildType: Release }
- { name: x86_64-windows, os: windows-latest, arch: x64, cmakeArgs: -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl, buildType: Release }
@ -109,7 +109,7 @@ jobs:
wget https://dav.sunnycase.moe/d/ci/nncase/${{matrix.config.toolchain_file}}.tar.xz -O toolchain.tar.xz
sudo tar xf toolchain.tar.xz -C $GITHUB_WORKSPACE
echo "${{matrix.config.toolchain_env}}=$GITHUB_WORKSPACE/${{matrix.config.toolchain_file}}" >> $GITHUB_ENV
wget https://dav.sunnycase.moe/d/ci/nncase/${{matrix.config.qemu}}.tgz -O qemu.tgz
sudo tar xf qemu.tgz -C /usr/local/bin
echo "TESTS_EXECUTABLE_LOADER=${{matrix.config.qemu}}" >> $GITHUB_ENV

192
README.md
View File

@ -2,74 +2,99 @@
<img src="docs/logo.png" width="400" alt="nncase" />
</div>
[![GitHub repository](https://img.shields.io/badge/github-repository-blue?logo=github&style=plastic)](https://github.com/kendryte/nncase)
[![Gitee repository](https://img.shields.io/badge/gitee-repository-blue?logo=gitee&style=plastic)](https://gitee.com/kendryte/nncase)
[![GitHub release](https://img.shields.io/github/v/release/kendryte/nncase?color=brightgreen&display_name=tag&logo=github&style=plastic)](https://github.com/kendryte/nncase/releases)
[![GitHub repository](https://img.shields.io/badge/github-repository-blue?logo=github&style=plastic)](https://github.com/kendryte/nncase) [![Gitee repository](https://img.shields.io/badge/gitee-repository-blue?logo=gitee&style=plastic)](https://gitee.com/kendryte/nncase) [![GitHub release](https://img.shields.io/github/v/release/kendryte/nncase?color=brightgreen&display_name=tag&logo=github&style=plastic)](https://github.com/kendryte/nncase/releases)
[切换中文](docs/readme_ZH.md)
`nncase` is a neural network compiler for AI accelerators.
`nncase` 是一个为 AI 加速器设计的神经网络编译器。
技术交流 QQ 群790699378
Telegram: [nncase community](https://t.me/joinchat/PPcEPZMLaTViNDI1)
Technical Discussion QQ Group: 790699378 . Answer: 人工智能
## Install from binaries
[TOC]
## 从二进制安装
Download prebuilt binaries from [Release](https://github.com/kendryte/nncase/releases).
下载预编译的二进制文件 [Release](https://github.com/kendryte/nncase/releases)。
## Build from source
## 从源码编译
[Build from source](./docs/build.md)
## Supported operators
## 支持的算子
- [TFLite ops](./docs/tflite_ops.md)
- [Caffe ops](./docs/caffe_ops.md)
- [ONNX ops](./docs/onnx_ops.md)
## K210/K510
- [Usage](https://github.com/kendryte/nncase/blob/release/1.0/docs/USAGE_EN.md)
- [FAQ](https://github.com/kendryte/nncase/blob/release/1.0/docs/FAQ_EN.md)
- [使用说明](https://github.com/kendryte/nncase/blob/release/1.0/docs/USAGE_ZH.md)
- [常见问题](https://github.com/kendryte/nncase/blob/release/1.0/docs/FAQ_ZH.md)
- [Example](https://github.com/kendryte/nncase/blob/release/1.0/examples/user_guide/)
---
## K230
- [Usage](./docs/USAGE_v2_EN.md)
- [FAQ](./docs/FAQ_EN.md)
- [Example](./examples/user_guide/k230_simulate-EN.ipynb)
- [使用说明](./docs/USAGE_v2.md)
- [常见问题](./docs/FAQ_ZH.md)
- [示例](./examples/user_guide/k230_simulate-ZH.ipynb)
- [Colab run](https://colab.research.google.com/drive/1m8TTree096m5VHmq-Uc60gXyltVCgnRb?usp=sharing)
- [ *Version relationship between `nncase` and `K230_SDK`* ](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK_nncase%E7%89%88%E6%9C%AC%E5%AF%B9%E5%BA%94%E5%85%B3%E7%B3%BB.html#k230sdknncase)
- [update nncase runtime library in SDK](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK%E6%9B%B4%E6%96%B0nncase%E8%BF%90%E8%A1%8C%E6%97%B6%E5%BA%93%E6%8C%87%E5%8D%97.html)
## Resources
### Install
## 资源
### K210
- [K210_Yolo_framework](https://github.com/zhen8838/K210_Yolo_framework)
- [Shts!'s Blog (Japanese)](https://www.shtsno24.tokyo/2020/03/nncase-v020.html)
- Linux
```shell
pip install nncase nncase-kpu
```
- Windows:
```shell
1. pip install nncase
2. Download `nncase_kpu-2.x.x-py2.py3-none-win_amd64.whl` in below link.
3. pip install nncase_kpu-2.x.x-py2.py3-none-win_amd64.whl
```
All version of `nncase` and `nncase-kpu` in [Release](https://github.com/kendryte/nncase/releases).
### Supported operators
- [TFLite ops](./docs/tflite_ops.md)
- [Caffe ops](./docs/caffe_ops.md)
- [ONNX ops](./docs/onnx_ops.md)
### benchmark test
<table>
<tr> <th>kind</th> <th> model </th><th> shape </th><th> quant_type(If/W) </th><th> nncase_fps </th><th> tflite_onnx_result </th><th> accuracy </th><th> info </th></tr>
<tr>
<td rowspan='3'>Image Classification</td>
<td>mobilenetv2 </td><td> [1,224,224,3] </td><td> u8/u8 </td><td> 600.24 </td><td> top-1 = 71.3%<br/>top-5 = 90.1% </td><td> top-1 = 71.1%<br/>top-5 = 90.0% </td><td> dataset(ImageNet 2012, 50000 images)<br/> tflite </td></tr>
<tr><td>resnet50V2 </td><td> [1,3,224,224] </td><td> u8/u8 </td><td> 86.17 </td><td> top-1 = 75.44%<br/>top-5 = 92.56% </td><td> top-1 = 75.11% <br/> top-5 = 92.36% </td><td> dataset(ImageNet 2012, 50000 images)<br/> onnx</td></tr>
<tr><td>yolov8s_cls </td><td> [1,3,224,224] </td><td> u8/u8 </td><td> 130.497 </td><td> top-1 = 72.2%<br/>top-5 = 90.9% </td><td> top-1 = 72.2%<br/>top-5 = 90.8% </td><td> dataset(ImageNet 2012, 50000 images)<br/> yolov8s_cls(v8.0.207)</td></tr>
<tr>
<td rowspan='2'>Object Detection</td>
<td>yolov5s_det </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 23.645 </td><td> bbox<br/>mAP50-90 = 0.374<br/>mAP50 = 0.567 </td><td> bbox<br/>mAP50-90 = 0.369<br/>mAP50 = 0.566</td><td>dataset(coco val2017, 5000 images)<br/>yolov5s_det(v7.0 tag, rect=False, conf=0.001, iou=0.65)</td></tr>
<tr><td>yolov8s_det </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 9.373 </td><td> bbox<br/>mAP50-90 = 0.446<br/>mAP50 = 0.612<br/>mAP75 = 0.484 </td><td> bbox<br/>mAP50-90 = 0.404<br/>mAP50 = 0.593<br/>mAP75 = 0.45</td><td>dataset(coco val2017, 5000 images)<br/>yolov8s_det(v8.0.207, rect = False)</td></tr>
<tr>
<td rowspan='1'>Image Segmentation</td>
<td>yolov8s_seg </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 7.845 </td><td> bbox<br/>mAP50-90 = 0.444<br/>mAP50 = 0.606<br/>mAP75 = 0.484<br/>segm<br/>mAP50-90 = 0.371<br/>mAP50 = 0.578<br/>mAP75 = 0.396 </td><td> bbox<br/>mAP50-90 = 0.444<br/>mAP50 = 0.606<br/>mAP75 = 0.484<br/>segm<br/>mAP50-90 = 0.371<br/>mAP50 = 0.579<br/>mAP75 = 0.397</td><td> dataset(coco val2017, 5000 images)<br/>yolov8s_seg(v8.0.207, rect = False, conf_thres = 0.0008)</td></tr>
<tr>
<td rowspan='3'>Pose Estimation</td>
<td>yolov8n_pose_320 </td><td> [1,3,320,320] </td><td> u8/u8 </td><td> 36.066 </td><td> bbox<br/>mAP50-90 = 0.6<br/>mAP50 = 0.843<br/>mAP75 = 0.654<br/>keypoints<br/>mAP50-90 = 0.358<br/>mAP50 = 0.646<br/>mAP75 = 0.353 </td><td> bbox<br/>mAP50-90 = 0.6<br/>mAP50 = 0.841<br/>mAP75 = 0.656<br/>keypoints<br/>mAP50-90 = 0.359<br/>mAP50 = 0.648<br/>mAP75 = 0.357 </td><td> dataset(coco val2017, 2346 images)<br/>yolov8n_pose(v8.0.207, rect = False)</td></tr>
<tr><td>yolov8n_pose_640 </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 10.88 </td><td> bbox<br/>mAP50-90 = 0.694<br/>mAP50 = 0.909<br/>mAP75 = 0.776<br/>keypoints<br/>mAP50-90 = 0.509<br/>mAP50 = 0.798<br/>mAP75 = 0.544 </td><td> bbox<br/>mAP50-90 = 0.694<br/>mAP50 = 0.909<br/>mAP75 = 0.777<br/>keypoints<br/>mAP50-90 = 0.508<br/>mAP50 = 0.798<br/>mAP75 = 0.54 </td><td> dataset(coco val2017, 2346 images)<br/>yolov8n_pose(v8.0.207, rect = False)</td></tr>
<tr><td>yolov8s_pose </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 5.568 </td><td> bbox<br/>mAP50-90 = 0.733<br/>mAP50 = 0.925<br/>mAP75 = 0.818<br/>keypoints<br/>mAP50-90 = 0.605<br/>mAP50 = 0.857<br/>mAP75 = 0.666 </td><td> bbox<br/>mAP50-90 = 0.734<br/>mAP50 = 0.925<br/>mAP75 = 0.819<br/>keypoints<br/>mAP50-90 = 0.604<br/>mAP50 = 0.859<br/>mAP75 = 0.669</td><td> dataset(coco val2017, 2346 images)<br/>yolov8s_pose(v8.0.207, rect = False)</td></tr>
</table>
### Demo
|[eye gaze](https://developer.canaan-creative.com/devAdmin/model/download?mid=be978f1f38b8aa2f2b649185a10c2e9c&filePath=/upload/model/official/k230/yolop_lane_seg/yolop_lane_seg.zip) | [space_resize](https://developer.canaan-creative.com/devAdmin/model/download?mid=7d48cb68a499dd54daf0ced14549b142&filePath=/upload/model/official/k230/space_resize/space_resize.zip) | [face pose](https://developer.canaan-creative.com/devAdmin/model/download?mid=5b87c02b969a9e60d48b08e357c20e31&filePath=/upload/model/official/k230/face_pose/face_pose.zip) |
|---|---|---|
|<img src="https://github.com/kendryte/nncase_docs/blob/master/gif/eye_gaze_result.gif?raw=true" alt="gif"> | <img src="https://github.com/kendryte/nncase_docs/blob/master/gif/space_resize.gif?raw=true" alt="gif">| <img src="https://github.com/kendryte/nncase_docs/blob/master/gif/face_pose_result.gif?raw=true">|
---
## Architecture
## K210/K510
## 架构
- [Usage](https://github.com/kendryte/nncase/blob/release/1.0/docs/USAGE_EN.md)
- [FAQ](https://github.com/kendryte/nncase/blob/release/1.0/docs/FAQ_EN.md)
- [Example](https://github.com/kendryte/nncase/blob/release/1.0/examples/user_guide/)
<div align="center">
<img src="docs/arch.png" alt="nncase arch" />
</div>
### Supported operators
- [TFLite ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/tflite_ops.md)
- [Caffe ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/caffe_ops.md)
- [ONNX ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/onnx_ops.md)
---
## Features
@ -80,11 +105,68 @@ Download prebuilt binaries from [Release](https://github.com/kendryte/nncase/rel
- Support post quantization from float model with calibration dataset
- Flat model with zero copy loading
## 功能
---
- 支持多输入输出网络,支持多分支结构
- 静态内存分配,不需要堆内存
- 算子合并和优化
- 支持 float 和量化 uint8 推理
- 支持训练后量化,使用浮点模型和量化校准集
- 平坦模型,支持零拷贝加载
## Architecture
<div align="center">
<img src="docs/imgs/arch.jpeg" alt="nncase arch" />
</div>
---
## Build from source
**It is recommended to install nncase directly through `pip`. At present, the source code related to k510 and K230 chips is not open source, so it is not possible to use `nncase-K510` and `nncase-kpu` (K230) directly by compiling source code.**
If there are operators in your model that `nncase` does not yet support, you can request them in the issue or implement them yourself and submit the PR. Later versions will be integrated, or contact us to provide a temporary version.
Here are the steps to compile `nncase`.
```shell
git clone https://github.com/kendryte/nncase.git
cd nncase
mkdir build && cd build
# Use Ninja
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
ninja && ninja install
# Use make
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
make && make install
```
---
## Resources
### Canaan developer community
[Canaan developer community](https://developer.canaan-creative.com/resource) contains all resources related to K210, K510, and K230.
- 资料下载 --> Pre-compiled images available for the development boards corresponding to the three chips.
- 文档 --> Documents corresponding to the three chips.
- 模型库 --> Examples and code for industrial, security, educational and other scenarios that can be run on the K210 and K230.
- 模型训练 --> The model training platform for K210 and K230 supports the training of various scenarios.
### Bilibili
- [Canaan AI tutorial and application demonstration](https://space.bilibili.com/677429436)
### K210 related repo
- [K210_Yolo_framework](https://github.com/zhen8838/K210_Yolo_framework)
- [Shts!&#39;s Blog (Japanese)](https://www.shtsno24.tokyo/2020/03/nncase-v020.html)
- [Examples](https://github.com/kendryte/canmv_examples/tree/main/01-K210)
### K230 related repo
- C: [K230_SDK](https://github.com/kendryte/k230_sdk)
- [Documents](https://github.com/kendryte/k230_docs)
- [K230 end-to-end tutorial](https://github.com/kendryte/K230_training_scripts)
- MicroPython: [Canmv_k230](https://github.com/kendryte/k230_canmv)
- [Documents](https://github.com/kendryte/k230_canmv_docs)
- [Examples](https://github.com/kendryte/canmv_examples/tree/main/02-K230)
---

View File

@ -4,41 +4,68 @@
## 1. Error installing `whl` package
### 1.1 Q: `xxx.whl is not a supported wheel on this platform`
### 1.1 `xxx.whl is not a supported wheel on this platform`
A: Upgrade pip >= 20.3 using `pip install --upgrade pip`
A: Upgrade pip >= 20.3.
```shell
pip install --upgrade pip
```
---
## 2. Compile-time errors
### 2.1 "System.NotSupportedException"
#### 2.1.1 Q: Compile model reported error "System.NotSupportedException: Not Supported *** op: XXX"
### 2.1 Compile model reported error "System.NotSupportedException: Not Supported *** op: XXX"
A: This exception indicates that there are operators, `XXX`, that are not yet supported. You can create a issue in [nncase Github Issue](https://github.com/kendryte/nncase/issues). In the current directory `***_ops.md`, you can view the operators already supported in each inference framework.
If 'XXX' belongs to quantization-related operators such as `FAKE_QUANT`, `DEQUANTIZE`, `QUANTIZE`, it indicates that the current model is a quantized model, and 'nncase' does not currently support such models, please compile `kmodel` using a floating point model.
### 2.2 "System.IO.IOException"
#### 2.2.1 Q: Downloading the `nncase` repository and compiling it yourself and running test gives this error, `The configured user limit (128) on the number of inotify instances has been reached, or the per-process limit on the number of open file descriptors has been reached`.
### 2.2 "The configured user limit (128) on the number of inotify instances has been reached, or the per-process limit on the number of open file descriptors has been reached."
A: Use `sudo gedit /proc/sys/fs/inotify/max_user_instances` to change 128 to a larger value.
### 2.3 `initialize` error
### 2.3 `RuntimeError: Failed to initialize hostfxr`
#### 2.3.1 Q"RuntimeError: Failed to initialize hostfxr" appears when compiling the kmodel.
ANeed to install dotnet-sdk-7.0.
A1Need to install dotnet-7.0.
- Linux:
```shell
sudo apt-get update
sudo apt-get install dotnet-sdk-7.0
```
- Windows: Refer to MicroSoft official website.
### 2.4 "KeyNotFoundException: The given key 'K230' was not present in the dictionary"
A: Need to install `nncase-kpu`.
- Linux: `pip install nncase-kpu`
- Windows: Sorry for that you need to download the `whl` package in [nncase github repo](https://github.com/kendryte/nncase/tags) and install it manually.
> Before install `nncase`, please make sure that the version of `nncase` is consistent with the version of `nncase-kpu`.
```shell
> pip show nncase | grep "Version:"
Version: 2.8.0
(Linux) > pip install nncase-kpu==2.8.0
(Windows)> pip install nncase_kpu-2.8.0-py2.py3-none-win_amd64.whl
```
---
## 3. Runtime errors
### 3.1 Q: Compiling `kmodel` is fine, but when inferring, the error `nncase.simulator.k230.sc: not found`occurs.
### 3.1 When inferring, the error `nncase.simulator.k230.sc: not found` occurs.
A: First, make sure that the path of the nncase installation is added to the PATH environment variable. You need to check whether the versions of `nncase` and `nncase-kpu` are the same.
Or these situations:
- `"nncase.simulator.k230.sc: Permision denied."`
- `"Input/output error."`
A: Make sure that the path of the nncase installation is added to the `PATH` environment variable. You need to check whether the versions of `nncase` and `nncase-kpu` are the same.
```shell
root@a52f1cacf581:/mnt# pip list | grep nncase
@ -52,13 +79,24 @@ If inconsistent, install the same version of the Python package `pip install nnc
## 4. Runtime error on k230 development board
### 4.1 Q: `data.size_bytes() == size = false (bool)`
### 4.1 `data.size_bytes() == size = false (bool)`
A: The above situation is usually caused by an error in the input data file of the app inference, which does not match the model input shape or the model input type. Especially when pre-processing is configured, you need to check ` input_shape` and `input_type ` of input data, after adding pre-processing operation, relevant nodes are added to the model, and the input node will also be changed. If `input_shape `, `input_type `are different from the original model, the newly configured `shape `, `type` should be used to generate input data.
### 4.2 Q: `std::bad_alloc`
### 4.2 `std::bad_alloc`
A: Usually it is caused by memory allocation failure, you can do the following troubleshooting.
- Check whether the generated `kmodel` exceeds the current available memory.
- Check whether the generated `kmodel` exceeds the current available system memory.
- Check whether the generated `kmodel` exceeds the currently available system memory.
- Check App for memory leaks.
### 4.3 throw error when load model
The exception `terminate: Invalid kmodel` is thrown when attempting to load a `kmodel` as bellow.
```CPP
interp.load_model(ifs).expect("Invalid kmodel");
```
AThe issue arises due to a mismatch between the nncase version used when compiling the kmodel and the current SDK version. Please refer to the [SDK-nncase Version Correspondence](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK_nncase%E7%89%88%E6%9C%AC%E5%AF%B9%E5%BA%94%E5%85%B3%E7%B3%BB.html) for a lookup, and follow the [Update the nncase Runtime Library Guide](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK%E6%9B%B4%E6%96%B0nncase%E8%BF%90%E8%A1%8C%E6%97%B6%E5%BA%93%E6%8C%87%E5%8D%97.html) to resolve the problem.

View File

@ -2,39 +2,66 @@
## 1. 安装 `whl`包出错
### 1.1 Q`xxx.whl is not a supported wheel on this platform.`
### 1.1 `xxx.whl is not a supported wheel on this platform.`
A升级 pip >= 20.3 `pip install --upgrade pip`
A升级 pip >= 20.3
```shell
pip install --upgrade pip
```
---
## 2.编译模型时报错
### 2.1 `System.NotSupportedException`
#### 2.1.1 Q编译模型报错“System.NotSupportedException: Not Supported *** op: XXX”。
### 2.1 编译模型报错“System.NotSupportedException: Not Supported *** op: XXX”。
A该异常表明 `XXX`算子尚未支持,可以在[nncase Github Issue](https://github.com/kendryte/nncase/issues)中提需求。当前目录下 `***_ops.md`文档,可以查看各个推理框架中已经支持的算子。
如果 `XXX`属于 `FAKE_QUANT`、`DEQUANTIZE`、`QUANTIZE`等量化相关的算子,表明当前模型属于量化模型,`nncase`目前不支持这类模型,请使用浮点模型来编译 `kmodel`
### 2.2 `System.IO.IOException`
### 2.2 "The configured user limit (128) on the number of inotify instances has been reached, or the per-process limit on the number of open file descriptors has been reached"。
#### 2.2.1 Q下载 `nncase`仓库自己编译后运行test出现这个错误"The configured user limit (128) on the number of inotify instances has been reached, or the per-process limit on the number of open file descriptors has been reached"
A使用 `sudo gedit /proc/sys/fs/inotify/max_user_instances`修改128为更大的值即可
A1使用 `sudo gedit /proc/sys/fs/inotify/max_user_instances`修改128为更大的值即可。
### 2.3 `RuntimeError: Failed to initialize hostfxr`
### 2.3 `initialize`相关
#### 2.3.1 Q编译模型出现如下错误`RuntimeError: Failed to initialize hostfxr`
A1需要安装dotnet-7.0
A需要安装dotnet-sdk-7.0
- Linux:
```shell
sudo apt-get update
sudo apt-get install dotnet-sdk-7.0
```
- Windows: 请自行查阅微软官方文档。
### 2.4 "KeyNotFoundException: The given key 'K230' was not present in the dictionary"
A需要安装nncase-kpu
- Linux使用pip安装nncase-kpu `pip install nncase-kpu`
- Windows在[nncase github tags界面](https://github.com/kendryte/nncase/tags)下载对应版本的whl包然后使用pip安装。
>安装nncase-kpu之前请先检查nncase版本然后安装与nncase版本一致的nncase-kpu。
```shell
> pip show nncase | grep "Version:"
Version: 2.8.0
(Linux) > pip install nncase-kpu==2.8.0
(Windows)> pip install nncase_kpu-2.8.0-py2.py3-none-win_amd64.whl
```
---
## 3. 推理时报错
### 3.1 Q在编译kmodel正常 但是推理的时候出现 `nncase.simulator.k230.sc: not found`的错误。
### 3.1 推理的时候出现 `nncase.simulator.k230.sc: not found`
A将nncase的安装路径加入到 `PATH`环境变量中同时检查一下nncase和nncase-kpu版本是否一致。
或者以下情况:
- `"nncase.simulator.k230.sc: Permision denied."`
- `"Input/output error."`
A将nncase的安装路径加入到 `PATH`环境变量中,同时检查一下`nncase`和`nncase-kpu`版本是否一致。
```shell
root@a52f1cacf581:/mnt# pip list | grep nncase
@ -48,13 +75,23 @@ nncase-kpu 2.1.1.20230721
## 4. k230开发板推理时报错
### 4.1 Q`data.size_bytes() == size = false (bool)`
### 4.1 `data.size_bytes() == size = false (bool)`
A以上这种情况通常有是app推理时的输入数据文件有错误与模型输入shape不匹配或者与模型输入type不匹配。尤其当配置了前处理时需要检查这两个属性添加前处理操作后模型中增加了相关的节点输入节点也会发生变化。如果 `input_shape`、`input_type`和原始模型不同,则需要以新配置的 `shape``type`为准来生成输入数据。
### 4.2 Q抛出 `std::bad_alloc`异常
### 4.2 抛出 `std::bad_alloc`异常
A通常是因为内存分配失败导致的可做如下排查。
- 检查生成的kmodel是否超过当前系统可用内存
- 检查App是否存在内存泄露
### 4.3 加载模型时抛出异常
加载`kmodel`代码如下时,抛出异常 `terminate:Invalid kmodel`
```CPP
interp.load_model(ifs).expect("Invalid kmodel");
```
A是由于编译`kmodel`时的nncase版本与当前SDK版本不匹配导致请按照[SDK、nncase版本对应关系](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK_nncase%E7%89%88%E6%9C%AC%E5%AF%B9%E5%BA%94%E5%85%B3%E7%B3%BB.html)查询,并按照[更新nncase运行时库教程](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK%E6%9B%B4%E6%96%B0nncase%E8%BF%90%E8%A1%8C%E6%97%B6%E5%BA%93%E6%8C%87%E5%8D%97.html)解决。

BIN
docs/imgs/arch.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 412 KiB

165
docs/readme_ZH.md Normal file
View File

@ -0,0 +1,165 @@
<div align="center">
<img src="logo.png" width="400" alt="nncase" />
</div>
[![GitHub repository](https://img.shields.io/badge/github-repository-blue?logo=github&style=plastic)](https://github.com/kendryte/nncase) [![Gitee repository](https://img.shields.io/badge/gitee-repository-blue?logo=gitee&style=plastic)](https://gitee.com/kendryte/nncase) [![GitHub release](https://img.shields.io/github/v/release/kendryte/nncase?color=brightgreen&display_name=tag&logo=github&style=plastic)](https://github.com/kendryte/nncase/releases)
[Switch to English](../README.md)
`nncase` 是一个为 AI 加速器设计的神经网络编译器。
技术交流 QQ 群790699378 答案:人工智能
Telegram: [nncase community](https://t.me/joinchat/PPcEPZMLaTViNDI1)
[TOC]
---
## K230
- [使用说明](./USAGE_v2.md)
- [常见问题](./FAQ_ZH.md)
- [示例](../examples/user_guide/k230_simulate-ZH.ipynb)
- [Colab 在线示例](https://colab.research.google.com/drive/1m8TTree096m5VHmq-Uc60gXyltVCgnRb?usp=sharing)
- [ *nncase与K230_SDK版本对应关系说明* ](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK_nncase%E7%89%88%E6%9C%AC%E5%AF%B9%E5%BA%94%E5%85%B3%E7%B3%BB.html#k230sdknncase)
- [SDK中更新nncase运行时库](https://developer.canaan-creative.com/k230/dev/zh/03_other/K230_SDK%E6%9B%B4%E6%96%B0nncase%E8%BF%90%E8%A1%8C%E6%97%B6%E5%BA%93%E6%8C%87%E5%8D%97.html)
### 安装
- Linux
```shell
pip install nncase nncase-kpu
```
- Windows:
```shell
1. pip install nncase
2. 在下面release链接中下载`nncase_kpu-2.x.x-py2.py3-none-win_amd64.whl`
3. pip install nncase_kpu-2.x.x-py2.py3-none-win_amd64.whl
```
已经发布Python包见[Release](https://github.com/kendryte/nncase/releases)页面。
### 支持的算子
- [TFLite ops](./docs/tflite_ops.md)
- [Caffe ops](./docs/caffe_ops.md)
- [ONNX ops](./docs/onnx_ops.md)
### benchmark test
<table>
<tr> <th>kind</th> <th> model </th><th> shape </th><th> quant_type(If/W) </th><th> nncase_fps </th><th> tflite_onnx_result </th><th> accuracy </th><th> info </th></tr>
<tr>
<td rowspan='3'>Image Classification</td>
<td>mobilenetv2 </td><td> [1,224,224,3] </td><td> u8/u8 </td><td> 600.24 </td><td> top-1 = 71.3%<br/>top-5 = 90.1% </td><td> top-1 = 71.1%<br/>top-5 = 90.0% </td><td> dataset(ImageNet 2012, 50000 images)<br/> tflite </td></tr>
<tr><td>resnet50V2 </td><td> [1,3,224,224] </td><td> u8/u8 </td><td> 86.17 </td><td> top-1 = 75.44%<br/>top-5 = 92.56% </td><td> top-1 = 75.11% <br/> top-5 = 92.36% </td><td> dataset(ImageNet 2012, 50000 images)<br/> onnx</td></tr>
<tr><td>yolov8s_cls </td><td> [1,3,224,224] </td><td> u8/u8 </td><td> 130.497 </td><td> top-1 = 72.2%<br/>top-5 = 90.9% </td><td> top-1 = 72.2%<br/>top-5 = 90.8% </td><td> dataset(ImageNet 2012, 50000 images)<br/> yolov8s_cls(v8.0.207)</td></tr>
<tr>
<td rowspan='2'>Object Detection</td>
<td>yolov5s_det </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 23.645 </td><td> bbox<br/>mAP50-90 = 0.374<br/>mAP50 = 0.567 </td><td> bbox<br/>mAP50-90 = 0.369<br/>mAP50 = 0.566</td><td>dataset(coco val2017, 5000 images)<br/>yolov5s_det(v7.0 tag, rect=False, conf=0.001, iou=0.65)</td></tr>
<tr><td>yolov8s_det </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 9.373 </td><td> bbox<br/>mAP50-90 = 0.446<br/>mAP50 = 0.612<br/>mAP75 = 0.484 </td><td> bbox<br/>mAP50-90 = 0.404<br/>mAP50 = 0.593<br/>mAP75 = 0.45</td><td>dataset(coco val2017, 5000 images)<br/>yolov8s_det(v8.0.207, rect = False)</td></tr>
<tr>
<td rowspan='1'>Image Segmentation</td>
<td>yolov8s_seg </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 7.845 </td><td> bbox<br/>mAP50-90 = 0.444<br/>mAP50 = 0.606<br/>mAP75 = 0.484<br/>segm<br/>mAP50-90 = 0.371<br/>mAP50 = 0.578<br/>mAP75 = 0.396 </td><td> bbox<br/>mAP50-90 = 0.444<br/>mAP50 = 0.606<br/>mAP75 = 0.484<br/>segm<br/>mAP50-90 = 0.371<br/>mAP50 = 0.579<br/>mAP75 = 0.397</td><td> dataset(coco val2017, 5000 images)<br/>yolov8s_seg(v8.0.207, rect = False, conf_thres = 0.0008)</td></tr>
<tr>
<td rowspan='3'>Pose Estimation</td>
<td>yolov8n_pose_320 </td><td> [1,3,320,320] </td><td> u8/u8 </td><td> 36.066 </td><td> bbox<br/>mAP50-90 = 0.6<br/>mAP50 = 0.843<br/>mAP75 = 0.654<br/>keypoints<br/>mAP50-90 = 0.358<br/>mAP50 = 0.646<br/>mAP75 = 0.353 </td><td> bbox<br/>mAP50-90 = 0.6<br/>mAP50 = 0.841<br/>mAP75 = 0.656<br/>keypoints<br/>mAP50-90 = 0.359<br/>mAP50 = 0.648<br/>mAP75 = 0.357 </td><td> dataset(coco val2017, 2346 images)<br/>yolov8n_pose(v8.0.207, rect = False)</td></tr>
<tr><td>yolov8n_pose_640 </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 10.88 </td><td> bbox<br/>mAP50-90 = 0.694<br/>mAP50 = 0.909<br/>mAP75 = 0.776<br/>keypoints<br/>mAP50-90 = 0.509<br/>mAP50 = 0.798<br/>mAP75 = 0.544 </td><td> bbox<br/>mAP50-90 = 0.694<br/>mAP50 = 0.909<br/>mAP75 = 0.777<br/>keypoints<br/>mAP50-90 = 0.508<br/>mAP50 = 0.798<br/>mAP75 = 0.54 </td><td> dataset(coco val2017, 2346 images)<br/>yolov8n_pose(v8.0.207, rect = False)</td></tr>
<tr><td>yolov8s_pose </td><td> [1,3,640,640] </td><td> u8/u8 </td><td> 5.568 </td><td> bbox<br/>mAP50-90 = 0.733<br/>mAP50 = 0.925<br/>mAP75 = 0.818<br/>keypoints<br/>mAP50-90 = 0.605<br/>mAP50 = 0.857<br/>mAP75 = 0.666 </td><td> bbox<br/>mAP50-90 = 0.734<br/>mAP50 = 0.925<br/>mAP75 = 0.819<br/>keypoints<br/>mAP50-90 = 0.604<br/>mAP50 = 0.859<br/>mAP75 = 0.669</td><td> dataset(coco val2017, 2346 images)<br/>yolov8s_pose(v8.0.207, rect = False)</td></tr>
</table>
### Demo示例
|[eye gaze](https://developer.canaan-creative.com/devAdmin/model/download?mid=be978f1f38b8aa2f2b649185a10c2e9c&filePath=/upload/model/official/k230/yolop_lane_seg/yolop_lane_seg.zip) | [space_resize](https://developer.canaan-creative.com/devAdmin/model/download?mid=7d48cb68a499dd54daf0ced14549b142&filePath=/upload/model/official/k230/space_resize/space_resize.zip) | [face pose](https://developer.canaan-creative.com/devAdmin/model/download?mid=5b87c02b969a9e60d48b08e357c20e31&filePath=/upload/model/official/k230/face_pose/face_pose.zip) |
|---|---|---|
|<img src="https://github.com/kendryte/nncase_docs/blob/master/gif/eye_gaze_result.gif?raw=true" alt="gif"> | <img src="https://github.com/kendryte/nncase_docs/blob/master/gif/space_resize.gif?raw=true" alt="gif">| <img src="https://github.com/kendryte/nncase_docs/blob/master/gif/face_pose_result.gif?raw=true">|
---
## K210/K510
- [使用说明](https://github.com/kendryte/nncase/blob/release/1.0/docs/USAGE_ZH.md)
- [常见问题](https://github.com/kendryte/nncase/blob/release/1.0/docs/FAQ_ZH.md)
- [示例程序](https://github.com/kendryte/nncase/blob/release/1.0/examples/user_guide/)
### 支持的算子
- [TFLite ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/tflite_ops.md)
- [Caffe ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/caffe_ops.md)
- [ONNX ops](https://github.com/kendryte/nncase/blob/release/1.0/docs/onnx_ops.md)
---
## 特性
- 支持多输入输出网络,支持多分支结构
- 静态内存分配,不需要堆内存
- 算子合并和优化
- 支持 float 和量化 uint8 推理
- 支持训练后量化,使用浮点模型和量化校准集
- 平坦模型,支持零拷贝加载
---
## 架构
<div align="center">
<img src="imgs/arch.jpeg" alt="nncase arch" />
</div>
---
## 源码编译
**推荐直接通过`pip`安装nncase来使用目前K510、K230芯片相关的源码并未开源因此无法直接通过编译源码来使用`nncase-k510`和`nncase-kpu`(K230)。**
如果你的模型中存在`nncase`尚未支持的算子可以在issue中提出需求或者自己实现并提交PR。后续版本将会进行集成或者联系我们提供临时版本。
以下为编译 `nncase` 的步骤
```shell
git clone https://github.com/kendryte/nncase.git
cd nncase
mkdir build && cd build
# 使用Ninja编译
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
ninja && ninja install
# 使用make编译
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
make && make install
```
---
## 资源
### 嘉楠开发者社区
[嘉楠开发者社区](https://developer.canaan-creative.com/resource)中包含所有K210、K510、K230相关的资源包括
- 资料下载 --> 三款芯片对应的开发板可使用的预编译镜像。
- 文档 --> 三款芯片及不同版本对应的文档。
- 模型库 --> K210、K230上可运行的应用于工业、安防、教育等场景的示例以及代码。
- 模型训练 --> 针对K210、K230的模型训练平台支持多种场景的训练。
### Bilibili
- [嘉楠 AI教程及应用演示](https://space.bilibili.com/677429436)
### K210相关仓库
- [K210_Yolo_framework](https://github.com/zhen8838/K210_Yolo_framework)
- [Shts!&#39;s Blog (Japanese)](https://www.shtsno24.tokyo/2020/03/nncase-v020.html)
- [示例脚本](https://github.com/kendryte/canmv_examples/tree/main/01-K210)
### K230相关仓库
- C: [K230_SDK](https://github.com/kendryte/k230_sdk)
- [文档](https://github.com/kendryte/k230_docs)
- [K230端到端全流程教程](https://github.com/kendryte/K230_training_scripts)
- MicroPython: [Canmv_k230](https://github.com/kendryte/k230_canmv)
- [文档](https://github.com/kendryte/k230_canmv_docs)
- [示例脚本](https://github.com/kendryte/canmv_examples/tree/main/02-K230)

View File

@ -8,7 +8,7 @@ onnxoptimizer==0.2.6
onnxruntime==1.12.0
ncnn==1.0.20230816
numpy==1.21.0
torch==1.9.0
torch==1.13.1
torchvision==0.10.0
imageio==2.15.0
protobuf==3.12.2

View File

@ -100,7 +100,6 @@ internal class Compiler : ICompiler
p.Add<Passes.Rules.Neutral.NormAxisReshape>();
p.Add<Passes.Rules.Neutral.NormAxisReduceArg>();
p.Add<Passes.Rules.Neutral.NormAxisSlice>();
p.Add<Passes.Rules.Neutral.SwapBinaryArgs>();
p.Add<Passes.Rules.Neutral.SqueezeTransposeShape>();
p.Add<Passes.Rules.Neutral.Squeeze5DTranspose>();
p.Add<Passes.Rules.Neutral.SqueezeBinaryShape>();
@ -141,6 +140,7 @@ internal class Compiler : ICompiler
p.Add<Passes.Rules.Neutral.FoldNopReduce>();
p.Add<Passes.Rules.Neutral.SliceToGetItem>();
p.Add<Passes.Rules.Neutral.FoldTwoPads>();
p.Add<Passes.Rules.Neutral.SwapBinaryArgs>();
p.Add<Passes.Rules.Neutral.FoldDilatedConv2D>();
});
@ -153,10 +153,17 @@ internal class Compiler : ICompiler
p.Add<Passes.Rules.WithMarker.FoldTransposeActTranspose>();
p.Add<Passes.Rules.WithMarker.FoldTransposeBinaryActTranspose>();
p.Add<Passes.Rules.WithMarker.CombineReshapePad>();
p.Add<Passes.Rules.WithMarker.CombineTransposePad>();
p.Add<Passes.Rules.WithMarker.CombinePadTranspose>();
p.Add<Passes.Rules.Neutral.CombineTransposeUnary>();
p.Add<Passes.Rules.Neutral.CombineTransposePad>();
if (_compileSession.CompileOptions.ShapeBucketOptions.Enable)
{
p.Add<Passes.Rules.WithMarker.CombineTransposePad>();
}
else
{
p.Add<Passes.Rules.Neutral.CombineTransposePad>();
}
p.Add<Passes.Rules.Neutral.CombinePadTranspose>();
p.Add<Passes.Rules.Neutral.CombineBinaryTranspose>();
p.Add<Passes.Rules.Neutral.CombineConstBinaryTranspose>();

View File

@ -288,5 +288,7 @@ public static partial class Utility
public static Pattern MaybeMarker(Pattern input) => IsAlt(input, IsRangeOfMarker(input, IsWildcard()));
public static Pattern MaybeMarker(Pattern input, string markerName) => IsAlt(input, IsRangeOfMarker(markerName, input, IsWildcard()));
public static Pattern HasMarker(Pattern input, string? markerName = null) => IsRangeOfMarker(markerName, input, IsWildcard());
}

View File

@ -71,4 +71,21 @@ public static class TIRUtilities
IR.F.Math.Max(0, t.First.Start),
IR.F.Math.Min(t.Second.FixedValue, t.First.Stop),
t.First.Step)).ToArray();
public static bool TryGetFixedRegions(TIR.BufferRegion region, out (int Start, int Stop, int Step)[] slice)
{
slice = new (int Start, int Stop, int Step)[region.Region.Length];
for (int i = 0; i < region.Region.Length; i++)
{
var rg = region.Region[i];
if (rg is not Range { Start: IR.TensorConst start, Stop: IR.TensorConst stop, Step: IR.TensorConst step })
{
return false;
}
slice[i] = (start.Value.ToScalar<int>(), stop.Value.ToScalar<int>(), step.Value.ToScalar<int>());
}
return true;
}
}

View File

@ -1,7 +1,6 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
using GiGraph.Dot.Output.Writers.Edges;
using Nncase.Diagnostics;
using Nncase.IR;
using Nncase.IR.Tensors;

View File

@ -32,6 +32,25 @@ public partial class EGraphPrinter
return printer.SaveToStream(file);
}
/// <summary>
/// find the minCostEnode in eclass.
/// <remarks>
/// the marker first.
/// </remarks>
/// </summary>
internal static ENode MinByWithMarker(EClass eClass, CostModel.EGraphCostModel costModel)
{
return eClass.Nodes.OrderBy(e => e.Expr, ENodeTypeComparer.Instance).MinBy(x => x.Expr is Marker ? CostModel.Cost.Zero : costModel[x])!;
}
/// <summary>
/// find the minCostEnode in eclass skip marker.
/// </summary>
internal static ENode MinByWithOutMarker(EClass eClass, CostModel.EGraphCostModel costModel)
{
return eClass.Nodes.Where(e => e.Expr is not Marker).MinBy(x => costModel[x])!;
}
private DotGraph AttachEGraphCost(CostModel.EGraphCostModel costModel, EClass entry)
{
// 1. display each enode costs.
@ -72,12 +91,12 @@ public partial class EGraphPrinter
continue;
}
var minCostEnode = parent.MinByWithMarker(costModel);
var minCostEnode = MinByWithMarker(parent, costModel);
// when this marker ecalss has been visited, skip it.
if (markerEclassMemo.Contains(parent))
{
minCostEnode = parent.MinByWithOutMarker(costModel);
minCostEnode = MinByWithOutMarker(parent, costModel);
}
var (minCostDotnode, table) = NodesMap[minCostEnode];
@ -93,7 +112,7 @@ public partial class EGraphPrinter
if (minCostEnode.Expr is Marker && child == parent)
{
markerEclassMemo.Add(child);
var otherminCostENode = child.MinByWithOutMarker(costModel);
var otherminCostENode = MinByWithOutMarker(child, costModel);
var (childDotNode, _) = NodesMap[otherminCostENode];
_dotGraph.Edges.Add(childDotNode, minCostDotnode, edge =>
{
@ -103,7 +122,7 @@ public partial class EGraphPrinter
}
else
{
var childEnode = child.Find().MinByWithMarker(costModel);
var childEnode = MinByWithMarker(child.Find(), costModel);
var (childDotNode, _) = NodesMap[childEnode];
_dotGraph.Edges.Add(childDotNode, minCostDotnode, edge =>
{
@ -126,3 +145,23 @@ public partial class EGraphPrinter
return _dotGraph;
}
}
internal sealed class ENodeTypeComparer : IComparer<Expr>
{
public static readonly ENodeTypeComparer Instance = new();
public int Compare(Expr? x, Expr? y) => (x, y) switch
{
(null, null) => 0,
(Expr, null) => 1,
(null, Expr) => -1,
(Expr, Expr) => GetPriority(x).CompareTo(GetPriority(y)),
};
private int GetPriority(Expr x) => x switch
{
Marker => 0,
Const => 1,
_ => 2,
};
}

View File

@ -0,0 +1,50 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Google.OrTools.Sat;
using Nncase.CostModel;
using Nncase.Diagnostics;
using Nncase.IR;
using Nncase.PatternMatch;
using static Nncase.PatternMatch.F.Math;
using static Nncase.PatternMatch.Utility;
namespace Nncase.Passes;
/// <summary>
/// EGraph extract extensions.
/// </summary>
public static class EGraphExtensions
{
/// <summary>
/// Extract egraph.
/// </summary>
/// <param name="eGraph">egraph.</param>
/// <param name="root">Root eclass.</param>
/// <param name="basefunc_cost_evaluator">base func cost evaluator.</param>
/// <param name="constrains">the cp model constrains.</param>
public static Expr Extract(this IEGraph eGraph, EClass root, Evaluator.IBaseFuncCostEvaluator? basefunc_cost_evaluator, EGraphExtractConstrains[] constrains)
{
// 1. set enode expr with more accuracy type.
foreach (var eclass in eGraph.Classes)
{
foreach (var nodes in eclass.Nodes)
{
if (eclass.CheckedType.CompareTo(nodes.Expr.CheckedType) > 0)
{
nodes.Expr.CheckedType = eclass.CheckedType;
}
}
}
// 2. start the cost evaluator
var costModel = new CostModel.EGraphCostEvaluator(root.Find(), basefunc_cost_evaluator, false).Evaluate();
return new EGraphExtractor(costModel).Extract(root.Find(), eGraph, constrains);
}
}

View File

@ -1,95 +0,0 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Nncase.CostModel;
using Nncase.Diagnostics;
using Nncase.IR;
using Nncase.PatternMatch;
using static Nncase.PatternMatch.F.Math;
using static Nncase.PatternMatch.Utility;
namespace Nncase.Passes;
/// <summary>
/// EGraph extract extensions.
/// </summary>
public static class EGraphExtractExtensions
{
/// <summary>
/// Extract egraph.
/// </summary>
/// <param name="eGraph">eGraph.</param>
/// <param name="root">Root eclass.</param>
/// <param name="basefunc_cost_evaluator">base func cost evaluator.</param>
/// <param name="picks">the picks.</param>
/// <returns>Extracted root expression.</returns>
public static Expr Extract(this IEGraph eGraph, EClass root, Evaluator.IBaseFuncCostEvaluator? basefunc_cost_evaluator, out IReadOnlyDictionary<ENode, bool> picks)
{
// 1. set enode expr with more accuracy type.
foreach (var eclass in eGraph.Classes)
{
foreach (var nodes in eclass.Nodes)
{
if (eclass.CheckedType.CompareTo(nodes.Expr.CheckedType) > 0)
{
nodes.Expr.CheckedType = eclass.CheckedType;
}
}
}
// 2. start the cost evaluator
var costModel = new EGraphCostEvaluator(root.Find(), basefunc_cost_evaluator, false).Evaluate();
// if (DumpScope.Current.IsEnabled(DumpFlags.EGraphCost))
// {
// using var fs = DumpScope.Current.OpenFile(Path.Combine("Costs", $"V{eGraph.Version}.dot"));
// EGraphPrinter.DumpEgraphAsDot(eGraph, costModel, root.Find(), fs);
// }
// return new EGraphExtractor(costModel).Extract(root.Find(), eGraph);
return new EGraphExtractors.SatExtractor(costModel).Extract(root.Find(), eGraph, out picks);
}
/// <summary>
/// find the minCostEnode in eclass.
/// <remarks>
/// the marker first.
/// </remarks>
/// </summary>
internal static ENode MinByWithMarker(this EClass eClass, CostModel.EGraphCostModel costModel)
{
return eClass.Nodes.OrderBy(e => e.Expr, ENodeTypeComparer.Instance).MinBy(x => x.Expr is Marker ? Cost.Zero : costModel[x])!;
}
/// <summary>
/// find the minCostEnode in eclass skip marker.
/// </summary>
internal static ENode MinByWithOutMarker(this EClass eClass, CostModel.EGraphCostModel costModel)
{
return eClass.Nodes.Where(e => e.Expr is not Marker).MinBy(x => costModel[x])!;
}
internal sealed class ENodeTypeComparer : IComparer<Expr>
{
public static readonly ENodeTypeComparer Instance = new();
public int Compare(Expr? x, Expr? y) => (x, y) switch
{
(null, null) => 0,
(Expr, null) => 1,
(null, Expr) => -1,
(Expr, Expr) => GetPriority(x).CompareTo(GetPriority(y)),
};
private int GetPriority(Expr x) => x switch
{
Marker => 0,
Const => 1,
_ => 2,
};
}
}

View File

@ -11,18 +11,20 @@ using Nncase.CostModel;
using Nncase.Diagnostics;
using Nncase.IR;
namespace Nncase.Passes.EGraphExtractors;
namespace Nncase.Passes;
internal class SatExtractor : IExtractor
public delegate void EGraphExtractConstrains(CpModel model, IReadOnlyDictionary<ENode, BoolVar> vars);
internal class EGraphExtractor
{
private readonly EGraphCostModel _costModel;
public SatExtractor(EGraphCostModel costModel)
public EGraphExtractor(EGraphCostModel costModel)
{
_costModel = costModel;
}
public Expr Extract(EClass root, IEGraph eGraph, out IReadOnlyDictionary<ENode, bool> picks)
public Expr Extract(EClass root, IEGraph eGraph, EGraphExtractConstrains[] constrains)
{
var cpmodel = new CpModel();
@ -68,6 +70,11 @@ internal class SatExtractor : IExtractor
EliminateAllCycles(root, new(), new(), visited, cpmodel, vars);
}
foreach (var constrain in constrains)
{
constrain(cpmodel, vars);
}
// 3. add pick weights for all enode.
cpmodel.Minimize(LinearExpr.WeightedSum(eGraph.Nodes.Select(n => vars[n]), eGraph.Nodes.Select(n => checked((long)_costModel[n].Score))));
@ -121,7 +128,7 @@ internal class SatExtractor : IExtractor
throw new InvalidProgramException("SatExtract Failed!");
}
picks = eGraph.Nodes.ToDictionary(e => e, e => solver.BooleanValue(vars[e]));
var picks = eGraph.Nodes.ToDictionary(e => e, e => solver.BooleanValue(vars[e]));
using (var dumpStream = enableDump ? DumpScope.Current.OpenFile("Costs/Pick.dot") : Stream.Null)
{
EGraphPrinter.DumpEgraphAsDot(eGraph, _costModel, picks, root.Find(), dumpStream);

View File

@ -1,200 +0,0 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Nncase.CostModel;
using Nncase.Diagnostics;
using Nncase.IR;
using Nncase.PatternMatch;
using static Nncase.PatternMatch.F.Math;
using static Nncase.PatternMatch.Utility;
namespace Nncase.Passes.EGraphExtractors;
internal interface IExtractor
{
Expr Extract(EClass root, IEGraph eGraph, out IReadOnlyDictionary<ENode, bool> picks);
}
internal class Extractor : IExtractor
{
private readonly EGraphCostModel _costModel;
private readonly Dictionary<EClass, Expr> _eclassMemo = new();
private readonly Dictionary<EClass, Expr> _markerEclassMemo = new();
private readonly Dictionary<ENode, bool> _picks = new();
private StreamWriter? _dumpWriter;
public Extractor(EGraphCostModel costModel)
{
_costModel = costModel;
}
public Expr Extract(EClass root, IEGraph eGraph, out IReadOnlyDictionary<ENode, bool> picks)
{
_dumpWriter = DumpScope.Current.IsEnabled(DumpFlags.EGraphCost)
? new StreamWriter(DumpScope.Current.OpenFile($"{nameof(Extractor)}_Class_{root.Id}.txt"))
: null;
try
{
Visit(root);
}
finally
{
_dumpWriter?.Dispose();
}
foreach (var enode in eGraph.Nodes)
{
if (!_picks.ContainsKey(enode))
{
_picks[enode] = false;
}
}
picks = _picks;
return _eclassMemo[root];
}
private void Visit(EClass eclass)
{
var stack = new Stack<(EClass, ENode)>();
stack.Push((eclass, eclass.MinByWithMarker(_costModel)));
var markerEclassSet = new HashSet<EClass>();
while (stack.Any())
{
(eclass, var minCostEnode) = stack.Peek();
if (_eclassMemo.ContainsKey(eclass))
{
stack.Pop();
continue;
}
Expr? expr = null;
switch (minCostEnode.Expr)
{
case Var or TensorConst or TupleConst or Op or Fusion or None:
expr = minCostEnode.Expr;
break;
case Function or Call or IR.Tuple or Marker or IR.If:
var childrenExprs = new List<Expr>();
foreach (var child in minCostEnode.Children)
{
if (!_eclassMemo.TryGetValue(child, out var childExpr))
{
if (minCostEnode.Expr is Marker && child == eclass)
{
if (!_markerEclassMemo.TryGetValue(eclass, out var markerInputExpr))
{
markerEclassSet.Add(eclass);
stack.Push((eclass, eclass.MinByWithOutMarker(_costModel)));
}
else
{
childrenExprs.Add(markerInputExpr);
}
}
else
{
stack.Push((child, child.MinByWithMarker(_costModel)));
}
}
else
{
childrenExprs.Add(childExpr);
}
}
if (childrenExprs.Count != minCostEnode.Children.Count)
{
break;
}
expr = minCostEnode.Expr switch
{
Function function => Visit(minCostEnode, function, new(childrenExprs)),
Call call => Visit(minCostEnode, call, new(childrenExprs)),
IR.Tuple tuple => Visit(minCostEnode, tuple, new(childrenExprs)),
Marker marker => Visit(minCostEnode, marker, new(childrenExprs)),
IR.If @if => Visit(minCostEnode, @if, new(childrenExprs)),
_ => throw new ArgumentException("Unsupported expression type."),
};
break;
default:
throw new ArgumentException("Unsupported expression type.");
}
if (expr is null)
{
continue;
}
if (markerEclassSet.Contains(eclass) && minCostEnode.Expr is not Marker)
{
_markerEclassMemo.Add(eclass, expr);
}
else
{
_eclassMemo.Add(eclass, expr);
}
_picks[minCostEnode] = true;
stack.Pop();
}
}
private Marker Visit(ENode enode, Marker marker, IRArray<Expr> children)
{
var target = children[0];
var attr = children[1];
return marker.With(target: target, attribute: attr);
}
private Function Visit(ENode enode, Function func, IRArray<Expr> children)
{
if (children.Count == 0)
{
return func;
}
var body = children[0];
return func.With(body: body);
}
private IR.Tuple Visit(ENode enode, IR.Tuple tuple, IRArray<Expr> children)
{
return tuple.With(fields: children.ToArray());
}
private IR.If Visit(ENode enode, IR.If @if, IRArray<Expr> children)
{
return @if.With(condition: children[^3], then: children[^2], @else: children[^1], paramList: children[..^3].ToArray());
}
private Call Visit(ENode enode, Call call, IRArray<Expr> children)
{
var target = children[0];
var arguments = children.Skip(1);
// for mix quant debug.
if (call.EnodeQuantConfigWithCosine != null && _dumpWriter != null)
{
_dumpWriter.WriteLine(call + " " + call.CheckedType);
for (int i = 0; i < call.EnodeQuantConfigWithCosine.Count; i++)
{
for (int j = 0; j < call.EnodeQuantConfigWithCosine[i].Item1.Count; j++)
{
_dumpWriter.Write(call.EnodeQuantConfigWithCosine[i].Item1[j] + " ");
}
_dumpWriter.WriteLine(call.EnodeQuantConfigWithCosine[i].Item3);
}
}
return call.With(target: target, arguments: arguments.ToArray(), call.Metadata);
}
}

View File

@ -36,7 +36,7 @@ internal class EGraphRewriteProvider : IEGraphRewriteProvider
var graph = new EGraph(expr);
ERewrite(graph, rules, options);
var post = graph.Extract(graph.Root!, null, out _);
var post = graph.Extract(graph.Root!, null, Array.Empty<EGraphExtractConstrains>());
return post;
}

View File

@ -3,54 +3,34 @@
namespace Nncase.Passes.BufferSchedule;
internal sealed class TimeInterval
public sealed class Interval
{
public TimeInterval(int start, int end)
{
Brith = start;
Death = end;
}
public int Brith { get; set; }
public int Death { get; set; }
public int Size => Death - Brith;
public override string ToString()
{
return $"TimeInterval({Brith}, {Death})";
}
}
internal sealed class MemSpan
{
public MemSpan(int start, int end)
public Interval(int start, int end)
{
Start = start;
End = end;
Stop = end;
}
public int Start { get; set; }
public int End { get; set; }
public int Stop { get; set; }
public int Size => End - Start;
public int Size => Stop - Start;
public override string ToString()
{
return $"MemSpan({Start}, {End})";
return $"Interval({Start}, {Stop})";
}
}
internal class ScheduleBuffer
public class ScheduleBuffer
{
public ScheduleBuffer(string name, int number, TimeInterval interval, MemSpan span, int[] shape, int[] strides, bool inplace)
public ScheduleBuffer(string name, int number, Interval timeInterval, Interval memInterval, int[] shape, int[] strides, bool inplace)
{
Name = name;
Number = number;
Interval = interval;
Span = span;
TimeInterval = timeInterval;
MemInterval = memInterval;
Shape = shape;
Strides = strides;
Inplace = inplace;
@ -60,9 +40,9 @@ internal class ScheduleBuffer
public int Number { get; }
public TimeInterval Interval { get; }
public Interval TimeInterval { get; }
public MemSpan Span { get; }
public Interval MemInterval { get; }
public int[] Shape { get; }
@ -72,6 +52,6 @@ internal class ScheduleBuffer
public override string ToString()
{
return $"ScheduledBuffer('{Name}', {Number}, {Interval}, {Span}, ConstraintsMode.No, [{string.Join(",", Shape)}], [{string.Join(",", Strides)}], {Inplace})";
return $"ScheduledBuffer('{Name}', {Number}, {TimeInterval}, {MemInterval}, ConstraintsMode.No, [{string.Join(",", Shape)}], [{string.Join(",", Strides)}], {Inplace})";
}
}

View File

@ -13,49 +13,10 @@ using Nncase.IR;
namespace Nncase.Passes.BufferSchedule;
internal sealed class BufferScheduler
public class BufferScheduler
{
public IReadOnlyDictionary<Expr, ScheduleBuffer> CollectLifeTime(Function func)
public virtual void ExternalConstrains(CpModel model, IReadOnlyDictionary<Expr, ScheduleBuffer> bufferMap, IReadOnlyDictionary<Expr, (IntervalVar X, IntervalVar Y)> boxs)
{
var c = new LifeTimeCollector();
return c.Collect(func);
}
public void Schedule(IReadOnlyDictionary<Expr, ScheduleBuffer> bufferMap)
{
var model = new CpModel();
var noOverlap = model.AddNoOverlap2D();
var boxs = new Dictionary<Expr, (IntervalVar X, IntervalVar Y)>(ReferenceEqualityComparer.Instance);
var timeMap = new Dictionary<int, List<Expr>>();
var yStarts = new List<IntVar>();
foreach (var (expr, item) in bufferMap)
{
var xInterval = model.NewIntervalVar(model.NewConstant(item.Interval.Brith), model.NewConstant(item.Interval.Size), model.NewConstant(item.Interval.Death), item.Name + $"{item.Number}_x");
var upbound = 2147483648 - item.Span.End;
if (upbound <= 0)
{
throw new System.NotSupportedException();
}
var memStartVar = model.NewIntVar(0, upbound, $"{item.Name}_{item.Number}_y_start");
var yInterval = model.NewFixedSizeIntervalVar(memStartVar, item.Span.End, $"{item.Name}_{item.Number}_y");
noOverlap.AddRectangle(xInterval, yInterval);
yStarts.Add(memStartVar);
boxs.Add(expr, (xInterval, yInterval));
for (int time = item.Interval.Brith; time < item.Interval.Death; time++)
{
if (!timeMap.TryGetValue(time, out var timelist))
{
timelist = new();
timeMap.Add(time, timelist);
}
timelist.Add(expr);
}
}
foreach (var (expr, item) in bufferMap)
{
if (expr is Call { Target: IR.Tensors.Concat } concatCall && concatCall.Arguments[0] is IR.Tuple tuple)
@ -65,7 +26,7 @@ internal sealed class BufferScheduler
for (int i = 0; i < tuple.Fields.Length; i++)
{
model.Add((boxs[concatCall].Y.StartExpr() + offset) == boxs[tuple.Fields[i]].Y.StartExpr());
offset += bufferMap[tuple.Fields[i]].Span.Size;
offset += bufferMap[tuple.Fields[i]].MemInterval.Size;
}
}
else if (expr is Call { Target: IR.Tensors.Split } splitCall)
@ -79,7 +40,7 @@ internal sealed class BufferScheduler
foreach (var user in users.OrderBy(e => ((Call)e).Arguments[1].Evaluate().AsTensor().ToScalar<int>()))
{
model.Add((boxs[splitCall].Y.StartExpr() + offset) == boxs[user].Y.StartExpr());
offset += bufferMap[user].Span.Size;
offset += bufferMap[user].MemInterval.Size;
}
}
else if (expr is Call { Target: IR.Tensors.Reshape } reshapCall)
@ -88,6 +49,44 @@ internal sealed class BufferScheduler
model.Add(boxs[reshapCall].Y.StartExpr() == boxs[reshapCall.Arguments[0]].Y.StartExpr());
}
}
}
public void Schedule(IReadOnlyDictionary<Expr, ScheduleBuffer> bufferMap)
{
var model = new CpModel();
var noOverlap = model.AddNoOverlap2D();
var boxs = new Dictionary<Expr, (IntervalVar X, IntervalVar Y)>(ReferenceEqualityComparer.Instance);
var timeMap = new Dictionary<int, List<Expr>>();
var yStarts = new List<IntVar>();
foreach (var (expr, item) in bufferMap)
{
var xInterval = model.NewIntervalVar(model.NewConstant(item.TimeInterval.Start), model.NewConstant(item.TimeInterval.Size), model.NewConstant(item.TimeInterval.Stop), item.Name + $"{item.Number}_x");
var upbound = 2147483648 - item.MemInterval.Stop;
if (upbound <= 0)
{
throw new System.NotSupportedException();
}
var memStartVar = model.NewIntVar(0, upbound, $"{item.Name}_{item.Number}_y_start");
var yInterval = model.NewFixedSizeIntervalVar(memStartVar, item.MemInterval.Stop, $"{item.Name}_{item.Number}_y");
noOverlap.AddRectangle(xInterval, yInterval);
yStarts.Add(memStartVar);
boxs.Add(expr, (xInterval, yInterval));
for (int time = item.TimeInterval.Start; time < item.TimeInterval.Stop; time++)
{
if (!timeMap.TryGetValue(time, out var timelist))
{
timelist = new();
timeMap.Add(time, timelist);
}
timelist.Add(expr);
}
}
ExternalConstrains(model, bufferMap, boxs);
model.Minimize(LinearExpr.Sum(yStarts));
@ -99,10 +98,10 @@ internal sealed class BufferScheduler
throw new System.NotSupportedException();
}
foreach (var (k, v) in bufferMap)
foreach (var (k, _) in bufferMap)
{
bufferMap[k].Span.Start = checked((int)solver.Value(boxs[k].Y.StartExpr()));
bufferMap[k].Span.End = checked((int)solver.Value(boxs[k].Y.EndExpr()));
bufferMap[k].MemInterval.Start = checked((int)solver.Value(boxs[k].Y.StartExpr()));
bufferMap[k].MemInterval.Stop = checked((int)solver.Value(boxs[k].Y.EndExpr()));
}
}
@ -119,18 +118,11 @@ from enum import Enum
from typing import List
@dataclass
class TimeInterval():
class Interval():
start: int
end: int
def __str__(self) -> str:
return f'(start: {self.start}, end {self.end})'
@dataclass
class MemSpan():
depth_start: int
depth_end: int
def __str__(self) -> str:
return f'(start: {self.depth_start}, size {self.depth_end - self.depth_start})'
return f'(start: {self.start}, end {self.end}, size {self.end - self.start})'
class ConstraintsMode(Enum):
No = 0
@ -140,8 +132,8 @@ class ConstraintsMode(Enum):
class ScheduledBuffer():
name: str
number: int
interval: TimeInterval
location: MemSpan
time_interval: Interval
mem_interval: Interval
constraints: ConstraintsMode
shape: List[int]
stride: List[int]
@ -166,8 +158,8 @@ source = {
'height': [],
'alpha': [],
'color': [],
'location': [],
'interval': [],
'mem_interval': [],
'time_interval': [],
'shape': [],
'stride': [],
}
@ -177,10 +169,10 @@ x_range_max = 0
color_dict = {}
for buffer in buffers:
source['name'].append(buffer.name)
width = buffer.interval.end - buffer.interval.start
x = buffer.interval.start + (width / 2)
height = buffer.location.depth_end - buffer.location.depth_start
y = buffer.location.depth_start + (height / 2)
width = buffer.time_interval.end - buffer.time_interval.start
x = buffer.time_interval.start + (width / 2)
height = buffer.mem_interval.end - buffer.mem_interval.start
y = buffer.mem_interval.start + (height / 2)
y_range_max = max(y_range_max, y)
x_range_max = max(x_range_max, buffer.interval.end)
source['x'].append(x)
@ -193,13 +185,13 @@ for buffer in buffers:
color_dict[buffer.name] = color
source['color'].append(color)
source['alpha'].append(0.2 if buffer.inplace else 1.0)
source['interval'].append(str(buffer.interval))
source['location'].append(str(buffer.location))
source['time_interval'].append(str(buffer.time_interval))
source['mem_interval'].append(str(buffer.mem_interval))
source['shape'].append(','.join([str(s) for s in buffer.shape]))
source['stride'].append(','.join([str(s) for s in buffer.stride]))
source = ColumnDataSource(source)
hover = HoverTool(tooltips=[('name', '@name'), ('interval', '@interval'), ('location', '@location'),
hover = HoverTool(tooltips=[('name', '@name'), ('time_interval', '@time_interval'), ('mem_interval', '@mem_interval'),
('shape', '@shape'), ('stride', '@stride')])
p = figure(tools=[hover, WheelPanTool(), SaveTool(), WheelZoomTool(), ResetTool()], width=1280, height=720,

View File

@ -10,16 +10,16 @@ using Nncase.IR;
namespace Nncase.Passes.BufferSchedule;
internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
public class LifeTimeCollector : ExprVisitor<Unit, Unit>
{
public int TimeStamp { get; private set; }
public Dictionary<Expr, TimeInterval> LifenessMap { get; } = new(ReferenceEqualityComparer.Instance);
public Dictionary<Expr, Interval> LifenessMap { get; } = new(ReferenceEqualityComparer.Instance);
public IReadOnlyDictionary<Expr, ScheduleBuffer> Collect(Function entry)
public IReadOnlyDictionary<Expr, ScheduleBuffer> Collect(Expr expr)
{
Visit(entry.Body);
Update(entry.Body); // avoid final call time interval size == 1.
Visit(expr);
Update(expr); // avoid final call time interval size == 1.
Alias();
var d = new Dictionary<Expr, ScheduleBuffer>(ReferenceEqualityComparer.Instance);
@ -32,8 +32,7 @@ internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
Var va => va.Name,
_ => k.GetType().Name,
};
var size = GetSize(k.CheckedType, out var shape, out var stride);
var size = ComputeBufferSize(k.CheckedType, out var shape, out var stride);
d.Add(k, new(name, count++, v, new(0, size), shape, stride, false));
}
@ -62,6 +61,29 @@ internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
return Unit.Default;
}
protected virtual int ComputeBufferSize(IRType type, out int[] shape, out int[] stride)
{
shape = Array.Empty<int>();
stride = Array.Empty<int>();
var size = 0;
if (type is TensorType tensorType)
{
shape = tensorType.Shape.ToValueArray();
stride = TensorUtilities.GetStrides(shape);
size = TensorUtilities.GetSize(shape, stride, tensorType.DType.SizeInBytes);
}
else if (type is TupleType tupleType)
{
size = 0;
foreach (var item in tupleType)
{
size += ComputeBufferSize(item, out _, out _);
}
}
return size;
}
private void Update(Expr expr)
{
if (expr is Const or None)
@ -85,7 +107,7 @@ internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
}
else
{
interval.Death = TimeStamp + 1;
interval.Stop = TimeStamp + 1;
}
LifenessMap[expr] = interval;
@ -123,12 +145,12 @@ internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
} while (changed);
}
private bool AliasTime(Call call, TimeInterval interval)
private bool AliasTime(Call call, Interval interval)
{
var brith = call.GetArguments().Select(arg => LifenessMap[arg].Death).Concat(new[] { interval.Brith }).Max();
var death = call.GetUsers().Select(usr => LifenessMap[usr].Brith).Concat(new[] { interval.Death }).Min();
var brith = call.GetArguments().Select(arg => LifenessMap[arg].Stop).Concat(new[] { interval.Start }).Max();
var death = call.GetUsers().Select(usr => LifenessMap[usr].Start).Concat(new[] { interval.Stop }).Min();
if (brith == interval.Brith && death == interval.Death)
if (brith == interval.Start && death == interval.Stop)
{
return false;
}
@ -138,31 +160,8 @@ internal sealed class LifeTimeCollector : ExprVisitor<Unit, Unit>
throw new InvalidOperationException();
}
interval.Brith = brith;
interval.Death = death;
interval.Start = brith;
interval.Stop = death;
return true;
}
private int GetSize(IRType type, out int[] shape, out int[] stride)
{
shape = Array.Empty<int>();
stride = Array.Empty<int>();
var size = 0;
if (type is TensorType tensorType)
{
shape = tensorType.Shape.ToValueArray();
stride = TensorUtilities.GetStrides(shape);
size = TensorUtilities.GetSize(shape, stride, tensorType.DType.SizeInBytes);
}
else if (type is TupleType tupleType)
{
size = 0;
foreach (var item in tupleType)
{
size += GetSize(item, out _, out _);
}
}
return size;
}
}

View File

@ -46,7 +46,8 @@ public sealed class DDrBufferSchdeulePass : ModulePass
if (module.Entry is Function { ModuleKind: Callable.StackVMModuleKind, Body: Expr body } func && IsFixedType(body.CheckedType))
{
var sch = new BufferSchedule.BufferScheduler();
var buffers = sch.CollectLifeTime(func);
var c = new BufferSchedule.LifeTimeCollector();
var buffers = c.Collect(func.Body);
sch.Schedule(buffers);
using (var fs = Diagnostics.DumpScope.Current.OpenFile("draw_buffers.py"))
{

View File

@ -24,7 +24,7 @@ public sealed class EGraphExtractPass : Pass<IEGraph, BaseFunction>
protected override Task<BaseFunction> RunCoreAsync(IEGraph input, RunPassContext context)
{
var post = (BaseFunction)input.Extract(input.Root!, _costEvaluator, out _);
var post = (BaseFunction)input.Extract(input.Root!, _costEvaluator, Array.Empty<EGraphExtractConstrains>());
IRHelpers.DCE(post);
return Task.FromResult(post);
}

View File

@ -23,13 +23,14 @@ public sealed partial class FoldNopBinary : IRewriteRule
/// <inheritdoc/>
public IPattern Pattern { get; } = IsBinary(
"binary",
"call",
x => x.BinaryOp is BinaryOp.Add or BinaryOp.Sub or BinaryOp.Mul or BinaryOp.Div or BinaryOp.Mod or BinaryOp.Pow,
IsWildcard("lhs"),
IsTensorConst("rhs"));
private Expr? GetReplace(Binary binary, Expr lhs, TensorConst rhs)
private Expr? GetReplace(Binary binary, Call call, Expr lhs, TensorConst rhs)
{
if (lhs.CheckedType is Nncase.IR.AnyType || lhs.CheckedShape == rhs.CheckedShape)
if ((lhs.CheckedType is Nncase.IR.AnyType && rhs.CheckedShape.IsScalar) || (lhs.CheckedShape == call.CheckedShape))
{
return binary.BinaryOp switch
{

View File

@ -32,15 +32,17 @@ public sealed partial class CombineReshapePad : IRewriteRule
{
/// <inheritdoc/>
public IPattern Pattern { get; } =
IsReshape(
MaybeMarker(
IsReshape(
"reshape",
"reshapeCall",
_ => true,
HasMarker(IsPad("pad", "padCall", _ => true, HasMarker(IsWildcard("input"), "marker"), IsTensorConst("pads"), IsTensorConst("value")) with { TypePattern = HasFixedShape() }, "padOutMarker"),
IsWildcard("shape")) with
{ TypePattern = HasFixedShape() };
{ TypePattern = HasFixedShape() },
"outMarker");
private Expr? GetReplace(Reshape reshape, Call reshapeCall, Pad pad, Call padCall, Expr input, Expr shape, int[] pads, Expr value, Marker marker)
private Expr? GetReplace(Reshape reshape, Call reshapeCall, Pad pad, Call padCall, Expr input, Expr shape, int[] pads, Expr value, Marker marker, IMatchResult result)
{
// only support pattern like melgan
var reshapeRank = reshapeCall.CheckedShape.Rank;
@ -48,11 +50,23 @@ public sealed partial class CombineReshapePad : IRewriteRule
if (reshapeRank >= padRank
&& Enumerable.SequenceEqual(reshapeCall.CheckedShape.ToValueArray()[(reshapeRank - padRank)..], padCall.CheckedShape.ToValueArray()))
{
return Pad(
marker.With(target: Reshape(input, Enumerable.Repeat(1, reshapeRank - padRank).Concat(input.CheckedShape.ToValueArray()).ToArray()).InheritMetaData(reshapeCall)),
Tensor.From(Enumerable.Repeat(0, (reshapeRank - padRank) * 2).Concat(pads).ToArray(), new[] { reshapeRank, 2 }),
var newPad = Pad(
marker.With(target: Reshape(
marker.With(target: input),
Enumerable.Repeat(1, reshapeRank - padRank).Concat(input.CheckedShape.ToValueArray()).ToArray())
.InheritMetaData(reshapeCall)),
Tensor.From(
Enumerable.Repeat(0, (reshapeRank - padRank) * 2).Concat(pads).ToArray(),
new[] { reshapeRank, 2 }),
pad.PadMode,
value).InheritMetaData(padCall);
var outMarker = result.GetValueOrDefault("outMarker");
if (outMarker != null)
{
return ((Marker)outMarker).With(target: newPad);
}
return newPad;
}
return null;
@ -67,15 +81,17 @@ public sealed partial class CombineReshapePad : IRewriteRule
public sealed partial class CombineTransposePad : IRewriteRule
{
/// <inheritdoc/>
public IPattern Pattern { get; } = IsPad(
public IPattern Pattern { get; } = MaybeMarker(
IsPad(
"pad",
"padCall",
x => true,
HasMarker(IsTranspose(IsWildcard("input"), IsTensorConst("perm")), "marker"),
IsTensorConst("pads"),
IsWildcard("padValue"));
IsWildcard("padValue")),
"outMarker");
private Expr GetReplace(Pad pad, Call padCall, Expr input, int[] perm, Expr pads, Expr padValue, Marker marker)
private Expr GetReplace(Pad pad, Call padCall, Expr input, int[] perm, Expr pads, Expr padValue, Marker marker, IMatchResult result)
{
var inv_perm = perm.Select((p, i) => (p, i)).OrderBy(tp => tp.p).ToArray();
var newPads = new List<Expr>();
@ -87,7 +103,14 @@ public sealed partial class CombineTransposePad : IRewriteRule
}
var p = Pad(input, Stack(new IR.Tuple(newPads.ToArray()), 0).Evaluate().AsTensor(), pad.PadMode, padValue).InheritMetaData(padCall);
return Transpose(marker.With(target: p), perm);
var newTranspose = Transpose(marker.With(target: p), perm);
var outMarker = result.GetValueOrDefault("outMarker");
if (outMarker != null)
{
return ((Marker)outMarker).With(target: newTranspose);
}
return newTranspose;
}
}
@ -99,7 +122,8 @@ public sealed partial class CombineTransposePad : IRewriteRule
public sealed partial class CombinePadTranspose : IRewriteRule
{
/// <inheritdoc/>
public IPattern Pattern { get; } = IsTranspose(
public IPattern Pattern { get; } = MaybeMarker(
IsTranspose(
"transpose",
x => true,
HasMarker(
@ -111,9 +135,10 @@ public sealed partial class CombinePadTranspose : IRewriteRule
IsTensorConst("pads"),
IsTensorConst("padValue")),
"marker"),
IsTensorConst("perm"));
IsTensorConst("perm")),
"outMarker");
private Expr GetReplace(Pad pad, Call padCall, Expr input, int[] perm, Expr pads, Expr padValue, Marker marker)
private Expr GetReplace(Pad pad, Call padCall, Expr input, int[] perm, Expr pads, Expr padValue, Marker marker, IMatchResult result)
{
var newPads = new List<int>();
for (int i = 0; i < perm.Length; i++)
@ -122,6 +147,17 @@ public sealed partial class CombinePadTranspose : IRewriteRule
newPads.Add(((TensorConst)pads).Value.ToArray<int>()[(perm[i] * 2) + 1]);
}
return Pad(marker.With(target: Transpose(input, perm)), Tensor.From<int>(newPads.ToArray(), pads.CheckedShape), pad.PadMode, padValue).InheritMetaData(padCall);
var newPad = Pad(
marker.With(target: Transpose(input, perm)),
Tensor.From<int>(newPads.ToArray(), pads.CheckedShape),
pad.PadMode,
padValue).InheritMetaData(padCall);
var outMarker = result.GetValueOrDefault("outMarker");
if (outMarker != null)
{
return ((Marker)outMarker).With(target: newPad);
}
return newPad;
}
}

View File

@ -47,10 +47,10 @@ public sealed class UnitTestEGraphCostModel
},
};
Assert.IsType<TensorConst>(list.OrderBy(e => e, EGraphExtractExtensions.ENodeTypeComparer.Instance).First());
Assert.IsType<TensorConst>(list.OrderBy(e => e, ENodeTypeComparer.Instance).First());
Assert.True(cost[b] < cost[c]);
Assert.IsType<TensorConst>(list.OrderBy(e => e, EGraphExtractExtensions.ENodeTypeComparer.Instance).MinBy(e => cost[e]));
Assert.IsType<TensorConst>(list.OrderBy(e => e, ENodeTypeComparer.Instance).MinBy(e => cost[e]));
}
}

View File

@ -115,7 +115,7 @@ class Inference:
def dump_kmodel_desc(self, file):
input_shapes = data_shape_list_string(self.inputs)
output_shapes = data_shape_list_string(self.outputs)
s = f"{len(self.inputs)} {len(self.outputs)}\n{input_shapes}\n{output_shapes}"
s = f"{len(self.inputs)} {len(self.outputs)}\n{input_shapes}\n{output_shapes}\n"
with open(file, "w+") as f:
f.write(s)

View File

@ -66,7 +66,7 @@ def dump_dict_to_json(dict, json_file):
def in_ci():
return os.getenv('CI', False)
return os.getenv('CI', 'False').lower() == 'true'
def kpu_targets():

View File

@ -99,7 +99,7 @@ class Inference:
def dump_kmodel_desc(self, file):
input_shapes = data_shape_list_string(self.inputs)
output_shapes = data_shape_list_string(self.outputs)
s = f"{len(self.inputs)} {len(self.outputs)}\n{input_shapes}\n{output_shapes}"
s = f"{len(self.inputs)} {len(self.outputs)}\n{input_shapes}\n{output_shapes}\n"
with open(file, "w+") as f:
f.write(s)

View File

@ -62,7 +62,7 @@ def dump_dict_to_json(dict, json_file):
def in_ci():
return os.getenv('CI', False)
return os.getenv('CI', 'False').lower() == 'true'
def kpu_targets():