WIN10+WSL+UBUNTU2004+mmdeploy+3090
本文记述在win10系统下,使用wsl2虚拟系统ubuntu2004,安装mmdeploy,训练mmseg,转换pth模型,使用python和c++分别在onnx和tensorrt下部署的经历。
环境
系统:win10
显卡:3090
ubuntu:20.04
软件
cuda:11.6 update2
nvidia-driver:511.65
cudnn: 8.6.0
tensorRT: TensorRT-8.5.3.1
onnx:1.8.1
重点注意
- win系统中的cuda最好和ubuntu系统的cuda保持一致,不一致可以部署成功,但是转换tensorrt会出现结果和onnx不一致的情况
- 30系列显卡,nvidia-driver最好在470以上,wsl才能直接使用驱动
win10端操作
升级wsl2
下载最新包:
适用于 x64 计算机的 WSL2 Linux 内核更新包
运行上一步中下载的更新包。
将 WSL 2 设置为默认版本
打开 PowerShell,然后在安装新的 Linux 发行版时运行以下命令,将 WSL 2 设置为默认版本:
wsl --set-default-version 2
安装所选的 Linux 分发
打开 Microsoft Store,并选择你偏好的 Linux 分发版。安装好后最好进入系统, 查看一下nvidia-smi,可以直接执行,查看PATH目录,并记录你的cuda系统变量
转移存储目录
默认的虚拟存储在c盘,非常占内存,一定要转移
wsl -l -v # 显示已经安装的虚拟机
wsl --shutdown # 停止所有虚拟机
wsl -l -v # 停止所有虚拟机
wsl --export Ubuntu-20.04 E:\u2004.rar # 将名字为Ubuntu-20.04的虚拟机系统导出,导出文件名为u2004.rar
wsl --unregister Ubuntu-20.04 # 注销名字为Ubuntu-20.04的虚拟机
wsl -l -v # 显示已经安装的虚拟机
wsl --import Ubuntu-20.04 E:\Ubuntu2004 E:\u2004.rar # 重新导入名字为Ubuntu-20.04的虚拟机,并存储在E盘
wsl -l -v # 显示已经安装的虚拟机
UBUNTU端
设置环境变量
进入系统后先设置环境变量
export PATH='/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/usr/lib/wsl/lib:/mnt/c/Program Files/WindowsApps/CanonicalGroupLimited.Ubuntu20.04LTS_2004.6.16.0_x64__79rhkp1fndgsc':$PATH
export PATH='/mnt/c/Program Files (x86)/Common Files/Intel/Shared Libraries/redist/intel64/compiler:/mnt/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.6/bin:/mnt/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.6/libnvvp:/mnt/c/Windows/system32:/mnt/c/Windows:/mnt/c/Windows/System32/Wbem:/mnt/c/Windows/System32/WindowsPowerShell/v1.0/:/mnt/c/Windows/System32/OpenSSH/':$PATH
export PATH='/mnt/c/Program Files/NVIDIA Corporation/NVIDIA NvDLISR:/mnt/c/Program Files (x86)/NVIDIA Corporation/PhysX/Common:/mnt/c/Program Files/Git/cmd:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS:/mnt/c/WINDOWS/System32/Wbem:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/:/mnt/c/WINDOWS/System32/OpenSSH/':$PATH
export PATH='/mnt/c/Program Files/NVIDIA Corporation/Nsight Compute 2022.1.1//mnt/c/Users/H/AppData/Local/Microsoft/WindowsApps:/mnt/c/Users/H/AppData/Local/Programs/Microsoft VS Code/bin:/snap/bin':$PATH
export PATH='/mnt/c/Program Files/NVIDIA Corporation/NVSMI':$PATH
export PATH=~/anaconda3/envs/mmdep/bin:~/anaconda3/bin:~/anaconda3/condabin:$PATH
export PATH=~/Code/cmake-3.20.0-linux-x86_64/bin/:$PATH
然后执行nvidia-smi
nvcc 在/usr/loca/cuda/bin下,可以直接使用
查看是否有显卡信息
软件
- Anaconda3-2023.03-0-Linux-x86_64 直接下载安装
- cudnn 8.6
- cuda_11.6.2_510.47.03_linux
- tensorrt 8.5.3.1
conda环境
创建环境
conda create -n mmdeploy python=3.7 -y
conda activate mmdeploy
pip install numpy==1.21.6 -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116 -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install onnxruntime==1.8.1 -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install onnxruntime-gpu==1.8.1 -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install -U openmim -i https://pypi.tuna.tsinghua.edu.cn/simple/
mim install mmengine -i https://pypi.tuna.tsinghua.edu.cn/simple/
mim install "mmcv>=2.0.1" -i https://pypi.tuna.tsinghua.edu.cn/simple/
sudo apt-get install libspdlog-dev
sudo apt-get install libopencv-dev
ppl.cv
git clone https://github.com/openppl-public/ppl.cv.git
cd ppl.cv
export PPLCV_DIR=$(pwd)
git checkout tags/v0.7.1 -b v0.7.1
./build.sh cuda
tensorrt
tar -zxvf TensorRT-8.5.3.1.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz
pip install TensorRT-8.5.3.1/python/tensorrt-8.5.3.1-cp37-none-linux_x86_64.whl
cd TensorRT-8.5.3.1/
export TENSORRT_DIR=$(pwd)
export LD_LIBRARY_PTH=$TENSORRT_DIR/lib:$LD_LIBRARY_PATH
cd uff/
pip install uff-0.6.9-py2.py3-none-any.whl
cd ..
cd graphsurgeon/
pip install graphsurgeon-0.4.6-py2.py3-none-any.whl
onnxruntime
wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz
tar -zxvf onnxruntime-linux-x64-1.8.1.tgz
cd onnxruntime-linux-x64-1.8.1
export ONNXRUNTIME_DIR=$(pwd)
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
cudnn
tar -xcJf cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz
export CUDNN_DIR=$(pwd)/cudnn-linux-x86_64-8.6.0.163_cuda11-archive
export LD_LIBRARY_PATH=$CUDNN_DIR/lib:$LD_LIBRARY_PATH
mmsegmentation
git clone https://github.com/open-mmlab/mmsegmentation.git
cd mmsegmentation
pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
mmdeploy
这里编译onnx和tensorrt的支持
git clone --recursive https://github.com/open-mmlab/mmdeploy.git
cd mmdeploy
mkdir build
cd build
cmake .. \
-DCMAKE_CXX_COMPILER=g++-9 \
-DMMDEPLOY_BUILD_SDK=ON \
-DMMDEPLOY_BUILD_EXAMPLES=ON \
-DMMDEPLOY_BUILD_SDK_PYTHON_API=ON \
-DMMDEPLOY_TARGET_DEVICES="cuda;cpu" \
-DMMDEPLOY_TARGET_BACKENDS="trt;ort" \
-Dpplcv_DIR=${PPLCV_DIR}/cuda-build/install/lib/cmake/ppl \
-DTENSORRT_DIR=${TENSORRT_DIR} \
-DCUDNN_DIR=${CUDNN_DIR} \
-DONNXRUNTIME_DIR=${ONNXRUNTIME_DIR}
make -j$(nproc) && make install
cd ..
# pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
mim install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
如果找不到cuda.h,导入头文件路径
export CPLUS_INCLUDE_PATH=/usr/local/cuda-11.6/targets/x86_64-linux/include/
如果有路径找不到, 设置一下环境变量
export CUDA_DIR=/usr/local/cuda
export TENSORRT_DIR=~/Code/TensorRT-8.5.3.1
export ONNXRUNTIME_DIR=~/Code/onnxruntime-linux-x64-1.8.1
export CUDNN_DIR=~/Code/cudnn-linux-x86_64-8.6.0.163_cuda11-archive
export MMDeploy_DIR=~/Code/mmdeploy
export PPLCV_DIR=~/Code/ppl.cv
export LD_LIBRARY_PATH=$PPLCV_DIR/cuda-build/install/lib:$MMDeploy_DIR/build/lib:$CUDA_DIR/lib64:/$CUDNN_DIR/lib:$TENSORRT_DIR/lib:$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
python api和sdk测试
先转换模型, 这个是转换为onnx的脚本
export PYTHONPATH=~/Code/mmdeploy/build/lib
python mmdeploy/tools/deploy.py ~/Code/mmdeploy/configs/mmseg/segmentation_onnxruntime_dynamic.py ~/Code/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py ~/Code/mmsegmentation/pretrained/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth ~/Code/mmsegmentation/demo/demo.png --work-dir ~/Code/mmsegmentation/onnx/ --device cuda --dump-info
这里注意, 一定要导入路径PYTHONPATH=~/Code/mmdeploy/build/lib,不然python会报找不到mmdeploy的包
python 文件,里面包含了onnx和tensorrt的转换和测试
import os
home = '/home/jason'
image = '%s/Code/mmsegmentation/demo/demo.png'%(home)
device = 'cuda'
cmd = 'cd ~/Code && python mmdeploy/tools/deploy.py \
%s/Code/mmdeploy/configs/mmseg/segmentation_onnxruntime_dynamic.py \
%s/Code/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py \
%s/Code/mmsegmentation/pretrained/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \
%s/Code/mmsegmentation/demo/demo.png \
--work-dir %s/Code/mmsegmentation/onnx/ --device cpu --dump-info'%(home,home,home,home,home)
pipe = os.popen(cmd)
print(pipe.read())
from mmdeploy.apis.utils import build_task_processor
from mmdeploy.utils import get_input_shape, load_config
import torch
deploy_cfg = '%s/Code/mmdeploy/configs/mmseg/segmentation_onnxruntime_dynamic.py'%(home)
model_cfg = '%s/Code/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py'%(home)
backend_model = ['%s/Code/mmsegmentation/trt/end2end.onnx'%(home)]
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
task_processor = build_task_processor(model_cfg, deploy_cfg, 'cpu')
model = task_processor.build_backend_model(backend_model)
input_shape = get_input_shape(deploy_cfg)
model_inputs, _ = task_processor.create_input(image, input_shape)
with torch.no_grad():
result = model.test_step(model_inputs)
print(result)
task_processor.visualize( image=image, model=model, result=result[0], window_name='visualize', output_file='./output_segmentation_onnx.png')
cmd = 'cd ~/Code && python mmdeploy/tools/deploy.py \
%s/Code/mmdeploy/configs/mmseg/segmentation_tensorrt_dynamic-512x1024-2048x2048.py \
%s/Code/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py \
%s/Code/mmsegmentation/pretrained/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \
%s/Code/mmsegmentation/demo/demo.png \
--work-dir %s/Code/mmsegmentation/trt/ --device cuda --dump-info --log-level INFO '%(home,home,home,home,home)
print(cmd)
pipe = os.popen(cmd)
print(pipe.read())
print('-------------------- start inference --------------------------')
from mmdeploy_runtime import Segmentor
import cv2
import numpy as np
img = cv2.imread(image)
# create a classifier
segmentor = Segmentor(model_path='%s/Code/mmsegmentation/trt/'%(home), device_name='cuda', device_id=0)
# perform inference
seg = segmentor(img)
# visualize inference result
## random a palette with size 256x3
palette = np.random.randint(0, 256, size=(256, 3))
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
for label, color in enumerate(palette):
color_seg[seg == label, :] = color
# convert to BGR
color_seg = color_seg[..., ::-1]
img = img * 0.5 + color_seg * 0.5
img = img.astype(np.uint8)
cv2.imwrite('output_segmentation_trt.png', img)
TensorRT c++测试
cpp 文件
#include <fstream>
#include <numeric>
#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <random>
#include <string>
#include <vector>
#include "mmdeploy/segmentor.h"
using namespace std;
vector<cv::Vec3b> gen_palette(int num_classes) {
std::mt19937 gen;
std::uniform_int_distribution<ushort> uniform_dist(0, 255);
vector<cv::Vec3b> palette;
palette.reserve(num_classes);
for (auto i = 0; i < num_classes; ++i) {
palette.emplace_back(uniform_dist(gen), uniform_dist(gen), uniform_dist(gen));
return palette;
int main(int argc, char* argv[]) {
if (argc != 4) {
fprintf(stderr, "usage:\n image_segmentation device_name model_path image_path\n");
return 1;
auto device_name = argv[1];
auto model_path = argv[2];
auto image_path = argv[3];
cv::Mat img = cv::imread(image_path);
if (!img.data) {
fprintf(stderr, "failed to load image: %s\n", image_path);
return 1;
mmdeploy_segmentor_t segmentor{};
int status{};
status = mmdeploy_segmentor_create_by_path(model_path, device_name, 0, &segmentor);
if (status != MMDEPLOY_SUCCESS) {
fprintf(stderr, "failed to create segmentor, code: %d\n", (int)status);
return 1;
mmdeploy_mat_t mat{
img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8};
mmdeploy_segmentation_t* result{};
status = mmdeploy_segmentor_apply(segmentor, &mat, 1, &result);
if (status != MMDEPLOY_SUCCESS) {
fprintf(stderr, "failed to apply segmentor, code: %d\n", (int)status);
return 1;
auto palette = gen_palette(result->classes + 1);
cv::Mat color_mask = cv::Mat::zeros(result->height, result->width, CV_8UC3);
int pos = 0;
int total = color_mask.rows * color_mask.cols;
std::vector<int> idxs(result->classes);
for (auto iter = color_mask.begin<cv::Vec3b>(); iter != color_mask.end<cv::Vec3b>(); ++iter) {
// output mask
if (result->mask) {
*iter = palette[result->mask[pos++]];
// output score
if (result->score) {
std::iota(idxs.begin(), idxs.end(), 0);
auto k =
std::max_element(idxs.begin(), idxs.end(),
[&](int i, int j) {
return result->score[i * total + pos] < result->score[j * total + pos];
idxs.begin();
*iter = palette[k];
pos += 1;
img = img * 0.5 + color_mask * 0.5;
cv::imwrite("output_segmentation.png", img);
mmdeploy_segmentor_release_result(result, 1);
mmdeploy_segmentor_destroy(segmentor);
return 0;
cmake_minimum_required(VERSION 3.14)
project(mmdeploy-example)
if (NOT (${CMAKE_PROJECT_NAME} STREQUAL "MMDeploy"))
find_package(MMDeploy REQUIRED)
endif ()
set(name image_segmentation)
# Search for c/cpp sources
file(GLOB _SRCS ${name}.c*)
add_executable(${name} ${_SRCS})
if (NOT (MSVC OR APPLE))
# Disable new dtags so that executables can run even without LD_LIBRARY_PATH set
target_link_libraries(${name} PRIVATE -Wl,--disable-new-dtags)
endif ()
if (MMDEPLOY_BUILD_SDK_MONOLITHIC)
target_link_libraries(${name} PRIVATE mmdeploy ${OpenCV_LIBS})
else ()
# Load MMDeploy modules
mmdeploy_load_static(${name} MMDeployStaticModules)
mmdeploy_load_dynamic(${name} MMDeployDynamicModules)
# Link to MMDeploy libraries
target_link_libraries(${name} PRIVATE MMDeployLibs ${OpenCV_LIBS})
endif ()
install(TARGETS ${name} RUNTIME DESTINATION bin)
执行脚本
cd build &&
export CUDA_DIR=/usr/local/cuda
export TENSORRT_DIR=~/Code/TensorRT-8.2.5.1
export ONNXRUNTIME_DIR=~/Code/onnxruntime-linux-x64-1.8.1
export CUDNN_DIR=~/Code/cudnn/cuda
export MMDeploy_DIR=~/Code/mmdeploy
export LD_LIBRARY_PATH=$MMDeploy_DIR/build/lib:$CUDA_DIR/lib64:/$CUDNN_DIR/lib64:$TENSORRT_DIR/lib:$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH