1.导出onnx
环境pycharm+onnx1.7.0+torch1.7.1+cuda11.0
使用:python3 tools/export_onnx.py --output-name yolox_s.onnx -n yolox-s -c yolox_s.pth
导出参考官网:https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime
注意软件版本:
TensorRT、cuda、cudnn各版本的匹配要求
,onnx版本和tensorr、cuda版本要适配不然很容易失败
2.onnx转trt
环境:VS2019+trt7.1.3.4+cuda11.0
参考源码:
https://github.com/shaoeric/TensorRT_ONNX_resnet18
将
CIFARONNX.cpp
修改如下:
#include <iostream>
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "NvOnnxParser.h"
//#include "NvOnnxParserRuntime.h"
#include "argsParser.h"
#include "logger.h"
#include "common.h"
//#define HEIGHT 640
//#define WIDTH 640
//#define CHANNEL 3
#define BATCH_SIZE 1
//#define NUM_CLASSES 20
#define RUN_FP16 false
#define RUN_INT8 false
using namespace std;
//地址:https://github.com/shaoeric/TensorRT_ONNX_resnet18
class TRTONNX
public:
TRTONNX(const string& onnx_file, const string& engine_file) : m_onnx_file(onnx_file), m_engine_file(engine_file) {};
vector<float> prepareImage(const cv::Mat& img);
bool onnxToTRTModel(nvinfer1::IHostMemory* trt_model_stream);
bool loadEngineFromFile();
void doInference(const cv::Mat& img);
private:
const string m_onnx_file;
const string m_engine_file;
samplesCommon::Args gArgs;
nvinfer1::ICudaEngine* m_engine;
bool constructNetwork(nvinfer1::IBuilder* builder, nvinfer1::INetworkDefinition* network, nvinfer1::IBuilderConfig* config, nvonnxparser::IParser* parser);
bool saveEngineFile(nvinfer1::IHostMemory* data);
std::unique_ptr<char[]> readEngineFile(int& length);
int64_t volume(const nvinfer1::Dims& d);
unsigned int getElementSize(nvinfer1::DataType t);
bool TRTONNX::saveEngineFile(nvinfer1::IHostMemory* data)
std::ofstream file;
file.open(m_engine_file, std::ios::binary | std::ios::out);
cout << "writing engine file..." << endl;
file.write((const char*)data->data(), data->size());
cout << "save engine file done" << endl;
file.close();
return true;
bool TRTONNX::constructNetwork(nvinfer1::IBuilder* builder, nvinfer1::INetworkDefinition* network, nvinfer1::IBuilderConfig* config, nvonnxparser::IParser* parser)
// 解析onnx文件
if (!parser->parseFromFile(this->m_onnx_file.c_str(), static_cast<int>(gLogger.getReportableSeverity())))
gLogError << "Fail to parse ONNX file" << std::endl;
return false;
// build the Engine
builder->setMaxBatchSize(BATCH_SIZE);
config->setMaxWorkspaceSize(1 << 30);
if (RUN_FP16)
config->setFlag(nvinfer1::BuilderFlag::kFP16);
if (RUN_INT8)
config->setFlag(nvinfer1::BuilderFlag::kINT8);
samplesCommon::setAllTensorScales(network, 127.0f, 127.0f);
samplesCommon::enableDLA(builder, config, gArgs.useDLACore);
return true;
* 在没有trt engine plan文件的情况下,从onnx文件构建engine,然后序列化成engine plan文件
bool TRTONNX::onnxToTRTModel(nvinfer1::IHostMemory* trt_model_stream)
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger.getTRTLogger());
assert(builder != nullptr);
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());
// 构建网络
if (!constructNetwork(builder, network, config, parser))
return false;
m_engine = builder->buildEngineWithConfig(*network, *config);
assert(m_engine != nullptr);
// 验证网络构建正确
//assert(network->getNbInputs() == 1);
//assert(network->getInput(0)->getDimensions().nbDims == 4);
//assert(network->getNbOutputs() == 1);
//assert(network->getOutput(0)->getDimensions().nbDims == 1);
// 序列化
trt_model_stream = m_engine->serialize();
nvinfer1::IHostMemory* data = m_engine->serialize();
saveEngineFile(data);
parser->destroy();
network->destroy();
builder->destroy();
//m_engine->destroy();
int main()
string onnx_file = "D:/vs_projects/onnx2trt/yolox_s.onnx";
string engine_file = "D:/vs_projects/onnx2trt/yolox_s.trt";
IHostMemory* trt_model_stream{ nullptr };
TRTONNX trt_onnx(onnx_file, engine_file);
// 打开文件,是否存在engine plan
trt_onnx.onnxToTRTModel(trt_model_stream);
View Code
由于只进行转换不推理,所以去掉了推理部分的代码,
同时将其中的一行代码
nvinfer1::INetworkDefinition* network = builder->createNetwork();
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
否则会报错:
这里参考了tensorrt7.1.3.4里的sampleOnnxMNIST
3.yolox tensorrt推理
这里参考:YOLOX Window10 TensorRT 全面部署教程中的windows部署部分,根据自己实际情况配置cuda和tensorrt
自己直接运行报错:
参考:解锁新姿势-使用TensorRT部署pytorch模型
在yolox里执行
import onnxruntime as ort
ort_session = ort.InferenceSession('yolox_s.onnx')
input = ort_session.get_inputs()[0].name
ouput = ort_session.get_outputs()[0].name
print(input)
print(ouput)
images
output
const char* INPUT_BLOB_NAME = "input_0";
const char* OUTPUT_BLOB_NAME = "output_0";
const char* INPUT_BLOB_NAME = "images";
const char* OUTPUT_BLOB_NAME = "output";
问题解决。。。