TensorRT Samples: GoogleNet

来源:互联网 发布:土建造价软件视频教程 编辑:程序博客网 时间:2024/06/07 03:08
关于TensorRT的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/78469551  

以下是参考TensorRT 2.1.2中的sampleGoogleNet.cpp文件改写的测试代码,文件(googlenet.cpp)内容如下:

#include <iostream>#include <tuple>#include <string>#include <vector>#include <algorithm>#include <cuda_runtime_api.h>#include <NvInfer.h>#include <NvCaffeParser.h>#include "common.hpp"// reference: TensorRT-2.1.2/samples/sampleMNIST/sampleGoogleNet.cppnamespace {// batch size, timing iterations, input blob name, output blob name, deploy file, model filetypedef std::tuple<int, int, std::string, std::string, std::string , std::string> DATA_INFO;  struct Profiler : public nvinfer1::IProfiler {typedef std::pair<std::string, float> Record;std::vector<Record> mProfile;int timing_iterations {1};void setTimeIterations(int iteration){timing_iterations = iteration;}virtual void reportLayerTime(const char* layerName, float ms){auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });if (record == mProfile.end())mProfile.push_back(std::make_pair(layerName, ms));elserecord->second += ms;}void printLayerTimes(){float totalTime = 0;for (size_t i = 0; i < mProfile.size(); ++i) {fprintf(stdout, "%s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / timing_iterations);totalTime += mProfile[i].second;}fprintf(stdout, "Time over all layers: %4.3f\n", totalTime / timing_iterations);}};int caffeToGIEModel(const std::string& deployFile,// name for caffe prototxt const std::string& modelFile,// name for model  const std::vector<std::string>& outputs,   // network outputs unsigned int maxBatchSize,// batch size - NB must be at least as large as the batch we want to run with) nvinfer1::IHostMemory *&gieModelStream, Logger logger){// create API root class - must span the lifetime of the engine usagenvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);nvinfer1::INetworkDefinition* network = builder->createNetwork();// parse the caffe model to populate the network, then set the outputsnvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();bool useFp16 = builder->platformHasFastFp16();nvinfer1::DataType modelDataType = useFp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // create a 16-bit model if it's natively supportedconst nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(deployFile.c_str(), modelFile.c_str(), *network, modelDataType);CHECK(blobNameToTensor != nullptr);// the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generatefor (auto& s : outputs)network->markOutput(*blobNameToTensor->find(s.c_str()));// Build the enginebuilder->setMaxBatchSize(maxBatchSize);builder->setMaxWorkspaceSize(16 << 20);// set up the network for paired-fp16 format if availableif(useFp16)builder->setHalf2Mode(true);nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);CHECK(engine != nullptr);// we don't need the network any more, and we can destroy the parsernetwork->destroy();parser->destroy();// serialize the engine, then close everything downgieModelStream = engine->serialize();engine->destroy();builder->destroy();nvcaffeparser1::shutdownProtobufLibrary();return 0;}int timeInference(nvinfer1::ICudaEngine* engine, const DATA_INFO& info, Profiler* profiler){// input and output buffer pointers that we pass to the engine - the engine requires exactly ICudaEngine::getNbBindings(),// of these, but in this case we know that there is exactly one input and one output.CHECK(engine->getNbBindings() == 2);void* buffers[2];// In order to bind the buffers, we need to know the names of the input and output tensors.// note that indices are guaranteed to be less than ICudaEngine::getNbBindings()int inputIndex = engine->getBindingIndex(std::get<2>(info).c_str()), outputIndex = engine->getBindingIndex(std::get<3>(info).c_str());// allocate GPU buffersnvinfer1::DimsCHW inputDims = static_cast<nvinfer1::DimsCHW&&>(engine->getBindingDimensions(inputIndex)), outputDims = static_cast<nvinfer1::DimsCHW&&>(engine->getBindingDimensions(outputIndex));size_t inputSize = std::get<0>(info) * inputDims.c() * inputDims.h() * inputDims.w() * sizeof(float);size_t outputSize = std::get<0>(info) * outputDims.c() * outputDims.h() * outputDims.w() * sizeof(float);cudaMalloc(&buffers[inputIndex], inputSize);cudaMalloc(&buffers[outputIndex], outputSize);nvinfer1::IExecutionContext* context = engine->createExecutionContext();context->setProfiler(profiler);// zero the input buffercudaMemset(buffers[inputIndex], 0, inputSize);for (int i = 0; i < std::get<1>(info); ++i)context->execute(std::get<0>(info), buffers);// release the context and bufferscontext->destroy();cudaFree(buffers[inputIndex]);cudaFree(buffers[outputIndex]);return 0;}} // namespaceint test_googlenet(){fprintf(stdout, "Building and running a GPU inference engine for GoogleNet, N=4...\n");// stuff we know about the network and the caffe input/output blobsDATA_INFO info(4, 1000, "data", "prob", "models/googlenet.prototxt", "models/googlenet.caffemodel");Logger logger;// parse the caffe model and the mean file   nvinfer1::IHostMemory* gieModelStream{ nullptr };caffeToGIEModel(std::get<4>(info), std::get<5>(info), std::vector<std::string>{std::get<3>(info)}, std::get<0>(info), gieModelStream, logger);// create an enginenvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger);nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), nullptr);fprintf(stdout, "Bindings after deserializing:\n"); for (int bi = 0; bi < engine->getNbBindings(); bi++) { if (engine->bindingIsInput(bi) == true) { fprintf(stdout, "Binding %d (%s): Input.\n",  bi, engine->getBindingName(bi)); } else { fprintf(stdout, "Binding %d (%s): Output.\n", bi, engine->getBindingName(bi)); } } Profiler profiler;profiler.setTimeIterations(std::get<1>(info));// run inference with null data to time network performancetimeInference(engine,  info, &profiler);engine->destroy();infer->destroy();profiler.printLayerTimes();fprintf(stdout, "Done.\n");return 0;}
执行结果如下:

测试代码编译步骤如下(ReadMe.txt):
在Linux下通过CMake编译TensorRT_Test中的测试代码步骤:1. 将终端定位到CUDA_Test/prj/linux_tensorrt_cmake,依次执行如下命令:$ mkdir build$ cd build$ cmake ..$ make (生成TensorRT_Test执行文件)$ ln -s ../../../test_data/models  ./ (将models目录软链接到build目录下)$ ln -s ../../../test_data/images  ./ (将images目录软链接到build目录下)$ ./TensorRT_Test2. 对于有需要用OpenCV参与的读取图像的操作,需要先将对应文件中的图像路径修改为Linux支持的路径格式

GitHub: https://github.com/fengbingchun/CUDA_Test


原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 一岁半宝宝咳嗽半个月了怎么办 6岁儿童上课调皮怎么办 小孩挨揍后精神失控怎么办 孩子不听话把我胃气疼了怎么办 因为孩子不听话夫妻经常吵架怎么办 11岁儿子不听话了怎么办 二十岁的儿子还不听话怎么办 幼儿园小班幼儿不听老师的话怎么办 幼师对待不听话的孩子该怎么办 2岁宝宝不吃饭只喝奶怎么办 孩子哭着喊妈妈不睡觉怎么办 孩子晚上不睡觉一直哭怎么办 孩子不睡觉还哭怎么办 孩子晚上不睡觉老哭怎么办? 4岁宝宝叛逆期怎么办 驾考紧张脚抖怎么办 驾考科目三紧张怎么办 孩子不自信容易紧张怎么办 在人多时候紧张怎么办 考科目二很紧张怎么办 明天出成绩很紧张怎么办 一紧张就射精了怎么办 孩子在幼儿园表现不好怎么办 小孩子在幼儿园表现的不好怎么办 学籍档案写错了怎么办 发展报告填错了怎么办 学生发展报告丢了怎么办 大一新生入学团员档案袋丢失怎么办 三年级孩子语文成绩差怎么办 运气不好时 我们该怎么办 猫走了财运变差怎么办 特别在意别人的眼光怎么办 入职两个月没有业绩怎么办 在学舞蹈中孩子怕疼怎么办 我生二胎怕疼怎么办 孩子的执行力差怎么办 怀孕三个月不想要孩子怎么办 生完孩子老是掉头发怎么办 5岁半宝宝有狐臭怎么办? 4岁宝宝脾气倔怎么办 小孩子摔到后脑勺吐了怎么办