GOTURN(Generic Object Tracking Using Regression Networks ECCV2016)
来源:互联网 发布:太上老君与如来佛 知乎 编辑:程序博客网 时间:2024/05/22 07:05
This tracker is able to track objects at 100 fps. This real-time speed is due to two factors.
First, most previous neural network trackers are trained online. However, training neural networks is a slow process, leading to slow tracking. In contrast, our tracker is trained offline to learn a generic relationship between appearance and motion, so no online training is required.
Second, most trackers take a classification-based approah, classifying many image patches to find the target object. In contrast, this tracker used a regression-based approach, requiring just a single feed-forward pass through the network to regresses directly to the location of the target object.
The combination of offline training and one-pass regression leads to a significant speed-up compared to previous approaches and allows it to track objects at real-time speeds.
下面这个是train使用的网络,跟deploy有点不一样哦。
自己根据这个模型和网络实现的test过程,使用的是vot2014的数据库进行的测试,在测试过程中,在序列检测的过程中使用除第一帧(使用gounding truth)外,其他都是使用前一帧的结果进行target的获取,这使用过程中,发现,GOTURN回出现当相似的物体相遇之后,跟踪有时候回出现跳转到另外一个物体(分析:可以是在序列过程中误差的叠加造成forward时候的target一些偏差导致跟踪错误),还有,对于可形变物体跟踪效果也不好。(使用的时候要注意,输出的结果要除以10,才是得到的是左上角,和右下角相对于patch的位置)。
#include <string>#include <vector>#include "boost/algorithm/string.hpp"#include "google/protobuf/text_format.h"#include <opencv2/opencv.hpp>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/common.hpp"#include "caffe/net.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/db.hpp"#include "caffe/util/format.hpp"#include "caffe/util/io.hpp"#include <stdio.h>#include <malloc.h>#include <fstream>#include <boost/progress.hpp>#include <fstream>#include <stdio.h>#include <boost/math/special_functions/next.hpp>#include <boost/random.hpp>#include <limits>#include "caffe/common.hpp"#include "caffe/util/math_functions.hpp"#include "caffe/util/rng.hpp"//#include "caffe/util/math_functions.hpp"using caffe::Blob;using caffe::Caffe;using caffe::Datum;using caffe::Net;using caffe::Layer;using std::string;namespace db = caffe::db;void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path);void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path);void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image);void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image);//int lap(int x1_min,int x1_max,int x2_min,int x2_max);int main(int argc, char** argv){ Caffe::set_mode(Caffe::GPU); boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST)); net->CopyTrainedLayersFromBinaryProto(argv[2]); //argv[3]表示的是包含pictures的文件夹 cv::Mat target, image; cv::Mat pre_image, cur_image; cv::Point point1, point2; float coord; char* filepath = new char[100]; std::ifstream in;//;(string(string(argv[3]))); sprintf(filepath, "%s/groundtruth.txt",argv[3]); in.open(filepath); delete [] filepath; in >> coord; in >> coord; in >> coord; point1.x = int(coord); in >> coord; point1.y = int(coord); in >> coord; in >> coord; in >> coord; point2.x = int(coord); in >> coord; point2.y = int(coord); in.close(); char* image_path = new char[100]; int count = 1; int cols,rows; int w,h; while(1){ //std::cout << point1 << " " << point2 << std::endl; sprintf(image_path,"%s/%.8d.jpg",argv[3],count++); pre_image = cv::imread(image_path); //std::cout << image_path << std::endl; sprintf(image_path,"%s/%.8d.jpg",argv[3],count++); cur_image = cv::imread(image_path); cols = cur_image.cols; rows = cur_image.rows; if(cols == 0 || rows == 0) break; w = point2.x - point1.x; h = point2.y - point1.y; //std::cout << w <<" h=" << h << std::endl; point1.x = point1.x - w/2; point1.y = point1.y - h/2; point2.x = point2.x + w/2; point2.y = point2.y + h/2; //std::cout << point1 << " " << point2 << std::endl; point1.x = point1.x < 0 ? 0 : point1.x; point1.y = point1.y < 0 ? 0 : point1.y; point2.x = point2.x > cols ? cols : point2.x; point2.y = point2.y > rows ? rows : point2.y; // std::cout << point1 << " " << point2 << std::endl; target = pre_image(cv::Rect(point1,point2)); image = cur_image(cv::Rect(point1,point2)); loaddata_target(net,target); loaddata_image(net,image); net->Forward(); Blob<float>* output_layer = net->output_blobs()[0]; std::vector<int> points; points.push_back(int(image.cols*((output_layer->cpu_data()[0]/10)))); points.push_back(int(image.rows*((output_layer->cpu_data()[1]/10)))); points.push_back(int(image.cols*((output_layer->cpu_data()[2]/10)))); points.push_back(int(image.rows*((output_layer->cpu_data()[3]/10)))); point2.x = point1.x + points[2]; point2.y = point1.y+ points[3]; point1.x += points[0]; point1.y += points[1]; cv::rectangle(cur_image, cv::Rect(point1,point2),cv::Scalar(0,225,255),1); cv::imshow("target",target); cv::imshow("image",cur_image); if(count == 3) cv::waitKey(0); cv::waitKey(33); //std::cout << image_path << std::endl; //target = pre_image(); // break; } delete [] image_path; return 1;}void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path){ Blob<float>* input_layer = net->input_blobs()[0]; int width, height; width = input_layer->width(); height = input_layer->height(); int size = width*height; cv::Mat image = cv::imread(image_path,-1); cv::Mat image_resized; cv::resize(image, image_resized, cv::Size(height, width)); float* input_data = input_layer->mutable_cpu_data(); int temp,idx; for(int i = 0; i < height; ++i){ uchar* pdata = image_resized.ptr<uchar>(i); for(int j = 0; j < width; ++j){ temp = 3*j; idx = i*width+j; input_data[idx] = (pdata[temp+2]); input_data[idx+size] = (pdata[temp+1]); input_data[idx+2*size] = (pdata[temp+0]); } } //cv::imshow("image",image_resized);}void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path){ Blob<float>* input_layer = net->input_blobs()[1]; int width, height; width = input_layer->width(); height = input_layer->height(); int size = width*height; cv::Mat image = cv::imread(image_path,-1); cv::Mat image_resized; cv::resize(image, image_resized, cv::Size(height, width)); float* input_data = input_layer->mutable_cpu_data(); int temp,idx; for(int i = 0; i < height; ++i){ uchar* pdata = image_resized.ptr<uchar>(i); for(int j = 0; j < width; ++j){ temp = 3*j; idx = i*width+j; input_data[idx] = (pdata[temp+2]); input_data[idx+size] = (pdata[temp+1]); input_data[idx+2*size] = (pdata[temp+0]); } } //cv::waitKey(0); //cv::imshow("image",image_resized);}void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image){ Blob<float>* input_layer = net->input_blobs()[0]; int width, height; width = input_layer->width(); height = input_layer->height(); int size = width*height; //cv::Mat image = cv::imread(image_path,-1); cv::Mat image_resized; cv::resize(image, image_resized, cv::Size(height, width)); float* input_data = input_layer->mutable_cpu_data(); int temp,idx; for(int i = 0; i < height; ++i){ uchar* pdata = image_resized.ptr<uchar>(i); for(int j = 0; j < width; ++j){ temp = 3*j; idx = i*width+j; input_data[idx] = (pdata[temp+2])-104; input_data[idx+size] = (pdata[temp+1])-117; input_data[idx+2*size] = (pdata[temp+0])-123; } } //cv::imshow("image",image_resized);}void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image){ Blob<float>* input_layer = net->input_blobs()[1]; int width, height; width = input_layer->width(); height = input_layer->height(); int size = width*height; //cv::Mat image = cv::imread(image_path,-1); cv::Mat image_resized; cv::resize(image, image_resized, cv::Size(height, width)); float* input_data = input_layer->mutable_cpu_data(); int temp,idx; for(int i = 0; i < height; ++i){ uchar* pdata = image_resized.ptr<uchar>(i); for(int j = 0; j < width; ++j){ temp = 3*j; idx = i*width+j; input_data[idx] = (pdata[temp+2])-104; input_data[idx+size] = (pdata[temp+1])-117; input_data[idx+2*size] = (pdata[temp+0])-123; } } //cv::waitKey(0); //cv::imshow("image",image_resized);}
deploy.prototxt
name: "CaffeNet"input: "target"input: "image"#targetinput_dim: 1input_dim: 3input_dim: 227input_dim: 227#imageinput_dim: 1input_dim: 3input_dim: 227input_dim: 227layer { name: "conv1" type: "Convolution" bottom: "target" top: "conv1" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } }}layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1"}layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }}layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2"}layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }}layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } }}layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3"}layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4"}layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5"}layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "conv1_p" type: "Convolution" bottom: "image" top: "conv1_p" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } }}layer { name: "relu1_p" type: "ReLU" bottom: "conv1_p" top: "conv1_p"}layer { name: "pool1_p" type: "Pooling" bottom: "conv1_p" top: "pool1_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "norm1_p" type: "LRN" bottom: "pool1_p" top: "norm1_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }}layer { name: "conv2_p" type: "Convolution" bottom: "norm1_p" top: "conv2_p" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu2_p" type: "ReLU" bottom: "conv2_p" top: "conv2_p"}layer { name: "pool2_p" type: "Pooling" bottom: "conv2_p" top: "pool2_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "norm2_p" type: "LRN" bottom: "pool2_p" top: "norm2_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }}layer { name: "conv3_p" type: "Convolution" bottom: "norm2_p" top: "conv3_p" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } }}layer { name: "relu3_p" type: "ReLU" bottom: "conv3_p" top: "conv3_p"}layer { name: "conv4_p" type: "Convolution" bottom: "conv3_p" top: "conv4_p" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu4_p" type: "ReLU" bottom: "conv4_p" top: "conv4_p"}layer { name: "conv5_p" type: "Convolution" bottom: "conv4_p" top: "conv5_p" param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu5_p" type: "ReLU" bottom: "conv5_p" top: "conv5_p"}layer { name: "pool5_p" type: "Pooling" bottom: "conv5_p" top: "pool5_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }}layer { name: "concat" type: "Concat" bottom: "pool5" bottom: "pool5_p" top: "pool5_concat" concat_param { axis: 1 }}layer { name: "fc6-new" type: "InnerProduct" bottom: "pool5_concat" top: "fc6" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6"}layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 }}layer { name: "fc7-new" type: "InnerProduct" bottom: "fc6" top: "fc7" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7"}layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 }}layer { name: "fc7-newb" type: "InnerProduct" bottom: "fc7" top: "fc7b" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } }}layer { name: "relu7b" type: "ReLU" bottom: "fc7b" top: "fc7b"}layer { name: "drop7b" type: "Dropout" bottom: "fc7b" top: "fc7b" dropout_param { dropout_ratio: 0.5 }}layer { name: "fc8-shapes" type: "InnerProduct" bottom: "fc7b" top: "fc8" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } }}
- GOTURN(Generic Object Tracking Using Regression Networks ECCV2016)
- GOTURN: Generic Object Tracking Using Regression Networks
- 论文阅读《Re3: Real-Time Recurrent Regression Networks for Object Tracking》
- 论文阅读-Re 3 : Real-Time Recurrent Regression Networks for Object Tracking
- 论文阅读: ECCV2016 Chained Predictions Using Convolutional Neural Networks
- GOTURN——Learning to Track at 100 FPS with Deep Regression Networks
- GOTURN: Learning to Track at 100 FPS with Deep Regression Networks笔记
- 目标跟踪之GOTURN:Learning to Track at 100 FPS with Deep Regression Networks
- Fully-Convolutional Siamese Networks for Object Tracking
- 物体跟踪-Fully-Convolutional Siamese Networks for Object Tracking
- Multi-Object Tracking with Quadruplet Convolutional Neural Networks
- Summary of Visual Object Tracking using Adaptive Correlation Filters
- Visual Object Tracking using Adaptive Correlation Filters 论文笔记
- 带你玩-Object Tracking using OpenCV (一)
- 带你玩-Object Tracking using OpenCV (二)
- Object tracking
- Object Tracking
- Correlation Filter in Visual Tracking系列一:Visual Object Tracking using Adaptive Correlation Filters 论
- bzoj 1021 dp
- [51nod 1292]字符串中的最大值V2
- Listen第二个参数的意义
- java基础学习之Layout
- android studio 传感器之加速度,临近,光线传感器
- GOTURN(Generic Object Tracking Using Regression Networks ECCV2016)
- 图片选择时有图片显示不出,还OOM
- android textview等文本内容中字体颜色的改变和字体背景色的改变
- 虚幻学习笔记_UMG UI
- 注册广播
- IOS面试题大全
- zabbix解决监控项中文乱码
- Ubuntu14.04 eclipse闪退问题
- 符号化Symbol(符号)体系