zynq学习笔记——HLS FAST corner导出keypoints(一)
来源:互联网 发布:男士睡衣品牌 知乎 编辑:程序博客网 时间:2024/05/16 09:12
PC平台:WINDOWS 10 64位
Xilinx设计开发套件:Xilinx_vivado_sdk_2015.4
开发板:Zed Board
之前参照XAPP1167文档,使用HLS Video函数库里的FASTX跑了一下例子,当时的例子是直接把keypoint以mask方式画在了原始视频图像上,应用层并没有获取到keypoint的坐标信息,所以无法开展下一步的图像处理,比如获取keypoint的特征点信息进行图像匹配等,其实HLS FASTX提供了两个函数,一个是返回keypoint的mask图像,另一个是返回keypoint数组,所以如果需要获取到keypoint的坐标信息,必须得使用第二个函数
可以在Vivado HLS安装目录下找到FASTX的源代码,有两个地方,Xilinx\Vivado_HLS\2015.4\include\hls\hls_video_fast.h和Xilinx\Vivado_HLS\2015.4\common\technology\autopilot\hls\hls_video_fast.h
//generate array template<int PSize,int KERNEL_SIZE,typename T, int N, int SRC_T,int ROWS,int COLS>void FAST_t_opr( Mat<ROWS,COLS,SRC_T> &_src, Point_<T> (&_keypoints)[N], HLS_TNAME(SRC_T) _threshold, bool _nonmax_supression, int (&flag)[PSize][2] ){ typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T; LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T> k_buf; LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf; Window<3,3,ap_int<16> > core_win; Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T> win; Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)> s; int rows= _src.rows; int cols= _src.cols; assert(rows <= ROWS); assert(cols <= COLS); int kernel_half=KERNEL_SIZE/2; ap_uint<2> flag_val[PSize+PSize/2+1]; int flag_d[PSize+PSize/2+1];#pragma HLS ARRAY_PARTITION variable=flag_val dim=0#pragma HLS ARRAY_PARTITION variable=flag_d dim=0 int index=0; int offset=KERNEL_SIZE/2; if(_nonmax_supression) { offset=offset+1; } loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) { loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {#pragma HLS LOOP_FLATTEN off#pragma HLS PIPELINE II=1 if(i<rows&&j<cols) { for(int r= 0;r<KERNEL_SIZE;r++) { for(int c=0;c<KERNEL_SIZE-1;c++) { win.val[r][c]=win.val[r][c+1];//column left shift } } win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j]; for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) { win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j]; k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j]; } //------- _src>>s; win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0]; k_buf.val[KERNEL_SIZE-2][j]=s.val[0]; } //------core for(int r= 0;r<3;r++) { for(int c=0;c<3-1;c++) { core_win.val[r][c]=core_win.val[r][c+1];//column left shift } } core_win.val[0][3-1]=core_buf.val[0][j]; for(int buf_row= 1;buf_row< 3-1;buf_row++) { core_win.val[buf_row][3-1]=core_buf.val[buf_row][j]; core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j]; } int core=0; //output if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1) { //process bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression); if(iscorner&&!_nonmax_supression) { if(index<N) { _keypoints[index].x=j-offset; _keypoints[index].y=i-offset; index++; } } } if(i>=rows||j>=cols) { core=0; } if(_nonmax_supression) { core_win.val[3-1][3-1]=core; core_buf.val[3-2][j]=core; if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0) { bool iscorner=fast_nonmax(core_win); if(iscorner) { if(index<N) { _keypoints[index].x=j-offset; _keypoints[index].y=i-offset; index++; } } } } } }}template<typename T, int N, int SRC_T,int ROWS,int COLS>void FASTX( Mat<ROWS,COLS,SRC_T> &_src, Point_<T> (&_keypoints)[N], HLS_TNAME(SRC_T) _threshold, bool _nomax_supression ){#pragma HLS INLINE int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6}, {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}}; FAST_t_opr<16,7>(_src,_keypoints,_threshold,_nomax_supression,flag);}
为了方便在PS端应用层获取到keypoint信息,我对FASTX函数进行了小小的修改,取消了Point数据类型,直接使用int数组,数组的第1个元素记录keypoint的总个数,后面就是每个keypoint的坐标信息,int的高16位保存X坐标,低16保存Y坐标,修改后的代码如下,为了不影响原来的函数,可以直接增加以下代码
//generate array template<int PSize,int KERNEL_SIZE, int SRC_T,int ROWS,int COLS>void FAST_t_opr( Mat<ROWS,COLS,SRC_T> &_src, int *_keypoints, int _len, HLS_TNAME(SRC_T) _threshold, bool _nonmax_supression, int (&flag)[PSize][2] ){ typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T; LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T> k_buf; LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf; Window<3,3,ap_int<16> > core_win; Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T> win; Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)> s; int rows= _src.rows; int cols= _src.cols; assert(rows <= ROWS); assert(cols <= COLS); int kernel_half=KERNEL_SIZE/2; ap_uint<2> flag_val[PSize+PSize/2+1]; int flag_d[PSize+PSize/2+1];#pragma HLS ARRAY_PARTITION variable=flag_val dim=0#pragma HLS ARRAY_PARTITION variable=flag_d dim=0 int index = 1; int offset=KERNEL_SIZE/2; int location =0; if(_nonmax_supression) { offset=offset+1; } loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) { loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {#pragma HLS LOOP_FLATTEN off#pragma HLS PIPELINE II=1 if(i<rows&&j<cols) { for(int r= 0;r<KERNEL_SIZE;r++) { for(int c=0;c<KERNEL_SIZE-1;c++) { win.val[r][c]=win.val[r][c+1];//column left shift } } win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j]; for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) { win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j]; k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j]; } //------- _src>>s; win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0]; k_buf.val[KERNEL_SIZE-2][j]=s.val[0]; } //------core for(int r= 0;r<3;r++) { for(int c=0;c<3-1;c++) { core_win.val[r][c]=core_win.val[r][c+1];//column left shift } } core_win.val[0][3-1]=core_buf.val[0][j]; for(int buf_row= 1;buf_row< 3-1;buf_row++) { core_win.val[buf_row][3-1]=core_buf.val[buf_row][j]; core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j]; } int core=0; //output if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1) { //process bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression); if(iscorner&&!_nonmax_supression) { if(index<_len) { location =j-offset; location <<= 16; location |= i-offset; _keypoints[index] = location; index++; } } } if(i>=rows||j>=cols) { core=0; } if(_nonmax_supression) { core_win.val[3-1][3-1]=core; core_buf.val[3-2][j]=core; if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0) { bool iscorner=fast_nonmax(core_win); if(iscorner) { if(index<_len) { location =j-offset; location <<= 16; location |= i-offset; _keypoints[index] = location; index++; } } } } } } _keypoints[0] = (index-1); // keypoints total count}template<int SRC_T,int ROWS,int COLS>void FASTX( Mat<ROWS,COLS,SRC_T> &_src, int *_keypoints, int _len, HLS_TNAME(SRC_T) _threshold, bool _nomax_supression ){#pragma HLS INLINE int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6}, {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}}; FAST_t_opr<16,7>(_src,_keypoints,_len,_threshold,_nomax_supression,flag);}
修改完FAST函数后,把原来的例子进行相应的修改
void hls_fast_corner(AXI_STREAM& INPUT_STREAM, AXI_STREAM& OUTPUT_STREAM, int rows, int cols, int threhold, int keypoints[MAX_KEYPOINTS]){#pragma HLS INTERFACE axis port=INPUT_STREAM#pragma HLS INTERFACE axis port=OUTPUT_STREAM#pragma HLS INTERFACE s_axilite port=rows bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=cols bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=threhold bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=keypoints bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=return bundle=BUS_CTRLhls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> _src(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> _dst(rows,cols);#pragma HLS dataflowhls::AXIvideo2Mat(INPUT_STREAM, _src);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> src0(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3> src1(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC1> gray(rows,cols);hls::Duplicate(_src,src0,src1);hls::CvtColor<HLS_BGR2GRAY>(src0,gray);hls::FASTX(gray,keypoints, MAX_KEYPOINTS, threhold,true);hls::Mat2AXIvideo(src1, OUTPUT_STREAM);}
没有了paintmask,不能在图像上直接看到keypoint了,该如何验证呢,可以在testbench上使用cvCircle把keypoint画上去,代码如下:
int main (int argc, char** argv) { IplImage* src = cvLoadImage(INPUT_IMAGE); IplImage* dst = cvCreateImage(cvGetSize(src), src->depth, src->nChannels); AXI_STREAM src_axi, dst_axi; IplImage2AXIvideo(src, src_axi); int threhold = 60; int keypoints[MAX_KEYPOINTS]; hls_fast_corner(src_axi, dst_axi, src->height, src->width, threhold, keypoints); AXIvideo2IplImage(dst_axi, dst); int count = keypoints[0]; printf("keypoints count:%d\n", count); for(int i=1;i<count;i++) { int x = keypoints[i] >> 16; int y = (keypoints[i] & 0xFFFF); cvCircle(dst,cvPoint(x,y),2,CV_RGB(0,0,255),2); } cvSaveImage(OUTPUT_IMAGE, dst); return 0;}
C 仿真的效果
导出IP后,打开driver目录下的xhls_fast_corner.h,可以看到比之前多了下面这些函数,通过read keypoints函数就可以获取到keypoint信息了
u32 XHls_fast_corner_Get_keypoints_BaseAddress(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_HighAddress(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_TotalBytes(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_BitWidth(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_Depth(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Write_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);u32 XHls_fast_corner_Read_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);u32 XHls_fast_corner_Write_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);u32 XHls_fast_corner_Read_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);
阅读全文
0 0
- zynq学习笔记——HLS FAST corner导出keypoints(一)
- zynq学习笔记——HLS FAST corner导出keypoints(二)
- zynq-7000学习笔记(六)——HLS综合FAST corner并导出IP
- zynq-7000学习笔记(七)——在zedboard上验证hls FAST corner
- zynq-7000学习笔记(十七)——xilinx HLS fast corner与opencv fast detect运算耗时比较
- zynq-7000学习笔记(十)——Linux下通过UIO配置FAST corner寄存器
- zynq-7000学习笔记(十二)——Linux下USB摄像头+VDMA+FAST corner+frame buffer测试
- ZYNQ HLS图像处理加速总结(一)——FPGA硬件部分
- ZYNQ HLS工具系列(一) HLS图像处理入门
- zynq-7000学习笔记(一)——在zedboard上安装linaro图像界面系统
- ZYNQ学习笔记(一) HelloWorld实现
- ZYNQ学习笔记(一): uboot 编译
- Vivado HLS 学习笔记(一)
- 学习OpenCV——通过KeyPoints进行目标定位
- 学习OpenCV——通过KeyPoints进行目标定位
- zynq-7000学习笔记(二)——编译uboot
- zynq-7000学习笔记(三)——编译linux
- zynq-7000学习笔记(十四)——移植openCV
- Lucene(一)hello world
- 数据结构之哈夫曼树(最优二叉树)
- win7下安装dvwa
- BZOJ 3212 Pku3468 A Simple Problem with Integers 线段树
- 迷宫最短路径
- zynq学习笔记——HLS FAST corner导出keypoints(一)
- Maven异常: No compiler is provided in this environment. Perhaps you are running on a JRE rather than a
- English summary in June
- 菜鸟超神之路:按位与|,按位或&,详解分析
- 栈空间和堆空间
- 机器学习算法之线性回归(Linear Regression)
- 分布式系列 单点登录和session共享实现
- Rundll32.exe参数传递问题解决方法
- 简单MD5加密类