zynq学习笔记——HLS FAST corner导出keypoints(一)

来源:互联网 发布:男士睡衣品牌 知乎 编辑:程序博客网 时间:2024/05/16 09:12

PC平台:WINDOWS 10 64位

Xilinx设计开发套件:Xilinx_vivado_sdk_2015.4

开发板:Zed Board

之前参照XAPP1167文档,使用HLS Video函数库里的FASTX跑了一下例子,当时的例子是直接把keypoint以mask方式画在了原始视频图像上,应用层并没有获取到keypoint的坐标信息,所以无法开展下一步的图像处理,比如获取keypoint的特征点信息进行图像匹配等,其实HLS FASTX提供了两个函数,一个是返回keypoint的mask图像,另一个是返回keypoint数组,所以如果需要获取到keypoint的坐标信息,必须得使用第二个函数



可以在Vivado HLS安装目录下找到FASTX的源代码,有两个地方,Xilinx\Vivado_HLS\2015.4\include\hls\hls_video_fast.h和Xilinx\Vivado_HLS\2015.4\common\technology\autopilot\hls\hls_video_fast.h

//generate array template<int PSize,int KERNEL_SIZE,typename T, int N, int SRC_T,int ROWS,int COLS>void FAST_t_opr(        Mat<ROWS,COLS,SRC_T>    &_src,        Point_<T>                    (&_keypoints)[N],        HLS_TNAME(SRC_T)                    _threshold,        bool                    _nonmax_supression,        int                     (&flag)[PSize][2]        ){    typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;    LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T>    k_buf;    LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> >         core_buf;    Window<3,3,ap_int<16> >                            core_win;    Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T>       win;    Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)>             s;    int rows= _src.rows;    int cols= _src.cols;    assert(rows <= ROWS);    assert(cols <= COLS);    int kernel_half=KERNEL_SIZE/2;    ap_uint<2> flag_val[PSize+PSize/2+1];    int  flag_d[PSize+PSize/2+1];#pragma HLS ARRAY_PARTITION variable=flag_val dim=0#pragma HLS ARRAY_PARTITION variable=flag_d dim=0    int index=0;    int offset=KERNEL_SIZE/2;    if(_nonmax_supression)    {        offset=offset+1;    } loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {    loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {#pragma HLS LOOP_FLATTEN off#pragma HLS PIPELINE II=1            if(i<rows&&j<cols) {                for(int r= 0;r<KERNEL_SIZE;r++) {                    for(int c=0;c<KERNEL_SIZE-1;c++) {                        win.val[r][c]=win.val[r][c+1];//column left shift                    }                }                win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];                for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {                    win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];                    k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];                }                //-------                _src>>s;                win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];                k_buf.val[KERNEL_SIZE-2][j]=s.val[0];            }            //------core            for(int r= 0;r<3;r++)            {                for(int c=0;c<3-1;c++)                {                    core_win.val[r][c]=core_win.val[r][c+1];//column left shift                }            }            core_win.val[0][3-1]=core_buf.val[0][j];            for(int buf_row= 1;buf_row< 3-1;buf_row++)            {                core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];                core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];            }            int core=0;            //output            if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)            {                //process                bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);                if(iscorner&&!_nonmax_supression)                {                    if(index<N)                    {                    _keypoints[index].x=j-offset;                    _keypoints[index].y=i-offset;                    index++;                    }                }            }            if(i>=rows||j>=cols)            {                core=0;            }            if(_nonmax_supression)            {                core_win.val[3-1][3-1]=core;                core_buf.val[3-2][j]=core;                if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)                {                    bool iscorner=fast_nonmax(core_win);                    if(iscorner)                    {                    if(index<N)                    {                        _keypoints[index].x=j-offset;                        _keypoints[index].y=i-offset;                        index++;                    }                    }                }            }        }    }}template<typename T, int N, int SRC_T,int ROWS,int COLS>void FASTX(        Mat<ROWS,COLS,SRC_T>    &_src,        Point_<T> (&_keypoints)[N],        HLS_TNAME(SRC_T)    _threshold,        bool   _nomax_supression        ){#pragma HLS INLINE    int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},        {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};    FAST_t_opr<16,7>(_src,_keypoints,_threshold,_nomax_supression,flag);}

为了方便在PS端应用层获取到keypoint信息,我对FASTX函数进行了小小的修改,取消了Point数据类型,直接使用int数组,数组的第1个元素记录keypoint的总个数,后面就是每个keypoint的坐标信息,int的高16位保存X坐标,低16保存Y坐标,修改后的代码如下,为了不影响原来的函数,可以直接增加以下代码

//generate array template<int PSize,int KERNEL_SIZE, int SRC_T,int ROWS,int COLS>void FAST_t_opr(        Mat<ROWS,COLS,SRC_T>    &_src,        int                    *_keypoints,        int                    _len,        HLS_TNAME(SRC_T)       _threshold,        bool                    _nonmax_supression,        int                     (&flag)[PSize][2]        ){    typedef typename pixel_op_type<HLS_TNAME(SRC_T)>::T INPUT_T;    LineBuffer<KERNEL_SIZE-1,COLS,INPUT_T>    k_buf;    LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> >         core_buf;    Window<3,3,ap_int<16> >                            core_win;    Window<KERNEL_SIZE,KERNEL_SIZE,INPUT_T>       win;    Scalar<HLS_MAT_CN(SRC_T), HLS_TNAME(SRC_T)>             s;    int rows= _src.rows;    int cols= _src.cols;    assert(rows <= ROWS);    assert(cols <= COLS);    int kernel_half=KERNEL_SIZE/2;    ap_uint<2> flag_val[PSize+PSize/2+1];    int  flag_d[PSize+PSize/2+1];#pragma HLS ARRAY_PARTITION variable=flag_val dim=0#pragma HLS ARRAY_PARTITION variable=flag_d dim=0       int index = 1;     int offset=KERNEL_SIZE/2;    int location =0;    if(_nonmax_supression)    {        offset=offset+1;    } loop_height: for(HLS_SIZE_T i=0;i<rows+offset;i++) {    loop_width: for(HLS_SIZE_T j=0;j<cols+offset;j++) {#pragma HLS LOOP_FLATTEN off#pragma HLS PIPELINE II=1            if(i<rows&&j<cols) {                for(int r= 0;r<KERNEL_SIZE;r++) {                    for(int c=0;c<KERNEL_SIZE-1;c++) {                        win.val[r][c]=win.val[r][c+1];//column left shift                    }                }                win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];                for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {                    win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];                    k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];                }                //-------                _src>>s;                win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];                k_buf.val[KERNEL_SIZE-2][j]=s.val[0];            }            //------core            for(int r= 0;r<3;r++)            {                for(int c=0;c<3-1;c++)                {                    core_win.val[r][c]=core_win.val[r][c+1];//column left shift                }            }            core_win.val[0][3-1]=core_buf.val[0][j];            for(int buf_row= 1;buf_row< 3-1;buf_row++)            {                core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];                core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];            }            int core=0;            //output            if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)            {                //process                bool iscorner=fast_judge<PSize>(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);                if(iscorner&&!_nonmax_supression)                {                    if(index<_len)                    {                     location =j-offset;                    location <<= 16;                    location |= i-offset;                    _keypoints[index] = location;                    index++;                    }                }            }            if(i>=rows||j>=cols)            {                core=0;            }            if(_nonmax_supression)            {                core_win.val[3-1][3-1]=core;                core_buf.val[3-2][j]=core;                if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)                {                    bool iscorner=fast_nonmax(core_win);                    if(iscorner)                    {                    if(index<_len)                    {                        location =j-offset;                        location <<= 16;                        location |= i-offset;                        _keypoints[index] = location;                        index++;                    }                    }                }            }        }    }  _keypoints[0] = (index-1); // keypoints total count}template<int SRC_T,int ROWS,int COLS>void FASTX(        Mat<ROWS,COLS,SRC_T>    &_src,        int *_keypoints,        int _len,        HLS_TNAME(SRC_T)    _threshold,        bool   _nomax_supression        ){#pragma HLS INLINE    int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},        {3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};    FAST_t_opr<16,7>(_src,_keypoints,_len,_threshold,_nomax_supression,flag);}

修改完FAST函数后,把原来的例子进行相应的修改

void hls_fast_corner(AXI_STREAM& INPUT_STREAM, AXI_STREAM& OUTPUT_STREAM, int rows, int cols, int threhold, int keypoints[MAX_KEYPOINTS]){#pragma HLS INTERFACE axis port=INPUT_STREAM#pragma HLS INTERFACE axis port=OUTPUT_STREAM#pragma HLS INTERFACE s_axilite port=rows bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=cols bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=threhold bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=keypoints  bundle=BUS_CTRL#pragma HLS INTERFACE s_axilite port=return  bundle=BUS_CTRLhls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      _src(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      _dst(rows,cols);#pragma HLS dataflowhls::AXIvideo2Mat(INPUT_STREAM, _src);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      src0(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC3>      src1(rows,cols);hls::Mat<MAX_HEIGHT,MAX_WIDTH,HLS_8UC1>      gray(rows,cols);hls::Duplicate(_src,src0,src1);hls::CvtColor<HLS_BGR2GRAY>(src0,gray);hls::FASTX(gray,keypoints, MAX_KEYPOINTS, threhold,true);hls::Mat2AXIvideo(src1, OUTPUT_STREAM);}

没有了paintmask,不能在图像上直接看到keypoint了,该如何验证呢,可以在testbench上使用cvCircle把keypoint画上去,代码如下:

int main (int argc, char** argv) {    IplImage* src = cvLoadImage(INPUT_IMAGE);    IplImage* dst = cvCreateImage(cvGetSize(src), src->depth, src->nChannels);        AXI_STREAM  src_axi, dst_axi;    IplImage2AXIvideo(src, src_axi);    int threhold = 60;    int keypoints[MAX_KEYPOINTS];    hls_fast_corner(src_axi, dst_axi, src->height, src->width, threhold, keypoints);    AXIvideo2IplImage(dst_axi, dst);    int count = keypoints[0];    printf("keypoints count:%d\n", count);    for(int i=1;i<count;i++)    {    int x = keypoints[i] >> 16;    int y = (keypoints[i] & 0xFFFF);    cvCircle(dst,cvPoint(x,y),2,CV_RGB(0,0,255),2);    }    cvSaveImage(OUTPUT_IMAGE, dst);        return 0;}


C 仿真的效果


导出IP后,打开driver目录下的xhls_fast_corner.h,可以看到比之前多了下面这些函数,通过read keypoints函数就可以获取到keypoint信息了

u32 XHls_fast_corner_Get_keypoints_BaseAddress(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_HighAddress(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_TotalBytes(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_BitWidth(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Get_keypoints_Depth(XHls_fast_corner *InstancePtr);u32 XHls_fast_corner_Write_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);u32 XHls_fast_corner_Read_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);u32 XHls_fast_corner_Write_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);u32 XHls_fast_corner_Read_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);



阅读全文
0 0
原创粉丝点击