Keypoints+Knn+findHomography进行目标定位

来源：互联网发布：外网代理软件编辑：程序博客网时间：2024/05/16 19:41

本范例的代码主要都是学习OpenCV——通过KeyPoints进行目标定位这篇博客提供的，然后在它的基础上稍加修改，检测keypoints点的检测器是SURF，获取描述子也是用到SURF来描述，而用到的匹配器是FlannBased，匹配的方式是Knn方式，最后通过findHomography寻找单映射矩阵，perspectiveTransform获得最终的目标，在这个过程中还通过单映射矩阵来进一步去除伪匹配，这里只是贴出代码和代码解析，至于原理还没弄得特别明白，希望接下来可以继续学习，学懂了算法原理再来补充。

1、代码实现

#include "stdafx.h"#include "opencv2/opencv.hpp"#include <vector>  #include <iostream>  using namespace cv;  using namespace std;  Mat src,frameImg;  int width;  int height;  vector<Point> srcCorner(4);  vector<Point> dstCorner(4);  static bool createDetectorDescriptorMatcher( const string& detectorType, const string& descriptorType, const string& matcherType,  Ptr<FeatureDetector>& featureDetector,  Ptr<DescriptorExtractor>& descriptorExtractor,  Ptr<DescriptorMatcher>& descriptorMatcher )  {  cout << "< Creating feature detector, descriptor extractor and descriptor matcher ..." << endl;  if (detectorType=="SIFT"||detectorType=="SURF")  initModule_nonfree();  featureDetector = FeatureDetector::create( detectorType );  descriptorExtractor = DescriptorExtractor::create( descriptorType );  descriptorMatcher = DescriptorMatcher::create( matcherType );  cout << ">" << endl;  bool isCreated = !( featureDetector.empty() || descriptorExtractor.empty() || descriptorMatcher.empty() );  if( !isCreated )  cout << "Can not create feature detector or descriptor extractor or descriptor matcher of given types." << endl << ">" << endl;  return isCreated;  }  bool refineMatchesWithHomography(const std::vector<cv::KeyPoint>& queryKeypoints,    const std::vector<cv::KeyPoint>& trainKeypoints,     float reprojectionThreshold,    std::vector<cv::DMatch>& matches,    cv::Mat& homography  )  {  const int minNumberMatchesAllowed = 4;    if (matches.size() < minNumberMatchesAllowed)    return false;    // Prepare data for cv::findHomography    std::vector<cv::Point2f> queryPoints(matches.size());    std::vector<cv::Point2f> trainPoints(matches.size());    for (size_t i = 0; i < matches.size(); i++)    {    queryPoints[i] = queryKeypoints[matches[i].queryIdx].pt;    trainPoints[i] = trainKeypoints[matches[i].trainIdx].pt;    }    // Find homography matrix and get inliers mask    std::vector<unsigned char> inliersMask(matches.size());    homography = cv::findHomography(queryPoints,     trainPoints,     CV_FM_RANSAC,     reprojectionThreshold,     inliersMask);    std::vector<cv::DMatch> inliers;    for (size_t i=0; i<inliersMask.size(); i++)    {    if (inliersMask[i])    inliers.push_back(matches[i]);    }    matches.swap(inliers);  Mat homoShow;  drawMatches(src,queryKeypoints,frameImg,trainKeypoints,matches,homoShow,Scalar::all(-1),CV_RGB(255,255,255),Mat(),2);       imshow("homoShow",homoShow);   return matches.size() > minNumberMatchesAllowed;   }  bool matchingDescriptor(const vector<KeyPoint>& queryKeyPoints,const vector<KeyPoint>& trainKeyPoints,  const Mat& queryDescriptors,const Mat& trainDescriptors,   Ptr<DescriptorMatcher>& descriptorMatcher,  bool enableRatioTest = true)  {  vector<vector<DMatch>> m_knnMatches;  vector<DMatch>m_Matches;  if (enableRatioTest)  {  cout<<"KNN Matching"<<endl;  const float minRatio = 1.f / 1.5f;  descriptorMatcher->knnMatch(queryDescriptors,trainDescriptors,m_knnMatches,2);  for (size_t i=0; i<m_knnMatches.size(); i++)  {  const cv::DMatch& bestMatch = m_knnMatches[i][0];  const cv::DMatch& betterMatch = m_knnMatches[i][1];  float distanceRatio = bestMatch.distance / betterMatch.distance;  if (distanceRatio < minRatio)  {  m_Matches.push_back(bestMatch);  }  }  }  else  {  cout<<"Cross-Check"<<endl;  Ptr<cv::DescriptorMatcher> BFMatcher(new cv::BFMatcher(cv::NORM_HAMMING, true));  BFMatcher->match(queryDescriptors,trainDescriptors, m_Matches );  }  Mat homo;  float homographyReprojectionThreshold = 1.0;  bool homographyFound = refineMatchesWithHomography(  queryKeyPoints,trainKeyPoints,homographyReprojectionThreshold,m_Matches,homo);  if (!homographyFound)  return false;  else  {  if (m_Matches.size()>10){std::vector<Point2f> obj_corners(4);obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( src.cols, 0 );obj_corners[2] = cvPoint( src.cols, src.rows ); obj_corners[3] = cvPoint( 0, src.rows );std::vector<Point2f> scene_corners(4);perspectiveTransform( obj_corners, scene_corners, homo);line(frameImg,scene_corners[0],scene_corners[1],CV_RGB(255,0,0),2);  line(frameImg,scene_corners[1],scene_corners[2],CV_RGB(255,0,0),2);  line(frameImg,scene_corners[2],scene_corners[3],CV_RGB(255,0,0),2);  line(frameImg,scene_corners[3],scene_corners[0],CV_RGB(255,0,0),2);  return true;  }return true;}  }  int main()  {  string filename = "box.png";  src = imread(filename,0);  width = src.cols;  height = src.rows;  string detectorType = "SIFT";  string descriptorType = "SIFT";  string matcherType = "FlannBased";  Ptr<FeatureDetector> featureDetector;  Ptr<DescriptorExtractor> descriptorExtractor;  Ptr<DescriptorMatcher> descriptorMatcher;  if( !createDetectorDescriptorMatcher( detectorType, descriptorType, matcherType, featureDetector, descriptorExtractor, descriptorMatcher ) )  {  cout<<"Creat Detector Descriptor Matcher False!"<<endl;  return -1;  }  //Intial: read the pattern img keyPoint  vector<KeyPoint> queryKeypoints;  Mat queryDescriptor;  featureDetector->detect(src,queryKeypoints);  descriptorExtractor->compute(src,queryKeypoints,queryDescriptor);  VideoCapture cap(0); // open the default camera  cap.set( CV_CAP_PROP_FRAME_WIDTH,320);cap.set( CV_CAP_PROP_FRAME_HEIGHT,240 );if(!cap.isOpened())  // check if we succeeded  {  cout<<"Can't Open Camera!"<<endl;  return -1;  }  srcCorner[0] = Point(0,0);  srcCorner[1] = Point(width,0);  srcCorner[2] = Point(width,height);  srcCorner[3] = Point(0,height);  vector<KeyPoint> trainKeypoints;  Mat trainDescriptor;  Mat frame,grayFrame;char key=0;  //frame = imread("box_in_scene.png");  while (key!=27)  {  cap>>frame;  if (!frame.empty()){frame.copyTo(frameImg);printf("%d,%d\n",frame.depth(),frame.channels());grayFrame.zeros(frame.rows,frame.cols,CV_8UC1);cvtColor(frame,grayFrame,CV_BGR2GRAY);  trainKeypoints.clear();  trainDescriptor.setTo(0);  featureDetector->detect(grayFrame,trainKeypoints);  if(trainKeypoints.size()!=0)  {  descriptorExtractor->compute(grayFrame,trainKeypoints,trainDescriptor);  bool isFound = matchingDescriptor(queryKeypoints,trainKeypoints,queryDescriptor,trainDescriptor,descriptorMatcher);  imshow("foundImg",frameImg);  }  }key = waitKey(1); }  cap.release();return 0;}

2、运行结果

3、用到的类和函数

DescriptorMatcher::knnMatch

功能：对查询集的每个描述子寻找k个最好的匹配子

结构：

 void DescriptorMatcher::knnMatch(const Mat& queryDescriptors, const Mat& trainDescriptors, vector<vector<DMatch>>& matches, int k, const Mat& mask=Mat(), bool compactResult=false ) const

queryDescriptors ：查询描述子集
trainDescriptors ：训练描述子集
mask ：掩码指定查询和训练描述子集中哪些允许被匹配
matches ：每一个matches[i]都有k个或少于k个的匹配子
k ：每个查询描述子最好的匹配子数量，如果查询描述子的总数量少于k，则取总数量
compactResult ：当masks不为空时，使用此参数，如果compactResult为false，那么matches的容量和查询描述子的数量一样多，如果为true，则matches的容量就没有包含在mask中剔除的匹配点

BFMatcher::BFMatcher

功能：蛮力匹配

结构：

 BFMatcher::BFMatcher(int normType=NORM_L2, bool crossCheck=false )

normType ：NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2四个中的一个. L1 和L2 规范对于SIFT和SURF是个更好的选择，而 NORM_HAMMING 应该被用在ORB, BRISK和 BRIEF中, NORM_HAMMING2被用在 ORB 中，当 WTA_K==3 或4 的时候，（详见ORB::ORB描述子）

crossCheck ：如果为false，则默认为BFMatcher行为，对每个查找描述子中寻找k个最接近的邻居，如果为true，则是knnMatch() 方法，而k=1，返回一对匹配子，这个方法只返回最接近的距离的一对匹配子，当有足够多的匹配子的时候，这种方法通常能够产生最好的结果和最小的误差。

findHomography

功能：在两个平面之间寻找单映射变换矩阵

结构：

Mat findHomography(InputArray srcPoints, InputArray dstPoints, int method=0, double ransacReprojThreshold=3, OutputArray mask=noArray() )

srcPoints ：在原平面上点的坐标，CV_32FC2 的矩阵或者vector<Point2f>
dstPoints ：在目标平面上点的坐标，CV_32FC2 的矩阵或者 vector<Point2f> .
method –
用于计算单映射矩阵的方法.
0 - 使用所有的点的常规方法
CV_RANSAC - 基于 RANSAC 的方法

CV_LMEDS - 基于Least-Median 的方法

ransacReprojThreshold：处理一组点对为内部点的最大容忍重投影误差（只在RANSAC方法中使用），其形式为：如果 $\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \| > \texttt{ransacReprojThreshold}$

那么点i则被考虑为内部点，如果srcPoints和dstPoints是以像素为单位，通常把参数设置为1-10范围内

这个函数的作用是在原平面和目标平面之间返回一个单映射矩阵

$s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}$

因此反投影误差 $\sum _i \left ( x'_i- \frac{h_{11} x_i + h_{12} y_i + h_{13}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2+ \left ( y'_i- \frac{h_{21} x_i + h_{22} y_i + h_{23}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2$ 是最小的。

如果参数被设置为0，那么这个函数使用所有的点和一个简单的最小二乘算法来计算最初的单应性估计，但是，如果不是所有的点对都完全符合透视变换，那么这个初始的估计会很差，在这种情况下，你可以使用两个robust算法中的一个。 RANSAC 和LMeDS , 使用坐标点对生成了很多不同的随机组合子集（每四对一组），使用这些子集和一个简单的最小二乘法来估计变换矩阵，然后计算出单应性的质量，最好的子集被用来产生初始单应性的估计和掩码。
RANSAC方法几乎可以处理任何异常，但是需要一个阈值， LMeDS 方法不需要任何阈值，但是只有在inliers大于50%时才能计算正确，最后，如果没有outliers和噪音非常小，则可以使用默认的方法。

PerspectiveTransform

功能：向量数组的透视变换

结构：

void perspectiveTransform(InputArray src, OutputArray dst, InputArray m)

src ：输入两通道或三通道的浮点数组，每一个元素是一个2D/3D 的矢量转换

dst ：输出和src同样的size和type
m ：3x3 或者4x4浮点转换矩阵
转换方法为：

$(x, y, z) \rightarrow (x'/w, y'/w, z'/w)$

$(x', y', z', w') = \texttt{mat} \cdot \begin{bmatrix} x & y & z & 1 \end{bmatrix}$

$w = \fork{w'}{if $w' \ne 0$}{\infty}{otherwise}$

0 0