相似图片搜索原理二(phash—c++实现)

来源:互联网 发布:广西广电网络电视 编辑:程序博客网 时间:2024/05/20 23:04

前段时间介绍过相似图片搜索原理一(ahash)http://blog.csdn.net/lu597203933/article/details/45101859,它是基于内容检索最简单的一种;这里介绍它的增强版本感知哈希算法(perceptual hash, phash)。它主要也是用缩略图搜原图并能达到较好点的效果.

理论部分:

理论部分主要包括以下几个步骤:

<1> 图像缩放将图像缩放到32*32大小

<2>灰度化32*32大小的图像进行灰度化

<3>离散余弦变换(DCT)—对32*32大小图像进行DCT

<4>计算均值32*32大小图片前面8*8大小图片处理并计算这64个像素的均值

<4>得到8*8图像的phash—8*8的像素值中大于均值的则用1表示,小于的用0表示,这样就得到一个64位二进制码作为该图像的phash值。

<5>计算两幅图像ahash值的汉明距离,距离越小,表明两幅图像越相似;距离越大,表明两幅图像距离越大。

这样做能够避免伽马校正或者颜色直方图调整带来的影响。

更详细的理论可以参看:

1http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html

2http://blog.csdn.net/luoweifu/article/details/8220992包括java代码实现

 

下面我给出自己的c++代码实现:

<1>图像灰度化与缩放

      

        Mat img = imread("E:\\algorithmZack\\ImageSearch\\image\\person.jpg", 1);if(!img.data){cout << "the image is not exist" << endl;return 0;}int size = 32;  // 图片缩放后大小resize(img, img, Size(size,size));      // 缩放到32*32cvtColor(img, img, COLOR_BGR2GRAY);       // 灰度化

<2>DCT变换

/*功能:获取DCT系数n:矩阵大小quotient: 系数quotientT: 系数转置*/void coefficient(const int &n, double **quotient, double **quotientT){double sqr = 1.0/sqrt(n+0.0);for(int i = 0; i < n; i++){quotient[0][i] = sqr;quotientT[i][0] =  sqr;}for(int i = 1; i < n; i++){for(int j = 0; j < n; j++){quotient[i][j] = sqrt(2.0/n)*cos(i*(j+0.5)*PI/n);  // 由公式得到quotientT[j][i] = quotient[i][j];}}}/*功能:两矩阵相乘A和B:源输入矩阵result:输出矩阵*/void matrixMultiply(double **A, double **B, int n, double **result){  double t = 0;for(int i = 0; i < n; i++){for(int j = 0; j < n; j++){t = 0;for(int k = 0; k < n; k++)t += A[i][k]*B[k][j];   result[i][j] = t;}}}void DCT(Mat_<uchar> image, const int &n, double **iMatrix){for(int i = 0; i < n; i++){for(int j = 0; j < n; j++){iMatrix[i][j] = (double)image(i,j);}}// 为系数分配空间double **quotient = new double*[n];double **quotientT = new double*[n];double **tmp = new double*[n];for(int i = 0; i < n; i++){quotient[i] = new double[n];quotientT[i] = new double[n]; tmp[i] = new double[n];}// 计算系数矩阵coefficient(n, quotient, quotientT);matrixMultiply(quotient, iMatrix, n, tmp);  // 由公式成绩结果matrixMultiply(tmp, quotientT, n, iMatrix);for(int i = 0; i < n; i++){delete []tmp[i];delete []quotient[i];delete []quotientT[i];}delete []tmp;delete []quotient;delete []quotientT;}

<3>计算均值

// 计算8*8图像的平均灰度float calcAverage(double **iMatrix, const int &size){float sum = 0;for(int i = 0 ; i < size; i++){for(int j = 0; j < size; j++){sum += iMatrix[i][j];}}return sum/(size*size);}

<4>计算汉明距离

/* 计算hash值image:8*8的灰度图像size: 图像大小  8*8phash:存放64位hash值averagePix: 灰度值的平均值*/void fingerPrint(double **iMatrix, const int &size, bitset<hashLength> &phash, const float &averagePix){for(int i = 0; i < size; i++){int pos = i * size;for(int j = 0; j < size; j++){phash[pos+j] = iMatrix[i][j] >= averagePix ? 1:0;}}}


完整源代码:

 

#include <iostream>#include <bitset>#include <string>#include <iomanip>#include <cmath>#include <opencv2\highgui\highgui.hpp>#include <opencv2\imgproc\imgproc.hpp>#include <opencv2\core\core.hpp>using namespace std;using namespace cv;#define PI 3.1415926#define hashLength 64/*功能:获取DCT系数n:矩阵大小quotient: 系数quotientT: 系数转置*/void coefficient(const int &n, double **quotient, double **quotientT){double sqr = 1.0/sqrt(n+0.0);for(int i = 0; i < n; i++){quotient[0][i] = sqr;quotientT[i][0] =  sqr;}for(int i = 1; i < n; i++){for(int j = 0; j < n; j++){quotient[i][j] = sqrt(2.0/n)*cos(i*(j+0.5)*PI/n);  // 由公式得到quotientT[j][i] = quotient[i][j];}}}/*功能:两矩阵相乘A和B:源输入矩阵result:输出矩阵*/void matrixMultiply(double **A, double **B, int n, double **result){  double t = 0;for(int i = 0; i < n; i++){for(int j = 0; j < n; j++){t = 0;for(int k = 0; k < n; k++)t += A[i][k]*B[k][j];   result[i][j] = t;}}}void DCT(Mat_<uchar> image, const int &n, double **iMatrix){for(int i = 0; i < n; i++){for(int j = 0; j < n; j++){iMatrix[i][j] = (double)image(i,j);}}// 为系数分配空间double **quotient = new double*[n];double **quotientT = new double*[n];double **tmp = new double*[n];for(int i = 0; i < n; i++){quotient[i] = new double[n];quotientT[i] = new double[n]; tmp[i] = new double[n];}// 计算系数矩阵coefficient(n, quotient, quotientT);matrixMultiply(quotient, iMatrix, n, tmp);  // 由公式成绩结果matrixMultiply(tmp, quotientT, n, iMatrix);for(int i = 0; i < n; i++){delete []tmp[i];delete []quotient[i];delete []quotientT[i];}delete []tmp;delete []quotient;delete []quotientT;}// 计算8*8图像的平均灰度float calcAverage(double **iMatrix, const int &size){float sum = 0;for(int i = 0 ; i < size; i++){for(int j = 0; j < size; j++){sum += iMatrix[i][j];}}return sum/(size*size);}/* 计算hash值image:8*8的灰度图像size: 图像大小  8*8phash:存放64位hash值averagePix: 灰度值的平均值*/void fingerPrint(double **iMatrix, const int &size, bitset<hashLength> &phash, const float &averagePix){for(int i = 0; i < size; i++){int pos = i * size;for(int j = 0; j < size; j++){phash[pos+j] = iMatrix[i][j] >= averagePix ? 1:0;}}}/*计算汉明距离*/int hammingDistance(const bitset<hashLength> &query, const bitset<hashLength> &target){int distance = 0;for(int i = 0; i < hashLength; i++){distance += (query[i] == target[i] ? 0 : 1);}return distance;}string bitTohex(const bitset<hashLength> &target){string str;for(int i = 0; i < hashLength; i=i+4){int sum = 0;string s;sum += target[i] + (target[i+1]<<1) + (target[i+2]<<2) + (target[i+3]<<3);stringstream ss;ss << hex <<sum;    // 以十六进制保存ss >> s;str += s;}return str;}int main(){Mat img = imread("E:\\algorithmZack\\ImageSearch\\image\\person.jpg", 1);if(!img.data){cout << "the image is not exist" << endl;return 0;}int size = 32;  // 图片缩放后大小resize(img, img, Size(size,size));      // 缩放到32*32cvtColor(img, img, COLOR_BGR2GRAY);       // 灰度化double **iMatrix = new double*[size];for(int i = 0; i < size; i++)iMatrix[i] = new double[size];DCT(img, size, iMatrix);   // 离散余弦变换float averagePix = calcAverage(iMatrix, 8);cout << averagePix << endl;bitset<hashLength> phash;fingerPrint(iMatrix, 8, phash, averagePix);//cout << phash << endl;string str = bitTohex(phash);cout << str << endl;/*namedWindow("img");imshow("img", img);waitKey(0);*/string img_dir = "E:\\algorithmZack\\ImageSearch\\image\\";for(int i = 1; i <= 11; i++){string pos;stringstream ss;ss << i;ss >> pos;string img_name = img_dir + "person" + pos +".jpg"; Mat target = imread(img_name, 1);if(!target.data){cout << "the target image" << img_name << " is not exist" << endl;continue;}resize(target, target, Size(size,size));cvtColor(target, target, COLOR_BGR2GRAY);DCT(target, size, iMatrix);float averagePix2 = calcAverage(iMatrix, 8);bitset<hashLength> phash2;fingerPrint(iMatrix, 8, phash2, averagePix2);//cout << averagePix2 << endl;int distance = hammingDistance(phash, phash2);      // 计算汉明距离cout <<"【" << i <<"-" <<  distance << "】 ";}cout << endl;for(int i = 0; i < size; i++)delete []iMatrix[i];delete []iMatrix;return 0;}

测试图片为:


结果为:


其中【i-j】, i代表personi j代表personiperson的汉明距离。并由结果可见phash对于图片的旋转肯定是无能为力的。

说明:完整的工程文件等着几篇常规图像检索方法写完后再上传,请关注!

参考文献:

1http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html英文原始资料

2http://blog.csdn.net/luoweifu/article/details/8220992包括java代码实现

0 0