自己琢磨的 图像相似度算法 JAVA版实现

来源:互联网 发布:csgo淘宝买的安全么 编辑:程序博客网 时间:2024/04/28 22:06

     最近有个项目里要整理缩略图和原图是否匹配,然后就去找工具和方法做了,之后很好奇图像相似度是怎么算的,google了下貌似很深奥,而且无最简单的例子java代码源码下载,估计图形学的人不削用java吧。      

     个人从来没有研究过图像学,也没看过什么论文或者相关文档,写这个完全是靠google和百度,自己写了个实验了下,测试用例也少,估计有大BUG的存在,所以看的人权当学习交流,切勿生产使用。


     思路:

      1,二值化,(不做颜色比对了,生成黑白图)

      2 ,取黑色的矩形阵列,(不管图片大小,只取同颜色的矩形区域,判断规则下文讲)

      3,排序取前几个矩阵,(去最大矩阵的 %d 面积以上的矩阵,%d作为参数)。

      4,取得2张图的矩阵,对比矩阵距离面积等,根据面积权重,得到相似度。

  


如这么图取的矩阵就是红色区域,没画全。


   

好了,开始了,下面的代码要是有人发现问题PM我,我去改正:

     1,二值化:

package cn.my.image.Compare.test;import java.awt.image.BufferedImage;import java.io.IOException;import java.io.InputStream;import javax.imageio.ImageIO;public class Binarization {private static float RP = 0.333f;private static float GP = 0.334f;private static float BP = 0.333f;private static int Threshold = 0;private int h;private int w;public Binarization(){}/** * set your threshold such as 160  这里手工设判断黑白的标准,默认是图像全局的平均值。 * @param threshold */public Binarization(int threshold){Threshold = threshold;}/** * init with r,g,b Proportion  设置R G B的权重比,默认平分 * @param r * @param g * @param b */public Binarization(float r,float g,float b,int threshold){RP = r;GP = g;BP = b;Threshold = threshold;}public int getH(){return h;}public int getW() {return w;}public int getArea(){return w*h;}public int[][] toBinarization(InputStream stream) throws IOException {BufferedImage bi=ImageIO.read(stream);//input imageh=bi.getHeight();//get heightw=bi.getWidth();//get widthint sumRGB = 0;int[][] gray=new int[w][h];for (int x = 0; x < w; x++) {for (int y = 0; y < h; y++) {gray[x][y]=getGray(bi.getRGB(x, y));sumRGB = sumRGB + gray[x][y];}}if(Threshold ==0){Threshold = sumRGB/(h*w); //threshold by avager}int[][] binary=new int[w][h];for (int x = 0; x < w; x++) {for (int y = 0; y < h; y++) {if(getAverageColor(gray, x, y, w, h)>Threshold){binary[x][y] = 1 ;    /// 1 for White}else{binary[x][y] = 0 ;    /// 0 for black  }}}//System.out.print("black " +test1 + " white "+test2);return binary;}private int getGray(int rgb){int r = (rgb & 16711680) >> 16;int g = (rgb & 65280) >> 8;int b = (rgb & 255);int top=(int) (r*RP+g*GP+b*BP);return (int)(top);}private int getAverageColor(int[][] gray, int x, int y, int w, int h)    {        int rs = gray[x][y]                      + (x == 0 ? 255 : gray[x - 1][y])            + (x == 0 || y == 0 ? 255 : gray[x - 1][y - 1])            + (x == 0 || y == h - 1 ? 255 : gray[x - 1][y + 1])            + (y == 0 ? 255 : gray[x][y - 1])            + (y == h - 1 ? 255 : gray[x][y + 1])            + (x == w - 1 ? 255 : gray[x + 1][ y])            + (x == w - 1 || y == 0 ? 255 : gray[x + 1][y - 1])            + (x == w - 1 || y == h - 1 ? 255 : gray[x + 1][y + 1]);        return rs / 9;    }}

2.取得矩阵

package cn.my.image.Compare.test;import java.util.ArrayList;public class Distribution {private boolean isBlackSquare = true;private int picW;private int picH;public Distribution(){}/**这里设置是否需要逆序,比如一个图旋转 180度,或者图片逆序。需要设置这个属性为false,然后逆序矩形区域, 等于取了白点值。 * is use black to get square  * (some picture need one black and one write to compare) * such as Anti-color picture or 180 flip   * @param isBlackSquare */public Distribution(boolean isBlackSquare){this.isBlackSquare = isBlackSquare;}/** * get DNA * @param binary * @return DNA array */public ArrayList<Square> toDistribution(int binary[][]){picW = binary.length;picH = binary[0].length;//get picture's height widthif(!isBlackSquare){anti(binary);}return getSquareArray(binary);}/** * get image's x, y  * i[0] for x i[1] for y  * before toDistribution it will return 0; * @return */public int[] getWH(){int[] xy = new int[2];xy[0] = picW;xy[1] = picH;return xy;}/** * TODO it's not good  取得图片的矩形List,看到我的英文注释了吧,我觉得这段写的真心烂 , * @param binary * @return */private ArrayList<Square> getSquareArray(int binary[][]){ArrayList<Square> squareList = new ArrayList<Square>();boolean goon = true;int x=0,y=0;int tags = (picW>picH?picW:picH)/100;while(goon) {//Calculate next x,yfor(int j= y; j<picH;j++) {for(int i = 0;i <picW;i++) {if(binary[i][j] == 1) {x = i;y = j;i = picW;j = picH;}}}if(binary[x][y] != 1 || (x ==picW-1)&&(y ==picH-1)){break;}Square square = getSquare(binary,x,y);if(square.l>tags)squareList.add(square);if(x+square.l>=picW&&y+square.l>=picH)goon = false;}return squareList;}/** * create a Square object   取得1个 矩形区域。判断标准是这样的,我取一个矩形的边,然后            如果边上的黑色点数小于取得的矩形内部白点总数,则认为到边界了,保存矩形&退出,
           黑色点数大于取得的矩形内部白点总数,去检测矩形外面一层的边矩形的length +1  * Square:x,y for start point ,l for side length  * @param binary * @param x * @param y * @return */private Square getSquare(int binary[][],int x,int y) {Square square = new Square(x,y);int length = 0;int black=0,write=0,writeSum=0;boolean goon = true;while(goon) {black = getSidePoint(binary,x,y,length);write = 2*length -1 -black;writeSum = writeSum +write;length ++;//quit Standardif(x+length >=picW || y+length>=picH || writeSum>=black){goon = false;}}square.l = length;return square;}/** * get black point in square side  获得矩形的2条边的黑点数量,这2条边每次调用x,y 都会+1。 * @param binary * @param start_X * @param start_Y * @param length * @return */private int getSidePoint(int binary[][],int start_X,int start_Y,int length){/*if(length==0) {//for test!System.out.println("Distribution-->getSidePoint-->legth ==0");System.exit(0);}*/int diffPoint = 0;int x ,y;if(binary[start_X+length][start_Y+length]==1)diffPoint--;for(int i=0;i<=length;i++){x = binary[start_X+i][start_Y+length];y = binary[start_X+length][start_Y+i];if(x==1)diffPoint++;if(y==1)diffPoint++;binary[start_X+i][start_Y+length] = -1;binary[start_X+length][start_Y+i] = -1;  // Mark Read}return diffPoint;}/** * anti list 白黑交换函数 * @param binary */private void anti(int binary[][]) {for (int x = 0; x < picW; x++) {for (int y = 0; y < picH; y++) {if(binary[x][y] == 1 ){binary[x][y] = 0 ;    /// 0 for White }else{binary[x][y] = 1 ;    /// 1 for black }}}}}

3.对比

package cn.my.image.Compare.test;import java.util.ArrayList;public class Compare {private float pr = 0.5f;private double area = 0;public Compare() {}/** * input particle between 0-1 0 for use all squares 1 for use largest square * 英文比较差,这里设置的是取得最大矩形面积百分之几面积以上的矩形,设置1,就取最大,设置0,全取,我命名叫颗粒度,应该算是吧,默认50% * @param particle */public Compare(float particle) {if (particle <= 1 && particle >= 0)this.pr = particle;}public double toCompare(ArrayList<Square> s1, ArrayList<Square> s2, int area) {this.area = area;double fainllyratio = 0;s1 = Sort(s1);   //排序取前几个s2 = Sort(s2);ArrayList<Square> small = s1.size()<s2.size()?s1:s2; ArrayList<Square> large = s1.size()>=s2.size()?s1:s2; int tsize = 0;for(int i=0;i <small.size();i++){tsize = tsize + small.get(i).getArea();}for(int i=0;i <small.size();i++){fainllyratio= fainllyratio + (ratioFrom(small.get(i),large) *small.get(i).getArea() /tsize) ; //根据面积比不同的面积 相似度占的权重不同。}return fainllyratio;}        //算单个矩形的比率(相似度吧),比率这个英文好像拼错了。private double ratioFrom(Square s1, ArrayList<Square> large) {ArrayList<Square> temp = new ArrayList<Square>();int xend = s1.x + s1.l;int yend = s1.y + s1.l;int xm = s1.getCenterX();int ym = s1.getCenterY();Square square = new Square(-1, -1);square.l = 999999999;for (Square s2 : large) {int x1 = s2.getCenterX();int y1 = s2.getCenterY();if (x1 <= xend && x1 >= s1.x && y1 <= yend && y1 >= s1.y) {// in the squaretemp.add(s2);}if ((getAbsolute(x1 - xm) + getAbsolute(y1 - ym)) < square.l)square = s2;}double l1 = 0, l2 = 0, sizeratio = 0,size1=0;if (temp.isEmpty()) {temp.add(square);}for (Square s : temp) { size1 = size1 + s.l * s.l;l1 = l1 + s.x + s.l / 2;l2 = l2 + s.y + s.l / 2;}double size2 = s1.l * s1.l;sizeratio =  getAbsolute(size1 - size2)/((size1 + size2)/2);double l = (l1 / temp.size() - xm) * (l1 / temp.size() - xm)+ (l2 / temp.size() - ym) * (l2 / temp.size() - ym);double ratio = Math.sqrt(l)/Math.sqrt(area);ratio = (ratio + sizeratio)/2;return 1-ratio;}private double getAbsolute(double i) {return i > 0 ? i : -i;}private ArrayList<Square> Sort(ArrayList<Square> squares) {ArrayList<Square> newsquares = new ArrayList<Square>();if (squares.size() == 0)return newsquares;while (true) {Square temp = new Square(-1, -1);for (Square s : squares) {if (s.l > temp.l) {temp = s;}}newsquares.add(temp);if (temp.l <= newsquares.get(0).l * pr)break;squares.remove(temp);if (squares.isEmpty())break;}return newsquares;}}



OK:

测试:像素1900*2000的




Binarization b = new Binarization();FileInputStream is = new FileInputStream(new File("d:/1.jpg"));int a[][] = b.toBinarization(is);Distribution d = new Distribution();ArrayList<Square> s = d.toDistribution(a);System.out.println(s.size());FileInputStream is2 = new FileInputStream(new File("d:/3.jpg"));int a2[][] = b.toBinarization(is2);d = new Distribution();ArrayList<Square> s2 = d.toDistribution(a2);System.out.println(s.size());Compare c= new Compare();double m1 = c.toCompare(s, s2, b.getArea());double m2 = c.toCompare(s2,s, b.getArea());System.out.println("'''''" +m1 + "," +m2 +"," );

结果 :

'''''0.8107870909493387,0.7989862217705622,  相似度0.8.07 平均有 80%吧

同一张图测试对比

结果

'''''1.0,1.0000000000000002,    基本就是1,超过的点2应该是程序里面的double啊,开方啊之类的算久了 比较bug。忽略吧
'''''1.0,1.0000000000000002,

同一张图,反色对比

'''''0.21948688097837563,0.3277072822487256,


相似图。设置颗粒度0.1f,貌似就出问题了

'''''0.8188462582289785,0.6802030848688854,

完全不一致啊,平均75相识读,但正反比不一样说明算法稳定性很差,要改进啊。

同一张图。设置颗粒度0.1f。

'''''0.24759013614791678,0.0, 相似度0.这还差不多,要的就是0不过你也给太夸张了。。0!FT。。。

反正还有问题了,有待改进~!


原创粉丝点击