匹配字符串相似度算法(各个语言版本)

来源:互联网 发布:nginx日志 编辑:程序博客网 时间:2024/05/01 16:41

C++版

#include <iostream>#include <vector>#include <string>using namespace std;//算法int ldistance(const string source,const string target){    //step 1    int n=source.length();    int m=target.length();    if (m==0) return n;    if (n==0) return m;    //Construct a matrix    typedef vector< vector<int> >  Tmatrix;    Tmatrix matrix(n+1);    for(int i=0; i<=n; i++)  matrix[i].resize(m+1);    //step 2 Initialize    for(int i=1;i<=n;i++) matrix[i][0]=i;    for(int i=1;i<=m;i++) matrix[0][i]=i;     //step 3     for(int i=1;i<=n;i++)     {        const char si=source[i-1];        //step 4        for(int j=1;j<=m;j++)        {            const char dj=target[j-1];            //step 5            int cost;            if(si==dj){                cost=0;            }            else{                cost=1;            }            //step 6            const int above=matrix[i-1][j]+1;            const int left=matrix[i][j-1]+1;            const int diag=matrix[i-1][j-1]+cost;            matrix[i][j]=min(above,min(left,diag));        }     }//step7      return matrix[n][m];}int main(){    string s;    string d;    cout<<"source=";    cin>>s;    cout<<"diag=";    cin>>d;    int dist=ldistance(s,d);    cout<<"dist="<<dist<<endl;}

java版

package io;public class Test1 {         private int compare(String str, String target) {                int d[][]; // 矩阵                int n = str.length();                int m = target.length();                int i; // 遍历str的                int j; // 遍历target的                char ch1; // str的                char ch2; // target的                int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1                if (n == 0) {return m;}                  if (m == 0) { return n; }                d = new int[n + 1][m + 1];                for (i = 0; i <= n; i++) { // 初始化第一列                    d[i][0] = i;                }                for (j = 0; j <= m; j++) { // 初始化第一行                    d[0][j] = j;                }                for (i = 1; i <= n; i++) { // 遍历str                    ch1 = str.charAt(i - 1);                    // 去匹配target                    for (j = 1; j <= m; j++) {                        ch2 = target.charAt(j - 1);                        if (ch1 == ch2) {                            temp = 0;                        } else {                            temp = 1;                        }                        // 左边+1,上边+1, 左上角+temp取最小                        d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);                    }                }                return d[n][m];            }            private int min(int one, int two, int three) {                return (one = one < two ? one : two) < three ? one : three;            }            /**             * 获取两字符串的相似度             *              * @param str             * @param target             *              * @return             */            public float getSimilarityRatio(String str, String target) {                return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());            }            public static void main(String[] args) {                Test1 t=new Test1();               String tag="MySQL Server 5.6";//正确的               String test1="mysql";               String test2="MySQL";               String test3="MySQL Server";               String test4="MySQL 5.6";               String test5="Server 5.6";               String test6="SQL Server 5.6";               String test7="MySQL Server 5.6";                System.out.println("similarityRatio=" + t.getSimilarityRatio(test1, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test2, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test3, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test4, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test5, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test6, tag));                System.out.println("similarityRatio=" + t.getSimilarityRatio(test7, tag));            }}

JavaScript版

function compare(x, y) {      var z = 0;      var s = x.length + y.length;;      x.sort();      y.sort();      var a = x.shift();      var b = y.shift();      while(a !== undefined && b !== undefined) {          if (a === b) {              z++;              a = x.shift();              b = y.shift();          } else if (a < b) {              a = x.shift();          } else if (a > b) {              b = y.shift();          }      }      return z/s * 200;  }  console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello']))  console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'].sort()))

VBScript版

Function GetLevenshteinDistince(str1, str2)    Dim x, y, A, B, C, K    Dim Matrix()    ReDim Matrix(Len(str2), Len(str1))    '初始化第一行和第一列    For x = 0 To UBound(Matrix, 1)        Matrix(x, 0) = x    Next    For y = 0 To UBound(Matrix, 2)        Matrix(0, y) = y    Next    '填充矩阵    For x = 1 To UBound(Matrix, 1)        For y = 1 To UBound(Matrix, 2)            If (Mid(str1, Matrix(0, y), 1) = Mid(str2, Matrix(x, 0), 1)) Then                C = Matrix(x -1 ,y - 1)            Else                C = Matrix(x -1 ,y - 1) + 1            End If            A = Matrix(x - 1, y) + 1            B = Matrix(x, y - 1) + 1            If (A =< B and A =< C) Then Matrix(x, y) = A            If (B =< C and B =< A) Then Matrix(x, y) = B            If (C =< A and C =< B) Then Matrix(x, y) = C        Next    Next    '计算 LD 值    If (Len(str1) > Len(str2)) Then        K = Len(str1)    Else        K = Len(str2)    End If    GetLevenshteinDistince = FormatNumber(1 - (Matrix(Len(str2), Len(str1)) / K), 3, True)End Function
2 0
原创粉丝点击