文本比较算法的实现
来源:互联网 发布:淘宝短款溥针织开衫 编辑:程序博客网 时间:2024/05/27 01:14
由于本人对算法理解的重大失误,造成原来写的程序有重大BUG。特此向各位道歉来了,呵呵
今天又将算法看了一遍,终于理解其精髓
下面是主要的实现代码,献丑了
首先是初始化一个boolean数组,存放各个字符是否匹配的:
接着是一个根据这个boolean数组初始化一个int矩阵的函数,这个函数就对应寻找最大匹配路径的函数
对应寻找最优路径的函数,也是初始化一个int[][]
当然了,上面有调用到一个叫getV的方法,方法很简单
下面就是要计算差异了,基本和我上次写的计算差异的方法差不多
大功告成。。。。。
源码:http://www.blogjava.net/Files/phyeas/src.zip
今天又将算法看了一遍,终于理解其精髓
下面是主要的实现代码,献丑了
首先是初始化一个boolean数组,存放各个字符是否匹配的:
public static boolean[][] getCompareBooleanArray(String source,
String compareTo) {
boolean[][] comps;
if (compareTo == null || source == null) {
throw new IllegalArgumentException("必须设置完两个文本后再进行初始化");
}
comps = new boolean[compareTo.length()][source.length()];
for (int i = 0; i < compareTo.length(); i++) {
for (int j = 0; j < source.length(); j++) {
comps[i][j] = compareTo.charAt(i) == source.charAt(j);
// System.out.print(comps[i][j] + "\t");
}
// System.out.println();
}
return comps;
}
这个实现基本上没什么说的,就这样String compareTo) {
boolean[][] comps;
if (compareTo == null || source == null) {
throw new IllegalArgumentException("必须设置完两个文本后再进行初始化");
}
comps = new boolean[compareTo.length()][source.length()];
for (int i = 0; i < compareTo.length(); i++) {
for (int j = 0; j < source.length(); j++) {
comps[i][j] = compareTo.charAt(i) == source.charAt(j);
// System.out.print(comps[i][j] + "\t");
}
// System.out.println();
}
return comps;
}
接着是一个根据这个boolean数组初始化一个int矩阵的函数,这个函数就对应寻找最大匹配路径的函数
public static int[][] getSetpPathArr(boolean[][] barr) {
int[][] iarr = new int[barr.length][barr[0].length];
for (int i = barr.length - 1; i >= 0; i--) {
for (int j = barr[i].length - 1; j >= 0; j--) {
int v_i_j = barr[i][j] ? 1 : 0;
int n_i_1_j_1 = i + 1 >= iarr.length || j + 1 >= iarr[i].length ? 0
: iarr[i + 1][j + 1];
int n_i_1_j = i + 1 >= iarr.length ? 0 : iarr[i + 1][j];
int n_i_j_1 = j + 1 >= iarr[i].length ? 0 : iarr[i][j + 1];
int v_n_1_1 = v_i_j + n_i_1_j_1;
iarr[i][j] = v_n_1_1 > n_i_1_j ? v_n_1_1 > n_i_j_1 ? v_n_1_1
: n_i_j_1 : n_i_1_j > n_i_j_1 ? n_i_1_j : n_i_j_1;
}
}
return iarr;
}
根据算法的解释,从下往上,从右向左计算每个格子int[][] iarr = new int[barr.length][barr[0].length];
for (int i = barr.length - 1; i >= 0; i--) {
for (int j = barr[i].length - 1; j >= 0; j--) {
int v_i_j = barr[i][j] ? 1 : 0;
int n_i_1_j_1 = i + 1 >= iarr.length || j + 1 >= iarr[i].length ? 0
: iarr[i + 1][j + 1];
int n_i_1_j = i + 1 >= iarr.length ? 0 : iarr[i + 1][j];
int n_i_j_1 = j + 1 >= iarr[i].length ? 0 : iarr[i][j + 1];
int v_n_1_1 = v_i_j + n_i_1_j_1;
iarr[i][j] = v_n_1_1 > n_i_1_j ? v_n_1_1 > n_i_j_1 ? v_n_1_1
: n_i_j_1 : n_i_1_j > n_i_j_1 ? n_i_1_j : n_i_j_1;
}
}
return iarr;
}
对应寻找最优路径的函数,也是初始化一个int[][]
public static int[][] getMinSetpArr(int[][] miarr, boolean[][] barr) {
int[][] iarr = new int[miarr.length][miarr[0].length];
for (int i = iarr.length - 1; i >= 0; i--) {
for (int j = iarr[i].length - 1; j >= 0; j--) {
if (barr[i][j]) {
iarr[i][j] = getV(iarr, i + 1, j + 1) + 1;
} else if (getV(miarr, i, j + 1) >= getV(miarr, i + 1, j)) {
iarr[i][j] = getV(iarr, i, j + 1);
} else {
iarr[i][j] = getV(iarr, i + 1, j) + 1;
}
}
}
return iarr;
}
这样就相应对应了算法中讲的那三个矩阵。int[][] iarr = new int[miarr.length][miarr[0].length];
for (int i = iarr.length - 1; i >= 0; i--) {
for (int j = iarr[i].length - 1; j >= 0; j--) {
if (barr[i][j]) {
iarr[i][j] = getV(iarr, i + 1, j + 1) + 1;
} else if (getV(miarr, i, j + 1) >= getV(miarr, i + 1, j)) {
iarr[i][j] = getV(iarr, i, j + 1);
} else {
iarr[i][j] = getV(iarr, i + 1, j) + 1;
}
}
}
return iarr;
}
当然了,上面有调用到一个叫getV的方法,方法很简单
private static int getV(int[][] arr, int i, int j) {
if (i >= arr.length || j >= arr[i].length) {
return 0;
}
return arr[i][j];
}
分别初始化完这三个数组后对结果进行计算。if (i >= arr.length || j >= arr[i].length) {
return 0;
}
return arr[i][j];
}
boolean[][] barr = getCompareBooleanArray(source, compareTo);
int[][] miarr = getSetpPathArr(barr);
int[][] diarr = getMinSetpArr(miarr, barr);
先找出从顶点出发,下一步该走的那几个点:int[][] miarr = getSetpPathArr(barr);
int[][] diarr = getMinSetpArr(miarr, barr);
List<Point> points = new ArrayList<Point>();
int maxJ = -1;
for (int i = 0; i < barr.length; i++) {
int tempMaxJ = -1;
for (int j = 0; j < barr[i].length; j++) {
if (j > maxJ && maxJ != -1) {
break;
}
if (barr[i][j]) {
if (tempMaxJ == -1) {
tempMaxJ = j;
}
points.add(new Point(i, j));
}
}
if (tempMaxJ != -1) {
maxJ = tempMaxJ;
}
}
找出可能的最大最优匹配路径,这个可能会有几条路径,核心代码:int maxJ = -1;
for (int i = 0; i < barr.length; i++) {
int tempMaxJ = -1;
for (int j = 0; j < barr[i].length; j++) {
if (j > maxJ && maxJ != -1) {
break;
}
if (barr[i][j]) {
if (tempMaxJ == -1) {
tempMaxJ = j;
}
points.add(new Point(i, j));
}
}
if (tempMaxJ != -1) {
maxJ = tempMaxJ;
}
}
int max = 0;
int minSetp = -1;
for (Point point : points) {
if (miarr[point.getX()][point.getY()] > max) {
max = miarr[point.getX()][point.getY()];
}
}
for (Point point : points) {
if (minSetp == -1) {
minSetp = diarr[point.getX()][point.getY()];
} else if (miarr[point.getX()][point.getY()] >= max
&& diarr[point.getX()][point.getY()] < minSetp) {
minSetp = diarr[point.getX()][point.getY()];
}
}
List<Point> willRemove = new ArrayList<Point>();
for (Point point : points) {
if (miarr[point.getX()][point.getY()] < max
|| diarr[point.getX()][point.getY()] > minSetp) {
willRemove.add(point);
}
}
points.removeAll(willRemove);
willRemove.clear();
找到这里,最大匹配并且最优匹配的路径就找到了int minSetp = -1;
for (Point point : points) {
if (miarr[point.getX()][point.getY()] > max) {
max = miarr[point.getX()][point.getY()];
}
}
for (Point point : points) {
if (minSetp == -1) {
minSetp = diarr[point.getX()][point.getY()];
} else if (miarr[point.getX()][point.getY()] >= max
&& diarr[point.getX()][point.getY()] < minSetp) {
minSetp = diarr[point.getX()][point.getY()];
}
}
List<Point> willRemove = new ArrayList<Point>();
for (Point point : points) {
if (miarr[point.getX()][point.getY()] < max
|| diarr[point.getX()][point.getY()] > minSetp) {
willRemove.add(point);
}
}
points.removeAll(willRemove);
willRemove.clear();
下面就是要计算差异了,基本和我上次写的计算差异的方法差不多
List<TextDiff> diffs = new ArrayList<TextDiff>();
if (points.size() >= 1) {
Point startPoint = points.get(0);
points.clear();
if (startPoint.getX() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, 0, startPoint
.getX()));
}
if (startPoint.getY() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, 0, startPoint
.getY()));
}
Point next = getNext(startPoint, miarr, diarr, barr);
while (next != null) {
if (!barr[startPoint.getX()][startPoint.getY()]) {
startPoint = new Point(startPoint.getX() + 1, startPoint
.getY() + 1);
}
if (startPoint.getX() != next.getX() - 1
|| startPoint.getY() != next.getY() - 1) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), next.getX() - startPoint.getX()));
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, startPoint
.getY(), next.getY() - startPoint.getY()));
}
startPoint = next;
next = getNext(startPoint, miarr, diarr, barr);
}
if (startPoint.getX() != barr.length) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), barr.length - startPoint.getX()));
}
if (startPoint.getY() != barr[0].length) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED,
startPoint.getY(), barr[0].length - startPoint.getY()));
}
}
if (points.size() >= 1) {
Point startPoint = points.get(0);
points.clear();
if (startPoint.getX() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, 0, startPoint
.getX()));
}
if (startPoint.getY() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, 0, startPoint
.getY()));
}
Point next = getNext(startPoint, miarr, diarr, barr);
while (next != null) {
if (!barr[startPoint.getX()][startPoint.getY()]) {
startPoint = new Point(startPoint.getX() + 1, startPoint
.getY() + 1);
}
if (startPoint.getX() != next.getX() - 1
|| startPoint.getY() != next.getY() - 1) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), next.getX() - startPoint.getX()));
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, startPoint
.getY(), next.getY() - startPoint.getY()));
}
startPoint = next;
next = getNext(startPoint, miarr, diarr, barr);
}
if (startPoint.getX() != barr.length) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), barr.length - startPoint.getX()));
}
if (startPoint.getY() != barr[0].length) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED,
startPoint.getY(), barr[0].length - startPoint.getY()));
}
}
大功告成。。。。。
源码:http://www.blogjava.net/Files/phyeas/src.zip
- 文本比较算法的实现
- 文本比较算法的实现
- 文本比较算法的实现2
- 文本比较算法再实现-3
- 文本比较算法2
- 文本比较算法1
- 文本比较算法
- 文本比较算法1
- 文本比较算法
- 文本分类常用算法比较
- 最长公共子串的文本比较算法
- 文本比较算法——LD算法
- 文本比较算法--Needleman/Wunsch算法
- 文本比较算法Ⅲ——计算文本的相似度
- 文本比较算法Ⅲ——计算文本的相似度
- simhash算法的原理-用于实现文本判重复算法
- 比较好的一个bresenham算法实现
- 内部排序算法的比较和实现
- zt 终南:JRobin简介
- 系统开发----万能模板
- unity基础开发----NGUI绘制界面
- "PageMethods未定义"或"对象不支持此属性或方法"解决方法分享
- PL/SQL记录和表(oracle type(数组))(最简单!文章下面标黄标题是精华,必看)
- 文本比较算法的实现
- Java 中对类的主动引用和被动引用
- 在UI中显示Bitmap
- Failed to rename directory D:\android-sdk-windows\tools to D:\android-sdk-windows\temp\ToolPackage.o
- Unity3d游戏场景优化杂谈(4)
- c#中连接数据库关于sqlconnection的用户sa无法登录的问题
- 关于SIGPIPE信号
- excel一列数字前面批量加个逗号
- 锁屏快捷方式