Python实现求两个字符串的最短编辑距离

来源:互联网 发布:java小项目 编辑:程序博客网 时间:2024/05/16 00:32

        前几天用C++实现了求两个字符串的最短编辑距离的算法,现在用Python重新实现一遍,基本思路和C++版本是一致的。

1.时间复杂度分析

Python的实现方式同C++实现的理论时间复杂度是一样的,运用动态规划方法都是O(m*n)。从测试结果来看,不同规模的数据耗费的时间基本通问题的规模呈线性增长,非递归比递归过程快得多。不过,比较反常的是空间优化后比优化前时间耗费减少,而优化后计算过程中多了必要的取模运算,理论上应该比优化前增加才是,尚未找到原因。

字符串X长度

字符串Y长度

最短编辑距离

递归时间(ms

非递归时间(ms

非递归(空间优化)时间(ms

20

30

26

7.5

5.4

3.1

200

300

245

361

209

140

500

1000

815

17224

1344

1115

2000

1000

1635

101482

5721

4675

2000

3000

2411

299539

14503

14271


2.源代码

import randomimport datetimeimport syssys.setrecursionlimit(204800)def edit_1(x, y, edit, i, j):    xlen = len(x)    ylen = len(y)    if i >= 0 and j >= 0 and i <= xlen and j <= ylen:        if i == 0 or j == 0 or edit[i][j] != 100000:            return edit[i][j]        else:            if x[i-1] == y[j-1]:                edit[i][j] = min(min(edit_1(x,y,edit,i,j-1)+1, edit_1(x,y,edit,i-1,j)+1), edit_1(x,y,edit,i-1,j-1))                return edit[i][j]            else:                if i >= 2 and j >= 2 and x[i-2] == y[j-1] and x[i-1] == y[j-2]:                    edit[i][j] = min(min(edit_1(x,y,edit,i,j-1)+1, edit_1(x,y,edit,i-1,j)+1),min(edit_1(x,y,edit,i-1,j-1)+1, edit_1(x,y,edit,i-2,j-2)+1))                    return edit[i][j]                else:                    edit[i][j] = min(min(edit_1(x,y,edit,i,j-1)+1, edit_1(x,y,edit,i-1,j)+1), edit_1(x,y,edit,i-1,j-1)+1)                    return edit[i][j]    else:        return 0def edit_length_2(x, y):    edit = [[-1 for i in range(len(y) + 1)] for j in range(len(x) + 1)]    for i in range(len(x) + 1):        edit[i][0] = i    for j in range(len(y) + 1):        edit[0][j] = j    for i in range(1, len(x) + 1):        for j in range(1, len(y) + 1):            if x[i-1] == y[j-1]:                edit[i][j] = min(min(edit[i][j-1]+1, edit[i-1][j]+1), edit[i-1][j-1])            else:                if i >= 2 and j >= 2 and x[i-2] == y[j-1] and x[i-1] == y[j-2]:                    edit[i][j] = min(min(edit[i][j-1]+1, edit[i-1][j]+1), min(edit[i-1][j-1]+1, edit[i-2][j-2]+1))                else:                    edit[i][j] = min(min(edit[i][j-1]+1, edit[i-1][j]+1), edit[i-1][j-1]+1)    return edit[len(x)][len(y)]def edit_length_3(x, y):    edit = [[-1 for i in range(len(y) + 1)] for j in range(3)]    for j in range(len(y) + 1):        edit[0][j] = j    for i in range(1, len(x) + 1):        edit[i%3][0] = edit[(i-1)%3][0] + 1        for j in range(1, len(y) + 1):            if x[i-1] == y[j-1]:                edit[i%3][j] = min(min(edit[i%3][j-1]+1, edit[(i-1)%3][j]+1), edit[(i-1)%3][j-1])            else:                if i >= 2 and j >= 2 and x[i-2] == y[j-1] and x[i-1] == y[j-2]:                    edit[i%3][j] = min(min(edit[i%3][j-1]+1, edit[(i-1)%3][j]+1), min(edit[(i-1)%3][j-1]+1, edit[(i-2)%3][j-2]+1))                else:                    edit[i%3][j] = min(min(edit[i%3][j-1]+1, edit[(i-1)%3][j]+1), edit[(i-1)%3][j-1]+1)    return edit[len(x)%3][len(y)]def test_1(x, y):    edit = [[100000 for i in range(len(y) + 1)] for j in range(len(x) + 1)]    for i in range(len(x) + 1):        edit[i][0] = i    for j in range(len(y) + 1):        edit[0][j] = j    max_len = edit_1(x, y, edit, len(x), len(y))    print "Edit_Length_1: ",max_lendef test_2(x, y):    max_len = edit_length_2(x, y)    print "Edit_Length_2: ",max_lendef test_3(x, y):    max_len = edit_length_3(x, y)    print "Edit_Length_3: ",max_lendef rand_str(length):    str_0 = []    for i in range(length):        str_0.append(random.choice("abcdefghijklmnopqrstuvwxyz"))    return str_0def main():    x = rand_str(20)    y = rand_str(30)    print "The String X Length is : ", len(x), " String is : ",    for i in range(len(x)):        print x[i],    print ""    print "The String Y Length is : ", len(y), " String is : ",    for i in range(len(y)):        print y[i],    print ""    time_1 = datetime.datetime.now()    test_1(x, y)    time_2 = datetime.datetime.now()    time_3 = datetime.datetime.now()    test_2(x, y)    time_4 = datetime.datetime.now()    time_5 = datetime.datetime.now()    test_3(x, y)    time_6 = datetime.datetime.now()    print "Function 1 spend ", (time_2 - time_1)    print "Function 2 spend ", (time_4 - time_3)    print "Function 3 spend ", (time_6 - time_5)main()



0 0
原创粉丝点击