计算字符串的相似度(VB2005)——思索之二

来源:互联网 发布:网络用语老板什么意思 编辑:程序博客网 时间:2024/05/21 09:13

在完成“计算字符串的相似度(VB2005)——思索之一”之后,照例对程序进行了一番测试。第一次找了两个相似的字符串,长度分别为1517。速度和结果都比较满意。这也印证了算法的正确性。第二次找了两个相似的字符串,长度分别为15001507。嗯,直接跳出错误,说是堆栈错误。实际上是由于递归嵌套出了问题。采用递归算法,只是理论上有效,便于理解,实际应用中会出现各种限制。如本例,嵌套约1000层的时候就超过了系统的限制。必须想一个解决之道。

仔细观察,可以发现用数学性的语言描述就是

Fnm=GFnm),Fn+1m),Fnm+1))

这个可以简化为递推,由于递推可以放在一个函数内,就解决了系统的递归限制。代码赋予其后。用的是VB2005

 

Public Class clsCalculateStringDistanceEx

    Implements IDistance

 

    Private mStringA As String

    Private mStringB As String

    Private mIsSame As Boolean

 

    Private mDic As Dictionary(Of String, Integer)

 

    Private Function CalculateStringDistance(ByVal StartLower As Integer) As Integer

        Dim LA As Integer = mStringA.Length

        Dim LB As Integer = mStringB.Length

 

        Dim i As Integer, j As Integer

        Dim T1 As Integer, T2 As Integer, T3 As Integer

 

        AddToDic(LA + 1, LB + 1, 0)

 

        For i = LA To StartLower Step -1

            AddToDic(i, LB + 1, LA - i + 1)

        Next

 

        For i = LB To StartLower Step -1

            AddToDic(LA + 1, i, LB - i + 1)

        Next

 

        For i = LA To StartLower Step -1

            For j = LB To StartLower Step -1

                If mStringA.Chars(i - 1) = mStringB.Chars(j - 1) Then

                    AddToDic(i, j, GetFromDic(i + 1, j + 1))

                Else

                    T1 = GetFromDic(i + 1, j)

                    T2 = GetFromDic(i, j + 1)

                    T3 = GetFromDic(i + 1, j + 1)

                    AddToDic(i, j, Min(T1, T2, T3) + 1)

                End If

            Next

        Next

 

        Return GetFromDic(StartLower, StartLower)

    End Function

 

    Private Sub AddToDic(ByVal S1 As Integer, ByVal S2 As Integer, ByVal Value As Integer)

        mDic.Add(S1 & "," & S2, Value)

    End Sub

 

    Private Function GetFromDic(ByVal S1 As Integer, ByVal S2 As Integer) As Integer

        Return mDic(S1 & "," & S2)

    End Function

 

 

    Private Function Min(ByVal ParamArray M() As Integer) As Integer

        Dim i As Integer, J As Integer

        J = M(0)

        For i = 1 To M.GetUpperBound(0)

            If M(i) < J Then J = M(i)

        Next

        Return J

    End Function

 

    Public Function CalculateStringDistance() As Integer  _

            Implements IDistance.CalculateStringDistance

        If mStringA.Length = 0 Then Return mStringB.Length

        If mStringB.Length = 0 Then Return mStringA.Length

 

        mDic = New Dictionary(Of String, Integer)

 

        mIsSame = True

 

        Dim i As Integer, j As Integer

 

        For i = 1 To Min(mStringA.Length, mStringB.Length)

            If mStringA.Chars(i - 1) <> mStringB.Chars(i - 1) Then

                mIsSame = False

                j = i

                Exit For

            End If

        Next

 

        If mIsSame = False Then

            Return CalculateStringDistance(j)

        Else

            Return Math.Abs(mStringA.Length - mStringB.Length)

        End If        

    End Function

 

    Public ReadOnly Property DicCount() As Integer  _

            Implements IDistance.DicCount

        Get

            Return mDic.Count

        End Get

    End Property

 

    Public Sub SetString(ByVal S1 As String, ByVal S2 As String)  _

            Implements IDistance.SetString

        mStringA = S1

        mStringB = S2

    End Sub  

End Class

原创粉丝点击