最长公共子序列(Longest Common Subsequence, LCS)

来源:互联网 发布:js style标签 编辑:程序博客网 时间:2024/05/21 15:46

某个序列的子序列是从最初序列通过去除某些元素但不破坏余下元素的相对位置(在前或在后)而形成的新序列,所以子序列不必连续

最长公共子序列的最优子结构性质

设序列X=<x1, x2, …, xm>和Y=<y1, y2, …, yn>的一个最长公共子序列Z=<z1, z2, …, zk>, 则:
(1) 若xm=yn, 则zk=xm=yn且Zk-1是Xm-1和Yn-1的最长公共子序列;
(2) 若xm≠yn且zk≠xm, 则Z是Xm-1和Y的最长公共子序列;
(3) 若xm≠yn且zk≠yn, 则Z是X和Yn-1的最长公共子序列.
其中Xm-1=<x1, x2, …, xm-1>, Yn-1=<y1, y2, …, yn-1>, Zk-1=<z1, z2, …, zk-1>.

#include <iostream>
#include <string.h>
using namespace std;

#define MAX 100
// the length of lcs of str1[MAX] and str2[MAX]
int c[MAX][MAX] = { 0 };
// 1:left_up; 0:left; -1:up
short flag[MAX][MAX] = { 0 };
void lcs(char *str1, int len1, char *str2, int len2)
{
        for (int i = 1; i <= len1; ++i)
                for (int j = 1; j <= len2; ++j)
                {
                        if (str1[i-1] == str2[j-1])
                        {
                                c[i][j] = c[i-1][j-1] + 1;
                                flag[i][j] = 1;
                        }
                        else
                        {
                                if (c[i-1][j] > c[i][j-1])
                                {
                                        c[i][j] = c[i-1][j];
                                        flag[i][j] = 0;
                                }
                                else
                                {
                                        c[i][j] = c[i][j-1];
                                        flag[i][j] = -1;
                                }
                        }
                }
}

void print(int i, int j, char *str)
{
        if (0 == i || 0 == j)
                return;
        if (1 == flag[i][j])
        {
                print(i-1, j-1, str);
                cout << str[i-1];
        }
        else if (0 == flag[i][j])
                print(i-1, j, str);
        else if (-1 == flag[i][j])
                print(i, j-1, str);
}

int main()
{
        char str1[] = "GCCCTAGCG";
        char str2[] = "GCGCAATG";
        int len1 = strlen(str1);
        int len2 = strlen(str2);

        lcs(str1, len1, str2, len2);
        for (int i = 0; i <= len1; ++i)
        {
                for (int j = 0; j <= len2; ++j)
                        cout << c[i][j] << "   ";
                cout << endl;
        }

        cout << "length of lcs = " << c[len1][len2] << endl;
        print(len1, len2, str1);
        cout << endl;

        return 0;
}
    
output:

 ---------------------------------> str2
 | 0   0   0   0   0   0   0   0   0  
 | 0   1   1   1   1   1   1   1   1  
 | 0   1   2   2   2   2   2   2   2  
 | 0   1   2   2   3   3   3   3   3  
 | 0   1   2   2   3   3   3   3   3  
 | 0   1   2   2   3   3   3   4   4  
 | 0   1   2   2   3   4   4   4   4  
 | 0   1   2   3   3   4   4   4   5  
 | 0   1   2   3   4   4   4   4   5  
 | 0   1   2   3   4   4   4   4   5  
V

str1

length of lcs = 5
GCGCG

0 0