Hirschberg's algorithm to find string alignment

来源:互联网 发布:网络理财可靠吗 编辑:程序博客网 时间:2024/06/05 12:08
// Based on Hirschberg's algorithm to find the best alignment for two strings.// This algorithm is proved to run in O(mn) time and O(m + n) space// reference:// https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm// http://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/06DynamicProgrammingII.pdf// http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Dynamic/Hirsch/#include <iostream>#include <vector>#include <string>#include <algorithm>#include <Windows.h>using namespace std;vector<int> score(string X, string Y, int alpha, int theta, bool rev){int m = static_cast<int>(X.length());int n = static_cast<int>(Y.length());vector<int> ret(n + 1, 0);for (int j = 0; j <= n; ret[j] = j * theta, ++j);if (rev){reverse(X.begin(), X.end());reverse(Y.begin(), Y.end());}for (int i = 1; i <= m; ++i){int prev = ret[0];ret[0] += theta;for (int j = 1; j <= n; ++j){int cost = 0;if (X[i - 1] == Y[j - 1])cost = prev;elsecost = prev + alpha;cost = min(cost, theta + ret[j]);cost = min(cost, theta + ret[j - 1]);prev = ret[j];ret[j] = cost;}}return ret;}pair<string, string> NeedlemanWunsch(string X, string Y, int alpha, int theta){int m = X.length();int n = Y.length();vector<vector<int>> M(m + 1, vector<int>(n + 1, 0));for (int i = 0; i <= m; ++i)M[i][0] = i * theta;for (int j = 0; j <= n; ++j)M[0][j] = j * theta;for (int i = 1; i <= m; ++i){for (int j = 1; j <= n; ++j){int cost = 0; int kind = 1;if (X[i - 1] == Y[j - 1])cost = M[i - 1][j - 1];elsecost = M[i - 1][j - 1] + alpha;cost = min(cost, theta + M[i - 1][j]);cost = min(cost, theta + M[i][j - 1]);M[i][j] = cost;}}int i = m, j = n;vector<string> align(2);// backtracking to get the solutionwhile (i >= 1 || j >= 1){if ((i > 0 && M[i][j] == theta + M[i - 1][j]) || j == 0){align[0].push_back(X[i - 1]);align[1].push_back('-');--i;}else if ((j > 0 && M[i][j] == theta + M[i][j - 1]) || i == 0){align[0].push_back('-');align[1].push_back(Y[j - 1]);--j;}else if (M[i][j] == M[i - 1][j - 1] + alpha || M[i][j] == M[i - 1][j - 1]){align[0].push_back(X[i - 1]);align[1].push_back(Y[j - 1]);--i, --j;}}reverse(align[0].begin(), align[0].end());reverse(align[1].begin(), align[1].end());return {align[0], align[1]};}int partitionY(vector<int> &scoreL, vector<int> &scoreR){int n = static_cast<int>(scoreL.size());int minimum = INT_MAX, index = 0;reverse(scoreR.begin(), scoreR.end());for (int i = 0; i < n; ++i){if (scoreL[i] + scoreR[i] < minimum){minimum = scoreL[i] + scoreR[i];index = i;}}return index;}pair<string, string> Hirschberg(string X, string Y, int alpha, int theta){int m = static_cast<int>(X.length());int n = static_cast<int>(Y.length());string Z, W;if (m == 0){return{ string(n, '-'), Y };}else if (n == 0){return{ X, string(m, '-') };}else if (m == 1 || n == 1){return NeedlemanWunsch(X, Y, alpha, theta);}else{int xmid = m / 2;string X_l = X.substr(0, xmid);string X_r = X.substr(xmid, m - xmid);int ymid = partitionY(score(X_l, Y, alpha, theta, false), score(X_r, Y, alpha, theta, true));string Y_l = Y.substr(0, ymid);string Y_r = Y.substr(ymid, n - ymid);pair<string, string> ret1 = Hirschberg(X_l, Y_l, alpha, theta);pair<string, string> ret2 = Hirschberg(X_r, Y_r, alpha, theta);Z = ret1.first + ret2.first;W = ret1.second + ret2.second;}return { Z, W };}int main(){string X = "CTACCG";string Y = "TACATG";string X1 = "AGTACGCA";string Y1 = "TATGC";auto ret1 = Hirschberg(X, Y, 1, 2);cout << "X: " << X << endl;cout << "Y: " << Y << endl;cout << "the alignment for X and Y:" << endl;cout << ret1.first << endl;cout << ret1.second << endl;cout << "--------------------------------------------------" << endl;auto ret2 = Hirschberg(X1, Y1, 1, 2);cout << "X1: " << X << endl;cout << "Y1: " << Y << endl;cout << "the alignment for X1 and Y1:" << endl;cout << ret2.first << endl;cout << ret2.second << endl;system("PAUSE");return 0;}

0 0
原创粉丝点击