两个字符串中最大公共字符数(支持汉字)

来源:互联网 发布:淘宝双十二红包 编辑:程序博客网 时间:2024/05/01 06:54

【导读】

    最近遇到一个问题,可以抽象为:求两个字符串中最大公共字符数并输出所有公共字符。


【正文】

    给定字符串str1和str2, 借鉴“两个字符串中最大公共子串”的问题,给出递推公式:

f(i, j) :  str1中[0-i] 和str2中[0-j] 的最大公共字符数

f(i, j) = max { 

(1). 1 + f(i - 1, j - 1)  // if (str1[i] == str2[j])

(2). f(i - 1, j)              // if (str1[i] != str2[j])

(3). f(i, j - 1)              // if (str1[i] != str2[j])

}


代码实现:

CommonCharSearcher.h

#include <vector>typedef struct{    int pos1;    int pos2;    bool is_en;}MatchInfo;class CommonCharSearcher{public:        enum {NOT_MATCH, MATCH_CHAR, MATCH_TWO_CHAR};enum {UNKNOWN, CHAR, TWO_CHAR};public:CommonCharSearcher();        void search_most_common_char(const char *buff1, int len1, const char* buff2, int len2);int get_max_match_num();const std::vector<MatchInfo>& get_match_info();private:int match(const char* buff1, int i, const char* buff2, int j);int is_match_word(const char* buff1, int i, const char* buff2, int j);int get_word_type(const char* buff, int i);void print(int len1, int len2); // 输出备忘录数组信息void back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num); // 找到最大值后回溯private:        int m_match_num;int m_max_pos1;int m_max_pos2;char *m_buff1;char *m_buff2;int **m_match_arr;        std::vector<MatchInfo> m_match_infos;};

CommonCharSearcher.cpp

#include "stdafx.h"#include "CommonCharSearcher.h"#include <iostream>CommonCharSearcher::CommonCharSearcher(){m_match_num = 0;m_max_pos1 = -1;m_max_pos2 = -1;m_buff1 = NULL;m_buff2 = NULL;m_match_arr = NULL;}void CommonCharSearcher::search_most_common_char(const char *buff1, int len1, const char* buff2, int len2){if (buff1 == NULL || len1 <= 0 ||buff2 == NULL || len2 <= 0){return;}m_match_arr = new int*[len1];for (int i = 0; i < len1; ++i){m_match_arr[i] = new int[len2];for (int j = 0; j < len2; ++j){m_match_arr[i][j] = -1;}}for (int i = 0; i < len1; ++i){for (int j = 0; j < len2; ++j){int curr = this->match(buff1, i, buff2, j);if (curr > m_match_num){m_match_num = curr;m_max_pos1 = i;m_max_pos2 = j;}}}print(len1, len2);std::cout << "(" << m_max_pos1 << "," << m_max_pos2 << ") = " << m_match_num << std::endl;if (m_max_pos1 != -1 && m_max_pos2 != -1){this->back_track_exec(buff1, m_max_pos1, buff2, m_max_pos2, m_match_num);}for (i = m_match_infos.size() - 1; i >= 0 ; --i){std::cout << "pos1 = " << m_match_infos[i].pos1 << ", pos2 = " << m_match_infos[i].pos2 << ", is_en = " << m_match_infos[i].is_en << std::endl;}// 释放资源for (int i = 0; i < len1; ++i){delete [] m_match_arr[i];}delete [] m_match_arr;return;}int CommonCharSearcher::get_max_match_num(){return this->m_match_num;}const std::vector<MatchInfo>& CommonCharSearcher::get_match_info(){return this->m_match_infos;}int CommonCharSearcher::match(const char* buff1, int i, const char* buff2, int j){if (buff1 == NULL || i < 0 ||buff2 == NULL || j < 0){return 0;}// 已经计算过if (m_match_arr[i][j] != -1){return m_match_arr[i][j];}int type1 = this->get_word_type(buff1, i);int type2 = this->get_word_type(buff2, j);if (type1 == CommonCharSearcher::UNKNOWN){m_match_arr[i][j] = this->match(buff1, i - 1, buff2, j);return m_match_arr[i][j];}if (type2 == CommonCharSearcher::UNKNOWN){m_match_arr[i][j] = this->match(buff1, i, buff2, j - 1);return m_match_arr[i][j];}int max = 0;int max1 = 0;int max2 = 0;if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR){if (buff1[i] == buff2[j]){max = 1 + match(buff1, i - 1, buff2, j - 1);}max1 = match(buff1, i, buff2, j - 1);max2 = match(buff1, i - 1, buff2, j);}else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR){if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1]){max = 1 + match(buff1, i - 2, buff2, j - 2);}max1 = match(buff1, i, buff2, j - 2);max2 = match(buff1, i - 2, buff2, j);}else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR){max1 = match(buff1, i, buff2, j - 2);max2 = match(buff1, i - 1, buff2, j);}else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR){max1 = match(buff1, i, buff2, j - 1);max2 = match(buff1, i - 2, buff2, j);}if (max1 > max){max = max1;}if (max2 > max){max = max2;}m_match_arr[i][j] = max;return m_match_arr[i][j];}void CommonCharSearcher::print(int len1, int len2){for (int i = 0; i < len1; ++i){for (int j = 0; j < len2; ++j){std::cout << (int)m_match_arr[i][j] << "\t";}std::cout << std::endl;}}int CommonCharSearcher::is_match_word(const char* buff1, int i, const char* buff2, int j){if (buff1 == NULL || i < 0 ||buff2 == NULL || j < 0){return false;}if (buff1[i] >= 0 && buff2[j] >= 0){if (buff1[i] == buff2[j]){return MATCH_CHAR;}}else if (buff1[i] < 0 && buff2[j] < 0){if (buff1[i] == buff2[j] && buff1[i+1] == buff2[j+1]){return MATCH_TWO_CHAR;}}return NOT_MATCH;}int CommonCharSearcher::get_word_type(const char* buff, int i){if (buff[i] >= 0){return CommonCharSearcher::CHAR;}else{if (i-1>=0 && buff[i-1] < 0){return CommonCharSearcher::TWO_CHAR;}}return CommonCharSearcher::UNKNOWN;}void CommonCharSearcher::back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num){if (i < 0 || j < 0 || match_num <= 0){return;}int type1 = this->get_word_type(buff1, i);int type2 = this->get_word_type(buff2, j);if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR){if (buff1[i] == buff2[j]){MatchInfo match_info;match_info.is_en = true;match_info.pos1 = i;match_info.pos2 = j;m_match_infos.push_back(match_info);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 1, buff2, j - 1, match_num - 1);}else{back_track_exec(buff1, i, buff2, j - 1, match_num);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 1, buff2, j, match_num);}}else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR){if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1]){MatchInfo match_info;match_info.is_en = false;match_info.pos1 = i-1;match_info.pos2 = j-1;m_match_infos.push_back(match_info);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 2, buff2, j - 2, match_num - 1);}else{back_track_exec(buff1, i, buff2, j - 2, match_num);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 2, buff2, j, match_num);}}else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR){back_track_exec(buff1, i, buff2, j - 2, match_num);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 1, buff2, j, match_num);}else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR){back_track_exec(buff1, i, buff2, j - 1, match_num);if (m_match_infos.size() == m_match_num){return;}back_track_exec(buff1, i - 2, buff2, j, match_num);}}

注意:笔者的问题场景需要支持中文,全角按半角处理。如果仅仅需要支持英文字符串,代码会简洁很多。


0 0
原创粉丝点击