利用tree匹配user_agent

来源:互联网 发布:数据库云备份 编辑:程序博客网 时间:2024/06/05 19:48

利用trietree树结构,使其匹配过程可以在O(n)内搞定

 

#ifndef MATCH_USER_AGENT_H_#define MATCH_USER_AGENT_H_#include <string>#include <vector>using std::string;using std::vector;#define UA_TREE_WIDTH              50#define UA_TRIETREE_TRANS_ARR_LEN  128class TreeNode { public:  TreeNode *next_[UA_TREE_WIDTH];  bool end_flag_;};class UserAgentInfo { public :  /// 对字符串进行转码,节省trietree树空间  void TrietreeTransInit();  /// 初始化,trietree根节点等初始化  /// @return true: 初始化成功       false: 初始化失败  bool UAInit(const string &user_agent_file);  /// 向UserAgent所使用的trietree中插入节点  /// @param[in] keyword 插入的字符串  /// @return true: 插入成功       false: 插入失败  bool UATreeNodeInsert(const char* keyword);  /// 构造失败指针,利用失败指针匹配时可以在O(N)内搞定,即不回溯  void UAFaildBuild();  /// 删除UA Tree   /// @return true: 删除成功       false: 删除失败  bool UATreeDestory();  /// 递归删除UA Tree_Node  /// @param[in] tree根节点  /// @return true: 删除成功       false: 删除失败  bool UATreeNodeDestory(TreeNode* root);  /// 清空UA 相关存储结构    /// @return true: 删除成功       false: 删除失败  /// @当需要动态载入UA时才需要调此函数  bool UATreeClean();  /// 重新载入数据  /// @return true: 载入成功       false: 载入失败  /// @当需要动态载入UA时才需要调此函数  bool UATreeDynamicLoad(const std::string &user_agent_file);  /// user_agent匹配函数  /// @param[in] user_agent:请求用户的user_agent  /// @return true: 匹配上UA       false: 没有匹配上UA  bool UAMatch(const char* user_agent);  /// 读取user_agent文件  bool ReadUAFile(const std::string &user_agent_file, vector<string> &result); private:  TreeNode* ua_root_;  //trietree根节点  char ua_trans_arr_[UA_TRIETREE_TRANS_ARR_LEN];};#endif


 

 

#include "cpc_user_agent.h"bool UserAgentInfo::ReadUAFile(const std::string &user_agent_file, vector<string> &result) {  if (LoadFile(user_agent_file, 0, &result) == false) {    printf("Failed to load user_agent_file %s\n.", user_agent_file.c_str());    return false;  }  return true;}void UserAgentInfo::TrietreeTransInit() {  memset(ua_trans_arr_, 0, sizeof(char) * UA_TRIETREE_TRANS_ARR_LEN);  ua_trans_arr_[(int)'0'] = 1;  ua_trans_arr_[(int)'1'] = 2;  ua_trans_arr_[(int)'2'] = 3;  ua_trans_arr_[(int)'3'] = 4;  ua_trans_arr_[(int)'4'] = 5;  ua_trans_arr_[(int)'5'] = 6;  ua_trans_arr_[(int)'6'] = 7;  ua_trans_arr_[(int)'7'] = 8;  ua_trans_arr_[(int)'8'] = 9;  ua_trans_arr_[(int)'9'] = 10;  ua_trans_arr_[(int)'a'] = 11;  ua_trans_arr_[(int)'b'] = 12;  ua_trans_arr_[(int)'c'] = 13;  ua_trans_arr_[(int)'d'] = 14;  ua_trans_arr_[(int)'e'] = 15;  ua_trans_arr_[(int)'f'] = 16;  ua_trans_arr_[(int)'g'] = 17;  ua_trans_arr_[(int)'h'] = 18;  ua_trans_arr_[(int)'i'] = 19;  ua_trans_arr_[(int)'j'] = 20;  ua_trans_arr_[(int)'k'] = 21;  ua_trans_arr_[(int)'l'] = 22;  ua_trans_arr_[(int)'m'] = 23;  ua_trans_arr_[(int)'n'] = 24;  ua_trans_arr_[(int)'o'] = 25;  ua_trans_arr_[(int)'p'] = 26;  ua_trans_arr_[(int)'q'] = 27;  ua_trans_arr_[(int)'r'] = 28;  ua_trans_arr_[(int)'s'] = 29;  ua_trans_arr_[(int)'t'] = 30;  ua_trans_arr_[(int)'u'] = 31;  ua_trans_arr_[(int)'v'] = 32;  ua_trans_arr_[(int)'w'] = 33;  ua_trans_arr_[(int)'x'] = 34;  ua_trans_arr_[(int)'y'] = 35;  ua_trans_arr_[(int)'z'] = 36;  ua_trans_arr_[(int)'A'] = 11;  ua_trans_arr_[(int)'B'] = 12;  ua_trans_arr_[(int)'C'] = 13;  ua_trans_arr_[(int)'D'] = 14;  ua_trans_arr_[(int)'E'] = 15;  ua_trans_arr_[(int)'F'] = 16;  ua_trans_arr_[(int)'G'] = 17;  ua_trans_arr_[(int)'H'] = 18;  ua_trans_arr_[(int)'I'] = 19;  ua_trans_arr_[(int)'J'] = 20;  ua_trans_arr_[(int)'K'] = 21;  ua_trans_arr_[(int)'L'] = 22;  ua_trans_arr_[(int)'M'] = 23;  ua_trans_arr_[(int)'N'] = 24;  ua_trans_arr_[(int)'O'] = 25;  ua_trans_arr_[(int)'P'] = 26;  ua_trans_arr_[(int)'Q'] = 27;  ua_trans_arr_[(int)'R'] = 28;  ua_trans_arr_[(int)'S'] = 29;  ua_trans_arr_[(int)'T'] = 30;  ua_trans_arr_[(int)'U'] = 31;  ua_trans_arr_[(int)'V'] = 32;  ua_trans_arr_[(int)'W'] = 33;  ua_trans_arr_[(int)'X'] = 34;  ua_trans_arr_[(int)'Y'] = 35;  ua_trans_arr_[(int)'Z'] = 36;  ua_trans_arr_[(int)'*'] = 37;    ua_trans_arr_[(int)'.'] = 38;  ua_trans_arr_[(int)'#'] = 39;  ua_trans_arr_[(int)' '] = 39;  ua_trans_arr_[(int)';'] = 40;  ua_trans_arr_[(int)'('] = 41;  ua_trans_arr_[(int)')'] = 42;  ua_trans_arr_[(int)'/'] = 43;  ua_trans_arr_[(int)'-'] = 44;  ua_trans_arr_[(int)'_'] = 45;  }bool UserAgentInfo::UAInit(const string &user_agent_file) {  ua_root_ = new (std::nothrow) TreeNode();   if (NULL == ua_root_) {    printf("new TreeNode err: ua_root_\n");    return false;  }  TrietreeTransInit();  vector<string> keywords;  if (!ReadUAFile(user_agent_file, keywords)) {    return false;  }  vector<string>::const_iterator it = keywords.begin();  for (; it != keywords.end(); ++it) {    if (!UATreeNodeInsert((*it).c_str())) {      printf("in UAInit insert TreeNode err\n");      return false;    }  }  return true;}bool UserAgentInfo::UATreeNodeInsert(const char* keyword) {  TreeNode* curr = ua_root_;  TreeNode* new_node = NULL;  if (NULL == keyword || NULL == curr) {    printf("insert TreeNode err\n");    return false;  }  int str_len = strlen(keyword);  for (int i = 0; i < str_len; ++i) {    if (NULL == curr->next_[ua_trans_arr_[(int)keyword[i]]]) {      new_node = new (std::nothrow) TreeNode();      if (NULL == new_node) {        printf("insert TreeNode err. new node err\n");        return false;      }      curr->next_[ua_trans_arr_[(int)keyword[i]]] = new_node;    }    curr = curr->next_[ua_trans_arr_[(int)keyword[i]]];    if (i == str_len - 1) {      if ( 0 == curr->end_flag_) {        curr->end_flag_ = true;      }      else {        printf("insert UA Duplicate: %s\n", keyword);      }    }  }  return true;}/*   void UserAgentInfo::UAFaildBuild() {   TreeNode* temp = NULL;   TreeNode* p = NULL;   ua_root_->fail_ = NULL;   queue_nodes_[queue_head_++] = ua_root_;   while (queue_head_ != queue_tail_) {   temp = queue_nodes_[queue_tail_++];   for(int i = 0; i < UA_TREE_WIDTH; ++i) {   if(NULL != temp->next_[i]) {   if(temp == ua_root_) {   temp->next_[i]->fail_ = ua_root_;   }   else {   p = temp->fail_;   while (NULL != p) {   if(NULL != p->next_[i]) {   temp->next_[i]->fail_ = p->next_[i];   break;   }   p = p->fail_;   }   if(NULL == p) {   temp->next_[i]->fail_ = ua_root_;   }   }   queue_nodes_[queue_head_++] = temp->next_[i];   }   }   }   } */bool UserAgentInfo::UATreeNodeDestory(TreeNode* root) {      if (root == NULL) {    printf("UATreeNodeDestory fail_d: root is null\n");    return false;  }  for (int i = 0; i < UA_TREE_WIDTH; ++i) {    if(NULL != (root->next_[i])) {      int ret = UATreeNodeDestory(root->next_[i]);      if (true != ret) {        printf("UATreeNodeDestory faild\n");        return false;      }    }  }  if (root != ua_root_) {    delete root;  }  return false;}bool UserAgentInfo::UATreeDestory() {  return UATreeNodeDestory(ua_root_);}bool UserAgentInfo::UATreeClean() {  int ret = UATreeDestory();  if (true != ret) {    printf("UATreeDestory faild\n");    return false;  }  /*     memset(queue_nodes_, 0, sizeof(TreeNode*) * (UA_TREE_WIDTH * UA_TRIETREE_TRANS_ARR_LEN));     queue_head_ = 1;     queue_tail_ = 1;   */  return true;}bool UserAgentInfo::UATreeDynamicLoad(const std::string &user_agent_file) {  vector<string> keywords;  if (!ReadUAFile(user_agent_file, keywords)) {    return false;  }  vector<string>::const_iterator it = keywords.begin();  for (; it != keywords.end(); ++it) {    if (!UATreeNodeInsert((*it).c_str())) {      printf("in UAInit insert TreeNode err\n");      return false;    }  }  return true;}bool UserAgentInfo::UAMatch(const char* user_agent) {  TreeNode* curr = ua_root_;  if (NULL == ua_root_) {    printf("UAMatch err: ua_root_ is null\n");    return false;  }  if (NULL == user_agent) {    printf("UAMatch err: user_agent is null\n");    return false;  }  for (unsigned int i = 0; i < strlen(user_agent); ++i) {    if (NULL != curr->next_[ua_trans_arr_[(int)user_agent[i]]]) {      curr = curr->next_[ua_trans_arr_[(int)user_agent[i]]];      if (curr->end_flag_) {        return true;      }    }    else {      if (curr->end_flag_) {        return true;      }      else {        curr = ua_root_;      }    }  }  return false;}


 

原创粉丝点击