利用tree匹配user_agent
来源:互联网 发布:数据库云备份 编辑:程序博客网 时间:2024/06/05 19:48
利用trietree树结构,使其匹配过程可以在O(n)内搞定
#ifndef MATCH_USER_AGENT_H_#define MATCH_USER_AGENT_H_#include <string>#include <vector>using std::string;using std::vector;#define UA_TREE_WIDTH 50#define UA_TRIETREE_TRANS_ARR_LEN 128class TreeNode { public: TreeNode *next_[UA_TREE_WIDTH]; bool end_flag_;};class UserAgentInfo { public : /// 对字符串进行转码,节省trietree树空间 void TrietreeTransInit(); /// 初始化,trietree根节点等初始化 /// @return true: 初始化成功 false: 初始化失败 bool UAInit(const string &user_agent_file); /// 向UserAgent所使用的trietree中插入节点 /// @param[in] keyword 插入的字符串 /// @return true: 插入成功 false: 插入失败 bool UATreeNodeInsert(const char* keyword); /// 构造失败指针,利用失败指针匹配时可以在O(N)内搞定,即不回溯 void UAFaildBuild(); /// 删除UA Tree /// @return true: 删除成功 false: 删除失败 bool UATreeDestory(); /// 递归删除UA Tree_Node /// @param[in] tree根节点 /// @return true: 删除成功 false: 删除失败 bool UATreeNodeDestory(TreeNode* root); /// 清空UA 相关存储结构 /// @return true: 删除成功 false: 删除失败 /// @当需要动态载入UA时才需要调此函数 bool UATreeClean(); /// 重新载入数据 /// @return true: 载入成功 false: 载入失败 /// @当需要动态载入UA时才需要调此函数 bool UATreeDynamicLoad(const std::string &user_agent_file); /// user_agent匹配函数 /// @param[in] user_agent:请求用户的user_agent /// @return true: 匹配上UA false: 没有匹配上UA bool UAMatch(const char* user_agent); /// 读取user_agent文件 bool ReadUAFile(const std::string &user_agent_file, vector<string> &result); private: TreeNode* ua_root_; //trietree根节点 char ua_trans_arr_[UA_TRIETREE_TRANS_ARR_LEN];};#endif
#include "cpc_user_agent.h"bool UserAgentInfo::ReadUAFile(const std::string &user_agent_file, vector<string> &result) { if (LoadFile(user_agent_file, 0, &result) == false) { printf("Failed to load user_agent_file %s\n.", user_agent_file.c_str()); return false; } return true;}void UserAgentInfo::TrietreeTransInit() { memset(ua_trans_arr_, 0, sizeof(char) * UA_TRIETREE_TRANS_ARR_LEN); ua_trans_arr_[(int)'0'] = 1; ua_trans_arr_[(int)'1'] = 2; ua_trans_arr_[(int)'2'] = 3; ua_trans_arr_[(int)'3'] = 4; ua_trans_arr_[(int)'4'] = 5; ua_trans_arr_[(int)'5'] = 6; ua_trans_arr_[(int)'6'] = 7; ua_trans_arr_[(int)'7'] = 8; ua_trans_arr_[(int)'8'] = 9; ua_trans_arr_[(int)'9'] = 10; ua_trans_arr_[(int)'a'] = 11; ua_trans_arr_[(int)'b'] = 12; ua_trans_arr_[(int)'c'] = 13; ua_trans_arr_[(int)'d'] = 14; ua_trans_arr_[(int)'e'] = 15; ua_trans_arr_[(int)'f'] = 16; ua_trans_arr_[(int)'g'] = 17; ua_trans_arr_[(int)'h'] = 18; ua_trans_arr_[(int)'i'] = 19; ua_trans_arr_[(int)'j'] = 20; ua_trans_arr_[(int)'k'] = 21; ua_trans_arr_[(int)'l'] = 22; ua_trans_arr_[(int)'m'] = 23; ua_trans_arr_[(int)'n'] = 24; ua_trans_arr_[(int)'o'] = 25; ua_trans_arr_[(int)'p'] = 26; ua_trans_arr_[(int)'q'] = 27; ua_trans_arr_[(int)'r'] = 28; ua_trans_arr_[(int)'s'] = 29; ua_trans_arr_[(int)'t'] = 30; ua_trans_arr_[(int)'u'] = 31; ua_trans_arr_[(int)'v'] = 32; ua_trans_arr_[(int)'w'] = 33; ua_trans_arr_[(int)'x'] = 34; ua_trans_arr_[(int)'y'] = 35; ua_trans_arr_[(int)'z'] = 36; ua_trans_arr_[(int)'A'] = 11; ua_trans_arr_[(int)'B'] = 12; ua_trans_arr_[(int)'C'] = 13; ua_trans_arr_[(int)'D'] = 14; ua_trans_arr_[(int)'E'] = 15; ua_trans_arr_[(int)'F'] = 16; ua_trans_arr_[(int)'G'] = 17; ua_trans_arr_[(int)'H'] = 18; ua_trans_arr_[(int)'I'] = 19; ua_trans_arr_[(int)'J'] = 20; ua_trans_arr_[(int)'K'] = 21; ua_trans_arr_[(int)'L'] = 22; ua_trans_arr_[(int)'M'] = 23; ua_trans_arr_[(int)'N'] = 24; ua_trans_arr_[(int)'O'] = 25; ua_trans_arr_[(int)'P'] = 26; ua_trans_arr_[(int)'Q'] = 27; ua_trans_arr_[(int)'R'] = 28; ua_trans_arr_[(int)'S'] = 29; ua_trans_arr_[(int)'T'] = 30; ua_trans_arr_[(int)'U'] = 31; ua_trans_arr_[(int)'V'] = 32; ua_trans_arr_[(int)'W'] = 33; ua_trans_arr_[(int)'X'] = 34; ua_trans_arr_[(int)'Y'] = 35; ua_trans_arr_[(int)'Z'] = 36; ua_trans_arr_[(int)'*'] = 37; ua_trans_arr_[(int)'.'] = 38; ua_trans_arr_[(int)'#'] = 39; ua_trans_arr_[(int)' '] = 39; ua_trans_arr_[(int)';'] = 40; ua_trans_arr_[(int)'('] = 41; ua_trans_arr_[(int)')'] = 42; ua_trans_arr_[(int)'/'] = 43; ua_trans_arr_[(int)'-'] = 44; ua_trans_arr_[(int)'_'] = 45; }bool UserAgentInfo::UAInit(const string &user_agent_file) { ua_root_ = new (std::nothrow) TreeNode(); if (NULL == ua_root_) { printf("new TreeNode err: ua_root_\n"); return false; } TrietreeTransInit(); vector<string> keywords; if (!ReadUAFile(user_agent_file, keywords)) { return false; } vector<string>::const_iterator it = keywords.begin(); for (; it != keywords.end(); ++it) { if (!UATreeNodeInsert((*it).c_str())) { printf("in UAInit insert TreeNode err\n"); return false; } } return true;}bool UserAgentInfo::UATreeNodeInsert(const char* keyword) { TreeNode* curr = ua_root_; TreeNode* new_node = NULL; if (NULL == keyword || NULL == curr) { printf("insert TreeNode err\n"); return false; } int str_len = strlen(keyword); for (int i = 0; i < str_len; ++i) { if (NULL == curr->next_[ua_trans_arr_[(int)keyword[i]]]) { new_node = new (std::nothrow) TreeNode(); if (NULL == new_node) { printf("insert TreeNode err. new node err\n"); return false; } curr->next_[ua_trans_arr_[(int)keyword[i]]] = new_node; } curr = curr->next_[ua_trans_arr_[(int)keyword[i]]]; if (i == str_len - 1) { if ( 0 == curr->end_flag_) { curr->end_flag_ = true; } else { printf("insert UA Duplicate: %s\n", keyword); } } } return true;}/* void UserAgentInfo::UAFaildBuild() { TreeNode* temp = NULL; TreeNode* p = NULL; ua_root_->fail_ = NULL; queue_nodes_[queue_head_++] = ua_root_; while (queue_head_ != queue_tail_) { temp = queue_nodes_[queue_tail_++]; for(int i = 0; i < UA_TREE_WIDTH; ++i) { if(NULL != temp->next_[i]) { if(temp == ua_root_) { temp->next_[i]->fail_ = ua_root_; } else { p = temp->fail_; while (NULL != p) { if(NULL != p->next_[i]) { temp->next_[i]->fail_ = p->next_[i]; break; } p = p->fail_; } if(NULL == p) { temp->next_[i]->fail_ = ua_root_; } } queue_nodes_[queue_head_++] = temp->next_[i]; } } } } */bool UserAgentInfo::UATreeNodeDestory(TreeNode* root) { if (root == NULL) { printf("UATreeNodeDestory fail_d: root is null\n"); return false; } for (int i = 0; i < UA_TREE_WIDTH; ++i) { if(NULL != (root->next_[i])) { int ret = UATreeNodeDestory(root->next_[i]); if (true != ret) { printf("UATreeNodeDestory faild\n"); return false; } } } if (root != ua_root_) { delete root; } return false;}bool UserAgentInfo::UATreeDestory() { return UATreeNodeDestory(ua_root_);}bool UserAgentInfo::UATreeClean() { int ret = UATreeDestory(); if (true != ret) { printf("UATreeDestory faild\n"); return false; } /* memset(queue_nodes_, 0, sizeof(TreeNode*) * (UA_TREE_WIDTH * UA_TRIETREE_TRANS_ARR_LEN)); queue_head_ = 1; queue_tail_ = 1; */ return true;}bool UserAgentInfo::UATreeDynamicLoad(const std::string &user_agent_file) { vector<string> keywords; if (!ReadUAFile(user_agent_file, keywords)) { return false; } vector<string>::const_iterator it = keywords.begin(); for (; it != keywords.end(); ++it) { if (!UATreeNodeInsert((*it).c_str())) { printf("in UAInit insert TreeNode err\n"); return false; } } return true;}bool UserAgentInfo::UAMatch(const char* user_agent) { TreeNode* curr = ua_root_; if (NULL == ua_root_) { printf("UAMatch err: ua_root_ is null\n"); return false; } if (NULL == user_agent) { printf("UAMatch err: user_agent is null\n"); return false; } for (unsigned int i = 0; i < strlen(user_agent); ++i) { if (NULL != curr->next_[ua_trans_arr_[(int)user_agent[i]]]) { curr = curr->next_[ua_trans_arr_[(int)user_agent[i]]]; if (curr->end_flag_) { return true; } } else { if (curr->end_flag_) { return true; } else { curr = ua_root_; } } } return false;}
- 利用tree匹配user_agent
- user_agent
- 利用nginx来屏蔽指定的user_agent的访问
- 利用nginx来屏蔽指定的user_agent的访问
- user_agent.php
- USER_AGENT 知识
- 爬虫-利用urllib爬去网页增加user_agent和proxy(Python)
- 查看浏览器 USER_AGENT
- Nginx HTTP User_agent
- 禁止指定 user_agent
- Android获取User_agent信息
- 移动设备user_agent
- Cookies中的User_Agent获取
- 利用?防止过度匹配
- 利用FLANN SURF匹配
- Trie Tree匹配算法实现
- 修改Titanium的user_agent配置
- PHP伪造user_agent的方法
- 从公交塞车,看C# 多线程同步问题
- Oracle 11gR2 RAC 新特性说明
- C# 动态生成窗口中的控件不显示的解决方法
- 在SSRS中调用web service实现报表多语言
- 通过文件路径获得文件大小
- 利用tree匹配user_agent
- Flex Socket安全策略<policy-file-request/>及应对方法
- Sql 远程连接
- JSP四个作用域
- python学习之数据库操作(mysql_ubuntu版)
- WebKit General Orientation
- Ext.form.TextField基本用法
- zz正则表达式语法
- android 退出activity 转吖转