一种快速的无监督的向量化方法做地标识别
来源:互联网 发布:淘宝网10至20包邮 编辑:程序博客网 时间:2024/05/18 00:08
这是我之前写的一篇文章extremely fast codebook learning for landmark recognition。主要是利用Random projection trees and low dimensional manifolds这篇文章进行地标识别,论文的主要思想简单:利用random projection tree 随机选择一个方向进行投影,将投影之后的data根据阈值进行左右子树的划分,这里的思想有点类似于lsh在欧几里得空间的做法了,只不过这里的bin只有左右两个,分裂到一定深度停止(比如10),利用多颗树构成森林,消除随机性带来的不确定性,提高算法的泛化性。算法的速度非常快。在实际使用中也比其他的无监督式的做法要快的多。这里主要贴上个人写的主要c++实现代码,具体代码将放在https://github.com/guoyilin/ERPF:
struct TreeNode { vector<int> indices; int index; int height; float thresholds[2]; vector<float> w; TreeNode* left; TreeNode* right;};class SpatialTree {private: vector<vector<float> > *data; void splitF(TreeNode *node); void splitFbyFixed(TreeNode *node); vector<float> dot(const vector<vector<float> > &v1, const vector<float> &v2); float dot(const vector<float> &v1, const vector<float> &v2); float find_percentile(float percentage, const vector<float> &w); void save_rpTree(const string& rptree_file); void save_rpTree_sub(TreeNode *tree, ptree &pt);public: float spill; string rule; TreeNode *root; int leaf_count; int min_items; int samples_rp; int height; int dimension; SpatialTree(); vector<float> stringTovector(string& w); void print_rpTree(TreeNode *tree); SpatialTree(vector<vector<float> > &data, const string &rule, float spill, int height); void create_rpTree(const string& rptree_file); void load_rpTree(const string& rptree_file); vector<int> retrievalLeaf(vector<float> &feature);};#endif
<pre name="code" class="cpp">void SpatialTree::save_rpTree_sub(TreeNode *tree, ptree &pt) { if (tree->index > -1)//// leaf pt.put("xmlattr.index", tree->index); else { ////not leaf string w_string; for (std::vector<float>::const_iterator iter = tree->w.begin(); iter != tree->w.end(); ++iter) if (w_string.empty()) { ostringstream ostr; ostr << *iter; w_string += ostr.str(); w_string += ","; } else { ostringstream ostr; ostr << *iter; w_string += ostr.str(); w_string += ","; } w_string = w_string.substr(0, w_string.size() - 1); pt.put("xmlattr.w", w_string); pt.put("xmlattr.t0", tree->thresholds[0]); pt.put("xmlattr.t1", tree->thresholds[1]); ptree left_child; ptree right_child; left_child.put("xmlattr.height", tree->left->height); save_rpTree_sub(tree->left, left_child); pt.put_child("left", left_child); right_child.put("xmlattr.height", tree->right->height); save_rpTree_sub(tree->right, right_child); pt.put_child("right", right_child); }}void SpatialTree::save_rpTree(const string& rptree_file) { ptree pt; ptree root_tree; root_tree.put("xmlattr.height", root->height); root_tree.put("xmlattr.count", this->leaf_count); root_tree.put("xmlattr.dimension", this->dimension); root_tree.put("xmlattr.min_items", this->min_items); root_tree.put("xmlattr.samples_rp", this->samples_rp); root_tree.put("xmlattr.rule", this->rule); root_tree.put("xmlattr.spill", this->spill); if (root->height != 0) { save_rpTree_sub(root, root_tree); } pt.put_child("root", root_tree); boost::property_tree::write_xml(rptree_file.c_str(), pt);}SpatialTree::SpatialTree() { this->leaf_count = 0; this->min_items = 64; this->samples_rp = 10;}float SpatialTree::find_percentile(float percentage, const vector<float> &w) { vector<float> wx_sort(w); std::sort(wx_sort.begin(), wx_sort.end()); int n = wx_sort.size(); float i = (n + 1) * percentage; int j = (int) floor(i); float g = fmod(i, 1); if (g == 0) return wx_sort[j]; else return (1 - g) * wx_sort[j - 1] + g * wx_sort[j];}vector<int> SpatialTree::retrievalLeaf(vector<float> &feature) { vector<int> leafs; queue<TreeNode *> queue; queue.push(root); while (!queue.empty()) { TreeNode *item = queue.front(); if (item->index != -1) leafs.push_back(item->index); else { float wx = this->dot(item->w, feature); if (wx >= item->thresholds[0]) queue.push(item->right); if (wx < item->thresholds[1]) queue.push(item->left); } queue.pop(); } return leafs;}void SpatialTree::print_rpTree(TreeNode *tree) { if (tree->index != -1) { cout << "leaf:" << tree->height << endl; return; } for (int i = 0; i < tree->w.size(); i++) { cout << tree->w[i] << " "; } if (tree->index == -1) { cout << "height:" << tree->height << "\t"; cout << "w size:" << tree->w.size() << endl; print_rpTree(tree->left); print_rpTree(tree->right); }}vector<float> SpatialTree::stringTovector(string& w) { vector<float> result; vector<string> strs; boost::split(strs, w, boost::is_any_of(",")); for (int i = 0; i < strs.size(); i++) { float value = atof(strs[i].c_str()); result.push_back(value); } return result;}void SpatialTree::load_rpTree(const string& rptree_file) { ptree pt; read_xml(rptree_file, pt); this->root = new TreeNode(); root->height = pt.get<int> ("root.xmlattr.height"); this->min_items = pt.get<int> ("root.xmlattr.min_items"); this->samples_rp = pt.get<int> ("root.xmlattr.samples_rp"); this->dimension = pt.get<int> ("root.xmlattr.dimension"); this->leaf_count = pt.get<int> ("root.xmlattr.count"); queue<ptree> q; queue<TreeNode *> q2; ptree root_pt; root_pt = pt.get_child("root"); q.push(root_pt); q2.push(root); while (!q.empty()) { ptree node = q.front(); TreeNode *current = q2.front(); int index = node.get<int> ("xmlattr.index", -1); if (index != -1) { current->index = node.get<int> ("xmlattr.index", -1); } else { current->index = -1; current->thresholds[0] = node.get<float> ("xmlattr.t0"); current->thresholds[1] = node.get<float> ("xmlattr.t1"); string w = node.get<string> ("xmlattr.w"); current->w = stringTovector(w); TreeNode *leftNode = new TreeNode(); current->left = leftNode; TreeNode *rightNode = new TreeNode(); current->right = rightNode; leftNode->height = node.get<int> ("left.xmlattr.height"); rightNode->height = node.get<int> ("right.xmlattr.height"); ptree left = node.get_child("left"); ptree right = node.get_child("right"); q.push(left); q.push(right); q2.push(current->left); q2.push(current->right); } q.pop(); q2.pop(); }}vector<float> SpatialTree::dot(const vector<vector<float> > &v1, const vector<float> &v2) { if (v2.size() == 0) cout << "error in compute dot!" << endl; vector<float> result(v1.size()); for (int j = 0; j < v1.size(); j++) for (int i = 0; i < v2.size(); i++) { result[j] += v1[j][i] * v2[i]; } return result;}float SpatialTree::dot(const vector<float> &v1, const vector<float> &v2) { if (v1.size() != v2.size() || v1.size() == 0) { cout << "error in compute dot!" << endl; } float result = 0; for (int i = 0; i < v1.size(); i++) { result += v1[i] * v2[i]; } return result;}void SpatialTree::splitF(TreeNode *node) { std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution<float> distribution(0, 1); vector<vector<float> > W(this->samples_rp); for (int i = 0; i < W.size(); i++) { vector<float> v(this->dimension); W[i] = v; } vector<float> sum(this->samples_rp); for (int i = 0; i < this->samples_rp; i++) { for (int j = 0; j < this->dimension; j++) { W[i][j] = (float) distribution(gen); sum[i] += W[i][j]; } } for (int i = 0; i < this->samples_rp; i++) { for (int j = 0; j < this->dimension; j++) { W[i][j] = W[i][j] / sum[i]; } } vector<float> min_val(this->samples_rp, INFINITY); vector<float> max_val(this->samples_rp, -INFINITY); for (int i = 0; i < node->indices.size(); i++) { vector<float> point = (*this->data)[i]; vector<float> Wx = this->dot(W, point); for (int j = 0; j < min_val.size(); j++) { if (min_val[j] > Wx[j]) min_val[j] = Wx[j]; } for (int j = 0; j < max_val.size(); j++) { if (max_val[j] < Wx[j]) max_val[j] = Wx[j]; } } int max_index = -1; float max_value = -INFINITY; for (int i = 0; i < max_val.size(); i++) { max_val[i] = max_val[i] - min_val[i]; if (max_value < max_val[i]) { max_value = max_val[i]; max_index = i; } } node->w = W[max_index];}SpatialTree::SpatialTree(vector<vector<float> > &data, const string &rule, float spill, int height) { this->data = &data; this->rule = rule; this->spill = spill; min_items = 64; samples_rp = 10; this->height = height; this->leaf_count = 0;}void SpatialTree::create_rpTree(const string& rptree_file) { vector<int> indices(data->size()); for (vector<int>::iterator iter = indices.begin(); iter != indices.end(); ++iter) { *iter = leaf_count; leaf_count++; } root = new TreeNode(); root->indices = indices; root->height = this->height - 1; root->index = -1; leaf_count = 0; this->dimension = (*data)[0].size(); queue<TreeNode *> q; q.push(this->root); while (!q.empty()) { TreeNode *node = q.front(); if (node->height == 0 || node->indices.size() < this->min_items) { node->index = leaf_count; leaf_count++; } else { this->splitF(node); vector<float> wx(node->indices.size()); for (int i = 0; i < node->indices.size(); i++) { wx[i] = this->dot((*this->data)[node->indices[i]], node->w); } float low_percent = 0.5 - this->spill / 2; float high_percent = 0.5 + this->spill / 2; node->thresholds[0] = this->find_percentile(low_percent, wx); node->thresholds[1] = this->find_percentile(high_percent, wx); TreeNode *left = new TreeNode(); TreeNode *right = new TreeNode(); node->left = left; node->right = right; left->height = node->height - 1; right->height = node->height - 1; left->index = -1; right->index = -1; for (int i = 0; i < node->indices.size(); i++) { if (wx[i] <= node->thresholds[1]) left->indices.push_back(node->indices[i]); if (wx[i] >= node->thresholds[0]) right->indices.push_back(node->indices[i]); } q.push(left); q.push(right); } q.pop(); } // this->save_rpTree(rptree_file);}
0 0
- 一种快速的无监督的向量化方法做地标识别
- 文本小票的一种无监督聚类方法
- 文本向量化的方法
- 利用向量化对图像做快速赋值
- 基于伪label的一类无监督特征选择方法
- 有监督 与 无监督的区别 何时采用 有监督 or 无监督
- 一种快速文件传输的方法
- 快速排序的一种方法
- 词的向量化表示
- CLPlacemark地标的讲解
- 一种快速的未登陆词识别方法(原理和实现)
- 无监督和有监督算法的区别
- 无监督学习和监督学习的用途
- 监督学习和无监督学习的详细介绍
- 有监督学习 和 无监督学习 的具体含义
- 无监督和有监督算法的区别
- 有监督学习和无监督学习的区别
- 机器学习模型的基本分类--有监督、无监督
- vim之入门篇:安装/使用/配置
- VC使用: vs2003的工程升级到vs2010的问题汇总
- 最大权闭合子图的解法
- hdu 1171 Big Event
- Linux源码学习笔记:syscalls
- 一种快速的无监督的向量化方法做地标识别
- association 的使用
- ActiveX组件开发和使用
- HDU4920 Matrix multiplication (CPU cache对程序的影响)
- Python中的条件选择和循环语句
- rman configure命令
- mysql由浅入深视频-有高可用架构、调优、排错等
- const使用及问题总结
- 匹配一个2个相邻并且相同的字符