LR 做多分类的笔记

来源:互联网 发布:ubuntu rar 解压命令 编辑:程序博客网 时间:2024/04/29 14:58

1. 从概率的角度出发,推断一个样本的后验概率为:

其中:

4.63 式可以有比较简洁的形式,例如:线性表达式。


2. 假定P(x| Ck) 为正态分布,

则 lnp(x|Ck)p(Ck) 可以表示为线性的表达式如下:


3. 求解模型参数:


4. 本质上分类走概率模型比较靠谱,直观上某一个地方的点密集,可以说明在该类的概率搞。使用平方误差是,距离无法衡量到某类的距离。

但是 SVC 也用了类似的距离, 但是 SVC 只用了支持向量,且投影到高维空间。


5. sample code:

#include <string>#include <vector>#include <cmath>#include <map>#include "base/flags.h"#include "base/string_util.h"#include "utils/hash_tables.h"#include "common/file/simple_line_reader.h"DEFINE_string(train_path, "./test.txt", "trainning file");DEFINE_double(lambda, 0.001, "the weight of regularation");DEFINE_double(alpha, 0.1, "the learning rate");DEFINE_int32(n, 5000, "iteration times");// origianl data;struct DataSample {  std::string                      label;  double                           predict_prob;  utils::hash_map<std::string, double>  features;  void AddFeature(const std::string& fn, const double& v) {    features[fn] = v;  }};// inner_class label are: [0, 1, 2,  (label_count_-1)]class TrainningDataSet {public:  TrainningDataSet() {    label_count_ = 0;    inner_feature_map_["cb"] = 0;    outer_feature_map_[0] = "cb";    feature_count_ = 1;  }  // format like:  "A 1:0.2 2:0.4 3:77 4:0.3"  bool LoadSamplesFromFile(const std::string& file_path) {    file::SimpleLineReader  line_reader;    line_reader.OpenOrDie(file_path);    std::vector<std::string> lines;    line_reader.ReadLines(&lines);    for (size_t i = 0; i < lines.size(); ++i) {      std::vector<std::string> parts;      DataSample sample;      SplitString(lines[i], ' ', &parts);            sample.label = parts[0];      AddLabel(parts[0]);      for (size_t j = 1; j < parts.size(); ++j) {        std::vector<std::string> fn_v;        SplitString(parts[j], ':', &fn_v);        if (fn_v.size() != 2) {          continue;        }        double v = 0.0f;        StringToDouble(fn_v[1], &v);        sample.AddFeature(fn_v[0], v);        AddFeature(fn_v[0]);      }      samples_.push_back(sample);    }    return true;  }   void Train() {    AllocAuxParam();    TrainInternal(FLAGS_n);    Predict();    FreeAuxParam();  }  void Predict() {    double prob[100];    // Xn    for (auto it = samples_.begin(); it != samples_.end(); ++it) {      for (int k = 0; k < label_count_; ++k) {        prob[k] = w[k][0]*1.0f;        for (auto  sit = it->features.begin(); sit != it->features.end(); ++sit) {          prob[k] += sit->second*w[k][inner_feature_map_[sit->first]];        }        prob[k] = exp(prob[k]);      }      double total_exp = 0.0f;      for (int k = 0; k < label_count_; ++k) {        total_exp += prob[k];      }      it->predict_prob = prob[inner_label_map_[it->label]]/total_exp;      VLOG(0) << "sampel predict [" << it->label << "]: " << it->predict_prob;    }  }  void TrainInternal(int32 count) {    for (int32 i = 0; i < count; ++i) {      //VLOG(0) << "training iteration:  " << (i+1);      // caculate post_prob: P(Ci | x)      for (size_t n = 0; n < samples_.size(); ++n) {        const DataSample& sample = samples_[n];        for (int32 k = 0; k < label_count_; ++k) {          // W*X,  x[0] = 1;          post_prob[n][k] = 1.0f*w[k][0];          for (auto it = sample.features.begin(); it != sample.features.end(); ++it) {            std::string outter_feature_idx = it->first;            double val = it->second;            int32 feature_idx = inner_feature_map_[outter_feature_idx];            post_prob[n][k] += val*w[k][feature_idx];          }          post_prob[n][k] = exp(post_prob[n][k]);        }        double exp_total = 0.0f;        for (int32 k = 0; k < label_count_; ++k) {          exp_total += post_prob[n][k];        }        for (int32 k = 0; k < label_count_; ++k) {         post_prob[n][k] /= exp_total;         //VLOG(0) << "P(C" << k << "|X" << n << ") = " << post_prob[n][k];        }      }      // caculate gradient, (E)/(Wk)      for (int32 k = 0; k < label_count_; ++k) {        for (int32 d = 0; d < feature_count_; ++d) {          grad[k][d] = 0.0f;        }        // iteration on every sample        for (size_t n = 0; n < samples_.size(); ++n) {          // iteration on every dimension          double Tnk = GetTnk(n, k);          double Ynk = post_prob[n][k];          for (int32 d = 0; d < feature_count_; ++d) {            grad[k][d] += GetXnd(n, d)*(Ynk - Tnk);          }        }        //std::string w_str;        for (int32 d = 0; d < feature_count_; ++d) {          grad[k][d] += w[k][d]*FLAGS_lambda;          w[k][d] -= FLAGS_alpha*grad[k][d];          //w_str.append(outer_feature_map_[d]).append(":").append(DoubleToString(w[k][d])).append(",");        }        //VLOG(0) << "[w" << k << "]: " << w_str;      }    }  }  void Dump() {    utils::hash_map<std::string, int>::iterator it;    for (it = inner_label_map_.begin(); it != inner_label_map_.end(); ++it) {      VLOG(0) << "lable: " << it->first << ", " << it->second;    }    for (it = inner_feature_map_.begin(); it != inner_feature_map_.end(); ++it) {      VLOG(0) << "featu: " << it->first << ", " << it->second;    }  }private:  double**   w;           // w[k][d]    update:  w[k] = w[k] - alpha*(grad[k])  double**   grad;        // grad[k][d] update:  grad[k] = (Ynk - Tnk)*Xn*lambda;  double**   post_prob;   // post_prob[n][k] update:  p[n][k] = P(Ck | xn);  std::vector<DataSample>  samples_;  utils::hash_map<std::string, int>  inner_label_map_;  // 'A' -> 1    'B' -> 2  utils::hash_map<int, std::string>  outer_label_map_;  //   1 -> 'A'    2 -> 'B'  int32 label_count_;  int AddLabel(const std::string& outer_label) {    utils::hash_map<std::string, int>::iterator it = inner_label_map_.find(outer_label);    if (it == inner_label_map_.end()) {      inner_label_map_[outer_label] = label_count_;      outer_label_map_[label_count_] = outer_label;      label_count_++;    }    return it->second;  }  utils::hash_map<std::string, int>  inner_feature_map_;      // "const_bias" -> 0,  "1" -> 1,  "2" -> 2,  "url_host_big" -> feature_count  utils::hash_map<int, std::string>  outer_feature_map_;  int32 feature_count_;  void AddFeature(const std::string& feature_name) {    utils::hash_map<std::string, int>::iterator it = inner_feature_map_.find(feature_name);    if (it == inner_feature_map_.end()) {      inner_feature_map_[feature_name] = feature_count_;      outer_feature_map_[feature_count_] = feature_name;      feature_count_++;    }  }  double GetXnd(const int32& n, const int32& d) {    double ret = 0.0f;    if (d == 0) {      ret = 1.0f;    } else {      DataSample& sample = samples_[n];      std::string& ol = outer_feature_map_[d];      auto it = sample.features.find(ol);      if (it != sample.features.end()) {        ret = it->second;      }    }    //VLOG(0) << "X(" << n << "," << d << ") = " << ret;    return ret;  }  double GetTnk(const int32& n, const int32& k) {    double ret = 0.0f;    if (inner_label_map_[samples_[n].label] == k) {      ret = 1.0f;    }    //VLOG(0) << "T(" << n << "," << k << ") = " << ret;    return ret;  } void FreeAuxParam() {    for (int k = 0; k < label_count_; ++k) {      delete w[k];      delete grad[k];    }    delete w;    delete grad;    for (size_t n = 0; n < samples_.size(); ++n) {      delete post_prob[n];    }    delete post_prob;  }  void AllocAuxParam() {    // w[k][d], grad[k][d]    w = new double*[label_count_];    grad = new double*[label_count_];    for (int k = 0; k < label_count_; ++k) {      w[k] = new double[feature_count_];      grad[k] = new double[feature_count_];      for (int f = 0; f < feature_count_; ++f) {        w[k][f] = 0.0f;      }    }    // post_prob[n][k]    post_prob = new double*[samples_.size()];    for (size_t n = 0; n < samples_.size(); ++n) {      post_prob[n] = new double[label_count_];    }  }};int main(int argc, char* argv[]) {  base::ParseCommandLineFlags(&argc, &argv, false);  TrainningDataSet  tds;  tds.LoadSamplesFromFile(FLAGS_train_path);  tds.Dump();  tds.Train();  return 0;}

测试样例:

A 1:0.20 2:0.70

A 1:0.10 2:0.80

A 1:0.30 2:0.60

A 1:0.05 2:0.94

A 1:0.77 2:0.22

A 1:0.44 2:0.55

B 1:0.20 2:0.81

B 1:0.30 2:0.71

B 1:1.00 2:0.01

B 1:0.50 2:0.51

B 1:0.40 2:0.65

B 1:0.70 2:0.40


结果:

I0930 09:53:14.443656 32046 lr.cc:100] sampel predict [A]: 0.926048
I0930 09:53:14.443763 32046 lr.cc:100] sampel predict [A]: 0.932346
I0930 09:53:14.443836 32046 lr.cc:100] sampel predict [A]: 0.919215
I0930 09:53:14.443896 32046 lr.cc:100] sampel predict [A]: 0.592285
I0930 09:53:14.443940 32046 lr.cc:100] sampel predict [A]: 0.421599
I0930 09:53:14.443987 32046 lr.cc:100] sampel predict [A]: 0.499967
I0930 09:53:14.444035 32046 lr.cc:100] sampel predict [B]: 0.569759
I0930 09:53:14.444111 32046 lr.cc:100] sampel predict [B]: 0.593065
I0930 09:53:14.444164 32046 lr.cc:100] sampel predict [B]: 0.740223
I0930 09:53:14.444211 32046 lr.cc:100] sampel predict [B]: 0.638351
I0930 09:53:14.444258 32046 lr.cc:100] sampel predict [B]: 0.816627
I0930 09:53:14.444300 32046 lr.cc:100] sampel predict [B]: 0.955107

中间的几个点很接近,所以 prob 不是那么高~



0 0
原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 电器上涂了彩色蜡笔怎么办 3d渲大图慢怎么办 23岁就眼皮下垂怎么办 手被猫咪抓破了怎么办 有幻想症的人怎么办 做事不专心老走神怎么办 小朋友做作业老发呆走神怎么办? 小朋友总是上课走神发呆怎么办 21岁精神心急了怎么办 一直有人阻止我做事怎么办 安装了渲染su找不到怎么办 su室内渲染很暗怎么办 su未响应未保存怎么办 电脑工作栏没了怎么办 ps工作栏没了怎么办 草图大师文件太大打不开怎么办 su模型保存后不见了怎么办 墨汁溅到衣服上怎么办 黑裤子溅上白色颜料怎么办 解码器上墙后图像放大了怎么办 宝宝把墙画了怎么办 孩子画画勾线笔过敏怎么办 微信语音撤回了怎么办? 小天才电话手表充不上电怎么办 儿童电话手表定位不准怎么办 2岁宝宝不让刷牙怎么办 宝宝两岁蛀牙多还不刷牙怎么办 宝宝有蛀牙不肯刷牙怎么办 宝宝牙疼怎么办4岁 3岁宝宝龋齿牙疼怎么办 2岁宝宝不肯刷牙怎么办 3岁宝宝不爱刷牙怎么办 2岁宝宝不刷牙怎么办 二岁宝宝牙不好怎么办 小孩一刷牙就吐怎么办 孩子一刷牙就吐怎么办 两岁宝宝闹人怎么办 3岁宝宝不愿意刷牙怎么办 孩子牙没掉长出新牙来了怎么办 大孩子不洗澡怎么办啊 2岁宝宝不爱洗澡怎么办