difacto中的loss.h
来源:互联网 发布:阿里云软件下载 编辑:程序博客网 时间:2024/05/29 21:36
/** * \brief the loss function */template <typename T>class Loss { public: /** * create and init the loss function * * @param data X and Y * @param model w and V * @param model_siz 1 + length V[i] * @param conf difacto conf */ //model中参数的分布 Loss(const RowBlock<unsigned>& data, const std::vector<T>& model, const std::vector<int>& model_siz, const Config& conf) { nt_ = conf.num_threads(); // init w w.Load(0, data, model, model_siz); // init V if (conf.embedding_size() == 0) return; const auto& cf = conf.embedding(0); if (cf.dim() == 0) return; V.Load(cf.dim(), data, model, model_siz); V.dropout = cf.dropout(); V.grad_clipping = cf.grad_clipping(); V.grad_normalization = cf.grad_normalization(); } ~Loss() { } /** * \brief evaluate the progress * predict_y * py = X * w + .5 * sum((X*V).^2 - (X.*X)*(V.*V), 2); * * sum(A, 2) : sum the rows of A * .* : elemenetal-wise times */ void Evaluate(Progress* prog) { // py = X * w py_.resize(w.X.size); SpMV::Times(w.X, w.weight, &py_, nt_); BinClassEval<T> eval(w.X.label, py_.data(), py_.size(), nt_); prog->objv_w() = eval.LogitObjv(); // py += .5 * sum((X*V).^2 - (X.*X)*(V.*V), 2); if (!V.weight.empty()) { // tmp = (X.*X)*(V.*V) std::vector<T> vv = V.weight; for (auto& v : vv) v *= v; CHECK_EQ(vv.size(), V.pos.size() * V.dim); std::vector<T> xxvv(V.X.size * V.dim); SpMM::Times(V.XX, vv, &xxvv, nt_); // V.XV = X*V V.XV.resize(xxvv.size()); SpMM::Times(V.X, V.weight, &V.XV, nt_); // py += .5 * sum((V.XV).^2 - xxvv)#pragma omp parallel for num_threads(nt_) for (size_t i = 0; i < py_.size(); ++i) { T* t = V.XV.data() + i * V.dim; T* tt = xxvv.data() + i * V.dim; T s = 0; for (int j = 0; j < V.dim; ++j) s += t[j] * t[j] - tt[j]; py_[i] += .5 * s; } prog->objv() = eval.LogitObjv(); } else { prog->objv() = prog->objv_w(); } // auc, acc, logloss, copc prog->auc() = eval.AUC(); prog->new_ex() = w.X.size; prog->count() = 1; // prog->copc() = eval.Copc(); } /*! * \brief compute the gradients * p = - y ./ (1 + exp (y .* py)); * grad_w = X' * p; * grad_u = X' * diag(p) * X * V - diag((X.*X)'*p) * V */ void CalcGrad(std::vector<T>* grad) { // p = ... (reuse py_) CHECK_EQ(py_.size(), w.X.size) << "call *evaluate* first";#pragma omp parallel for num_threads(nt_) for (size_t i = 0; i < py_.size(); ++i) { T y = w.X.label[i] > 0 ? 1 : -1; py_[i] = - y / ( 1 + exp ( y * py_[i] )); } // grad_w = ... SpMV::TransTimes(w.X, py_, &w.weight, nt_); w.Save(grad); // grad_u = ... if (!V.weight.empty()) { int dim = V.dim; // xxp = (X.*X)'*p size_t m = V.pos.size(); std::vector<T> xxp(m); SpMM::TransTimes(V.XX, py_, &xxp, nt_); // V = - diag(xxp) * V CHECK_EQ(V.weight.size(), dim * m);#pragma omp parallel for num_threads(nt_) for (size_t i = 0; i < m; ++i) { T* v = V.weight.data() + i * dim; for (int j = 0; j < dim; ++j) v[j] *= - xxp[i]; } // V.XV = diag(p) * X * V size_t n = py_.size(); CHECK_EQ(V.XV.size(), n * dim);#pragma omp parallel for num_threads(nt_) for (size_t i = 0; i < n; ++i) { T* y = V.XV.data() + i * dim; for (int j = 0; j < dim; ++j) y[j] *= py_[i]; } // V += X' * V.XV SpMM::TransTimes(V.X, V.XV, (T)1, V.weight, &V.weight, nt_); // some preprocessing if (V.grad_clipping > 0) { T gc = V.grad_clipping; for (T& g : V.weight) g = g > gc ? gc : ( g < -gc ? -gc : g); } if (V.dropout > 0) { for (T& g : V.weight) { if ((T)rand() / RAND_MAX > 1 - V.dropout) g = 0; } } if (V.grad_normalization) Normalize(V.weight); } V.Save(grad); } void Normalize(std::vector<T>& grad) { T norm = 0; for (T g : grad) norm += g * g; if (norm < 1e-10) return; norm = sqrt(norm); for (T& g : grad) g = g / norm; } virtual void Predict(Stream* fo, bool prob_out) { if (py_.empty()) { py_.resize(w.X.size); SpMV::Times(w.X, w.weight, &py_, nt_); } ostream os(fo); if (prob_out) { for (auto p : py_) os << 1.0 / (1.0 + exp( - p )) << "\n"; } else { for (auto p : py_) os << p << "\n"; } } private: /// \brief store data and model w (dim==0) and V (dim >= 1) struct Data { /// \brief get data and model void Load(int d, const RowBlock<unsigned>& data, const std::vector<T>& model, const std::vector<int>& model_siz) { // init pos and w std::vector<unsigned> col_map; dim = d; if (dim == 0) { // w pos.resize(model_siz.size()); weight.resize(model_siz.size()); unsigned p = 0; for (size_t i = 0; i < model_siz.size(); ++i) { if (model_siz[i] == 0) { pos[i] = (unsigned)-1; } else { pos[i] = p; weight[i] = model[p]; p += model_siz[i]; } } CHECK_EQ((size_t)p, model.size()); } else { // V col_map.resize(model_siz.size()); unsigned k = 0, p = 0; for (size_t i = 0; i < model_siz.size(); ++i) { if (model_siz[i] == dim + 1) { pos.push_back(p+1); // skip the first dim col_map[i] = ++ k; } p += model_siz[i]; } CHECK_EQ((size_t)p, model.size()); weight.resize(pos.size() * dim); for (size_t i = 0; i < pos.size(); ++i) { memcpy(weight.data()+i*dim, model.data()+pos[i], dim*sizeof(T)); } } if (weight.empty()) return; // init X if (dim == 0) { // w X = data; } else { // V // pick the columns with model_siz = dim + 1 os.push_back(0); for (size_t i = 0; i < data.size; ++i) { for (size_t j = data.offset[i]; j < data.offset[i+1]; ++j) { unsigned d = data.index[j]; unsigned k = col_map[d]; if (k > 0) { idx.push_back(k-1); if (data.value) val_.push_back(data.value[j]); } } os.push_back(idx.size()); } X.size = data.size; X.offset = BeginPtr(os); X.value = BeginPtr(val_); X.index = BeginPtr(idx); } // init XX XX = X; if (X.value) { val2_.resize(X.offset[X.size]); for (size_t i = 0; i < val2_.size(); ++i) { val2_[i] = X.value[i] * X.value[i]; } XX.value = BeginPtr(val2_); } } /// \brief set the gradient void Save(std::vector<T>* grad) const { if (weight.empty()) return; int d = dim == 0 ? 1 : dim; CHECK_EQ(weight.size(), pos.size()*d); for (size_t i = 0; i < pos.size(); ++i) { if (pos[i] == (unsigned)-1) continue; memcpy(grad->data()+pos[i], weight.data()+i*d, d*sizeof(T)); } } int dim; RowBlock<unsigned> X, XX; // XX = X.*X //instance相关的 std::vector<T> weight;//模型权重 std::vector<unsigned> pos; std::vector<T> XV; T dropout = 0; T grad_clipping = 0; T grad_normalization = 0; private: std::vector<T> val_, val2_; std::vector<size_t> os; std::vector<unsigned> idx; }; Data w, V;//分别用来保存w和V std::vector<T> py_; int nt_; // number of threads};
阅读全文
0 0
- difacto中的loss.h
- caffe中的loss函数
- difacto源码分析
- 机器学习中的Loss function
- [1609.04802] SRGAN中的那些loss
- caffe中的loss函数简单介绍
- caffe中的loss和accuracy曲线
- Caffe中的Siamese网络(contrastive loss)
- 机器学习中的损失函数 (着重比较:hinge loss vs softmax loss)
- Caffe中的优化方法:SGD、ADAGRAD、NAG,loss的计算
- caffe 中的损失函数分析以及loss function
- caffe 中的损失函数分析以及loss function
- Caffe如何画出训练中的loss曲线图和accuracy曲线图
- ECCV16 Center Loss及其在人脸识别中的应用
- caffe绘制训练过程中的accuracy、loss曲线
- 深度学习Loss function之Softmax中的矩阵求导
- 一:caffe如何画出训练中的loss和accuracy曲线
- Insertion Loss & Return Loss
- stm32 NRF24L01
- sql语句中的选择语句
- PreSubclassWindow详细分析
- Java 如何将一个文件中的两列数据分别读到两个数组中?
- 单例模式的4种写法
- difacto中的loss.h
- 面试题50:树中两个节点的最低公共祖先
- 由chmod改系统文件权限引ssh登录报ssh_exchange_identification: read: Connection reset by peer无法登陆问题
- properties文件的写入和读取
- Solr can't create core in docker command
- 如何理解和熟练运用js中的call及apply?
- 分布式框架简介SSM组合+ springmvc+mybatis+shiro+restful+bootstrap
- 关于树莓派的连接
- POJ