优化算法比较

来源:互联网 发布:会计网络教育68所院校 编辑:程序博客网 时间:2024/06/07 17:33

优化算法比较

下面是某个blog主写的关于优化算法解法的一些conclusion,第二篇则是某个国外的blog主总结的,貌似这位已经总结成个survey发表了,国内已经有人翻译成中文了,可以结合着看下,先保存起来,目前po主自己刚开始看到 momentum 部分,实现也只到这里,拿各种优化算法来解logistic regression 。结果回头贴出来,先标记个


http://blog.csdn.net/luo123n/article/details/48239963

http://sebastianruder.com/optimizing-gradient-descent/index.html#fn:7

http://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650720663&idx=3&sn=d9f671f77be23a148d1830448154a545&chksm=871b0de9b06c84ffaf260b9ba2a010108cca62d5ce3dcbd8c98c72c9f786f9cd460b27b496ca&mpshare=1&scene=2&srcid=1121mgll9exVL2Gia7trGTn7&from=timeline#wechat_redirect


https://www.52ml.net/21094.html


/** * 归一化 */private void normalization(){double max[] = new double [corpus.getFeatureNum()]; double min[] = new double [corpus.getFeatureNum()] ; for(int i = 0 ;i<corpus.getFeatureNum();i++){max[i] = 0.0;min[i] = 0.0;}for (LRInstance instance:corpus.getInstances().values()){for (int i = 0 ;i<corpus.getFeatureNum();i++){double feature = instance.getFeatureIndex(i);if(max[i]<feature) {max[i] = feature;}else if(min[i]>feature){min[i] = feature;}}}for (LRInstance instance:corpus.getInstances().values()){for (int i = 0 ;i<corpus.getFeatureNum();i++){if(max[i]==min[i]) continue;double feature = instance.getFeatureIndex(i);instance.setFeatureIndex(i, (feature-min[i])/(max[i]-min[i]));}}}

private double sigmod(double z){return 1/(1+Math.exp(-z));} 

/** * batch gradient descent : every step use all examples  */private void batchGradientDescent(){for (int k = 0; k <conf.getMaxIter();k++){for (int i = 0 ; i<corpus.getFeatureNum();i++){double gradient =0.0;for(LRInstance instance: corpus.getInstances().values()){double predict = sigmod(instance.getCurrRTW(weights));gradient += (instance.getLabel()-predict)*instance.getFeatureIndex(i);}//if (theta<conf.getEpsilon())break; // convergence weights[i]=weights[i]+conf.getShrinkage()*gradient;}error(k);}}

/** * stochastic gradient descent : every step use one example */private void stoGradientDescent(){for (int k = 0; k <conf.getMaxIter();k++){ // iterativeRandom random = new Random(); int instanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;LRInstance instance = corpus.getInstances().get(instanceId);for (int i = 0;i<corpus.getFeatureNum();i++){  double predict =sigmod(instance.getCurrRTW(weights));double gradient = (instance.getLabel()-predict)*instance.getFeatureIndex(i);//if(theta<conf.getEpsilon()) break; //convergenceweights[i] = weights[i]+conf.getShrinkage()*gradient; }error(k);}}
/** * min batch gradient : every step use n examples * @param sampleNum */private void miniBatchGradientDescent(int sampleNum){ Random random = new Random();List<LRInstance> samples;for(int k = 0 ;k<conf.getMaxIter();k++){ samples = new ArrayList<LRInstance>();//step1:samplingfor (int n = 0 ;n < sampleNum;n++){ int stanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;LRInstance instance = corpus.getInstances().get(stanceId);samples.add(instance);}//gradient for (int i = 0 ;i <corpus.getFeatureNum();i++){double gradient = 0.0;for(LRInstance instance:samples){double predict = sigmod(instance.getCurrRTW(weights));gradient += (instance.getLabel()-predict)*instance.getFeatureIndex(i);}//if(theta<conf.getEpsilon()) break; //convergence weights[i] = weights[i]+conf.getShrinkage()*gradient;}error(k);}}

/** * momentum () *  */private void momentum (){System.out.println(conf.getGamma());Random random = new Random();LRInstance instance ;int instanceId ;double gradient ;double predict ;double[] vector = new double[corpus.getFeatureNum()];for(int i = 0 ;i < vector.length;i++){vector[i] = 0;}for(int k = 0 ;k < conf.getMaxIter();k++){instanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;instance = corpus.getInstances().get(instanceId);for(int i = 0 ;i < corpus.getFeatureNum();i++){predict = sigmod(instance.getCurrRTW(weights));gradient = (instance.getLabel()-predict)*instance.getFeatureIndex(i);vector[i] = conf.getGamma()*vector[i]+conf.getShrinkage()*gradient; // vector [i]? init is right?weights[i] = weights[i]+vector[i];}error(k);}}

中间算 gradient 停止条件那部分运行中被我注释掉了,因为样本量太小了,Epsilon 设置基本无效

0 0