非线性最小二乘法之Gauss Newton、L-M、Dog-Leg
来源:互联网 发布:线性时间选择算法分析 编辑:程序博客网 时间:2024/06/07 08:24
非线性最小二乘法之Gauss Newton、L-M、Dog-Leg
最快下降法
假设
现在讨论
其中
即
最小二乘问题
通常的最小二乘问题都可以表示为:
找到一个
假设对
则
通常记
所以
GaussNewton
选择
为使
当
由于GaussNewton求解过程中需要对
总结以上,GaussNewton法一般求解步骤:
- step1:根据
- step2:
- step3:若
通常GaussNewton法收敛较快,但是不稳定。而最快下降法稳定,但是收敛较慢。所以接下来我们介绍GaussNewton和最快下降法混合法。
LM阻尼最小二乘法
GaussNewton法是用
这样即使当
因此
那么LM阻尼最小二乘法实际迭代过程中怎样调整
假设
则对
我们定义一个增益比
在实际中,我们选择一阶近似、二阶近似并不是在所有定义域都满足的,而是在
当
而当
一种比较好的阻尼系数
初始值
总结以上,LM阻尼最小二乘法求解步骤:
- step1:初始化
- step2:求解梯度
- step3:根据
- step4:
如果
否则
对于
Dog-Leg最小二乘法
另外一个GaussNewton和最快下降法混合方法是Dog-Leg法,代替阻尼项而是用trust region。
回到上面讲到最快下降法,下降方向
假设
为使得
则如果GaussNewton法则
继续trust region是什么呢?
trust region即为在
然后继续,Dog-Leg迭代步长
和阻尼最小二乘法类似,实际中怎样更新trust region半径呢?
继续选择增益比
总结以上,Dog-Leg最小二乘法求解步骤:
- step1:初始化
- step2:求解梯度
- step3:如果trust region半径
- step4:分别根据GaussNewton法和最快下降法计算
- step5:根据
- step6:
重复step2。
对于
举例:
拟合
Gauss Newton代码:
double func(const VectorXd& input, const VectorXd& output, const VectorXd& params, double objIndex){ // obj = A * sin(Bx) + C * cos(D*x) - F double x1 = params(0); double x2 = params(1); double x3 = params(2); double x4 = params(3); double t = input(objIndex); double f = output(objIndex); return x1 * sin(x2 * t) + x3 * cos( x4 * t) - f;}//return vector make up of func() element.VectorXd objF(const VectorXd& input, const VectorXd& output, const VectorXd& params){ VectorXd obj(input.rows()); for(int i = 0; i < input.rows(); i++) obj(i) = func(input, output, params, i); return obj;}//F = (f ^t * f)/2double Func(const VectorXd& obj){ return obj.squaredNorm()/2;}double Deriv(const VectorXd& input, const VectorXd& output, int objIndex, const VectorXd& params, int paraIndex){ VectorXd para1 = params; VectorXd para2 = params; para1(paraIndex) -= DERIV_STEP; para2(paraIndex) += DERIV_STEP; double obj1 = func(input, output, para1, objIndex); double obj2 = func(input, output, para2, objIndex); return (obj2 - obj1) / (2 * DERIV_STEP);}MatrixXd Jacobin(const VectorXd& input, const VectorXd& output, const VectorXd& params){ int rowNum = input.rows(); int colNum = params.rows(); MatrixXd Jac(rowNum, colNum); for (int i = 0; i < rowNum; i++) { for (int j = 0; j < colNum; j++) { Jac(i,j) = Deriv(input, output, i, params, j); } } return Jac;}void gaussNewton(const VectorXd& input, const VectorXd& output, VectorXd& params){ int errNum = input.rows(); //error num int paraNum = params.rows(); //parameter num VectorXd obj(errNum); double last_sum = 0; int iterCnt = 0; while (iterCnt < MAX_ITER) { obj = objF(input, output, params); double sum = 0; sum = Func(obj); cout << "Iterator index: " << iterCnt << endl; cout << "parameter: " << endl << params << endl; cout << "error sum: " << endl << sum << endl << endl; if (fabs(sum - last_sum) <= 1e-12) break; last_sum = sum; MatrixXd Jac = Jacobin(input, output, params); VectorXd delta(paraNum); delta = (Jac.transpose() * Jac).inverse() * Jac.transpose() * obj; params -= delta; iterCnt++; }}
LM代码:
double maxMatrixDiagonale(const MatrixXd& Hessian){ int max = 0; for(int i = 0; i < Hessian.rows(); i++) { if(Hessian(i,i) > max) max = Hessian(i,i); } return max;}//L(h) = F(x) + h^t*J^t*f + h^t*J^t*J*h/2//deltaL = h^t * (u * h - g)/2double linerDeltaL(const VectorXd& step, const VectorXd& gradient, const double u){ double L = step.transpose() * (u * step - gradient); return L/2;}void levenMar(const VectorXd& input, const VectorXd& output, VectorXd& params){ int errNum = input.rows(); //error num int paraNum = params.rows(); //parameter num //initial parameter VectorXd obj = objF(input,output,params); MatrixXd Jac = Jacobin(input, output, params); //jacobin MatrixXd A = Jac.transpose() * Jac; //Hessian VectorXd gradient = Jac.transpose() * obj; //gradient //initial parameter tao v epsilon1 epsilon2 double tao = 1e-3; long long v = 2; double eps1 = 1e-12, eps2 = 1e-12; double u = tao * maxMatrixDiagonale(A); bool found = gradient.norm() <= eps1; if(found) return; double last_sum = 0; int iterCnt = 0; while (iterCnt < MAX_ITER) { VectorXd obj = objF(input,output,params); MatrixXd Jac = Jacobin(input, output, params); //jacobin MatrixXd A = Jac.transpose() * Jac; //Hessian VectorXd gradient = Jac.transpose() * obj; //gradient if( gradient.norm() <= eps1 ) { cout << "stop g(x) = 0 for a local minimizer optimizer." << endl; break; } cout << "A: " << endl << A << endl; VectorXd step = (A + u * MatrixXd::Identity(paraNum, paraNum)).inverse() * gradient; //negtive Hlm. cout << "step: " << endl << step << endl; if( step.norm() <= eps2*(params.norm() + eps2) ) { cout << "stop because change in x is small" << endl; break; } VectorXd paramsNew(params.rows()); paramsNew = params - step; //h_lm = -step; //compute f(x) obj = objF(input,output,params); //compute f(x_new) VectorXd obj_new = objF(input,output,paramsNew); double deltaF = Func(obj) - Func(obj_new); double deltaL = linerDeltaL(-1 * step, gradient, u); double roi = deltaF / deltaL; cout << "roi is : " << roi << endl; if(roi > 0) { params = paramsNew; u *= max(1.0/3.0, 1-pow(2*roi-1, 3)); v = 2; } else { u = u * v; v = v * 2; } cout << "u = " << u << " v = " << v << endl; iterCnt++; cout << "Iterator " << iterCnt << " times, result is :" << endl << endl; }}
Dog-Leg代码:
void dogLeg(const VectorXd& input, const VectorXd& output, VectorXd& params){ int errNum = input.rows(); //error num int paraNum = params.rows(); //parameter num VectorXd obj = objF(input, output, params); MatrixXd Jac = Jacobin(input, output, params); //jacobin VectorXd gradient = Jac.transpose() * obj; //gradient //initial parameter tao v epsilon1 epsilon2 double eps1 = 1e-12, eps2 = 1e-12, eps3 = 1e-12; double radius = 1.0; bool found = obj.norm() <= eps3 || gradient.norm() <= eps1; if(found) return; double last_sum = 0; int iterCnt = 0; while(iterCnt < MAX_ITER) { VectorXd obj = objF(input, output, params); MatrixXd Jac = Jacobin(input, output, params); //jacobin VectorXd gradient = Jac.transpose() * obj; //gradient if( gradient.norm() <= eps1 ) { cout << "stop F'(x) = g(x) = 0 for a global minimizer optimizer." << endl; break; } if(obj.norm() <= eps3) { cout << "stop f(x) = 0 for f(x) is so small"; break; } //compute how far go along stepest descent direction. double alpha = gradient.squaredNorm() / (Jac * gradient).squaredNorm(); //compute gauss newton step and stepest descent step. VectorXd stepest_descent = -alpha * gradient; VectorXd gauss_newton = (Jac.transpose() * Jac).inverse() * Jac.transpose() * obj * (-1); double beta = 0; //compute dog-leg step. VectorXd dog_leg(params.rows()); if(gauss_newton.norm() <= radius) dog_leg = gauss_newton; else if(alpha * stepest_descent.norm() >= radius) dog_leg = (radius / stepest_descent.norm()) * stepest_descent; else { VectorXd a = alpha * stepest_descent; VectorXd b = gauss_newton; double c = a.transpose() * (b - a); beta = (sqrt(c*c + (b-a).squaredNorm()*(radius*radius-a.squaredNorm()))-c) /(b-a).squaredNorm(); dog_leg = alpha * stepest_descent + beta * (gauss_newton - alpha * stepest_descent); } cout << "dog-leg: " << endl << dog_leg << endl; if(dog_leg.norm() <= eps2 *(params.norm() + eps2)) { cout << "stop because change in x is small" << endl; break; } VectorXd new_params(params.rows()); new_params = params + dog_leg; cout << "new parameter is: " << endl << new_params << endl; //compute f(x) obj = objF(input,output,params); //compute f(x_new) VectorXd obj_new = objF(input,output,new_params); //compute delta F = F(x) - F(x_new) double deltaF = Func(obj) - Func(obj_new); //compute delat L =L(0)-L(dog_leg) double deltaL = 0; if(gauss_newton.norm() <= radius) deltaL = Func(obj); else if(alpha * stepest_descent.norm() >= radius) deltaL = radius*(2*alpha*gradient.norm() - radius)/(2.0*alpha); else { VectorXd a = alpha * stepest_descent; VectorXd b = gauss_newton; double c = a.transpose() * (b - a); beta = (sqrt(c*c + (b-a).squaredNorm()*(radius*radius-a.squaredNorm()))-c) /(b-a).squaredNorm(); deltaL = alpha*(1-beta)*(1-beta)*gradient.squaredNorm()/2.0 + beta*(2.0-beta)*Func(obj); } double roi = deltaF / deltaL; if(roi > 0) { params = new_params; } if(roi > 0.75) { radius = max(radius, 3.0 * dog_leg.norm()); } else if(roi < 0.25) { radius = radius / 2.0; if(radius <= eps2*(params.norm()+eps2)) { cout << "trust region radius is too small." << endl; break; } } cout << "roi: " << roi << " dog-leg norm: " << dog_leg.norm() << endl; cout << "radius: " << radius << endl; iterCnt++; cout << "Iterator " << iterCnt << " times" << endl << endl; }}
main()
#include <eigen3/Eigen/Dense>#include <eigen3/Eigen/Sparse>#include <iostream>#include <iomanip>#include <math.h>using namespace std;using namespace Eigen;const double DERIV_STEP = 1e-5;const int MAX_ITER = 100;#define max(a,b) (((a)>(b))?(a):(b))int main(int argc, char* argv[]){ // obj = A * sin(Bx) + C * cos(D*x) - F //there are 4 parameter: A, B, C, D. int num_params = 4; //generate random data using these parameter int total_data = 100; VectorXd input(total_data); VectorXd output(total_data); double A = 5, B= 1, C = 10, D = 2; //load observation data for (int i = 0; i < total_data; i++) { //generate a random variable [-10 10] double x = 20.0 * ((random() % 1000) / 1000.0) - 10.0; double deltaY = 2.0 * (random() % 1000) /1000.0; double y = A*sin(B*x)+C*cos(D*x) + deltaY; input(i) = x; output(i) = y; } //gauss the parameters VectorXd params_gaussNewton(num_params); //init gauss params_gaussNewton << 1.6, 1.4, 6.2, 1.7; VectorXd params_levenMar = params_gaussNewton; VectorXd params_dogLeg = params_gaussNewton; gaussNewton(input, output, params_gaussNewton); levenMar(input, output, params_levenMar); dogLeg(input, output, params_dogLeg); cout << "gauss newton parameter: " << endl << params_gaussNewton << endl << endl << endl; cout << "Levenberg-Marquardt parameter: " << endl << params_levenMar << endl << endl << endl; cout << "dog-leg parameter: " << endl << params_dogLeg << endl << endl << endl;}
通常对于GaussNewton、LM、Dog-Leg,如果初始化的参数离真实值较近时,这三种方法都能收敛到真实值,而像例子中初始化参数
三种方法收敛过程中F值变化,可以看到LM、Dog-Leg收敛的结果明显优于GaussNewton。GaussNewton收敛过程中出现明显来回振荡,Dog-Leg最为平稳,LM收敛过程中出现三个阶段。
阻尼最小二乘法收敛过程中,
Dog-Leg收敛过程中trust region半径在不断改变,直到最后收敛后趋近于0。可以看到
总结以上:
最小二乘法优化后结果依赖于初始值的选取,LM、Dog-Leg收敛结果明显好于GaussNewton。LM、Dog-Leg通常能够达到同样的收敛精度,综合来看Dog-Leg略优于LM。
- 非线性最小二乘法之Gauss Newton、L-M、Dog-Leg
- Matlab之Gauss-Newton法
- Gauss-Newton algorithm
- Gauss-Newton算法学习
- Gauss-Newton算法学习
- Gauss-Newton算法学习
- 求解非线性最小二乘法 Eigen
- 非线性最小二乘法矩阵处理
- 最小二乘法 线性与非线性拟合
- 非线性最小二乘法优化问题(1)
- Newton-Raphson 法求解非线性方程组
- 二分法,newton迭代法求解非线性方程组
- Newton-Raphson法求解非线性方程复根
- 非线性变换.m
- 稳健估计/M估计/最小二乘法
- 11-19 秦皇岛重现赛水题总结之c,l,m
- L-M算法
- L~M方法
- dup and dup2的剖析
- 1060. Are They Equal (25)
- Java:异常、断言
- eclipse for java ee +tomcat +jsp环境搭建
- CSDN博客中画出一道华丽的水平分割线
- 非线性最小二乘法之Gauss Newton、L-M、Dog-Leg
- 刚学习Tcp通信遇到的一些问题
- JavaFX 程序退出时结束子线程
- 网狐框架分析九--TCPSocket.cpp
- Python——字典与字典方法
- 微信公众平台开发C#实现
- 牛客网刷题之跳台阶
- 欢迎使用CSDN-markdown编辑器
- Http post请求 entity body的格式