Next: Nonlinear Least-Squares Large Example, Previous: Nonlinear Least-Squares Geodesic Acceleration Example, Up: Nonlinear Least-Squares Examples [Index]
The following program compares all available nonlinear least squares trust-region subproblem (TRS) methods on the Branin function, a common optimization test problem. The cost function is given by
\Phi(x) &= 1/2 (f_1^2 + f_2^2) f_1 &= x_2 + a_1 x_1^2 + a_2 x_1 + a_3 f_2 &= sqrt(a_4) sqrt(1 + (1 - a_5) cos(x_1))
with a_1 = -{5.1 \over 4 \pi^2}, a_2 = {5 \over \pi}, a_3 = -6, a_4 = 10, a_5 = {1 \over 8\pi}. There are three minima of this function in the range (x_1,x_2) \in [-5,15] \times [-5,15]. The program below uses the starting point (x_1,x_2) = (6,14.5) and calculates the solution with all available nonlinear least squares TRS methods. The program output is shown below.
Method NITER NFEV NJEV Initial Cost Final cost Final cond(J) Final x levenberg-marquardt 20 27 21 1.9874e+02 3.9789e-01 6.1399e+07 (-3.14e+00, 1.23e+01) levenberg-marquardt+accel 27 36 28 1.9874e+02 3.9789e-01 1.4465e+07 (3.14e+00, 2.27e+00) dogleg 23 64 23 1.9874e+02 3.9789e-01 5.0692e+08 (3.14e+00, 2.28e+00) double-dogleg 24 69 24 1.9874e+02 3.9789e-01 3.4879e+07 (3.14e+00, 2.27e+00) 2D-subspace 23 54 24 1.9874e+02 3.9789e-01 2.5142e+07 (3.14e+00, 2.27e+00)
The first row of output above corresponds to standard Levenberg-Marquardt, while the second row includes geodesic acceleration. We see that the standard LM method converges to the minimum at (-\pi,12.275) and also uses the least number of iterations and Jacobian evaluations. All other methods converge to the minimum (\pi,2.275) and perform similarly in terms of number of Jacobian evaluations. We see that J is fairly ill-conditioned at both minima, indicating that the QR (or SVD) solver is the best choice for this problem. Since there are only two parameters in this optimization problem, we can easily visualize the paths taken by each method, which are shown in the figure below. The figure shows contours of the cost function \Phi(x_1,x_2) which exhibits three global minima in the range [-5,15] \times [-5,15]. The paths taken by each solver are shown as colored lines.
The program is given below.
#include <stdlib.h> #include <stdio.h> #include <gsl/gsl_vector.h> #include <gsl/gsl_matrix.h> #include <gsl/gsl_blas.h> #include <gsl/gsl_multifit_nlinear.h> /* parameters to model */ struct model_params { double a1; double a2; double a3; double a4; double a5; }; /* Branin function */ int func_f (const gsl_vector * x, void *params, gsl_vector * f) { struct model_params *par = (struct model_params *) params; double x1 = gsl_vector_get(x, 0); double x2 = gsl_vector_get(x, 1); double f1 = x2 + par->a1 * x1 * x1 + par->a2 * x1 + par->a3; double f2 = sqrt(par->a4) * sqrt(1.0 + (1.0 - par->a5) * cos(x1)); gsl_vector_set(f, 0, f1); gsl_vector_set(f, 1, f2); return GSL_SUCCESS; } int func_df (const gsl_vector * x, void *params, gsl_matrix * J) { struct model_params *par = (struct model_params *) params; double x1 = gsl_vector_get(x, 0); double f2 = sqrt(par->a4) * sqrt(1.0 + (1.0 - par->a5) * cos(x1)); gsl_matrix_set(J, 0, 0, 2.0 * par->a1 * x1 + par->a2); gsl_matrix_set(J, 0, 1, 1.0); gsl_matrix_set(J, 1, 0, -0.5 * par->a4 / f2 * (1.0 - par->a5) * sin(x1)); gsl_matrix_set(J, 1, 1, 0.0); return GSL_SUCCESS; } int func_fvv (const gsl_vector * x, const gsl_vector * v, void *params, gsl_vector * fvv) { struct model_params *par = (struct model_params *) params; double x1 = gsl_vector_get(x, 0); double v1 = gsl_vector_get(v, 0); double c = cos(x1); double s = sin(x1); double f2 = sqrt(par->a4) * sqrt(1.0 + (1.0 - par->a5) * c); double t = 0.5 * par->a4 * (1.0 - par->a5) / f2; gsl_vector_set(fvv, 0, 2.0 * par->a1 * v1 * v1); gsl_vector_set(fvv, 1, -t * (c + s*s/f2) * v1 * v1); return GSL_SUCCESS; } void callback(const size_t iter, void *params, const gsl_multifit_nlinear_workspace *w) { gsl_vector * x = gsl_multifit_nlinear_position(w); double x1 = gsl_vector_get(x, 0); double x2 = gsl_vector_get(x, 1); /* print out current location */ printf("%f %f\n", x1, x2); } void solve_system(gsl_vector *x0, gsl_multifit_nlinear_fdf *fdf, gsl_multifit_nlinear_parameters *params) { const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust; const size_t max_iter = 200; const double xtol = 1.0e-8; const double gtol = 1.0e-8; const double ftol = 1.0e-8; const size_t n = fdf->n; const size_t p = fdf->p; gsl_multifit_nlinear_workspace *work = gsl_multifit_nlinear_alloc(T, params, n, p); gsl_vector * f = gsl_multifit_nlinear_residual(work); gsl_vector * x = gsl_multifit_nlinear_position(work); int info; double chisq0, chisq, rcond; printf("# %s/%s\n", gsl_multifit_nlinear_name(work), gsl_multifit_nlinear_trs_name(work)); /* initialize solver */ gsl_multifit_nlinear_init(x0, fdf, work); /* store initial cost */ gsl_blas_ddot(f, f, &chisq0); /* iterate until convergence */ gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, callback, NULL, &info, work); /* store final cost */ gsl_blas_ddot(f, f, &chisq); /* store cond(J(x)) */ gsl_multifit_nlinear_rcond(&rcond, work); /* print summary */ fprintf(stderr, "%-25s %-6zu %-5zu %-5zu %-13.4e %-12.4e %-13.4e (%.2e, %.2e)\n", gsl_multifit_nlinear_trs_name(work), gsl_multifit_nlinear_niter(work), fdf->nevalf, fdf->nevaldf, chisq0, chisq, 1.0 / rcond, gsl_vector_get(x, 0), gsl_vector_get(x, 1)); printf("\n\n"); gsl_multifit_nlinear_free(work); } int main (void) { const size_t n = 2; const size_t p = 2; gsl_vector *f = gsl_vector_alloc(n); gsl_vector *x = gsl_vector_alloc(p); gsl_multifit_nlinear_fdf fdf; gsl_multifit_nlinear_parameters fdf_params = gsl_multifit_nlinear_default_parameters(); struct model_params params; params.a1 = -5.1 / (4.0 * M_PI * M_PI); params.a2 = 5.0 / M_PI; params.a3 = -6.0; params.a4 = 10.0; params.a5 = 1.0 / (8.0 * M_PI); /* print map of Phi(x1, x2) */ { double x1, x2, chisq; for (x1 = -5.0; x1 < 15.0; x1 += 0.1) { for (x2 = -5.0; x2 < 15.0; x2 += 0.1) { gsl_vector_set(x, 0, x1); gsl_vector_set(x, 1, x2); func_f(x, ¶ms, f); gsl_blas_ddot(f, f, &chisq); printf("%f %f %f\n", x1, x2, chisq); } printf("\n"); } printf("\n\n"); } /* define function to be minimized */ fdf.f = func_f; fdf.df = func_df; fdf.fvv = func_fvv; fdf.n = n; fdf.p = p; fdf.params = ¶ms; /* starting point */ gsl_vector_set(x, 0, 6.0); gsl_vector_set(x, 1, 14.5); fprintf(stderr, "%-25s %-6s %-5s %-5s %-13s %-12s %-13s %-15s\n", "Method", "NITER", "NFEV", "NJEV", "Initial Cost", "Final cost", "Final cond(J)", "Final x"); fdf_params.trs = gsl_multifit_nlinear_trs_lm; solve_system(x, &fdf, &fdf_params); fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel; solve_system(x, &fdf, &fdf_params); fdf_params.trs = gsl_multifit_nlinear_trs_dogleg; solve_system(x, &fdf, &fdf_params); fdf_params.trs = gsl_multifit_nlinear_trs_ddogleg; solve_system(x, &fdf, &fdf_params); fdf_params.trs = gsl_multifit_nlinear_trs_subspace2D; solve_system(x, &fdf, &fdf_params); gsl_vector_free(f); gsl_vector_free(x); return 0; }