Next: Nonlinear Least-Squares Comparison Example, Previous: Nonlinear Least-Squares Exponential Fit Example, Up: Nonlinear Least-Squares Examples [Index]
The following example program minimizes a modified Rosenbrock function, which is characterized by a narrow canyon with steep walls. The starting point is selected high on the canyon wall, so the solver must first find the canyon bottom and then navigate to the minimum. The problem is solved both with and without using geodesic acceleration for comparison. The cost function is given by
Phi(x) = 1/2 (f1^2 + f2^2) f1 = 100 ( x2 - x1^2 ) f2 = 1 - x1
The Jacobian matrix is given by
J = [ -200*x1 100 ; -1 0 ]
In order to use geodesic acceleration, the user must provide the second directional derivative of each residual in the velocity direction, D_v^2 f_i = \sum_{\alpha\beta} v_{\alpha} v_{\beta} \partial_{\alpha} \partial_{\beta} f_i. The velocity vector v is provided by the solver. For this example, these derivatives are given by
fvv = [ -200 v1^2 ; 0 ]
The solution of this minimization problem is given by
x* = [ 1 ; 1 ] Phi(x*) = 0
The program output is shown below.
=== Solving system without acceleration === NITER = 53 NFEV = 56 NJEV = 54 NAEV = 0 initial cost = 2.250225000000e+04 final cost = 6.674986031430e-18 final x = (9.999999974165e-01, 9.999999948328e-01) final cond(J) = 6.000096055094e+02 === Solving system with acceleration === NITER = 15 NFEV = 17 NJEV = 16 NAEV = 16 initial cost = 2.250225000000e+04 final cost = 7.518932873279e-19 final x = (9.999999991329e-01, 9.999999982657e-01) final cond(J) = 6.000097233278e+02
We can see that enabling geodesic acceleration requires less than a third of the number of Jacobian evaluations in order to locate the minimum. The path taken by both methods is shown in the figure below. The contours show the cost function \Phi(x_1,x_2). We see that both methods quickly find the canyon bottom, but the geodesic acceleration method navigates along the bottom to the solution with significantly fewer iterations.
The program is given below.
#include <stdlib.h> #include <stdio.h> #include <gsl/gsl_vector.h> #include <gsl/gsl_matrix.h> #include <gsl/gsl_blas.h> #include <gsl/gsl_multifit_nlinear.h> int func_f (const gsl_vector * x, void *params, gsl_vector * f) { double x1 = gsl_vector_get(x, 0); double x2 = gsl_vector_get(x, 1); gsl_vector_set(f, 0, 100.0 * (x2 - x1*x1)); gsl_vector_set(f, 1, 1.0 - x1); return GSL_SUCCESS; } int func_df (const gsl_vector * x, void *params, gsl_matrix * J) { double x1 = gsl_vector_get(x, 0); gsl_matrix_set(J, 0, 0, -200.0*x1); gsl_matrix_set(J, 0, 1, 100.0); gsl_matrix_set(J, 1, 0, -1.0); gsl_matrix_set(J, 1, 1, 0.0); return GSL_SUCCESS; } int func_fvv (const gsl_vector * x, const gsl_vector * v, void *params, gsl_vector * fvv) { double v1 = gsl_vector_get(v, 0); gsl_vector_set(fvv, 0, -200.0 * v1 * v1); gsl_vector_set(fvv, 1, 0.0); return GSL_SUCCESS; } void callback(const size_t iter, void *params, const gsl_multifit_nlinear_workspace *w) { gsl_vector * x = gsl_multifit_nlinear_position(w); /* print out current location */ printf("%f %f\n", gsl_vector_get(x, 0), gsl_vector_get(x, 1)); } void solve_system(gsl_vector *x0, gsl_multifit_nlinear_fdf *fdf, gsl_multifit_nlinear_parameters *params) { const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust; const size_t max_iter = 200; const double xtol = 1.0e-8; const double gtol = 1.0e-8; const double ftol = 1.0e-8; const size_t n = fdf->n; const size_t p = fdf->p; gsl_multifit_nlinear_workspace *work = gsl_multifit_nlinear_alloc(T, params, n, p); gsl_vector * f = gsl_multifit_nlinear_residual(work); gsl_vector * x = gsl_multifit_nlinear_position(work); int info; double chisq0, chisq, rcond; /* initialize solver */ gsl_multifit_nlinear_init(x0, fdf, work); /* store initial cost */ gsl_blas_ddot(f, f, &chisq0); /* iterate until convergence */ gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, callback, NULL, &info, work); /* store final cost */ gsl_blas_ddot(f, f, &chisq); /* store cond(J(x)) */ gsl_multifit_nlinear_rcond(&rcond, work); /* print summary */ fprintf(stderr, "NITER = %zu\n", gsl_multifit_nlinear_niter(work)); fprintf(stderr, "NFEV = %zu\n", fdf->nevalf); fprintf(stderr, "NJEV = %zu\n", fdf->nevaldf); fprintf(stderr, "NAEV = %zu\n", fdf->nevalfvv); fprintf(stderr, "initial cost = %.12e\n", chisq0); fprintf(stderr, "final cost = %.12e\n", chisq); fprintf(stderr, "final x = (%.12e, %.12e)\n", gsl_vector_get(x, 0), gsl_vector_get(x, 1)); fprintf(stderr, "final cond(J) = %.12e\n", 1.0 / rcond); printf("\n\n"); gsl_multifit_nlinear_free(work); } int main (void) { const size_t n = 2; const size_t p = 2; gsl_vector *f = gsl_vector_alloc(n); gsl_vector *x = gsl_vector_alloc(p); gsl_multifit_nlinear_fdf fdf; gsl_multifit_nlinear_parameters fdf_params = gsl_multifit_nlinear_default_parameters(); /* print map of Phi(x1, x2) */ { double x1, x2, chisq; double *f1 = gsl_vector_ptr(f, 0); double *f2 = gsl_vector_ptr(f, 1); for (x1 = -1.2; x1 < 1.3; x1 += 0.1) { for (x2 = -0.5; x2 < 2.1; x2 += 0.1) { gsl_vector_set(x, 0, x1); gsl_vector_set(x, 1, x2); func_f(x, NULL, f); chisq = (*f1) * (*f1) + (*f2) * (*f2); printf("%f %f %f\n", x1, x2, chisq); } printf("\n"); } printf("\n\n"); } /* define function to be minimized */ fdf.f = func_f; fdf.df = func_df; fdf.fvv = func_fvv; fdf.n = n; fdf.p = p; fdf.params = NULL; /* starting point */ gsl_vector_set(x, 0, -0.5); gsl_vector_set(x, 1, 1.75); fprintf(stderr, "=== Solving system without acceleration ===\n"); fdf_params.trs = gsl_multifit_nlinear_trs_lm; solve_system(x, &fdf, &fdf_params); fprintf(stderr, "=== Solving system with acceleration ===\n"); fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel; solve_system(x, &fdf, &fdf_params); gsl_vector_free(f); gsl_vector_free(x); return 0; }