// Copyright (C) 2008 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_OPTIMIZATIOn_ABSTRACT_ #ifdef DLIB_OPTIMIZATIOn_ABSTRACT_ #include <cmath> #include <limits> #include "../matrix/matrix_abstract.h" #include "../algs.h" namespace dlib{// ---------------------------------------------------------------------------------------- template < typename funct, typename T > class line_search_funct; /*! This object is a function object that represents a line search function. Moreover, it represents a function with the signature: double l(double x) !*/ template < typename funct, typename T > const line_search_funct<funct,T>make_line_search_function( const funct& f, const T& start, const T& direction ); /*! requires - is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size() (i.e. start and direction should be column vectors of the same size) - f must return either a double or a column vector the same length as start - f(start + 1.5*direction) should be a valid expression ensures - if (f returns a double) then - returns a line search function that computes l(x) == f(start + x*direction) - else - returns a line search function that computes l(x) == dot(f(start + x*direction),direction). That is, we assume f is the derivative of some other function and that what f returns is a gradient vector. So the following two expressions both create the derivative of l(x): - derivative(make_line_search_function(funct,start,direction)) - make_line_search_function(derivative(funct),start,direction) !*/ template < typename funct, typename T > const line_search_funct<funct,T>make_line_search_function( const funct& f, const T& start, const T& direction,double& f_out ); /*! This function is identical to the above three argument version of make_line_search_function() except that, if f() outputs a double, every time f() is evaluated its output is also stored into f_out. !*/ template < typename funct, typename T > const line_search_funct<funct,T>make_line_search_function( const funct& f, const T& start, const T& direction, T& gradient_out ); /*! This function is identical to the above three argument version of make_line_search_function() except that, if f() outputs a column vector, every time f() is evaluated its output is also stored into gradient_out. !*/ // ---------------------------------------------------------------------------------------- inlinedoublepoly_min_extrap(doublef0,doubled0,doublef1,doubled1,doublelimit = 1 ); /*! ensures - let c(x) be a 3rd degree polynomial such that: - c(0) == f0 - c(1) == f1 - derivative of c(x) at x==0 is d0 - derivative of c(x) at x==1 is d1 - returns the point in the range [0,limit] that minimizes the polynomial c(x) !*/ // ---------------------------------------------------------------------------------------- inlinedoublepoly_min_extrap(doublef0,doubled0,doublef1 ); /*! ensures - let c(x) be a 2nd degree polynomial such that: - c(0) == f0 - c(1) == f1 - derivative of c(x) at x==0 is d0 - returns the point in the range [0,1] that minimizes the polynomial c(x) !*/ // ---------------------------------------------------------------------------------------- inlinedoublepoly_min_extrap(doublef0,doubled0,doublex1,doublef_x1,doublex2,doublef_x2 ) /*! requires - 0 < x1 < x2 ensures - let f(x) be a 3rd degree polynomial such that: - f(0) == f0 - derivative of f(x) at x==0 is d0 - f(x1) == f_x1 - f(x2) == f_x2 - returns the point in the range [0,x2] that minimizes the polynomial f(x) !*/ // ---------------------------------------------------------------------------------------- inlinedoublelagrange_poly_min_extrap(doublep1,doublep2,doublep3,doublef1,doublef2,doublef3 ); /*! requires - f1 >= f2 <= f3 - p1 < p2 < p3 ensures - let c(x) be the second order Lagrange polynomial that interpolates the points p1, p2, and p3 where c(p1)==f1, c(p2)==f2, and c(p3)==f3 - this function returns the point in the range [p1,p3] that minimizes the polynomial c(x) !*/ // ---------------------------------------------------------------------------------------- template < typename funct, typename funct_der >doubleline_search( const funct& f, constdoublef0, const funct_der& der, constdoubled0,doublerho,doublesigma,doublemin_f,unsignedlongmax_iter ) /*! requires - 0 < rho < sigma < 1 - f and der are scalar functions of scalars (e.g. line_search_funct objects) - der is the derivative of f - f0 == f(0) - d0 == der(0) - max_iter > 0 ensures - Performs a line search and uses the strong Wolfe conditions to decide when the search can stop. - rho == the parameter of the Wolfe sufficient decrease condition - sigma == the parameter of the Wolfe curvature condition - max_iter == the maximum number of iterations allowable. After this many evaluations of f() line_search() is guaranteed to terminate. - returns a value alpha such that f(alpha) is significantly closer to the minimum of f than f(0). - It is assumed that the minimum possible value of f(x) is min_f. So if an alpha is found such that f(alpha) <= min_f then the search stops immediately. - This function is also optimized for the case where der(0) is negative. I.e. positive values of the argument to f() should be in a descent direction. - When this function makes calls to f() and der() it always does so by first calling f() and then calling der(). That is, these two functions are always called in pairs with f() being called first and then der() being called second. !*/ /* A good discussion of the Wolfe conditions and line search algorithms in general can be found in the book Practical Methods of Optimization by R. Fletcher and also in the more recent book Numerical Optimization by Nocedal and Wright. */ // ---------------------------------------------------------------------------------------- template < typename funct >doublebacktracking_line_search( const funct& f,doublef0,doubled0,doublealpha,doublerho,unsignedlongmax_iter ); /*! requires - 0 < rho < 1 - f is a scalar function of scalars (e.g. a line_search_funct object) - f0 == f(0) - d0 == the derivative of f() at f(0). - max_iter > 0 ensures - Performs a backtracking line search and uses the Armijo sufficient decrease rule to decide when the search can stop. - rho == the parameter of the sufficient decrease condition. - max_iter == the maximum number of iterations allowable. After this many evaluations of f() backtracking_line_search() is guaranteed to terminate. - The line search starts with the input alpha value and then backtracks until it finds a good enough alpha value. Once found, it returns the alpha value such that f(alpha) is significantly closer to the minimum of f than f(0). - The returned value of alpha will always be the last value of alpha which was passed to f(). That is, it will always be the case that the last call to f() made by backtracking_line_search() was f(alpha) where alpha is the return value from this function. !*/ // ---------------------------------------------------------------------------------------- template < typename funct > const negate_function_object<funct>negate_function( const funct& f ); /*! requires - f == a function that returns a scalar ensures - returns a function that represents the negation of f. That is, the returned function object represents g(x) == -f(x) !*/ // ---------------------------------------------------------------------------------------- classoptimize_single_variable_failure: public error; /*! This is the exception class used by the functions defined below. !*/ // ---------------------------------------------------------------------------------------- template < typename funct >doublefind_min_single_variable( const funct& f,double& starting_point, constdoublebegin = -1e200, constdoubleend = 1e200, constdoubleeps = 1e-3, constlongmax_iter = 100, constdoubleinitial_search_radius = 1 ) /*! requires - eps > 0 - max_iter > 1 - begin <= starting_point <= end - f must be a function of a double that returns a double (e.g. f(starting_point) should be a valid expression that evaluates to a double) - initial_search_radius > 0 ensures - Finds a point P such that: - P is a local minimum of the function f(). - begin <= P <= end - Evaluates f() no more than max_iter times - Stops searching when the window around the minimum point is smaller than eps. The search will begin with the given starting_point and expand out to the left and right by initial_search_radius sized steps. So if you think the minimum is likely to be found within X distance from the starting_point then set initial_search_radius to X. - #starting_point == P - returns f(P) throws - optimize_single_variable_failure This exception is thrown if max_iter iterations are performed without determining the min point to the requested accuracy of eps. !*/ // ---------------------------------------------------------------------------------------- template < typename funct >doublefind_max_single_variable( const funct& f,double& starting_point, constdoublebegin = -1e200, constdoubleend = 1e200, constdoubleeps = 1e-3, constlongmax_iter = 100, constdoubleinitial_search_radius = 1 ) /*! requires - eps > 0 - max_iter > 1 - begin <= starting_point <= end - f must be a function of a double that returns a double (e.g. f(starting_point) should be a valid expression that evaluates to a double) - initial_search_radius > 0 ensures - Finds a point P such that: - P is a local maximum of the function f(). - begin <= P <= end - Evaluates f() no more than max_iter times - Stops searching when the window around the maximum point is smaller than eps. The search will begin with the given starting_point and expand out to the left and right by initial_search_radius sized steps. So if you think the maximum is likely to be found within X distance from the starting_point then set initial_search_radius to X. - #starting_point == P - returns f(P) throws - optimize_single_variable_failure This exception is thrown if max_iter iterations are performed without determining the max point to the requested accuracy of eps. !*/ // ----------------------------------------------------------------------------------------}#endif // DLIB_OPTIMIZATIOn_ABSTRACT_