// Copyright (C) 2011 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_ #ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_ #include "structural_svm_problem_abstract.h" #include "../optimization/optimization_oca_abstract.h" namespace dlib{// ---------------------------------------------------------------------------------------- classsvm_struct_processing_node: noncopyable{/*! WHAT THIS OBJECT REPRESENTS This object is a tool for distributing the work involved in solving a dlib::structural_svm_problem across many computers. It is used in conjunction with the svm_struct_controller_node defined below. !*/ public: template < typename T, typename U >svm_struct_processing_node( const structural_svm_problem<T,U>& problem,unsignedshortport,unsignedshortnum_threads ); /*! requires - port != 0 - problem.get_num_samples() != 0 - problem.get_num_dimensions() != 0 ensures - This object will listen on the given port for a TCP connection from a svm_struct_controller_node. Once connected, the controller node will be able to access the given problem. - Will use num_threads threads at a time to make concurrent calls to the problem.separation_oracle() routine. You should set this parameter equal to the number of available processing cores. - Note that the following parameters within the given problem are ignored: - problem.get_c() - problem.get_epsilon() - problem.get_cache_based_epsilon() - problem.num_nuclear_norm_regularizers() - weather the problem is verbose or not Instead, they are defined by the svm_struct_controller_node. Note, however, that the problem.get_max_cache_size() parameter is meaningful and controls the size of the separation oracle cache within a svm_struct_processing_node. !*/}; // ---------------------------------------------------------------------------------------- classsvm_struct_controller_node: noncopyable{/*! INITIAL VALUE - get_num_processing_nodes() == 0 - get_epsilon() == 0.001 - get_max_iterations() == 10000 - get_c() == 1 - This object will not be verbose WHAT THIS OBJECT REPRESENTS This object is a tool for distributing the work involved in solving a dlib::structural_svm_problem across many computers. The best way to understand its use is via example: First, suppose you have defined a structural_svm_problem object by inheriting from it and defining the appropriate virtual functions. You could solve it by passing an instance to the oca optimizer. However, if your separation oracle takes a long time to evaluate then the optimization will take a long time to solve. To speed this up we can distribute the calls to the separation oracle across many computers. To make this concrete, lets imagine you want to distribute the work across three computers. You can accomplish this by creating four programs. One containing a svm_struct_controller_node and three containing svm_struct_processing_nodes. The programs might look like this: Controller program: int main() { svm_struct_controller_node cont; cont.set_c(100); // Tell cont where the processing nodes are on your network. cont.add_processing_node("192.168.1.10:12345"); cont.add_processing_node("192.168.1.11:12345"); cont.add_processing_node("192.168.1.12:12345"); matrix<double> w; oca solver; cont(solver, w); // Run the optimization. // After this finishes w will contain the solution vector. } Processing programs (they are all the same, except that each loads a different subset of the training data): int main() { // Put one third of your data into this problem object. How you do this depends on your problem. your_structural_svm_problem problem; svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer); cout << "hit enter to terminate this program" << endl; cin.get(); } !*/ public:svm_struct_controller_node( ); /*! ensures - this object is properly initialized !*/voidset_epsilon(doubleeps ); /*! requires - eps > 0 ensures - #get_epsilon() == eps !*/doubleget_epsilon( ) const; /*! ensures - returns the error epsilon that determines when training should stop. Smaller values may result in a more accurate solution but take longer to execute. Specifically, the algorithm stops when the average sample risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is within epsilon of its optimal value. Also note that sample risk is an upper bound on a sample's loss. So you can think of this epsilon value as saying "solve the optimization problem until the average loss per sample is within epsilon of it's optimal value". !*/doubleget_cache_based_epsilon( ) const; /*! ensures - if (get_max_cache_size() != 0) then - The solver will not stop when the average sample risk is within get_epsilon() of its optimal value. Instead, it will keep running but will run the optimizer completely on the cache until the average sample risk is within #get_cache_based_epsilon() of its optimal value. This means that it will perform this additional refinement in the solution accuracy without making any additional calls to the separation_oracle(). This is useful when using a nuclear norm regularization term because it allows you to quickly solve the optimization problem to a high precision, which in the case of a nuclear norm regularized problem means that many of the learned matrices will be low rank or very close to low rank due to the nuclear norm regularizer. This may not happen without solving the problem to a high accuracy or their ranks may be difficult to determine, so the extra accuracy given by the cache based refinement is very useful. Finally, note that we include the nuclear norm term as part of the "risk" for the purposes of determining when to stop. - else - The value of #get_cache_based_epsilon() has no effect. !*/voidset_cache_based_epsilon(doubleeps ); /*! requires - eps > 0 ensures - #get_cache_based_epsilon() == eps !*/voidset_max_iterations(unsignedlongmax_iter ); /*! ensures - #get_max_iterations() == max_iter !*/unsignedlongget_max_iterations( ); /*! ensures - returns the maximum number of iterations the SVM optimizer is allowed to run before it is required to stop and return a result. !*/voidadd_nuclear_norm_regularizer(longfirst_dimension,longrows,longcols,doubleregularization_strength ); /*! requires - 0 <= first_dimension < number of dimensions in problem - 0 <= rows - 0 <= cols - first_dimension+rows*cols <= number of dimensions in problem - 0 < regularization_strength ensures - Adds a nuclear norm regularization term to the optimization problem solved by this object. That is, instead of solving: Minimize: h(w) == 0.5*dot(w,w) + C*R(w) this object will solve: Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w) where "part of w" is the part of w indicated by the arguments to this function. In particular, the part of w included in the nuclear norm is exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols). Therefore, if you think of the w vector as being the concatenation of a bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() to add nuclear norm regularization terms to any of the matrices packed into w. - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1 !*/unsignedlongnum_nuclear_norm_regularizers( ) const; /*! ensures - returns the number of nuclear norm regularizers that are currently a part of this optimization problem. That is, returns the number of times add_nuclear_norm_regularizer() has been called since the last call to clear_nuclear_norm_regularizers() or object construction, whichever is most recent. !*/voidclear_nuclear_norm_regularizers( ); /*! ensures - #num_nuclear_norm_regularizers() == 0 !*/voidbe_verbose( ); /*! ensures - This object will print status messages to standard out so that a user can observe the progress of the algorithm. !*/voidbe_quiet( ); /*! ensures - this object will not print anything to standard out !*/doubleget_c( ) const; /*! ensures - returns the SVM regularization parameter. It is the parameter that determines the trade off between trying to fit the training data exactly or allowing more errors but hopefully improving the generalization of the resulting classifier. Larger values encourage exact fitting while smaller values of C may encourage better generalization. !*/voidset_c(doubleC ); /*! requires - C > 0 ensures - #get_c() == C !*/voidadd_processing_node( const network_address& addr ); /*! requires - addr.port != 0 ensures - if (this address hasn't already been added) then - #get_num_processing_nodes() == get_num_processing_nodes() + 1 - When operator() is invoked to solve the structural svm problem this object will connect to the svm_struct_processing_node located at the given network address and will include it in the distributed optimization. !*/voidadd_processing_node( const std::string& ip_or_hostname,unsignedshortport ); /*! requires - port != 0 ensures - invokes: add_processing_node(network_address(ip_or_hostname, port)) !*/unsignedlongget_num_processing_nodes( ) const; /*! ensures - returns the number of remote processing nodes that have been registered with this object. !*/voidremove_processing_nodes( ); /*! ensures - #get_num_processing_nodes() == 0 !*/ classinvalid_problem: public error{}; template <typename matrix_type>doubleoperator() ( const oca& solver, matrix_type& w ) const; /*! requires - get_num_processing_nodes() != 0 - matrix_type == a dlib::matrix capable of storing column vectors ensures - connects to the processing nodes and begins optimizing the structural svm problem using the given oca solver. - stores the solution in #w - returns the objective value at the solution #w throws - invalid_problem This exception is thrown if the svm_struct_processing_nodes disagree on the dimensionality of the problem. That is, if they disagree on the value of structural_svm_problem::get_num_dimensions(). !*/}; // ----------------------------------------------------------------------------------------}#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_