yixuan-LBFGSpp-c524a40/0000775000175000017510000000000015001153241014067 5ustar nileshnileshyixuan-LBFGSpp-c524a40/include/0000775000175000017510000000000015001153241015512 5ustar nileshnileshyixuan-LBFGSpp-c524a40/include/LBFGSpp/0000775000175000017510000000000015001153241016707 5ustar nileshnileshyixuan-LBFGSpp-c524a40/include/LBFGSpp/SubspaceMin.h0000664000175000017510000002570415001153241021301 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_SUBSPACE_MIN_H #define LBFGSPP_SUBSPACE_MIN_H #include #include #include #include "BFGSMat.h" /// \cond namespace LBFGSpp { // // Subspace minimization procedure of the L-BFGS-B algorithm, // mainly for internal use. // // The target of subspace minimization is to minimize the quadratic function m(x) // over the free variables, subject to the bound condition. // Free variables stand for coordinates that are not at the boundary in xcp, // the generalized Cauchy point. // // In the classical implementation of L-BFGS-B [1], the minimization is done by first // ignoring the box constraints, followed by a line search. Our implementation is // an exact minimization subject to the bounds, based on the BOXCQP algorithm [2]. // // Reference: // [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization. // [2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained convex quadratic problems. // template class SubspaceMin { private: using Vector = Eigen::Matrix; using Matrix = Eigen::Matrix; using IndexSet = std::vector; // v[ind] static Vector subvec(const Vector& v, const IndexSet& ind) { const int nsub = ind.size(); Vector res(nsub); for (int i = 0; i < nsub; i++) res[i] = v[ind[i]]; return res; } // v[ind] = rhs static void subvec_assign(Vector& v, const IndexSet& ind, const Vector& rhs) { const int nsub = ind.size(); for (int i = 0; i < nsub; i++) v[ind[i]] = rhs[i]; } // Check whether the vector is within the bounds static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub) { const int n = x.size(); for (int i = 0; i < n; i++) { if (x[i] < lb[i] || x[i] > ub[i]) return false; } return true; } // Test convergence of P set static bool P_converged(const IndexSet& yP_set, const Vector& vecy, const Vector& vecl, const Vector& vecu) { const int nP = yP_set.size(); for (int i = 0; i < nP; i++) { const int coord = yP_set[i]; if (vecy[coord] < vecl[coord] || vecy[coord] > vecu[coord]) return false; } return true; } // Test convergence of L set static bool L_converged(const IndexSet& yL_set, const Vector& lambda) { const int nL = yL_set.size(); for (int i = 0; i < nL; i++) { const int coord = yL_set[i]; if (lambda[coord] < Scalar(0)) return false; } return true; } // Test convergence of L set static bool U_converged(const IndexSet& yU_set, const Vector& mu) { const int nU = yU_set.size(); for (int i = 0; i < nU; i++) { const int coord = yU_set[i]; if (mu[coord] < Scalar(0)) return false; } return true; } public: // bfgs: An object that represents the BFGS approximation matrix. // x0: Current parameter vector. // xcp: Computed generalized Cauchy point. // g: Gradient at x0. // lb: Lower bounds for x. // ub: Upper bounds for x. // Wd: W'(xcp - x0) // newact_set: Coordinates that newly become active during the GCP procedure. // fv_set: Free variable set. // maxit: Maximum number of iterations. // drt: The output direction vector, drt = xsm - x0. static void subspace_minimize( const BFGSMat& bfgs, const Vector& x0, const Vector& xcp, const Vector& g, const Vector& lb, const Vector& ub, const Vector& Wd, const IndexSet& newact_set, const IndexSet& fv_set, int maxit, Vector& drt) { // std::cout << "========================= Entering subspace minimization =========================\n\n"; // d = xcp - x0 drt.noalias() = xcp - x0; // Size of free variables const int nfree = fv_set.size(); // If there is no free variable, simply return drt if (nfree < 1) { // std::cout << "========================= (Early) leaving subspace minimization =========================\n\n"; return; } // std::cout << "New active set = [ "; for(std::size_t i = 0; i < newact_set.size(); i++) std::cout << newact_set[i] << " "; std::cout << "]\n"; // std::cout << "Free variable set = [ "; for(std::size_t i = 0; i < fv_set.size(); i++) std::cout << fv_set[i] << " "; std::cout << "]\n\n"; // Extract the rows of W in the free variable set Matrix WF = bfgs.Wb(fv_set); // Compute F'BAb = -F'WMW'AA'd Vector vecc(nfree); bfgs.compute_FtBAb(WF, fv_set, newact_set, Wd, drt, vecc); // Set the vector c=F'BAb+F'g for linear term, and vectors l and u for the new bounds Vector vecl(nfree), vecu(nfree); for (int i = 0; i < nfree; i++) { const int coord = fv_set[i]; vecl[i] = lb[coord] - x0[coord]; vecu[i] = ub[coord] - x0[coord]; vecc[i] += g[coord]; } // Solve y = -inv(B[F, F]) * c Vector vecy(nfree); bfgs.solve_PtBP(WF, -vecc, vecy); // Test feasibility // If yes, then the solution has been found if (in_bounds(vecy, vecl, vecu)) { subvec_assign(drt, fv_set, vecy); return; } // Otherwise, enter the iterations // Make a copy of y as a fallback solution Vector yfallback = vecy; // Dual variables Vector lambda = Vector::Zero(nfree), mu = Vector::Zero(nfree); // Iterations IndexSet L_set, U_set, P_set, yL_set, yU_set, yP_set; L_set.reserve(nfree / 3); yL_set.reserve(nfree / 3); U_set.reserve(nfree / 3); yU_set.reserve(nfree / 3); P_set.reserve(nfree); yP_set.reserve(nfree); int k; for (k = 0; k < maxit; k++) { // Construct the L, U, and P sets, and then update values // Indices in original drt vector L_set.clear(); U_set.clear(); P_set.clear(); // Indices in y yL_set.clear(); yU_set.clear(); yP_set.clear(); for (int i = 0; i < nfree; i++) { const int coord = fv_set[i]; const Scalar li = vecl[i], ui = vecu[i]; if ((vecy[i] < li) || (vecy[i] == li && lambda[i] >= Scalar(0))) { L_set.push_back(coord); yL_set.push_back(i); vecy[i] = li; mu[i] = Scalar(0); } else if ((vecy[i] > ui) || (vecy[i] == ui && mu[i] >= Scalar(0))) { U_set.push_back(coord); yU_set.push_back(i); vecy[i] = ui; lambda[i] = Scalar(0); } else { P_set.push_back(coord); yP_set.push_back(i); lambda[i] = Scalar(0); mu[i] = Scalar(0); } } /* std::cout << "** Iter " << k << " **\n"; std::cout << " L = [ "; for(std::size_t i = 0; i < L_set.size(); i++) std::cout << L_set[i] << " "; std::cout << "]\n"; std::cout << " U = [ "; for(std::size_t i = 0; i < U_set.size(); i++) std::cout << U_set[i] << " "; std::cout << "]\n"; std::cout << " P = [ "; for(std::size_t i = 0; i < P_set.size(); i++) std::cout << P_set[i] << " "; std::cout << "]\n\n"; */ // Extract the rows of W in the P set Matrix WP = bfgs.Wb(P_set); // Solve y[P] = -inv(B[P, P]) * (B[P, L] * l[L] + B[P, U] * u[U] + c[P]) const int nP = P_set.size(); if (nP > 0) { Vector rhs = subvec(vecc, yP_set); Vector lL = subvec(vecl, yL_set); Vector uU = subvec(vecu, yU_set); Vector tmp(nP); bool nonzero = bfgs.apply_PtBQv(WP, L_set, lL, tmp, true); if (nonzero) rhs.noalias() += tmp; nonzero = bfgs.apply_PtBQv(WP, U_set, uU, tmp, true); if (nonzero) rhs.noalias() += tmp; bfgs.solve_PtBP(WP, -rhs, tmp); subvec_assign(vecy, yP_set, tmp); } // Solve lambda[L] = B[L, F] * y + c[L] const int nL = L_set.size(); const int nU = U_set.size(); Vector Fy; if (nL > 0 || nU > 0) bfgs.apply_WtPv(fv_set, vecy, Fy); if (nL > 0) { Vector res; bfgs.apply_PtWMv(L_set, Fy, res, Scalar(-1)); res.noalias() += subvec(vecc, yL_set) + bfgs.theta() * subvec(vecy, yL_set); subvec_assign(lambda, yL_set, res); } // Solve mu[U] = -B[U, F] * y - c[U] if (nU > 0) { Vector negRes; bfgs.apply_PtWMv(U_set, Fy, negRes, Scalar(-1)); negRes.noalias() += subvec(vecc, yU_set) + bfgs.theta() * subvec(vecy, yU_set); subvec_assign(mu, yU_set, -negRes); } // Test convergence if (L_converged(yL_set, lambda) && U_converged(yU_set, mu) && P_converged(yP_set, vecy, vecl, vecu)) break; } // If the iterations do not converge, try the projection if (k >= maxit) { vecy.noalias() = vecy.cwiseMax(vecl).cwiseMin(vecu); subvec_assign(drt, fv_set, vecy); // Test whether drt is a descent direction Scalar dg = drt.dot(g); // If yes, return the result if (dg <= -std::numeric_limits::epsilon()) return; // If not, fall back to the projected unconstrained solution vecy.noalias() = yfallback.cwiseMax(vecl).cwiseMin(vecu); subvec_assign(drt, fv_set, vecy); dg = drt.dot(g); if (dg <= -std::numeric_limits::epsilon()) return; // If still not, fall back to the unconstrained solution subvec_assign(drt, fv_set, yfallback); return; } // std::cout << "** Minimization finished in " << k + 1 << " iteration(s) **\n\n"; // std::cout << "========================= Leaving subspace minimization =========================\n\n"; subvec_assign(drt, fv_set, vecy); } }; } // namespace LBFGSpp /// \endcond #endif // LBFGSPP_SUBSPACE_MIN_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/Param.h0000664000175000017510000003501115001153241020120 0ustar nileshnilesh// Copyright (C) 2016-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_PARAM_H #define LBFGSPP_PARAM_H #include #include // std::invalid_argument namespace LBFGSpp { /// /// \defgroup Enumerations /// /// Enumeration types for line search. /// /// /// \ingroup Enumerations /// /// The enumeration of line search termination conditions. /// enum LINE_SEARCH_TERMINATION_CONDITION { /// /// Backtracking method with the Armijo condition. /// The backtracking method finds the step length such that it satisfies /// the sufficient decrease (Armijo) condition, /// \f$f(x + a \cdot d) \le f(x) + \beta' \cdot a \cdot g(x)^T d\f$, /// where \f$x\f$ is the current point, \f$d\f$ is the current search direction, /// \f$a\f$ is the step length, and \f$\beta'\f$ is the value specified by /// \ref LBFGSParam::ftol. \f$f\f$ and \f$g\f$ are the function /// and gradient values respectively. /// LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, /// /// The backtracking method with the defualt (regular Wolfe) condition. /// An alias of `LBFGS_LINESEARCH_BACKTRACKING_WOLFE`. /// LBFGS_LINESEARCH_BACKTRACKING = 2, /// /// Backtracking method with regular Wolfe condition. /// The backtracking method finds the step length such that it satisfies /// both the Armijo condition (`LBFGS_LINESEARCH_BACKTRACKING_ARMIJO`) /// and the curvature condition, /// \f$g(x + a \cdot d)^T d \ge \beta \cdot g(x)^T d\f$, where \f$\beta\f$ /// is the value specified by \ref LBFGSParam::wolfe. /// LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, /// /// Backtracking method with strong Wolfe condition. /// The backtracking method finds the step length such that it satisfies /// both the Armijo condition (`LBFGS_LINESEARCH_BACKTRACKING_ARMIJO`) /// and the following condition, /// \f$\vert g(x + a \cdot d)^T d\vert \le \beta \cdot \vert g(x)^T d\vert\f$, /// where \f$\beta\f$ is the value specified by \ref LBFGSParam::wolfe. /// LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3 }; /// /// Parameters to control the L-BFGS algorithm. /// template class LBFGSParam { public: /// /// The number of corrections to approximate the inverse Hessian matrix. /// The L-BFGS routine stores the computation results of previous \ref m /// iterations to approximate the inverse Hessian matrix of the current /// iteration. This parameter controls the size of the limited memories /// (corrections). The default value is \c 6. Values less than \c 3 are /// not recommended. Large values will result in excessive computing time. /// int m; /// /// Absolute tolerance for convergence test. /// This parameter determines the absolute accuracy \f$\epsilon_{abs}\f$ /// with which the solution is to be found. A minimization terminates when /// \f$||g|| < \max\{\epsilon_{abs}, \epsilon_{rel}||x||\}\f$, /// where \f$||\cdot||\f$ denotes the Euclidean (L2) norm. The default value is /// \c 1e-5. /// Scalar epsilon; /// /// Relative tolerance for convergence test. /// This parameter determines the relative accuracy \f$\epsilon_{rel}\f$ /// with which the solution is to be found. A minimization terminates when /// \f$||g|| < \max\{\epsilon_{abs}, \epsilon_{rel}||x||\}\f$, /// where \f$||\cdot||\f$ denotes the Euclidean (L2) norm. The default value is /// \c 1e-5. /// Scalar epsilon_rel; /// /// Distance for delta-based convergence test. /// This parameter determines the distance \f$d\f$ to compute the /// rate of decrease of the objective function, /// \f$f_{k-d}(x)-f_k(x)\f$, where \f$k\f$ is the current iteration /// step. If the value of this parameter is zero, the delta-based convergence /// test will not be performed. The default value is \c 0. /// int past; /// /// Delta for convergence test. /// The algorithm stops when the following condition is met, /// \f$|f_{k-d}(x)-f_k(x)|<\delta\cdot\max(1, |f_k(x)|, |f_{k-d}(x)|)\f$, where \f$f_k(x)\f$ is /// the current function value, and \f$f_{k-d}(x)\f$ is the function value /// \f$d\f$ iterations ago (specified by the \ref past parameter). /// The default value is \c 0. /// Scalar delta; /// /// The maximum number of iterations. /// The optimization process is terminated when the iteration count /// exceeds this parameter. Setting this parameter to zero continues an /// optimization process until a convergence or error. The default value /// is \c 0. /// int max_iterations; /// /// The line search termination condition. /// This parameter specifies the line search termination condition that will be used /// by the LBFGS routine. The default value is `LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE`. /// int linesearch; /// /// The maximum number of trials for the line search. /// This parameter controls the number of function and gradients evaluations /// per iteration for the line search routine. The default value is \c 20. /// int max_linesearch; /// /// The minimum step length allowed in the line search. /// The default value is \c 1e-20. Usually this value does not need to be /// modified. /// Scalar min_step; /// /// The maximum step length allowed in the line search. /// The default value is \c 1e+20. Usually this value does not need to be /// modified. /// Scalar max_step; /// /// A parameter to control the accuracy of the line search routine. /// The default value is \c 1e-4. This parameter should be greater /// than zero and smaller than \c 0.5. /// Scalar ftol; /// /// The coefficient for the Wolfe condition. /// This parameter is valid only when the line-search /// algorithm is used with the Wolfe condition. /// The default value is \c 0.9. This parameter should be greater /// the \ref ftol parameter and smaller than \c 1.0. /// Scalar wolfe; public: /// /// Constructor for L-BFGS parameters. /// Default values for parameters will be set when the object is created. /// LBFGSParam() { // clang-format off m = 6; epsilon = Scalar(1e-5); epsilon_rel = Scalar(1e-5); past = 0; delta = Scalar(0); max_iterations = 0; linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE; max_linesearch = 20; min_step = Scalar(1e-20); max_step = Scalar(1e+20); ftol = Scalar(1e-4); wolfe = Scalar(0.9); // clang-format on } /// /// Checking the validity of L-BFGS parameters. /// An `std::invalid_argument` exception will be thrown if some parameter /// is invalid. /// inline void check_param() const { if (m <= 0) throw std::invalid_argument("'m' must be positive"); if (epsilon < 0) throw std::invalid_argument("'epsilon' must be non-negative"); if (epsilon_rel < 0) throw std::invalid_argument("'epsilon_rel' must be non-negative"); if (past < 0) throw std::invalid_argument("'past' must be non-negative"); if (delta < 0) throw std::invalid_argument("'delta' must be non-negative"); if (max_iterations < 0) throw std::invalid_argument("'max_iterations' must be non-negative"); if (linesearch < LBFGS_LINESEARCH_BACKTRACKING_ARMIJO || linesearch > LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) throw std::invalid_argument("unsupported line search termination condition"); if (max_linesearch <= 0) throw std::invalid_argument("'max_linesearch' must be positive"); if (min_step < 0) throw std::invalid_argument("'min_step' must be positive"); if (max_step < min_step) throw std::invalid_argument("'max_step' must be greater than 'min_step'"); if (ftol <= 0 || ftol >= 0.5) throw std::invalid_argument("'ftol' must satisfy 0 < ftol < 0.5"); if (wolfe <= ftol || wolfe >= 1) throw std::invalid_argument("'wolfe' must satisfy ftol < wolfe < 1"); } }; /// /// Parameters to control the L-BFGS-B algorithm. /// template class LBFGSBParam { public: /// /// The number of corrections to approximate the inverse Hessian matrix. /// The L-BFGS-B routine stores the computation results of previous \ref m /// iterations to approximate the inverse Hessian matrix of the current /// iteration. This parameter controls the size of the limited memories /// (corrections). The default value is \c 6. Values less than \c 3 are /// not recommended. Large values will result in excessive computing time. /// int m; /// /// Absolute tolerance for convergence test. /// This parameter determines the absolute accuracy \f$\epsilon_{abs}\f$ /// with which the solution is to be found. A minimization terminates when /// \f$||Pg||_{\infty} < \max\{\epsilon_{abs}, \epsilon_{rel}||x||\}\f$, /// where \f$||x||\f$ denotes the Euclidean (L2) norm of \f$x\f$, and /// \f$Pg=P(x-g,l,u)-x\f$ is the projected gradient. The default value is /// \c 1e-5. /// Scalar epsilon; /// /// Relative tolerance for convergence test. /// This parameter determines the relative accuracy \f$\epsilon_{rel}\f$ /// with which the solution is to be found. A minimization terminates when /// \f$||Pg||_{\infty} < \max\{\epsilon_{abs}, \epsilon_{rel}||x||\}\f$, /// where \f$||x||\f$ denotes the Euclidean (L2) norm of \f$x\f$, and /// \f$Pg=P(x-g,l,u)-x\f$ is the projected gradient. The default value is /// \c 1e-5. /// Scalar epsilon_rel; /// /// Distance for delta-based convergence test. /// This parameter determines the distance \f$d\f$ to compute the /// rate of decrease of the objective function, /// \f$f_{k-d}(x)-f_k(x)\f$, where \f$k\f$ is the current iteration /// step. If the value of this parameter is zero, the delta-based convergence /// test will not be performed. The default value is \c 1. /// int past; /// /// Delta for convergence test. /// The algorithm stops when the following condition is met, /// \f$|f_{k-d}(x)-f_k(x)|<\delta\cdot\max(1, |f_k(x)|, |f_{k-d}(x)|)\f$, where \f$f_k(x)\f$ is /// the current function value, and \f$f_{k-d}(x)\f$ is the function value /// \f$d\f$ iterations ago (specified by the \ref past parameter). /// The default value is \c 1e-10. /// Scalar delta; /// /// The maximum number of iterations. /// The optimization process is terminated when the iteration count /// exceeds this parameter. Setting this parameter to zero continues an /// optimization process until a convergence or error. The default value /// is \c 0. /// int max_iterations; /// /// The maximum number of iterations in the subspace minimization. /// This parameter controls the number of iterations in the subspace /// minimization routine. The default value is \c 10. /// int max_submin; /// /// The maximum number of trials for the line search. /// This parameter controls the number of function and gradients evaluations /// per iteration for the line search routine. The default value is \c 20. /// int max_linesearch; /// /// The minimum step length allowed in the line search. /// The default value is \c 1e-20. Usually this value does not need to be /// modified. /// Scalar min_step; /// /// The maximum step length allowed in the line search. /// The default value is \c 1e+20. Usually this value does not need to be /// modified. /// Scalar max_step; /// /// A parameter to control the accuracy of the line search routine. /// The default value is \c 1e-4. This parameter should be greater /// than zero and smaller than \c 0.5. /// Scalar ftol; /// /// The coefficient for the Wolfe condition. /// This parameter is valid only when the line-search /// algorithm is used with the Wolfe condition. /// The default value is \c 0.9. This parameter should be greater /// the \ref ftol parameter and smaller than \c 1.0. /// Scalar wolfe; public: /// /// Constructor for L-BFGS-B parameters. /// Default values for parameters will be set when the object is created. /// LBFGSBParam() { // clang-format off m = 6; epsilon = Scalar(1e-5); epsilon_rel = Scalar(1e-5); past = 1; delta = Scalar(1e-10); max_iterations = 0; max_submin = 10; max_linesearch = 20; min_step = Scalar(1e-20); max_step = Scalar(1e+20); ftol = Scalar(1e-4); wolfe = Scalar(0.9); // clang-format on } /// /// Checking the validity of L-BFGS-B parameters. /// An `std::invalid_argument` exception will be thrown if some parameter /// is invalid. /// inline void check_param() const { if (m <= 0) throw std::invalid_argument("'m' must be positive"); if (epsilon < 0) throw std::invalid_argument("'epsilon' must be non-negative"); if (epsilon_rel < 0) throw std::invalid_argument("'epsilon_rel' must be non-negative"); if (past < 0) throw std::invalid_argument("'past' must be non-negative"); if (delta < 0) throw std::invalid_argument("'delta' must be non-negative"); if (max_iterations < 0) throw std::invalid_argument("'max_iterations' must be non-negative"); if (max_submin < 0) throw std::invalid_argument("'max_submin' must be non-negative"); if (max_linesearch <= 0) throw std::invalid_argument("'max_linesearch' must be positive"); if (min_step < 0) throw std::invalid_argument("'min_step' must be positive"); if (max_step < min_step) throw std::invalid_argument("'max_step' must be greater than 'min_step'"); if (ftol <= 0 || ftol >= 0.5) throw std::invalid_argument("'ftol' must satisfy 0 < ftol < 0.5"); if (wolfe <= ftol || wolfe >= 1) throw std::invalid_argument("'wolfe' must satisfy ftol < wolfe < 1"); } }; } // namespace LBFGSpp #endif // LBFGSPP_PARAM_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/LineSearchNocedalWright.h0000664000175000017510000002653015001153241023556 0ustar nileshnilesh// Copyright (C) 2016-2025 Yixuan Qiu // Copyright (C) 2016-2025 Dirk Toewe // Under MIT license #ifndef LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H #define LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H #include #include #include "Param.h" namespace LBFGSpp { /// /// A line search algorithm for the strong Wolfe condition. Implementation based on: /// /// "Numerical Optimization" 2nd Edition, /// Jorge Nocedal and Stephen J. Wright, /// Chapter 3. Line Search Methods, page 60. /// template class LineSearchNocedalWright { private: using Vector = Eigen::Matrix; // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of // the function, and the fitted quadratic function is used to // estimate the minimum static Scalar quad_interp(const Scalar& step_lo, const Scalar& step_hi, const Scalar& fx_lo, const Scalar& fx_hi, const Scalar& dg_lo) { using std::abs; // polynomial: p (x) = c0*(x - step)² + c1 // conditions: p (step_hi) = fx_hi // p (step_lo) = fx_lo // p'(step_lo) = dg_lo // We allow fx_hi to be Inf, so first compute a candidate for step size, // and test whether NaN occurs const Scalar fdiff = fx_hi - fx_lo; const Scalar sdiff = step_hi - step_lo; const Scalar smid = (step_hi + step_lo) / Scalar(2); Scalar step_candid = fdiff * step_lo - smid * sdiff * dg_lo; step_candid = step_candid / (fdiff - sdiff * dg_lo); // In some cases the interpolation is not a good choice // This includes (a) NaN values; (b) too close to the end points; (c) outside the interval // In such cases, a bisection search is used const bool candid_nan = !(std::isfinite(step_candid)); const Scalar end_dist = std::min(abs(step_candid - step_lo), abs(step_candid - step_hi)); const bool near_end = end_dist < Scalar(0.01) * abs(sdiff); const bool bisect = candid_nan || (step_candid <= std::min(step_lo, step_hi)) || (step_candid >= std::max(step_lo, step_hi)) || near_end; const Scalar step = bisect ? smid : step_candid; return step; } public: /// /// Line search by Nocedal and Wright (2006). /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param param Parameters for the L-BFGS algorithm. /// \param xp The current point. /// \param drt The current moving direction. /// \param step_max The upper bound for the step size that makes x feasible. /// Can be ignored for the L-BFGS solver. /// \param step In: The initial step length. /// Out: The calculated step length. /// \param fx In: The objective function value at the current point. /// Out: The function value at the new point. /// \param grad In: The current gradient vector. /// Out: The gradient at the new point. /// \param dg In: The inner product between drt and grad. /// Out: The inner product between drt and the new gradient. /// \param x Out: The new point moved to. /// template static void LineSearch(Foo& f, const LBFGSParam& param, const Vector& xp, const Vector& drt, const Scalar& step_max, Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x) { // Check the value of step if (step <= Scalar(0)) throw std::invalid_argument("'step' must be positive"); if (param.linesearch != LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) throw std::invalid_argument("'param.linesearch' must be 'LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE' for LineSearchNocedalWright"); // To make this implementation more similar to the other line search // methods in LBFGSpp, the symbol names from the literature // ("Numerical Optimizations") have been changed. // // Literature | LBFGSpp // -----------|-------- // alpha | step // phi | fx // phi' | dg // The expansion rate of the step size const Scalar expansion = Scalar(2); // Save the function value at the current x const Scalar fx_init = fx; // Projection of gradient on the search direction const Scalar dg_init = dg; // Make sure d points to a descent direction if (dg_init > Scalar(0)) throw std::logic_error("the moving direction increases the objective function value"); const Scalar test_decr = param.ftol * dg_init, // Sufficient decrease test_curv = -param.wolfe * dg_init; // Curvature // Ends of the line search range (step_lo > step_hi is allowed) // We can also define dg_hi, but it will never be used Scalar step_hi, fx_hi; Scalar step_lo = Scalar(0), fx_lo = fx_init, dg_lo = dg_init; // We also need to save x and grad for step=step_lo, since we want to return the best // step size along the path when strong Wolfe condition is not met Vector x_lo = xp, grad_lo = grad; // STEP 1: Bracketing Phase // Find a range guaranteed to contain a step satisfying strong Wolfe. // The bracketing phase exits if one of the following conditions is satisfied: // (1) Current step violates the sufficient decrease condition // (2) Current fx >= previous fx // (3) Current dg >= 0 // (4) Strong Wolfe condition is met // // (4) terminates the whole line search, and (1)-(3) go to the zoom phase // // See also: // "Numerical Optimization", "Algorithm 3.5 (Line Search Algorithm)". int iter = 0; for (;;) { // Evaluate the current step size x.noalias() = xp + step * drt; fx = f(x, grad); dg = grad.dot(drt); // Test the sufficient decrease condition if (fx - fx_init > step * test_decr || (Scalar(0) < step_lo && fx >= fx_lo)) { // Case (1) and (2) step_hi = step; fx_hi = fx; // dg_hi = dg; break; } // If reaching here, then the sufficient decrease condition is satisfied // Test the curvature condition if (std::abs(dg) <= test_curv) return; // Case (4) step_hi = step_lo; fx_hi = fx_lo; // dg_hi = dg_lo; step_lo = step; fx_lo = fx; dg_lo = dg; // Move x and grad to x_lo and grad_lo, respectively x_lo.swap(x); grad_lo.swap(grad); if (dg >= Scalar(0)) break; // Case (3) iter++; // If we have used up all line search iterations in the bracketing phase, // it means every new step decreases the objective function. Of course, // the strong Wolfe condition is not met, but we choose not to raise an // exception; instead, we return the best step size so far. This means that // we exit the line search with the most recent step size, which has the // smallest objective function value during the line search if (iter >= param.max_linesearch) { // throw std::runtime_error("the line search routine reached the maximum number of iterations"); // At this point we can guarantee that {step, fx, dg}=={step, fx, dg}_lo // But we need to move {x, grad}_lo back before returning x.swap(x_lo); grad.swap(grad_lo); return; } // If we still stay in the loop, it means we can expand the current step step *= expansion; } // STEP 2: Zoom Phase // Given a range (step_lo,step_hi) that is guaranteed to // contain a valid strong Wolfe step value, this method // finds such a value. // // If step_lo > 0, then step_lo is, among all step sizes generated so far and // satisfying the sufficient decrease condition, the one giving the smallest // objective function value. // // See also: // "Numerical Optimization", "Algorithm 3.6 (Zoom)". for (;;) { // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of // the function, and the fitted quadratic function is used to // estimate the minimum step = quad_interp(step_lo, step_hi, fx_lo, fx_hi, dg_lo); // Evaluate the current step size x.noalias() = xp + step * drt; fx = f(x, grad); dg = grad.dot(drt); // Test the sufficient decrease condition if (fx - fx_init > step * test_decr || fx >= fx_lo) { if (step == step_hi) throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision"); step_hi = step; fx_hi = fx; // dg_hi = dg; } else { // Test the curvature condition if (std::abs(dg) <= test_curv) return; if (dg * (step_hi - step_lo) >= Scalar(0)) { step_hi = step_lo; fx_hi = fx_lo; // dg_hi = dg_lo; } if (step == step_lo) throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision"); // If reaching here, then the current step satisfies sufficient decrease condition step_lo = step; fx_lo = fx; dg_lo = dg; // Move x and grad to x_lo and grad_lo, respectively x_lo.swap(x); grad_lo.swap(grad); } iter++; // If we have used up all line search iterations in the zoom phase, // then the strong Wolfe condition is not met. We choose not to raise an // exception (unless no step satisfying sufficient decrease is found), // but to return the best step size so far, i.e., step_lo if (iter >= param.max_linesearch) { // throw std::runtime_error("the line search routine reached the maximum number of iterations"); if (step_lo <= Scalar(0)) throw std::runtime_error("the line search routine failed, unable to sufficiently decrease the function value"); // Return everything with _lo step = step_lo; fx = fx_lo; dg = dg_lo; // Move {x, grad}_lo back x.swap(x_lo); grad.swap(grad_lo); return; } } } }; } // namespace LBFGSpp #endif // LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/LineSearchMoreThuente.h0000664000175000017510000004554015001153241023265 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_LINE_SEARCH_MORE_THUENTE_H #define LBFGSPP_LINE_SEARCH_MORE_THUENTE_H #include #include // std::invalid_argument, std::runtime_error #include "Param.h" namespace LBFGSpp { /// /// The line search algorithm by Moré and Thuente (1994), currently used for the L-BFGS-B algorithm. /// /// The target of this line search algorithm is to find a step size \f$\alpha\f$ that satisfies the strong Wolfe condition /// \f$f(x+\alpha d) \le f(x) + \alpha\mu g(x)^T d\f$ and \f$|g(x+\alpha d)^T d| \le \eta|g(x)^T d|\f$. /// Our implementation is a simplified version of the algorithm in [1]. We assume that \f$0<\mu<\eta<1\f$, while in [1] /// they do not assume \f$\eta>\mu\f$. As a result, the algorithm in [1] has two stages, but in our implementation we /// only need the first stage to guarantee the convergence. /// /// Reference: /// [1] Moré, J. J., & Thuente, D. J. (1994). Line search algorithms with guaranteed sufficient decrease. /// template class LineSearchMoreThuente { private: using Vector = Eigen::Matrix; // Minimizer of a quadratic function q(x) = c0 + c1 * x + c2 * x^2 // that interpolates fa, ga, and fb, assuming the minimizer exists // For case I: fb >= fa and ga * (b - a) < 0 static Scalar quadratic_minimizer(const Scalar& a, const Scalar& b, const Scalar& fa, const Scalar& ga, const Scalar& fb) { const Scalar ba = b - a; const Scalar w = Scalar(0.5) * ba * ga / (fa - fb + ba * ga); return a + w * ba; } // Minimizer of a quadratic function q(x) = c0 + c1 * x + c2 * x^2 // that interpolates fa, ga and gb, assuming the minimizer exists // The result actually does not depend on fa // For case II: ga * (b - a) < 0, ga * gb < 0 // For case III: ga * (b - a) < 0, ga * ga >= 0, |gb| <= |ga| static Scalar quadratic_minimizer(const Scalar& a, const Scalar& b, const Scalar& ga, const Scalar& gb) { const Scalar w = ga / (ga - gb); return a + w * (b - a); } // Local minimizer of a cubic function q(x) = c0 + c1 * x + c2 * x^2 + c3 * x^3 // that interpolates fa, ga, fb and gb, assuming a != b // Also sets a flag indicating whether the minimizer exists static Scalar cubic_minimizer(const Scalar& a, const Scalar& b, const Scalar& fa, const Scalar& fb, const Scalar& ga, const Scalar& gb, bool& exists) { using std::abs; using std::sqrt; const Scalar apb = a + b; const Scalar ba = b - a; const Scalar ba2 = ba * ba; const Scalar fba = fb - fa; const Scalar gba = gb - ga; // z3 = c3 * (b-a)^3, z2 = c2 * (b-a)^3, z1 = c1 * (b-a)^3 const Scalar z3 = (ga + gb) * ba - Scalar(2) * fba; const Scalar z2 = Scalar(0.5) * (gba * ba2 - Scalar(3) * apb * z3); const Scalar z1 = fba * ba2 - apb * z2 - (a * apb + b * b) * z3; // std::cout << "z1 = " << z1 << ", z2 = " << z2 << ", z3 = " << z3 << std::endl; // If c3 = z/(b-a)^3 == 0, reduce to quadratic problem const Scalar eps = std::numeric_limits::epsilon(); if (abs(z3) < eps * abs(z2) || abs(z3) < eps * abs(z1)) { // Minimizer exists if c2 > 0 exists = (z2 * ba > Scalar(0)); // Return the end point if the minimizer does not exist return exists ? (-Scalar(0.5) * z1 / z2) : b; } // Now we can assume z3 > 0 // The minimizer is a solution to the equation c1 + 2*c2 * x + 3*c3 * x^2 = 0 // roots = -(z2/z3) / 3 (+-) sqrt((z2/z3)^2 - 3 * (z1/z3)) / 3 // // Let u = z2/(3z3) and v = z1/z2 // The minimizer exists if v/u <= 1 const Scalar u = z2 / (Scalar(3) * z3), v = z1 / z2; const Scalar vu = v / u; exists = (vu <= Scalar(1)); if (!exists) return b; // We need to find a numerically stable way to compute the roots, as z3 may still be small // // If |u| >= |v|, let w = 1 + sqrt(1-v/u), and then // r1 = -u * w, r2 = -v / w, r1 does not need to be the smaller one // // If |u| < |v|, we must have uv <= 0, and then // r = -u (+-) sqrt(delta), where // sqrt(delta) = sqrt(|u|) * sqrt(|v|) * sqrt(1-u/v) Scalar r1 = Scalar(0), r2 = Scalar(0); if (abs(u) >= abs(v)) { const Scalar w = Scalar(1) + sqrt(Scalar(1) - vu); r1 = -u * w; r2 = -v / w; } else { const Scalar sqrtd = sqrt(abs(u)) * sqrt(abs(v)) * sqrt(1 - u / v); r1 = -u - sqrtd; r2 = -u + sqrtd; } return (z3 * ba > Scalar(0)) ? ((std::max)(r1, r2)) : ((std::min)(r1, r2)); } // Select the next step size according to the current step sizes, // function values, and derivatives static Scalar step_selection( const Scalar& al, const Scalar& au, const Scalar& at, const Scalar& fl, const Scalar& fu, const Scalar& ft, const Scalar& gl, const Scalar& gu, const Scalar& gt) { using std::abs; if (al == au) return al; // If ft = Inf or gt = Inf, we return the middle point of al and at if (!std::isfinite(ft) || !std::isfinite(gt)) return (al + at) / Scalar(2); // ac: cubic interpolation of fl, ft, gl, gt // aq: quadratic interpolation of fl, gl, ft bool ac_exists; // std::cout << "al = " << al << ", at = " << at << ", fl = " << fl << ", ft = " << ft << ", gl = " << gl << ", gt = " << gt << std::endl; const Scalar ac = cubic_minimizer(al, at, fl, ft, gl, gt, ac_exists); const Scalar aq = quadratic_minimizer(al, at, fl, gl, ft); // std::cout << "ac = " << ac << ", aq = " << aq << std::endl; // Case 1: ft > fl if (ft > fl) { // This should not happen if ft > fl, but just to be safe if (!ac_exists) return aq; // Then use the scheme described in the paper return (abs(ac - al) < abs(aq - al)) ? ac : ((aq + ac) / Scalar(2)); } // as: quadratic interpolation of gl and gt const Scalar as = quadratic_minimizer(al, at, gl, gt); // Case 2: ft <= fl, gt * gl < 0 if (gt * gl < Scalar(0)) return (abs(ac - at) >= abs(as - at)) ? ac : as; // Case 3: ft <= fl, gt * gl >= 0, |gt| < |gl| const Scalar deltal = Scalar(1.1), deltau = Scalar(0.66); if (abs(gt) < abs(gl)) { // We choose either ac or as // The case for ac: 1. It exists, and // 2. ac is farther than at from al, and // 3. ac is closer to at than as // Cases for as: otherwise const Scalar res = (ac_exists && (ac - at) * (at - al) > Scalar(0) && abs(ac - at) < abs(as - at)) ? ac : as; // Postprocessing the chosen step return (at > al) ? std::min(at + deltau * (au - at), res) : std::max(at + deltau * (au - at), res); } // Simple extrapolation if au, fu, or gu is infinity if ((!std::isfinite(au)) || (!std::isfinite(fu)) || (!std::isfinite(gu))) return at + deltal * (at - al); // ae: cubic interpolation of ft, fu, gt, gu bool ae_exists; const Scalar ae = cubic_minimizer(at, au, ft, fu, gt, gu, ae_exists); // Case 4: ft <= fl, gt * gl >= 0, |gt| >= |gl| // The following is not used in the paper, but it seems to be a reasonable safeguard return (at > al) ? std::min(at + deltau * (au - at), ae) : std::max(at + deltau * (au - at), ae); } public: /// /// Line search by Moré and Thuente (1994). /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param param An `LBFGSParam` or `LBFGSBParam` object that stores the /// parameters of the solver. /// \param xp The current point. /// \param drt The current moving direction. /// \param step_max The upper bound for the step size that makes x feasible. /// \param step In: The initial step length. /// Out: The calculated step length. /// \param fx In: The objective function value at the current point. /// Out: The function value at the new point. /// \param grad In: The current gradient vector. /// Out: The gradient at the new point. /// \param dg In: The inner product between drt and grad. /// Out: The inner product between drt and the new gradient. /// \param x Out: The new point moved to. /// template static void LineSearch(Foo& f, const SolverParam& param, const Vector& xp, const Vector& drt, const Scalar& step_max, Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x) { using std::abs; // std::cout << "========================= Entering line search =========================\n\n"; // Check the value of step if (step <= Scalar(0)) throw std::invalid_argument("'step' must be positive"); if (step > step_max) throw std::invalid_argument("'step' exceeds 'step_max'"); // Save the function value at the current x const Scalar fx_init = fx; // Projection of gradient on the search direction const Scalar dg_init = dg; // std::cout << "fx_init = " << fx_init << ", dg_init = " << dg_init << std::endl << std::endl; // Make sure d points to a descent direction if (dg_init >= Scalar(0)) throw std::logic_error("the moving direction does not decrease the objective function value"); // Tolerance for convergence test // Sufficient decrease const Scalar test_decr = param.ftol * dg_init; // Curvature const Scalar test_curv = -param.wolfe * dg_init; // The bracketing interval Scalar I_lo = Scalar(0), I_hi = std::numeric_limits::infinity(); Scalar fI_lo = Scalar(0), fI_hi = std::numeric_limits::infinity(); Scalar gI_lo = (Scalar(1) - param.ftol) * dg_init, gI_hi = std::numeric_limits::infinity(); // We also need to save x and grad for step=I_lo, since we want to return the best // step size along the path when strong Wolfe condition is not met Vector x_lo = xp, grad_lo = grad; Scalar fx_lo = fx_init, dg_lo = dg_init; // Function value and gradient at the current step size x.noalias() = xp + step * drt; fx = f(x, grad); dg = grad.dot(drt); // std::cout << "max_step = " << step_max << ", step = " << step << ", fx = " << fx << ", dg = " << dg << std::endl; // Convergence test if (fx <= fx_init + step * test_decr && abs(dg) <= test_curv) { // std::cout << "** Criteria met\n\n"; // std::cout << "========================= Leaving line search =========================\n\n"; return; } // Extrapolation factor const Scalar delta = Scalar(1.1); int iter; for (iter = 0; iter < param.max_linesearch; iter++) { // ft = psi(step) = f(xp + step * drt) - f(xp) - step * test_decr // gt = psi'(step) = dg - mu * dg_init // mu = param.ftol const Scalar ft = fx - fx_init - step * test_decr; const Scalar gt = dg - param.ftol * dg_init; // Update step size and bracketing interval Scalar new_step; if (ft > fI_lo) { // Case 1: ft > fl new_step = step_selection(I_lo, I_hi, step, fI_lo, fI_hi, ft, gI_lo, gI_hi, gt); // Sanity check: if the computed new_step is too small, typically due to // extremely large value of ft, switch to the middle point if (new_step <= param.min_step) new_step = (I_lo + step) / Scalar(2); I_hi = step; fI_hi = ft; gI_hi = gt; // std::cout << "Case 1: new step = " << new_step << std::endl; } else if (gt * (I_lo - step) > Scalar(0)) { // Case 2: ft <= fl, gt * (al - at) > 0 // // Page 291 of Moré and Thuente (1994) suggests that // newat = min(at + delta * (at - al), amax), delta in [1.1, 4] new_step = std::min(step_max, step + delta * (step - I_lo)); // We can also consider the following scheme: // First let step_selection() decide a value, and then project to the range above // // new_step = step_selection(I_lo, I_hi, step, fI_lo, fI_hi, ft, gI_lo, gI_hi, gt); // const Scalar delta2 = Scalar(4) // const Scalar t1 = step + delta * (step - I_lo); // const Scalar t2 = step + delta2 * (step - I_lo); // const Scalar tl = std::min(t1, t2), tu = std::max(t1, t2); // new_step = std::min(tu, std::max(tl, new_step)); // new_step = std::min(step_max, new_step); I_lo = step; fI_lo = ft; gI_lo = gt; // Move x and grad to x_lo and grad_lo, respectively x_lo.swap(x); grad_lo.swap(grad); fx_lo = fx; dg_lo = dg; // std::cout << "Case 2: new step = " << new_step << std::endl; } else { // Case 3: ft <= fl, gt * (al - at) <= 0 new_step = step_selection(I_lo, I_hi, step, fI_lo, fI_hi, ft, gI_lo, gI_hi, gt); I_hi = I_lo; fI_hi = fI_lo; gI_hi = gI_lo; I_lo = step; fI_lo = ft; gI_lo = gt; // Move x and grad to x_lo and grad_lo, respectively x_lo.swap(x); grad_lo.swap(grad); fx_lo = fx; dg_lo = dg; // std::cout << "Case 3: new step = " << new_step << std::endl; } // Case 1 and 3 are interpolations, whereas Case 2 is extrapolation // This means that Case 2 may return new_step = step_max, // and we need to decide whether to accept this value // 1. If both step and new_step equal to step_max, it means // step will have no further change, so we accept it // 2. Otherwise, we need to test the function value and gradient // on step_max, and decide later // In case step, new_step, and step_max are equal, directly return the computed x and fx if (step == step_max && new_step >= step_max) { // std::cout << "** Maximum step size reached\n\n"; // std::cout << "========================= Leaving line search =========================\n\n"; // Move {x, grad}_lo back before returning x.swap(x_lo); grad.swap(grad_lo); return; } // Otherwise, recompute x and fx based on new_step step = new_step; if (step < param.min_step) throw std::runtime_error("the line search step became smaller than the minimum value allowed"); if (step > param.max_step) throw std::runtime_error("the line search step became larger than the maximum value allowed"); // Update parameter, function value, and gradient x.noalias() = xp + step * drt; fx = f(x, grad); dg = grad.dot(drt); // std::cout << "step = " << step << ", fx = " << fx << ", dg = " << dg << std::endl; // Convergence test if (fx <= fx_init + step * test_decr && abs(dg) <= test_curv) { // std::cout << "** Criteria met\n\n"; // std::cout << "========================= Leaving line search =========================\n\n"; return; } // Now assume step = step_max, and we need to decide whether to // exit the line search (see the comments above regarding step_max) // If we reach here, it means this step size does not pass the convergence // test, so either the sufficient decrease condition or the curvature // condition is not met yet // // Typically the curvature condition is harder to meet, and it is // possible that no step size in [0, step_max] satisfies the condition // // But we need to make sure that its psi function value is smaller than // the best one so far. If not, go to the next iteration and find a better one if (step >= step_max) { const Scalar ft_bound = fx - fx_init - step * test_decr; if (ft_bound <= fI_lo) { // std::cout << "** Maximum step size reached\n\n"; // std::cout << "========================= Leaving line search =========================\n\n"; return; } } } // If we have used up all line search iterations, then the strong Wolfe condition // is not met. We choose not to raise an exception (unless no step satisfying // sufficient decrease is found), but to return the best step size so far if (iter >= param.max_linesearch) { // throw std::runtime_error("the line search routine reached the maximum number of iterations"); // First test whether the last step is better than I_lo // If yes, return the last step const Scalar ft = fx - fx_init - step * test_decr; if (ft <= fI_lo) return; // If not, then the best step size so far is I_lo, but it needs to be positive if (I_lo <= Scalar(0)) throw std::runtime_error("the line search routine is unable to sufficiently decrease the function value"); // Return everything with _lo step = I_lo; fx = fx_lo; dg = dg_lo; // Move {x, grad}_lo back x.swap(x_lo); grad.swap(grad_lo); return; } } }; } // namespace LBFGSpp #endif // LBFGSPP_LINE_SEARCH_MORE_THUENTE_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/LineSearchBracketing.h0000664000175000017510000001126715001153241023076 0ustar nileshnilesh// Copyright (C) 2016-2025 Yixuan Qiu // Copyright (C) 2016-2025 Dirk Toewe // Under MIT license #ifndef LBFGSPP_LINE_SEARCH_BRACKETING_H #define LBFGSPP_LINE_SEARCH_BRACKETING_H #include #include // std::runtime_error #include "Param.h" namespace LBFGSpp { /// /// The bracketing line search algorithm for L-BFGS. Mainly for internal use. /// template class LineSearchBracketing { private: using Vector = Eigen::Matrix; public: /// /// Line search by bracketing. Similar to the backtracking line search /// except that it actively maintains an upper and lower bound of the /// current search range. /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param param Parameters for the L-BFGS algorithm. /// \param xp The current point. /// \param drt The current moving direction. /// \param step_max The upper bound for the step size that makes x feasible. /// Can be ignored for the L-BFGS solver. /// \param step In: The initial step length. /// Out: The calculated step length. /// \param fx In: The objective function value at the current point. /// Out: The function value at the new point. /// \param grad In: The current gradient vector. /// Out: The gradient at the new point. /// \param dg In: The inner product between drt and grad. /// Out: The inner product between drt and the new gradient. /// \param x Out: The new point moved to. /// template static void LineSearch(Foo& f, const LBFGSParam& param, const Vector& xp, const Vector& drt, const Scalar& step_max, Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x) { // Check the value of step if (step <= Scalar(0)) throw std::invalid_argument("'step' must be positive"); // Save the function value at the current x const Scalar fx_init = fx; // Projection of gradient on the search direction const Scalar dg_init = grad.dot(drt); // Make sure d points to a descent direction if (dg_init > 0) throw std::logic_error("the moving direction increases the objective function value"); const Scalar test_decr = param.ftol * dg_init; // Upper and lower end of the current line search range Scalar step_lo = 0, step_hi = std::numeric_limits::infinity(); int iter; for (iter = 0; iter < param.max_linesearch; iter++) { // x_{k+1} = x_k + step * d_k x.noalias() = xp + step * drt; // Evaluate this candidate fx = f(x, grad); if (fx > fx_init + step * test_decr || (fx != fx)) { step_hi = step; } else { dg = grad.dot(drt); // Armijo condition is met if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) break; if (dg < param.wolfe * dg_init) { step_lo = step; } else { // Regular Wolfe condition is met if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) break; if (dg > -param.wolfe * dg_init) { step_hi = step; } else { // Strong Wolfe condition is met break; } } } assert(step_lo < step_hi); if (step < param.min_step) throw std::runtime_error("the line search step became smaller than the minimum value allowed"); if (step > param.max_step) throw std::runtime_error("the line search step became larger than the maximum value allowed"); // continue search in mid of current search range step = std::isinf(step_hi) ? 2 * step : step_lo / 2 + step_hi / 2; } if (iter >= param.max_linesearch) throw std::runtime_error("the line search routine reached the maximum number of iterations"); } }; } // namespace LBFGSpp #endif // LBFGSPP_LINE_SEARCH_BRACKETING_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/LineSearchBacktracking.h0000664000175000017510000001046415001153241023406 0ustar nileshnilesh// Copyright (C) 2016-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_LINE_SEARCH_BACKTRACKING_H #define LBFGSPP_LINE_SEARCH_BACKTRACKING_H #include #include // std::runtime_error #include "Param.h" namespace LBFGSpp { /// /// The backtracking line search algorithm for L-BFGS. Mainly for internal use. /// template class LineSearchBacktracking { private: using Vector = Eigen::Matrix; public: /// /// Line search by backtracking. /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param param Parameters for the L-BFGS algorithm. /// \param xp The current point. /// \param drt The current moving direction. /// \param step_max The upper bound for the step size that makes x feasible. /// Can be ignored for the L-BFGS solver. /// \param step In: The initial step length. /// Out: The calculated step length. /// \param fx In: The objective function value at the current point. /// Out: The function value at the new point. /// \param grad In: The current gradient vector. /// Out: The gradient at the new point. /// \param dg In: The inner product between drt and grad. /// Out: The inner product between drt and the new gradient. /// \param x Out: The new point moved to. /// template static void LineSearch(Foo& f, const LBFGSParam& param, const Vector& xp, const Vector& drt, const Scalar& step_max, Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x) { // Decreasing and increasing factors const Scalar dec = 0.5; const Scalar inc = 2.1; // Check the value of step if (step <= Scalar(0)) throw std::invalid_argument("'step' must be positive"); // Save the function value at the current x const Scalar fx_init = fx; // Projection of gradient on the search direction const Scalar dg_init = grad.dot(drt); // Make sure d points to a descent direction if (dg_init > 0) throw std::logic_error("the moving direction increases the objective function value"); const Scalar test_decr = param.ftol * dg_init; Scalar width; int iter; for (iter = 0; iter < param.max_linesearch; iter++) { // x_{k+1} = x_k + step * d_k x.noalias() = xp + step * drt; // Evaluate this candidate fx = f(x, grad); if (fx > fx_init + step * test_decr || (fx != fx)) { width = dec; } else { dg = grad.dot(drt); // Armijo condition is met if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) break; if (dg < param.wolfe * dg_init) { width = inc; } else { // Regular Wolfe condition is met if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) break; if (dg > -param.wolfe * dg_init) { width = dec; } else { // Strong Wolfe condition is met break; } } } if (step < param.min_step) throw std::runtime_error("the line search step became smaller than the minimum value allowed"); if (step > param.max_step) throw std::runtime_error("the line search step became larger than the maximum value allowed"); step *= width; } if (iter >= param.max_linesearch) throw std::runtime_error("the line search routine reached the maximum number of iterations"); } }; } // namespace LBFGSpp #endif // LBFGSPP_LINE_SEARCH_BACKTRACKING_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/Cauchy.h0000664000175000017510000002476215001153241020307 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_CAUCHY_H #define LBFGSPP_CAUCHY_H #include #include #include "BFGSMat.h" /// \cond namespace LBFGSpp { // // Class to compute the generalized Cauchy point (GCP) for the L-BFGS-B algorithm, // mainly for internal use. // // The target of the GCP procedure is to find a step size t such that // x(t) = x0 - t * g is a local minimum of the quadratic function m(x), // where m(x) is a local approximation to the objective function. // // First determine a sequence of break points t0=0, t1, t2, ..., tn. // On each interval [t[i-1], t[i]], x is changing linearly. // After passing a break point, one or more coordinates of x will be fixed at the bounds. // We search the first local minimum of m(x) by examining the intervals [t[i-1], t[i]] sequentially. // // Reference: // [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization. // template class ArgSort { private: using Vector = Eigen::Matrix; using IndexSet = std::vector; const Scalar* values; public: ArgSort(const Vector& value_vec) : values(value_vec.data()) {} inline bool operator()(int key1, int key2) { return values[key1] < values[key2]; } inline void sort_key(IndexSet& key_vec) const { std::sort(key_vec.begin(), key_vec.end(), *this); } }; template class Cauchy { private: typedef Eigen::Matrix Vector; typedef Eigen::Matrix IntVector; typedef Eigen::Matrix Matrix; typedef std::vector IndexSet; // Find the smallest index i such that brk[ord[i]] > t, assuming brk[ord] is already sorted. // If the return value equals n, then all values are <= t. static int search_greater(const Vector& brk, const IndexSet& ord, const Scalar& t, int start = 0) { const int nord = ord.size(); int i; for (i = start; i < nord; i++) { if (brk[ord[i]] > t) break; } return i; } public: // bfgs: An object that represents the BFGS approximation matrix. // x0: Current parameter vector. // g: Gradient at x0. // lb: Lower bounds for x. // ub: Upper bounds for x. // xcp: The output generalized Cauchy point. // vecc: c = W'(xcp - x0), used in the subspace minimization routine. // newact_set: Coordinates that newly become active during the GCP procedure. // fv_set: Free variable set. static void get_cauchy_point( const BFGSMat& bfgs, const Vector& x0, const Vector& g, const Vector& lb, const Vector& ub, Vector& xcp, Vector& vecc, IndexSet& newact_set, IndexSet& fv_set) { // std::cout << "========================= Entering GCP search =========================\n\n"; // Initialization const int n = x0.size(); xcp.resize(n); xcp.noalias() = x0; vecc.resize(2 * bfgs.num_corrections()); vecc.setZero(); newact_set.clear(); newact_set.reserve(n); fv_set.clear(); fv_set.reserve(n); // Construct break points Vector brk(n), vecd(n); // If brk[i] == 0, i belongs to active set // If brk[i] == Inf, i belongs to free variable set // Others are currently undecided IndexSet ord; ord.reserve(n); const Scalar inf = std::numeric_limits::infinity(); for (int i = 0; i < n; i++) { if (lb[i] == ub[i]) brk[i] = Scalar(0); else if (g[i] < Scalar(0)) brk[i] = (x0[i] - ub[i]) / g[i]; else if (g[i] > Scalar(0)) brk[i] = (x0[i] - lb[i]) / g[i]; else brk[i] = inf; const bool iszero = (brk[i] == Scalar(0)); vecd[i] = iszero ? Scalar(0) : -g[i]; if (brk[i] == inf) fv_set.push_back(i); else if (!iszero) ord.push_back(i); } // Sort indices of break points ArgSort sorting(brk); sorting.sort_key(ord); // Break points `brko := brk[ord]` are in increasing order // `ord` contains the coordinates that define the corresponding break points // brk[i] == 0 <=> The i-th coordinate is on the boundary const int nord = ord.size(); const int nfree = fv_set.size(); if ((nfree < 1) && (nord < 1)) { /* std::cout << "** All coordinates at boundary **\n"; std::cout << "\n========================= Leaving GCP search =========================\n\n"; */ return; } // First interval: [il=0, iu=brk[ord[0]]] // In case ord is empty, we take iu=Inf // p = W'd, c = 0 Vector vecp; bfgs.apply_Wtv(vecd, vecp); // f' = -d'd Scalar fp = -vecd.squaredNorm(); // f'' = -theta * f' - p'Mp Vector cache; bfgs.apply_Mv(vecp, cache); // cache = Mp Scalar fpp = -bfgs.theta() * fp - vecp.dot(cache); // Theoretical step size to move Scalar deltatmin = -fp / fpp; // Limit on the current interval Scalar il = Scalar(0); // We have excluded the case that max(brk) <= 0 int b = 0; Scalar iu = (nord < 1) ? inf : brk[ord[b]]; Scalar deltat = iu - il; /* int iter = 0; std::cout << "** Iter " << iter << " **\n"; std::cout << " fp = " << fp << ", fpp = " << fpp << ", deltatmin = " << deltatmin << std::endl; std::cout << " il = " << il << ", iu = " << iu << ", deltat = " << deltat << std::endl; */ // If deltatmin >= deltat, we need to do the following things: // 1. Update vecc // 2. Since we are going to cross iu, the coordinates that define iu become active // 3. Update some quantities on these new active coordinates (xcp, vecd, vecp) // 4. Move to the next interval and compute the new deltatmin bool crossed_all = false; const int ncorr = bfgs.num_corrections(); Vector wact(2 * ncorr); while (deltatmin >= deltat) { // Step 1 vecc.noalias() += deltat * vecp; // Step 2 // First check how many coordinates will be active when we cross the previous iu // b is the smallest number such that brko[b] == iu // Let bp be the largest number such that brko[bp] == iu // Then coordinates ord[b] to ord[bp] will be active const int act_begin = b; const int act_end = search_greater(brk, ord, iu, b) - 1; // If nfree == 0 and act_end == nord-1, then we have crossed all coordinates // We only need to update xcp from ord[b] to ord[bp], and then exit if ((nfree == 0) && (act_end == nord - 1)) { // std::cout << "** [ "; for (int i = act_begin; i <= act_end; i++) { const int act = ord[i]; xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act]; newact_set.push_back(act); // std::cout << act + 1 << " "; } // std::cout << "] become active **\n\n"; // std::cout << "** All break points visited **\n\n"; crossed_all = true; break; } // Step 3 // Update xcp and d on active coordinates // std::cout << "** [ "; fp += deltat * fpp; for (int i = act_begin; i <= act_end; i++) { const int act = ord[i]; xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act]; // z = xcp - x0 const Scalar zact = xcp[act] - x0[act]; const Scalar gact = g[act]; const Scalar ggact = gact * gact; wact.noalias() = bfgs.Wb(act); bfgs.apply_Mv(wact, cache); // cache = Mw fp += ggact + bfgs.theta() * gact * zact - gact * cache.dot(vecc); fpp -= (bfgs.theta() * ggact + 2 * gact * cache.dot(vecp) + ggact * cache.dot(wact)); vecp.noalias() += gact * wact; vecd[act] = Scalar(0); newact_set.push_back(act); // std::cout << act + 1 << " "; } // std::cout << "] become active **\n\n"; // Step 4 // Theoretical step size to move deltatmin = -fp / fpp; // Update interval bound il = iu; b = act_end + 1; // If we have visited all finite-valued break points, and have not exited earlier, // then the next iu will be infinity. Simply exit the loop now if (b >= nord) break; iu = brk[ord[b]]; // Width of the current interval deltat = iu - il; /* iter++; std::cout << "** Iter " << iter << " **\n"; std::cout << " fp = " << fp << ", fpp = " << fpp << ", deltatmin = " << deltatmin << std::endl; std::cout << " il = " << il << ", iu = " << iu << ", deltat = " << deltat << std::endl; */ } // In some rare cases fpp is numerically zero, making deltatmin equal to Inf // If this happens, force fpp to be the machine precision const Scalar eps = std::numeric_limits::epsilon(); if (fpp < eps) deltatmin = -fp / eps; // Last step if (!crossed_all) { deltatmin = std::max(deltatmin, Scalar(0)); vecc.noalias() += deltatmin * vecp; const Scalar tfinal = il + deltatmin; // Update xcp on free variable coordinates for (int i = 0; i < nfree; i++) { const int coord = fv_set[i]; xcp[coord] = x0[coord] + tfinal * vecd[coord]; } for (int i = b; i < nord; i++) { const int coord = ord[i]; xcp[coord] = x0[coord] + tfinal * vecd[coord]; fv_set.push_back(coord); } } // std::cout << "\n========================= Leaving GCP search =========================\n\n"; } }; } // namespace LBFGSpp /// \endcond #endif // LBFGSPP_CAUCHY_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/BKLDLT.h0000664000175000017510000004065615001153241020047 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_BK_LDLT_H #define LBFGSPP_BK_LDLT_H #include #include #include /// \cond namespace LBFGSpp { enum COMPUTATION_INFO { SUCCESSFUL = 0, NOT_COMPUTED, NUMERICAL_ISSUE }; // Bunch-Kaufman LDLT decomposition // References: // 1. Bunch, J. R., & Kaufman, L. (1977). Some stable methods for calculating inertia and solving symmetric linear systems. // Mathematics of computation, 31(137), 163-179. // 2. Golub, G. H., & Van Loan, C. F. (2012). Matrix computations (Vol. 3). JHU press. Section 4.4. // 3. Bunch-Parlett diagonal pivoting // 4. Ashcraft, C., Grimes, R. G., & Lewis, J. G. (1998). Accurate symmetric indefinite linear equation solvers. // SIAM Journal on Matrix Analysis and Applications, 20(2), 513-561. template class BKLDLT { private: using Index = Eigen::Index; using Matrix = Eigen::Matrix; using Vector = Eigen::Matrix; using MapVec = Eigen::Map; using MapConstVec = Eigen::Map; using IntVector = Eigen::Matrix; using GenericVector = Eigen::Ref; using GenericMatrix = Eigen::Ref; using ConstGenericMatrix = const Eigen::Ref; using ConstGenericVector = const Eigen::Ref; Index m_n; Vector m_data; // storage for a lower-triangular matrix std::vector m_colptr; // pointers to columns IntVector m_perm; // [-2, -1, 3, 1, 4, 5]: 0 <-> 2, 1 <-> 1, 2 <-> 3, 3 <-> 1, 4 <-> 4, 5 <-> 5 std::vector > m_permc; // compressed version of m_perm: [(0, 2), (2, 3), (3, 1)] bool m_computed; int m_info; // Access to elements // Pointer to the k-th column Scalar* col_pointer(Index k) { return m_colptr[k]; } // A[i, j] -> m_colptr[j][i - j], i >= j Scalar& coeff(Index i, Index j) { return m_colptr[j][i - j]; } const Scalar& coeff(Index i, Index j) const { return m_colptr[j][i - j]; } // A[i, i] -> m_colptr[i][0] Scalar& diag_coeff(Index i) { return m_colptr[i][0]; } const Scalar& diag_coeff(Index i) const { return m_colptr[i][0]; } // Compute column pointers void compute_pointer() { m_colptr.clear(); m_colptr.reserve(m_n); Scalar* head = m_data.data(); for (Index i = 0; i < m_n; i++) { m_colptr.push_back(head); head += (m_n - i); } } // Copy mat - shift * I to m_data void copy_data(ConstGenericMatrix& mat, int uplo, const Scalar& shift) { if (uplo == Eigen::Lower) { for (Index j = 0; j < m_n; j++) { const Scalar* begin = &mat.coeffRef(j, j); const Index len = m_n - j; std::copy(begin, begin + len, col_pointer(j)); diag_coeff(j) -= shift; } } else { Scalar* dest = m_data.data(); for (Index i = 0; i < m_n; i++) { for (Index j = i; j < m_n; j++, dest++) { *dest = mat.coeff(i, j); } diag_coeff(i) -= shift; } } } // Compute compressed permutations void compress_permutation() { for (Index i = 0; i < m_n; i++) { // Recover the permutation action const Index perm = (m_perm[i] >= 0) ? (m_perm[i]) : (-m_perm[i] - 1); if (perm != i) m_permc.push_back(std::make_pair(i, perm)); } } // Working on the A[k:end, k:end] submatrix // Exchange k <-> r // Assume r >= k void pivoting_1x1(Index k, Index r) { // No permutation if (k == r) { m_perm[k] = r; return; } // A[k, k] <-> A[r, r] std::swap(diag_coeff(k), diag_coeff(r)); // A[(r+1):end, k] <-> A[(r+1):end, r] std::swap_ranges(&coeff(r + 1, k), col_pointer(k + 1), &coeff(r + 1, r)); // A[(k+1):(r-1), k] <-> A[r, (k+1):(r-1)] Scalar* src = &coeff(k + 1, k); for (Index j = k + 1; j < r; j++, src++) { std::swap(*src, coeff(r, j)); } m_perm[k] = r; } // Working on the A[k:end, k:end] submatrix // Exchange [k+1, k] <-> [r, p] // Assume p >= k, r >= k+1 void pivoting_2x2(Index k, Index r, Index p) { pivoting_1x1(k, p); pivoting_1x1(k + 1, r); // A[k+1, k] <-> A[r, k] std::swap(coeff(k + 1, k), coeff(r, k)); // Use negative signs to indicate a 2x2 block // Also minus one to distinguish a negative zero from a positive zero m_perm[k] = -m_perm[k] - 1; m_perm[k + 1] = -m_perm[k + 1] - 1; } // A[r1, c1:c2] <-> A[r2, c1:c2] // Assume r2 >= r1 > c2 >= c1 void interchange_rows(Index r1, Index r2, Index c1, Index c2) { if (r1 == r2) return; for (Index j = c1; j <= c2; j++) { std::swap(coeff(r1, j), coeff(r2, j)); } } // lambda = |A[r, k]| = max{|A[k+1, k]|, ..., |A[end, k]|} // Largest (in magnitude) off-diagonal element in the first column of the current reduced matrix // r is the row index // Assume k < end Scalar find_lambda(Index k, Index& r) { using std::abs; const Scalar* head = col_pointer(k); // => A[k, k] const Scalar* end = col_pointer(k + 1); // Start with r=k+1, lambda=A[k+1, k] r = k + 1; Scalar lambda = abs(head[1]); // Scan remaining elements for (const Scalar* ptr = head + 2; ptr < end; ptr++) { const Scalar abs_elem = abs(*ptr); if (lambda < abs_elem) { lambda = abs_elem; r = k + (ptr - head); } } return lambda; } // sigma = |A[p, r]| = max {|A[k, r]|, ..., |A[end, r]|} \ {A[r, r]} // Largest (in magnitude) off-diagonal element in the r-th column of the current reduced matrix // p is the row index // Assume k < r < end Scalar find_sigma(Index k, Index r, Index& p) { using std::abs; // First search A[r+1, r], ..., A[end, r], which has the same task as find_lambda() // If r == end, we skip this search Scalar sigma = Scalar(-1); if (r < m_n - 1) sigma = find_lambda(r, p); // Then search A[k, r], ..., A[r-1, r], which maps to A[r, k], ..., A[r, r-1] for (Index j = k; j < r; j++) { const Scalar abs_elem = abs(coeff(r, j)); if (sigma < abs_elem) { sigma = abs_elem; p = j; } } return sigma; } // Generate permutations and apply to A // Return true if the resulting pivoting is 1x1, and false if 2x2 bool permutate_mat(Index k, const Scalar& alpha) { using std::abs; Index r = k, p = k; const Scalar lambda = find_lambda(k, r); // If lambda=0, no need to interchange if (lambda > Scalar(0)) { const Scalar abs_akk = abs(diag_coeff(k)); // If |A[k, k]| >= alpha * lambda, no need to interchange if (abs_akk < alpha * lambda) { const Scalar sigma = find_sigma(k, r, p); // If sigma * |A[k, k]| >= alpha * lambda^2, no need to interchange if (sigma * abs_akk < alpha * lambda * lambda) { if (abs_akk >= alpha * sigma) { // Permutation on A pivoting_1x1(k, r); // Permutation on L interchange_rows(k, r, 0, k - 1); return true; } else { // There are two versions of permutation here // 1. A[k+1, k] <-> A[r, k] // 2. A[k+1, k] <-> A[r, p], where p >= k and r >= k+1 // // Version 1 and 2 are used by Ref[1] and Ref[2], respectively // Version 1 implementation p = k; // Version 2 implementation // [r, p] and [p, r] are symmetric, but we need to make sure // p >= k and r >= k+1, so it is safe to always make r > p // One exception is when min{r,p} == k+1, in which case we make // r = k+1, so that only one permutation needs to be performed /* const Index rp_min = std::min(r, p); const Index rp_max = std::max(r, p); if(rp_min == k + 1) { r = rp_min; p = rp_max; } else { r = rp_max; p = rp_min; } */ // Right now we use Version 1 since it reduces the overhead of interchange // Permutation on A pivoting_2x2(k, r, p); // Permutation on L interchange_rows(k, p, 0, k - 1); interchange_rows(k + 1, r, 0, k - 1); return false; } } } } return true; } // E = [e11, e12] // [e21, e22] // Overwrite E with inv(E) void inverse_inplace_2x2(Scalar& e11, Scalar& e21, Scalar& e22) const { // inv(E) = [d11, d12], d11 = e22/delta, d21 = -e21/delta, d22 = e11/delta // [d21, d22] const Scalar delta = e11 * e22 - e21 * e21; std::swap(e11, e22); e11 /= delta; e22 /= delta; e21 = -e21 / delta; } // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE int gaussian_elimination_1x1(Index k) { // D = 1 / A[k, k] const Scalar akk = diag_coeff(k); // Return NUMERICAL_ISSUE if not invertible if (akk == Scalar(0)) return NUMERICAL_ISSUE; diag_coeff(k) = Scalar(1) / akk; // B -= l * l' / A[k, k], B := A[(k+1):end, (k+1):end], l := L[(k+1):end, k] Scalar* lptr = col_pointer(k) + 1; const Index ldim = m_n - k - 1; MapVec l(lptr, ldim); for (Index j = 0; j < ldim; j++) { MapVec(col_pointer(j + k + 1), ldim - j).noalias() -= (lptr[j] / akk) * l.tail(ldim - j); } // l /= A[k, k] l /= akk; return SUCCESSFUL; } // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE int gaussian_elimination_2x2(Index k) { // D = inv(E) Scalar& e11 = diag_coeff(k); Scalar& e21 = coeff(k + 1, k); Scalar& e22 = diag_coeff(k + 1); // Return NUMERICAL_ISSUE if not invertible if (e11 * e22 - e21 * e21 == Scalar(0)) return NUMERICAL_ISSUE; inverse_inplace_2x2(e11, e21, e22); // X = l * inv(E), l := L[(k+2):end, k:(k+1)] Scalar* l1ptr = &coeff(k + 2, k); Scalar* l2ptr = &coeff(k + 2, k + 1); const Index ldim = m_n - k - 2; MapVec l1(l1ptr, ldim), l2(l2ptr, ldim); Eigen::Matrix X(ldim, 2); X.col(0).noalias() = l1 * e11 + l2 * e21; X.col(1).noalias() = l1 * e21 + l2 * e22; // B -= l * inv(E) * l' = X * l', B = A[(k+2):end, (k+2):end] for (Index j = 0; j < ldim; j++) { MapVec(col_pointer(j + k + 2), ldim - j).noalias() -= (X.col(0).tail(ldim - j) * l1ptr[j] + X.col(1).tail(ldim - j) * l2ptr[j]); } // l = X l1.noalias() = X.col(0); l2.noalias() = X.col(1); return SUCCESSFUL; } public: BKLDLT() : m_n(0), m_computed(false), m_info(NOT_COMPUTED) {} // Factorize mat - shift * I BKLDLT(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0)) : m_n(mat.rows()), m_computed(false), m_info(NOT_COMPUTED) { compute(mat, uplo, shift); } void compute(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0)) { using std::abs; m_n = mat.rows(); if (m_n != mat.cols()) throw std::invalid_argument("BKLDLT: matrix must be square"); m_perm.setLinSpaced(m_n, 0, m_n - 1); m_permc.clear(); // Copy data m_data.resize((m_n * (m_n + 1)) / 2); compute_pointer(); copy_data(mat, uplo, shift); const Scalar alpha = (1.0 + std::sqrt(17.0)) / 8.0; Index k = 0; for (k = 0; k < m_n - 1; k++) { // 1. Interchange rows and columns of A, and save the result to m_perm bool is_1x1 = permutate_mat(k, alpha); // 2. Gaussian elimination if (is_1x1) { m_info = gaussian_elimination_1x1(k); } else { m_info = gaussian_elimination_2x2(k); k++; } // 3. Check status if (m_info != SUCCESSFUL) break; } // Invert the last 1x1 block if it exists if (k == m_n - 1) { const Scalar akk = diag_coeff(k); if (akk == Scalar(0)) m_info = NUMERICAL_ISSUE; diag_coeff(k) = Scalar(1) / diag_coeff(k); } compress_permutation(); m_computed = true; } // Solve Ax=b void solve_inplace(GenericVector b) const { if (!m_computed) throw std::logic_error("BKLDLT: need to call compute() first"); // PAP' = LDL' // 1. b -> Pb Scalar* x = b.data(); MapVec res(x, m_n); Index npermc = m_permc.size(); for (Index i = 0; i < npermc; i++) { std::swap(x[m_permc[i].first], x[m_permc[i].second]); } // 2. Lz = Pb // If m_perm[end] < 0, then end with m_n - 3, otherwise end with m_n - 2 const Index end = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2); for (Index i = 0; i <= end; i++) { const Index b1size = m_n - i - 1; const Index b2size = b1size - 1; if (m_perm[i] >= 0) { MapConstVec l(&coeff(i + 1, i), b1size); res.segment(i + 1, b1size).noalias() -= l * x[i]; } else { MapConstVec l1(&coeff(i + 2, i), b2size); MapConstVec l2(&coeff(i + 2, i + 1), b2size); res.segment(i + 2, b2size).noalias() -= (l1 * x[i] + l2 * x[i + 1]); i++; } } // 3. Dw = z for (Index i = 0; i < m_n; i++) { const Scalar e11 = diag_coeff(i); if (m_perm[i] >= 0) { x[i] *= e11; } else { const Scalar e21 = coeff(i + 1, i), e22 = diag_coeff(i + 1); const Scalar wi = x[i] * e11 + x[i + 1] * e21; x[i + 1] = x[i] * e21 + x[i + 1] * e22; x[i] = wi; i++; } } // 4. L'y = w // If m_perm[end] < 0, then start with m_n - 3, otherwise start with m_n - 2 Index i = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2); for (; i >= 0; i--) { const Index ldim = m_n - i - 1; MapConstVec l(&coeff(i + 1, i), ldim); x[i] -= res.segment(i + 1, ldim).dot(l); if (m_perm[i] < 0) { MapConstVec l2(&coeff(i + 1, i - 1), ldim); x[i - 1] -= res.segment(i + 1, ldim).dot(l2); i--; } } // 5. x = P'y for (i = npermc - 1; i >= 0; i--) { std::swap(x[m_permc[i].first], x[m_permc[i].second]); } } Vector solve(ConstGenericVector& b) const { Vector res = b; solve_inplace(res); return res; } int info() const { return m_info; } }; } // namespace LBFGSpp /// \endcond #endif // LBFGSPP_BK_LDLT_H yixuan-LBFGSpp-c524a40/include/LBFGSpp/BFGSMat.h0000664000175000017510000005332315001153241020251 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_BFGS_MAT_H #define LBFGSPP_BFGS_MAT_H #include #include #include #include "BKLDLT.h" /// \cond namespace LBFGSpp { // // An *implicit* representation of the BFGS approximation to the Hessian matrix // // B = theta * I - W * M * W' -- approximation to Hessian matrix, see [2] // H = inv(B) -- approximation to inverse Hessian matrix, see [2] // // Reference: // [1] D. C. Liu and J. Nocedal (1989). On the limited memory BFGS method for large scale optimization. // [2] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization. // template class BFGSMat { private: using Vector = Eigen::Matrix; using Matrix = Eigen::Matrix; using RefConstVec = Eigen::Ref; using IndexSet = std::vector; int m_m; // Maximum number of correction vectors Scalar m_theta; // theta * I is the initial approximation to the Hessian matrix Matrix m_s; // History of the s vectors Matrix m_y; // History of the y vectors Vector m_ys; // History of the s'y values Vector m_alpha; // Temporary values used in computing H * v int m_ncorr; // Number of correction vectors in the history, m_ncorr <= m int m_ptr; // A Pointer to locate the most recent history, 1 <= m_ptr <= m // Details: s and y vectors are stored in cyclic order. // For example, if the current s-vector is stored in m_s[, m-1], // then in the next iteration m_s[, 0] will be overwritten. // m_s[, m_ptr-1] points to the most recent history (if ncorr > 0), // and m_s[, m_ptr % m] points to the location that will be // overwritten next time. //========== The following members are only used in L-BFGS-B algorithm ==========// Matrix m_permMinv; // Permutated M inverse BKLDLT m_permMsolver; // Represents the permutated M matrix public: // Constructor BFGSMat() {} // Reset internal variables // n: dimension of the vector to be optimized // m: maximum number of corrections to approximate the Hessian matrix inline void reset(int n, int m) { m_m = m; m_theta = Scalar(1); m_s.resize(n, m); m_y.resize(n, m); m_ys.resize(m); m_alpha.resize(m); m_ncorr = 0; m_ptr = m; // This makes sure that m_ptr % m == 0 in the first step if (LBFGSB) { m_permMinv.resize(2 * m, 2 * m); m_permMinv.setZero(); m_permMinv.diagonal().setOnes(); } } // Add correction vectors to the BFGS matrix inline void add_correction(const RefConstVec& s, const RefConstVec& y) { const int loc = m_ptr % m_m; m_s.col(loc).noalias() = s; m_y.col(loc).noalias() = y; // ys = y's = 1/rho const Scalar ys = m_s.col(loc).dot(m_y.col(loc)); m_ys[loc] = ys; m_theta = m_y.col(loc).squaredNorm() / ys; if (m_ncorr < m_m) m_ncorr++; m_ptr = loc + 1; if (LBFGSB) { // Minv = [-D L'] // [ L theta*S'S] // Copy -D // Let S=[s[0], ..., s[m-1]], Y=[y[0], ..., y[m-1]] // D = [s[0]'y[0], ..., s[m-1]'y[m-1]] m_permMinv(loc, loc) = -ys; // Update S'S // We only store S'S in Minv, and multiply theta when LU decomposition is performed Vector Ss = m_s.leftCols(m_ncorr).transpose() * m_s.col(loc); m_permMinv.block(m_m + loc, m_m, 1, m_ncorr).noalias() = Ss.transpose(); m_permMinv.block(m_m, m_m + loc, m_ncorr, 1).noalias() = Ss; // Compute L // L = [ 0 ] // [ s[1]'y[0] 0 ] // [ s[2]'y[0] s[2]'y[1] ] // ... // [s[m-1]'y[0] ... ... ... ... ... s[m-1]'y[m-2] 0] // // L_next = [ 0 ] // [s[2]'y[1] 0 ] // [s[3]'y[1] s[3]'y[2] ] // ... // [s[m]'y[1] ... ... ... ... ... s[m]'y[m-1] 0] const int len = m_ncorr - 1; // First zero out the column of oldest y if (m_ncorr >= m_m) m_permMinv.block(m_m, loc, m_m, 1).setZero(); // Compute the row associated with new s // The current row is loc // End with column (loc + m - 1) % m // Length is len int yloc = (loc + m_m - 1) % m_m; for (int i = 0; i < len; i++) { m_permMinv(m_m + loc, yloc) = m_s.col(loc).dot(m_y.col(yloc)); yloc = (yloc + m_m - 1) % m_m; } // Matrix LDLT factorization m_permMinv.block(m_m, m_m, m_m, m_m) *= m_theta; m_permMsolver.compute(m_permMinv); m_permMinv.block(m_m, m_m, m_m, m_m) /= m_theta; } } // Explicitly form the B matrix inline Matrix get_Bmat() const { // Initial approximation theta * I const int n = m_s.rows(); Matrix B = m_theta * Matrix::Identity(n, n); if (m_ncorr < 1) return B; // Construct W matrix, W = [Y, theta * S] // Y = [y0, y1, ..., yc] // S = [s0, s1, ..., sc] // We first set W = [Y, S], since later we still need Y and S matrices // After computing Minv, we rescale the S part in W Matrix W(n, 2 * m_ncorr); // r = m_ptr - 1 points to the most recent element, // (r + 1) % m_ncorr points to the oldest element int j = m_ptr % m_ncorr; for (int i = 0; i < m_ncorr; i++) { W.col(i).noalias() = m_y.col(j); W.col(m_ncorr + i).noalias() = m_s.col(j); j = (j + 1) % m_m; } // Now Y = W[:, :c], S = W[:, c:] // Construct Minv matrix, Minv = [-D L' ] // [ L theta * S'S] // D = diag(y0's0, ..., yc'sc) Matrix Minv(2 * m_ncorr, 2 * m_ncorr); Minv.topLeftCorner(m_ncorr, m_ncorr).setZero(); Vector ys = W.leftCols(m_ncorr).cwiseProduct(W.rightCols(m_ncorr)).colwise().sum().transpose(); Minv.diagonal().head(m_ncorr).noalias() = -ys; // L = [ 0 ] // [ s[1]'y[0] 0 ] // [ s[2]'y[0] s[2]'y[1] ] // ... // [s[c-1]'y[0] ... ... ... ... ... s[c-1]'y[c-2] 0] Minv.bottomLeftCorner(m_ncorr, m_ncorr).setZero(); for (int i = 0; i < m_ncorr - 1; i++) { // Number of terms for this column const int nterm = m_ncorr - i - 1; // S[:, -nterm:]'Y[:, j] Minv.col(i).tail(nterm).noalias() = W.rightCols(nterm).transpose() * W.col(i); } // The symmetric block Minv.topRightCorner(m_ncorr, m_ncorr).noalias() = Minv.bottomLeftCorner(m_ncorr, m_ncorr).transpose(); // theta * S'S Minv.bottomRightCorner(m_ncorr, m_ncorr).noalias() = m_theta * W.rightCols(m_ncorr).transpose() * W.rightCols(m_ncorr); // Set the true W matrix W.rightCols(m_ncorr).array() *= m_theta; // Compute B = theta * I - W * M * W' Eigen::PartialPivLU M_solver(Minv); B.noalias() -= W * M_solver.solve(W.transpose()); return B; } // Explicitly form the H matrix inline Matrix get_Hmat() const { // Initial approximation 1/theta * I const int n = m_s.rows(); Matrix H = (Scalar(1) / m_theta) * Matrix::Identity(n, n); if (m_ncorr < 1) return H; // Construct W matrix, W = [1/theta * Y, S] // Y = [y0, y1, ..., yc] // S = [s0, s1, ..., sc] // We first set W = [Y, S], since later we still need Y and S matrices // After computing M, we rescale the Y part in W Matrix W(n, 2 * m_ncorr); // p = m_ptr - 1 points to the most recent element, // (p + 1) % m_ncorr points to the oldest element int j = m_ptr % m_ncorr; for (int i = 0; i < m_ncorr; i++) { W.col(i).noalias() = m_y.col(j); W.col(m_ncorr + i).noalias() = m_s.col(j); j = (j + 1) % m_m; } // Now Y = W[:, :c], S = W[:, c:] // Construct M matrix, M = [ 0 -inv(R) ] // [ -inv(R)' inv(R)'(D + 1/theta * Y'Y)inv(R) ] // D = diag(y0's0, ..., yc'sc) Matrix M(2 * m_ncorr, 2 * m_ncorr); // First use M[:c, :c] to store R // R = [s[0]'y[0] s[0]'y[1] ... s[0]'y[c-1] ] // [ 0 s[1]'y[1] ... s[1]'y[c-1] ] // ... // [ 0 0 ... s[c-1]'y[c-1] ] for (int i = 0; i < m_ncorr; i++) { M.col(i).head(i + 1).noalias() = W.middleCols(m_ncorr, i + 1).transpose() * W.col(i); } // Compute inv(R) Matrix Rinv = M.topLeftCorner(m_ncorr, m_ncorr).template triangularView().solve(Matrix::Identity(m_ncorr, m_ncorr)); // Zero out the top left block M.topLeftCorner(m_ncorr, m_ncorr).setZero(); // Set the top right block M.topRightCorner(m_ncorr, m_ncorr).noalias() = -Rinv; // The symmetric block M.bottomLeftCorner(m_ncorr, m_ncorr).noalias() = -Rinv.transpose(); // 1/theta * Y'Y Matrix block = (Scalar(1) / m_theta) * W.leftCols(m_ncorr).transpose() * W.leftCols(m_ncorr); // D + 1/theta * Y'Y Vector ys = W.leftCols(m_ncorr).cwiseProduct(W.rightCols(m_ncorr)).colwise().sum().transpose(); block.diagonal().array() += ys.array(); // The bottom right block M.bottomRightCorner(m_ncorr, m_ncorr).noalias() = Rinv.transpose() * block * Rinv; // Set the true W matrix W.leftCols(m_ncorr).array() *= (Scalar(1) / m_theta); // Compute H = 1/theta * I + W * M * W' H.noalias() += W * M * W.transpose(); return H; } // Recursive formula to compute a * H * v, where a is a scalar, and v is [n x 1] // H0 = (1/theta) * I is the initial approximation to H // Algorithm 7.4 of Nocedal, J., & Wright, S. (2006). Numerical optimization. inline void apply_Hv(const Vector& v, const Scalar& a, Vector& res) { res.resize(v.size()); // L-BFGS two-loop recursion // Loop 1 res.noalias() = a * v; int j = m_ptr % m_m; for (int i = 0; i < m_ncorr; i++) { j = (j + m_m - 1) % m_m; m_alpha[j] = m_s.col(j).dot(res) / m_ys[j]; res.noalias() -= m_alpha[j] * m_y.col(j); } // Apply initial H0 res /= m_theta; // Loop 2 for (int i = 0; i < m_ncorr; i++) { const Scalar beta = m_y.col(j).dot(res) / m_ys[j]; res.noalias() += (m_alpha[j] - beta) * m_s.col(j); j = (j + 1) % m_m; } } //========== The following functions are only used in L-BFGS-B algorithm ==========// // Return the value of theta inline Scalar theta() const { return m_theta; } // Return current number of correction vectors inline int num_corrections() const { return m_ncorr; } // W = [Y, theta * S] // W [n x (2*ncorr)], v [n x 1], res [(2*ncorr) x 1] // res preserves the ordering of Y and S columns inline void apply_Wtv(const Vector& v, Vector& res) const { res.resize(2 * m_ncorr); res.head(m_ncorr).noalias() = m_y.leftCols(m_ncorr).transpose() * v; res.tail(m_ncorr).noalias() = m_theta * m_s.leftCols(m_ncorr).transpose() * v; } // The b-th row of the W matrix // Preserves the ordering of Y and S columns // Return as a column vector inline Vector Wb(int b) const { Vector res(2 * m_ncorr); for (int j = 0; j < m_ncorr; j++) { res[j] = m_y(b, j); res[m_ncorr + j] = m_s(b, j); } res.tail(m_ncorr) *= m_theta; return res; } // Extract rows of W inline Matrix Wb(const IndexSet& b) const { const int nb = b.size(); const int* bptr = b.data(); Matrix res(nb, 2 * m_ncorr); for (int j = 0; j < m_ncorr; j++) { const Scalar* Yptr = &m_y(0, j); const Scalar* Sptr = &m_s(0, j); Scalar* resYptr = res.data() + j * nb; Scalar* resSptr = resYptr + m_ncorr * nb; for (int i = 0; i < nb; i++) { const int row = bptr[i]; resYptr[i] = Yptr[row]; resSptr[i] = Sptr[row]; } } return res; } // M is [(2*ncorr) x (2*ncorr)], v is [(2*ncorr) x 1] inline void apply_Mv(const Vector& v, Vector& res) const { res.resize(2 * m_ncorr); if (m_ncorr < 1) return; Vector vpadding = Vector::Zero(2 * m_m); vpadding.head(m_ncorr).noalias() = v.head(m_ncorr); vpadding.segment(m_m, m_ncorr).noalias() = v.tail(m_ncorr); // Solve linear equation m_permMsolver.solve_inplace(vpadding); res.head(m_ncorr).noalias() = vpadding.head(m_ncorr); res.tail(m_ncorr).noalias() = vpadding.segment(m_m, m_ncorr); } // Compute W'Pv // W [n x (2*ncorr)], v [nP x 1], res [(2*ncorr) x 1] // res preserves the ordering of Y and S columns // Returns false if the result is known to be zero inline bool apply_WtPv(const IndexSet& P_set, const Vector& v, Vector& res, bool test_zero = false) const { const int* Pptr = P_set.data(); const Scalar* vptr = v.data(); int nP = P_set.size(); // Remove zeros in v to save computation IndexSet P_reduced; std::vector v_reduced; if (test_zero) { P_reduced.reserve(nP); for (int i = 0; i < nP; i++) { if (vptr[i] != Scalar(0)) { P_reduced.push_back(Pptr[i]); v_reduced.push_back(vptr[i]); } } Pptr = P_reduced.data(); vptr = v_reduced.data(); nP = P_reduced.size(); } res.resize(2 * m_ncorr); if (m_ncorr < 1 || nP < 1) { res.setZero(); return false; } for (int j = 0; j < m_ncorr; j++) { Scalar resy = Scalar(0), ress = Scalar(0); const Scalar* yptr = &m_y(0, j); const Scalar* sptr = &m_s(0, j); for (int i = 0; i < nP; i++) { const int row = Pptr[i]; resy += yptr[row] * vptr[i]; ress += sptr[row] * vptr[i]; } res[j] = resy; res[m_ncorr + j] = ress; } res.tail(m_ncorr) *= m_theta; return true; } // Compute s * P'WMv // Assume that v[2*ncorr x 1] has the same ordering (permutation) as W and M // Returns false if the result is known to be zero inline bool apply_PtWMv(const IndexSet& P_set, const Vector& v, Vector& res, const Scalar& scale) const { const int nP = P_set.size(); res.resize(nP); res.setZero(); if (m_ncorr < 1 || nP < 1) return false; Vector Mv; apply_Mv(v, Mv); // WP * Mv Mv.tail(m_ncorr) *= m_theta; for (int j = 0; j < m_ncorr; j++) { const Scalar* yptr = &m_y(0, j); const Scalar* sptr = &m_s(0, j); const Scalar Mvy = Mv[j], Mvs = Mv[m_ncorr + j]; for (int i = 0; i < nP; i++) { const int row = P_set[i]; res[i] += Mvy * yptr[row] + Mvs * sptr[row]; } } res *= scale; return true; } // If the P'W matrix has been explicitly formed, do a direct matrix multiplication inline bool apply_PtWMv(const Matrix& WP, const Vector& v, Vector& res, const Scalar& scale) const { const int nP = WP.rows(); res.resize(nP); if (m_ncorr < 1 || nP < 1) { res.setZero(); return false; } Vector Mv; apply_Mv(v, Mv); // WP * Mv Mv.tail(m_ncorr) *= m_theta; res.noalias() = scale * (WP * Mv); return true; } // Compute F'BAb = -(F'W)M(W'AA'd) // W'd is known, and AA'+FF'=I, so W'AA'd = W'd - W'FF'd // Usually d contains many zeros, so we fist compute number of nonzero elements in A set and F set, // denoted as nnz_act and nnz_fv, respectively // If nnz_act is smaller, compute W'AA'd = WA' (A'd) directly // If nnz_fv is smaller, compute W'AA'd = W'd - WF' * (F'd) inline void compute_FtBAb( const Matrix& WF, const IndexSet& fv_set, const IndexSet& newact_set, const Vector& Wd, const Vector& drt, Vector& res) const { const int nact = newact_set.size(); const int nfree = WF.rows(); res.resize(nfree); if (m_ncorr < 1 || nact < 1 || nfree < 1) { res.setZero(); return; } // W'AA'd Vector rhs(2 * m_ncorr); if (nact <= nfree) { // Construct A'd Vector Ad(nfree); for (int i = 0; i < nact; i++) Ad[i] = drt[newact_set[i]]; apply_WtPv(newact_set, Ad, rhs); } else { // Construct F'd Vector Fd(nfree); for (int i = 0; i < nfree; i++) Fd[i] = drt[fv_set[i]]; // Compute W'AA'd = W'd - WF' * (F'd) rhs.noalias() = WF.transpose() * Fd; rhs.tail(m_ncorr) *= m_theta; rhs.noalias() = Wd - rhs; } apply_PtWMv(WF, rhs, res, Scalar(-1)); } // Compute inv(P'BP) * v // P represents an index set // inv(P'BP) * v = v / theta + WP * inv(inv(M) - WP' * WP / theta) * WP' * v / theta^2 // // v is [nP x 1] inline void solve_PtBP(const Matrix& WP, const Vector& v, Vector& res) const { const int nP = WP.rows(); res.resize(nP); if (m_ncorr < 1 || nP < 1) { res.noalias() = v / m_theta; return; } // Compute the matrix in the middle (only the lower triangular part is needed) // Remember that W = [Y, theta * S], but we do not store theta in WP Matrix mid(2 * m_ncorr, 2 * m_ncorr); // [0:(ncorr - 1), 0:(ncorr - 1)] for (int j = 0; j < m_ncorr; j++) { mid.col(j).segment(j, m_ncorr - j).noalias() = m_permMinv.col(j).segment(j, m_ncorr - j) - WP.block(0, j, nP, m_ncorr - j).transpose() * WP.col(j) / m_theta; } // [ncorr:(2 * ncorr - 1), 0:(ncorr - 1)] mid.block(m_ncorr, 0, m_ncorr, m_ncorr).noalias() = m_permMinv.block(m_m, 0, m_ncorr, m_ncorr) - WP.rightCols(m_ncorr).transpose() * WP.leftCols(m_ncorr); // [ncorr:(2 * ncorr - 1), ncorr:(2 * ncorr - 1)] for (int j = 0; j < m_ncorr; j++) { mid.col(m_ncorr + j).segment(m_ncorr + j, m_ncorr - j).noalias() = m_theta * (m_permMinv.col(m_m + j).segment(m_m + j, m_ncorr - j) - WP.rightCols(m_ncorr - j).transpose() * WP.col(m_ncorr + j)); } // Factorization BKLDLT midsolver(mid); // Compute the final result Vector WPv = WP.transpose() * v; WPv.tail(m_ncorr) *= m_theta; midsolver.solve_inplace(WPv); WPv.tail(m_ncorr) *= m_theta; res.noalias() = v / m_theta + (WP * WPv) / (m_theta * m_theta); } // Compute P'BQv, where P and Q are two mutually exclusive index selection operators // P'BQv = -WP * M * WQ' * v // Returns false if the result is known to be zero inline bool apply_PtBQv(const Matrix& WP, const IndexSet& Q_set, const Vector& v, Vector& res, bool test_zero = false) const { const int nP = WP.rows(); const int nQ = Q_set.size(); res.resize(nP); if (m_ncorr < 1 || nP < 1 || nQ < 1) { res.setZero(); return false; } Vector WQtv; bool nonzero = apply_WtPv(Q_set, v, WQtv, test_zero); if (!nonzero) { res.setZero(); return false; } Vector MWQtv; apply_Mv(WQtv, MWQtv); MWQtv.tail(m_ncorr) *= m_theta; res.noalias() = -WP * MWQtv; return true; } // If the Q'W matrix has been explicitly formed, do a direct matrix multiplication inline bool apply_PtBQv(const Matrix& WP, const Matrix& WQ, const Vector& v, Vector& res) const { const int nP = WP.rows(); const int nQ = WQ.rows(); res.resize(nP); if (m_ncorr < 1 || nP < 1 || nQ < 1) { res.setZero(); return false; } // Remember that W = [Y, theta * S], so we need to multiply theta to the second half Vector WQtv = WQ.transpose() * v; WQtv.tail(m_ncorr) *= m_theta; Vector MWQtv; apply_Mv(WQtv, MWQtv); MWQtv.tail(m_ncorr) *= m_theta; res.noalias() = -WP * MWQtv; return true; } }; } // namespace LBFGSpp /// \endcond #endif // LBFGSPP_BFGS_MAT_H yixuan-LBFGSpp-c524a40/include/LBFGSB.h0000664000175000017510000002460215001153241016626 0ustar nileshnilesh// Copyright (C) 2020-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_LBFGSB_H #define LBFGSPP_LBFGSB_H #include // std::invalid_argument #include #include #include "LBFGSpp/Param.h" #include "LBFGSpp/BFGSMat.h" #include "LBFGSpp/Cauchy.h" #include "LBFGSpp/SubspaceMin.h" #include "LBFGSpp/LineSearchMoreThuente.h" namespace LBFGSpp { /// /// L-BFGS-B solver for box-constrained numerical optimization /// template class LineSearch = LineSearchMoreThuente> class LBFGSBSolver { private: using Vector = Eigen::Matrix; using Matrix = Eigen::Matrix; using MapVec = Eigen::Map; using IndexSet = std::vector; const LBFGSBParam& m_param; // Parameters to control the LBFGS algorithm BFGSMat m_bfgs; // Approximation to the Hessian matrix Vector m_fx; // History of the objective function values Vector m_xp; // Old x Vector m_grad; // New gradient Scalar m_projgnorm; // Projected gradient norm Vector m_gradp; // Old gradient Vector m_drt; // Moving direction // Reset internal variables // n: dimension of the vector to be optimized inline void reset(int n) { const int m = m_param.m; m_bfgs.reset(n, m); m_xp.resize(n); m_grad.resize(n); m_gradp.resize(n); m_drt.resize(n); if (m_param.past > 0) m_fx.resize(m_param.past); } // Project the vector x to the bound constraint set static void force_bounds(Vector& x, const Vector& lb, const Vector& ub) { x.noalias() = x.cwiseMax(lb).cwiseMin(ub); } // Norm of the projected gradient // ||P(x-g, l, u) - x||_inf static Scalar proj_grad_norm(const Vector& x, const Vector& g, const Vector& lb, const Vector& ub) { return ((x - g).cwiseMax(lb).cwiseMin(ub) - x).cwiseAbs().maxCoeff(); } // The maximum step size alpha such that x0 + alpha * d stays within the bounds static Scalar max_step_size(const Vector& x0, const Vector& drt, const Vector& lb, const Vector& ub) { const int n = x0.size(); Scalar step = std::numeric_limits::infinity(); for (int i = 0; i < n; i++) { if (drt[i] > Scalar(0)) { step = std::min(step, (ub[i] - x0[i]) / drt[i]); } else if (drt[i] < Scalar(0)) { step = std::min(step, (lb[i] - x0[i]) / drt[i]); } } return step; } public: /// /// Constructor for the L-BFGS-B solver. /// /// \param param An object of \ref LBFGSParam to store parameters for the /// algorithm /// LBFGSBSolver(const LBFGSBParam& param) : m_param(param) { m_param.check_param(); } /// /// Minimizing a multivariate function subject to box constraints, using the L-BFGS-B algorithm. /// Exceptions will be thrown if error occurs. /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param x In: An initial guess of the optimal point. Out: The best point /// found. /// \param fx Out: The objective function value at `x`. /// \param lb Lower bounds for `x`. /// \param ub Upper bounds for `x`. /// /// \return Number of iterations used. /// template inline int minimize(Foo& f, Vector& x, Scalar& fx, const Vector& lb, const Vector& ub) { using std::abs; // Dimension of the vector const int n = x.size(); if (lb.size() != n || ub.size() != n) throw std::invalid_argument("'lb' and 'ub' must have the same size as 'x'"); // Check whether the initial vector is within the bounds // If not, project to the feasible set force_bounds(x, lb, ub); // Initialization reset(n); // The length of lag for objective function value to test convergence const int fpast = m_param.past; // Evaluate function and compute gradient fx = f(x, m_grad); m_projgnorm = proj_grad_norm(x, m_grad, lb, ub); if (fpast > 0) m_fx[0] = fx; // std::cout << "x0 = " << x.transpose() << std::endl; // std::cout << "f(x0) = " << fx << ", ||proj_grad|| = " << m_projgnorm << std::endl << std::endl; // Early exit if the initial x is already a minimizer if (m_projgnorm <= m_param.epsilon || m_projgnorm <= m_param.epsilon_rel * x.norm()) { return 1; } // Compute generalized Cauchy point Vector xcp(n), vecc; IndexSet newact_set, fv_set; Cauchy::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set); /* Vector gcp(n); Scalar fcp = f(xcp, gcp); Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub); std::cout << "xcp = " << xcp.transpose() << std::endl; std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl; */ // Initial direction m_drt.noalias() = xcp - x; m_drt.normalize(); // Tolerance for s'y >= eps * (y'y) constexpr Scalar eps = std::numeric_limits::epsilon(); // s and y vectors Vector vecs(n), vecy(n); // Number of iterations used int k = 1; for (;;) { // Save the curent x and gradient m_xp.noalias() = x; m_gradp.noalias() = m_grad; Scalar dg = m_grad.dot(m_drt); // Maximum step size to make x feasible Scalar step_max = max_step_size(x, m_drt, lb, ub); // In some cases, the direction returned by the subspace minimization procedure // in the previous iteration is pathological, leading to issues such as // step_max~=0 and dg>=0. If this happens, we use xcp-x as the search direction, // and reset the BFGS matrix. This is because xsm (the subspace minimizer) // heavily depends on the BFGS matrix. If xsm is corrupted, then we may suspect // there is something wrong in the BFGS matrix, and it is safer to reset the matrix. // In contrast, xcp is obtained from a line search, which tends to be more robust if (dg >= Scalar(0) || step_max <= m_param.min_step) { // Reset search direction m_drt.noalias() = xcp - x; // Reset BFGS matrix m_bfgs.reset(n, m_param.m); // Recompute dg and step_max dg = m_grad.dot(m_drt); step_max = max_step_size(x, m_drt, lb, ub); } // Line search to update x, fx and gradient step_max = std::min(m_param.max_step, step_max); Scalar step = Scalar(1); step = std::min(step, step_max); LineSearch::LineSearch(f, m_param, m_xp, m_drt, step_max, step, fx, m_grad, dg, x); // New projected gradient norm m_projgnorm = proj_grad_norm(x, m_grad, lb, ub); /* std::cout << "** Iteration " << k << std::endl; std::cout << " x = " << x.transpose() << std::endl; std::cout << " f(x) = " << fx << ", ||proj_grad|| = " << m_projgnorm << std::endl << std::endl; */ // Convergence test -- gradient if (m_projgnorm <= m_param.epsilon || m_projgnorm <= m_param.epsilon_rel * x.norm()) { return k; } // Convergence test -- objective function value if (fpast > 0) { const Scalar fxd = m_fx[k % fpast]; if (k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1))) return k; m_fx[k % fpast] = fx; } // Maximum number of iterations if (m_param.max_iterations != 0 && k >= m_param.max_iterations) { return k; } // Update s and y // s_{k+1} = x_{k+1} - x_k // y_{k+1} = g_{k+1} - g_k vecs.noalias() = x - m_xp; vecy.noalias() = m_grad - m_gradp; if (vecs.dot(vecy) > eps * vecy.squaredNorm()) m_bfgs.add_correction(vecs, vecy); force_bounds(x, lb, ub); Cauchy::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set); /*Vector gcp(n); Scalar fcp = f(xcp, gcp); Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub); std::cout << "xcp = " << xcp.transpose() << std::endl; std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl;*/ SubspaceMin::subspace_minimize(m_bfgs, x, xcp, m_grad, lb, ub, vecc, newact_set, fv_set, m_param.max_submin, m_drt); /*Vector gsm(n); Scalar fsm = f(x + m_drt, gsm); Scalar projgsmnorm = proj_grad_norm(x + m_drt, gsm, lb, ub); std::cout << "xsm = " << (x + m_drt).transpose() << std::endl; std::cout << "f(xsm) = " << fsm << ", ||proj_grad|| = " << projgsmnorm << std::endl << std::endl;*/ k++; } return k; } /// /// Returning the gradient vector on the last iterate. /// Typically used to debug and test convergence. /// Should only be called after the `minimize()` function. /// /// \return A const reference to the gradient vector. /// const Vector& final_grad() const { return m_grad; } /// /// Returning the infinity norm of the final projected gradient. /// The projected gradient is defined as \f$P(x-g,l,u)-x\f$, where \f$P(v,l,u)\f$ stands for /// the projection of a vector \f$v\f$ onto the box specified by the lower bound vector \f$l\f$ and /// upper bound vector \f$u\f$. /// Scalar final_grad_norm() const { return m_projgnorm; } }; } // namespace LBFGSpp #endif // LBFGSPP_LBFGSB_H yixuan-LBFGSpp-c524a40/include/LBFGS.h0000664000175000017510000001476215001153241016532 0ustar nileshnilesh// Copyright (C) 2016-2025 Yixuan Qiu // Under MIT license #ifndef LBFGSPP_LBFGS_H #define LBFGSPP_LBFGS_H #include #include "LBFGSpp/Param.h" #include "LBFGSpp/BFGSMat.h" #include "LBFGSpp/LineSearchBacktracking.h" #include "LBFGSpp/LineSearchBracketing.h" #include "LBFGSpp/LineSearchNocedalWright.h" #include "LBFGSpp/LineSearchMoreThuente.h" namespace LBFGSpp { /// /// L-BFGS solver for unconstrained numerical optimization /// template class LineSearch = LineSearchNocedalWright> class LBFGSSolver { private: using Vector = Eigen::Matrix; using Matrix = Eigen::Matrix; using MapVec = Eigen::Map; const LBFGSParam& m_param; // Parameters to control the LBFGS algorithm BFGSMat m_bfgs; // Approximation to the Hessian matrix Vector m_fx; // History of the objective function values Vector m_xp; // Old x Vector m_grad; // New gradient Scalar m_gnorm; // Norm of the gradient Vector m_gradp; // Old gradient Vector m_drt; // Moving direction // Reset internal variables // n: dimension of the vector to be optimized inline void reset(int n) { const int m = m_param.m; m_bfgs.reset(n, m); m_xp.resize(n); m_grad.resize(n); m_gradp.resize(n); m_drt.resize(n); if (m_param.past > 0) m_fx.resize(m_param.past); } public: /// /// Constructor for the L-BFGS solver. /// /// \param param An object of \ref LBFGSParam to store parameters for the /// algorithm /// LBFGSSolver(const LBFGSParam& param) : m_param(param) { m_param.check_param(); } /// /// Minimizing a multivariate function using the L-BFGS algorithm. /// Exceptions will be thrown if error occurs. /// /// \param f A function object such that `f(x, grad)` returns the /// objective function value at `x`, and overwrites `grad` with /// the gradient. /// \param x In: An initial guess of the optimal point. Out: The best point /// found. /// \param fx Out: The objective function value at `x`. /// /// \return Number of iterations used. /// template inline int minimize(Foo& f, Vector& x, Scalar& fx) { using std::abs; // Dimension of the vector const int n = x.size(); reset(n); // The length of lag for objective function value to test convergence const int fpast = m_param.past; // Evaluate function and compute gradient fx = f(x, m_grad); m_gnorm = m_grad.norm(); if (fpast > 0) m_fx[0] = fx; // std::cout << "x0 = " << x.transpose() << std::endl; // std::cout << "f(x0) = " << fx << ", ||grad|| = " << m_gnorm << std::endl << std::endl; // Early exit if the initial x is already a minimizer if (m_gnorm <= m_param.epsilon || m_gnorm <= m_param.epsilon_rel * x.norm()) { return 1; } // Initial direction m_drt.noalias() = -m_grad; // Initial step size Scalar step = Scalar(1) / m_drt.norm(); // Tolerance for s'y >= eps * (y'y) constexpr Scalar eps = std::numeric_limits::epsilon(); // s and y vectors Vector vecs(n), vecy(n); // Number of iterations used int k = 1; for (;;) { // std::cout << "Iter " << k << " begins" << std::endl << std::endl; // Save the curent x and gradient m_xp.noalias() = x; m_gradp.noalias() = m_grad; Scalar dg = m_grad.dot(m_drt); const Scalar step_max = m_param.max_step; // Line search to update x, fx and gradient LineSearch::LineSearch(f, m_param, m_xp, m_drt, step_max, step, fx, m_grad, dg, x); // New gradient norm m_gnorm = m_grad.norm(); // std::cout << "Iter " << k << " finished line search" << std::endl; // std::cout << " x = " << x.transpose() << std::endl; // std::cout << " f(x) = " << fx << ", ||grad|| = " << m_gnorm << std::endl << std::endl; // Convergence test -- gradient if (m_gnorm <= m_param.epsilon || m_gnorm <= m_param.epsilon_rel * x.norm()) { return k; } // Convergence test -- objective function value if (fpast > 0) { const Scalar fxd = m_fx[k % fpast]; if (k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1))) return k; m_fx[k % fpast] = fx; } // Maximum number of iterations if (m_param.max_iterations != 0 && k >= m_param.max_iterations) { return k; } // Update s and y // s_{k+1} = x_{k+1} - x_k // y_{k+1} = g_{k+1} - g_k vecs.noalias() = x - m_xp; vecy.noalias() = m_grad - m_gradp; if (vecs.dot(vecy) > eps * vecy.squaredNorm()) m_bfgs.add_correction(vecs, vecy); // Recursive formula to compute d = -H * g m_bfgs.apply_Hv(m_grad, -Scalar(1), m_drt); // Reset step = 1.0 as initial guess for the next line search step = Scalar(1); k++; } return k; } /// /// Returning the gradient vector on the last iterate. /// Typically used to debug and test convergence. /// Should only be called after the `minimize()` function. /// /// \return A const reference to the gradient vector. /// const Vector& final_grad() const { return m_grad; } /// /// Returning the Euclidean norm of the final gradient. /// Scalar final_grad_norm() const { return m_gnorm; } /// /// Returning the approximate Hessian matrix on the last iterate. /// Matrix final_approx_hessian() const { return m_bfgs.get_Bmat(); } /// /// Returning the approximate inverse Hessian matrix on the last iterate. /// Matrix final_approx_inverse_hessian() const { return m_bfgs.get_Hmat(); } }; } // namespace LBFGSpp #endif // LBFGSPP_LBFGS_H yixuan-LBFGSpp-c524a40/examples/0000775000175000017510000000000015001153241015705 5ustar nileshnileshyixuan-LBFGSpp-c524a40/examples/example-rosenbrock.cpp0000664000175000017510000000246515001153241022220 0ustar nileshnilesh#include #include #include using Eigen::VectorXf; using Eigen::MatrixXf; using namespace LBFGSpp; class Rosenbrock { private: int n; public: Rosenbrock(int n_) : n(n_) {} float operator()(const VectorXf& x, VectorXf& grad) { float fx = 0.0; for(int i = 0; i < n; i += 2) { float t1 = 1.0 - x[i]; float t2 = 10 * (x[i + 1] - x[i] * x[i]); grad[i + 1] = 20 * t2; grad[i] = -2.0 * (x[i] * grad[i + 1] + t1); fx += t1 * t1 + t2 * t2; } return fx; } }; int main() { const int n = 10; LBFGSParam param; LBFGSSolver solver(param); Rosenbrock fun(n); VectorXf x = VectorXf::Zero(n); float fx; int niter = solver.minimize(fun, x, fx); std::cout << niter << " iterations" << std::endl; std::cout << "x = \n" << x.transpose() << std::endl; std::cout << "f(x) = " << fx << std::endl; std::cout << "grad = " << solver.final_grad().transpose() << std::endl; std::cout << "||grad|| = " << solver.final_grad_norm() << std::endl; std::cout << "approx_hess = \n" << solver.final_approx_hessian() << std::endl; std::cout << "approx_inv_hess = \n" << solver.final_approx_inverse_hessian() << std::endl; return 0; } yixuan-LBFGSpp-c524a40/examples/example-rosenbrock-comparison.cpp0000664000175000017510000000611615001153241024365 0ustar nileshnilesh#include #include #include using Eigen::VectorXd; using Eigen::MatrixXd; using namespace LBFGSpp; class Rosenbrock { private: int n; ptrdiff_t ncalls; public: Rosenbrock(int n_) : n(n_), ncalls(0) {} double operator()(const VectorXd& x, VectorXd& grad) { // std::cout << x << std::endl; ncalls += 1; double fx = 0.0; for(int i = 0; i < n; i += 2) { double t1 = 1.0 - x[i]; double t2 = 10 * (x[i + 1] - x[i] * x[i]); grad[i + 1] = 20 * t2; grad[i] = -2.0 * (x[i] * grad[i + 1] + t1); fx += t1 * t1 + t2 * t2; } assert( ! std::isnan(fx) ); return fx; } const ptrdiff_t get_ncalls() { return ncalls; } }; int main() { LBFGSParam param; param. linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE; param.max_linesearch = 256; LBFGSSolver solver_backtrack(param); LBFGSSolver solver_bracket (param); LBFGSSolver solver_nocedal (param); LBFGSSolver solver_more (param); const int tests_per_n = 1024; for( int n=2; n <= 24; n += 2 ) { std::cout << "n = " << n << std::endl; Rosenbrock fun_backtrack(n), fun_bracket (n), fun_nocedal (n), fun_more (n); int niter_backtrack = 0, niter_bracket = 0, niter_nocedal = 0, niter_more = 0; for( int test=0; test < tests_per_n; test++ ) { VectorXd x, x0 = VectorXd::Random(n); double fx; x = x0; niter_backtrack += solver_backtrack.minimize(fun_backtrack, x, fx); assert( ( (x.array() - 1.0).abs() < 1e-4 ).all() ); x = x0; niter_bracket += solver_bracket .minimize(fun_bracket , x, fx); assert( ( (x.array() - 1.0).abs() < 1e-4 ).all() ); x = x0; niter_nocedal += solver_nocedal .minimize(fun_nocedal , x, fx); assert( ( (x.array() - 1.0).abs() < 1e-4 ).all() ); x = x0; niter_more += solver_more .minimize(fun_more , x, fx); assert( ( (x.array() - 1.0).abs() < 1e-4 ).all() ); } std::cout << " Average #calls:" << std::endl; std::cout << " LineSearchBacktracking : " << (fun_backtrack.get_ncalls() / tests_per_n) << " calls, " << (niter_backtrack / tests_per_n) << " iterations" << std::endl; std::cout << " LineSearchBracketing : " << (fun_bracket .get_ncalls() / tests_per_n) << " calls, " << (niter_bracket / tests_per_n) << " iterations" << std::endl; std::cout << " LineSearchNocedalWright: " << (fun_nocedal .get_ncalls() / tests_per_n) << " calls, " << (niter_nocedal / tests_per_n) << " iterations" << std::endl; std::cout << " LineSearchMoreThuente: " << (fun_more .get_ncalls() / tests_per_n) << " calls, " << (niter_more / tests_per_n) << " iterations" << std::endl; } return 0; } yixuan-LBFGSpp-c524a40/examples/example-rosenbrock-bracketing.cpp0000664000175000017510000000227215001153241024323 0ustar nileshnilesh#include #include #include using Eigen::VectorXd; using Eigen::MatrixXd; using namespace LBFGSpp; class Rosenbrock { private: int n; public: Rosenbrock(int n_) : n(n_) {} double operator()(const VectorXd& x, VectorXd& grad) { double fx = 0.0; for(int i = 0; i < n; i += 2) { double t1 = 1.0 - x[i]; double t2 = 10 * (x[i + 1] - x[i] * x[i]); grad[i + 1] = 20 * t2; grad[i] = -2.0 * (x[i] * grad[i + 1] + t1); fx += t1 * t1 + t2 * t2; } assert( ! std::isnan(fx) ); return fx; } }; int main() { LBFGSParam param; LBFGSSolver solver(param); for( int n=2; n <= 16; n += 2 ) { std::cout << "n = " << n << std::endl; Rosenbrock fun(n); for( int test=0; test < 1024; test++ ) { VectorXd x = VectorXd::Random(n); double fx; int niter = solver.minimize(fun, x, fx); assert( ( (x.array() - 1.0).abs() < 1e-4 ).all() ); } std::cout << "Test passed!" << std::endl << std::endl; } return 0; } yixuan-LBFGSpp-c524a40/examples/example-rosenbrock-box.cpp0000664000175000017510000000352315001153241023002 0ustar nileshnilesh#include #include #include using namespace LBFGSpp; typedef double Scalar; typedef Eigen::Matrix Vector; // Example from the roptim R package // f(x) = (x[0] - 1)^2 + 4 * (x[1] - x[0]^2)^2 + ... + 4 * (x[end] - x[end - 1]^2)^2 class Rosenbrock { private: int n; public: Rosenbrock(int n_) : n(n_) {} Scalar operator()(const Vector& x, Vector& grad) { Scalar fx = (x[0] - 1.0) * (x[0] - 1.0); grad[0] = 2 * (x[0] - 1) + 16 * (x[0] * x[0] - x[1]) * x[0]; for(int i = 1; i < n; i++) { fx += 4 * std::pow(x[i] - x[i - 1] * x[i - 1], 2); if(i == n - 1) { grad[i] = 8 * (x[i] - x[i - 1] * x[i - 1]); } else { grad[i] = 8 * (x[i] - x[i - 1] * x[i - 1]) + 16 * (x[i] * x[i] - x[i + 1]) * x[i]; } } return fx; } }; int main() { const int n = 25; LBFGSBParam param; LBFGSBSolver solver(param); Rosenbrock fun(n); // Variable bounds Vector lb = Vector::Constant(n, 2.0); Vector ub = Vector::Constant(n, 4.0); // The third variable is unbounded lb[2] = -std::numeric_limits::infinity(); ub[2] = std::numeric_limits::infinity(); // Initial values Vector x = Vector::Constant(n, 3.0); // Make some initial values at the bounds x[0] = x[1] = 2.0; x[5] = x[7] = 4.0; Scalar fx; int niter = solver.minimize(fun, x, fx, lb, ub); std::cout << niter << " iterations" << std::endl; std::cout << "x = \n" << x.transpose() << std::endl; std::cout << "f(x) = " << fx << std::endl; std::cout << "grad = " << solver.final_grad().transpose() << std::endl; std::cout << "projected grad norm = " << solver.final_grad_norm() << std::endl; return 0; } yixuan-LBFGSpp-c524a40/examples/example-quadratic.cpp0000664000175000017510000000135315001153241022021 0ustar nileshnilesh#include #include #include using Eigen::VectorXd; using Eigen::MatrixXd; using namespace LBFGSpp; double foo(const VectorXd& x, VectorXd& grad) { const int n = x.size(); VectorXd d(n); for(int i = 0; i < n; i++) d[i] = i; double f = (x - d).squaredNorm(); grad.noalias() = 2.0 * (x - d); return f; } int main() { const int n = 10; LBFGSParam param; LBFGSSolver solver(param); VectorXd x = VectorXd::Zero(n); double fx; int niter = solver.minimize(foo, x, fx); std::cout << niter << " iterations" << std::endl; std::cout << "x = \n" << x.transpose() << std::endl; std::cout << "f(x) = " << fx << std::endl; return 0; } yixuan-LBFGSpp-c524a40/examples/CMakeLists.txt0000664000175000017510000000061415001153241020446 0ustar nileshnileshforeach (source example-quadratic.cpp example-rosenbrock-box.cpp example-rosenbrock-bracketing.cpp example-rosenbrock-comparison.cpp example-rosenbrock.cpp) get_filename_component(example ${source} NAME_WLE) add_executable(${example} ${source}) set_property(TARGET ${example} PROPERTY CXX_STANDARD 17) target_link_libraries(${example} PRIVATE lbfgspp Eigen3::Eigen) endforeach () yixuan-LBFGSpp-c524a40/doxygen/0000775000175000017510000000000015001153241015544 5ustar nileshnileshyixuan-LBFGSpp-c524a40/doxygen/Doxyfile0000664000175000017510000036053415001153241017265 0ustar nileshnilesh# Doxyfile 1.9.6 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). # # Note: # # Use doxygen to compare the used configuration file with the template # configuration file: # doxygen -x [configFile] # Use doxygen to compare the used configuration file with the template # configuration file without replacing the environment variables or CMake type # replacement variables: # doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "LBFGS++" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = ../doc # If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 # sub-directories (in 2 levels) under the output directory of each output format # and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to # control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO # Controls the number of sub-directories that will be created when # CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every # level increment doubles the number of directories, resulting in 4096 # directories at level 8 which is the default and also the maximum value. The # sub-directories are organized in 2 levels, the first level always has a fixed # number of 16 directories. # Minimum value: 0, maximum value: 8, default value: 8. # This tag requires that the tag CREATE_SUBDIRS is set to YES. CREATE_SUBDIRS_LEVEL = 8 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, # Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English # (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, # Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with # English messages), Korean, Korean-en (Korean with English messages), Latvian, # Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, # Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, # Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = ../include # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # By default Python docstrings are displayed as preformatted text and doxygen's # special commands cannot be used. By setting PYTHON_DOCSTRING to NO the # doxygen's special commands can be used and the contents of the docstring # documentation blocks is shown as doxygen documentation. # The default value is: YES. PYTHON_DOCSTRING = YES # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". Note that you cannot put \n's in the value part of an alias # to insert newlines (in the resulting output). You can put ^^ in the value part # of an alias to insert a newline as if a physical newline was in the original # file. When you need a literal { or } or , in the value part of an alias you # have to escape them by means of a backslash (\), this can lead to conflicts # with the commands \{ and \} for these it is advised to use the version @{ and # @} or use a double escape (\\{ and \\}) ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, # VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files # as Fortran files (default is PHP), and .f files as C (default is Fortran), # use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. When specifying no_extension you should add # * to the FILE_PATTERNS. # # Note see also the list of default file extension mappings. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 # The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, # which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. NUM_PROC_THREADS = 1 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If this flag is set to YES, the name of an unnamed parameter in a declaration # will be determined by the corresponding definition. By default unnamed # parameters remain unnamed in the output. # The default value is: YES. RESOLVE_UNNAMED_PARAMS = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # will also hide undocumented C++ concepts if enabled. This option has no effect # if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # With the correct setting of option CASE_SENSE_NAMES doxygen will better be # able to match the capabilities of the underlying filesystem. In case the # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that # are not case sensitive the option should be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. # Possible values are: SYSTEM, NO and YES. # The default value is: SYSTEM. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_HEADERFILE tag is set to YES then the documentation for a class # will show which file needs to be included to use the class. # The default value is: YES. SHOW_HEADERFILE = YES # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = NO # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. See also section "Changing the # layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as documenting some parameters in # a documented function twice, or documenting parameters that don't exist or # using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete # function parameter documentation. If set to NO, doxygen will accept that some # parameters have no documentation without warning. # The default value is: YES. WARN_IF_INCOMPLETE_DOC = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong parameter # documentation, but not about the absence of documentation. If EXTRACT_ALL is # set to YES then this flag will automatically be disabled. See also # WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO # If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about # undocumented enumeration values. If set to NO, doxygen will accept # undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: NO. WARN_IF_UNDOC_ENUM_VAL = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but # at the end of the doxygen process doxygen will return with a non-zero status. # Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # In the $text part of the WARN_FORMAT command it is possible that a reference # to a more specific place is given. To make it easier to jump to this place # (outside of doxygen) the user can define a custom "cut" / "paste" string. # Example: # WARN_LINE_FORMAT = "'vi $file +$line'" # See also: WARN_FORMAT # The default value is: at line $line of file $file. WARN_LINE_FORMAT = "at line $line of file $file" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). In case the file specified cannot be opened for writing the # warning and error messages are written to standard error. When as file - is # specified the warning and error messages are written to standard output # (stdout). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = ../include ../include/LBFGSpp ../README.md # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. # See also: INPUT_FILE_ENCODING # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # This tag can be used to specify the character encoding of the source files # that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify # character encoding on a per file pattern basis. Doxygen will compare the file # name with each pattern and apply the encoding instead of the default # INPUT_ENCODING) if there is a match. The character encodings are a list of the # form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding # "INPUT_ENCODING" for further information on supported encodings. INPUT_FILE_ENCODING = # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # Note the list of default checked file patterns might differ from the list of # default file extension mappings. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, # *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C # comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.l \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.pyw \ *.f90 \ *.f95 \ *.f03 \ *.f08 \ *.f18 \ *.f \ *.for \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.ice # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that doxygen will use the data processed and written to standard output # for further processing, therefore nothing else, like debug statements or used # commands (so in case of a Windows batch file always use @echo OFF), should be # written to standard output. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = ../README.md # The Fortran standard specifies that for fixed formatted Fortran code all # characters from position 72 are to be considered as comment. A common # extension is to allow longer lines before the automatic comment starts. The # setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can # be processed before the automatic comment starts. # Minimum value: 7, maximum value: 10000, default value: 72. FORTRAN_COMMENT_AFTER = 72 #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: # http://clang.llvm.org/) for more accurate parsing at the cost of reduced # performance. This can be particularly helpful with template rich C++ code for # which doxygen's built-in parser lacks the necessary type information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS # tag is set to YES then doxygen will add the directory of each input to the # include path. # The default value is: YES. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the # path to the directory containing a file called compile_commands.json. This # file is the compilation database (see: # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the # options used when the source files were built. This is equivalent to # specifying the -p option to a clang tool, such as clang-check. These options # will then be passed to the parser. Any options specified with CLANG_OPTIONS # will be added as well. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = NO # The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) # that should be ignored while generating the index headers. The IGNORE_PREFIX # tag works for classes, function and member names. The entity will be placed in # the alphabetical list under the first letter of the entity name that remains # after removing the prefix. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). # Note: Since the styling of scrollbars can currently not be overruled in # Webkit/Chromium, the styling will be left out of the default doxygen.css if # one or more extra stylesheets have been specified. So if scrollbar # customization is desired it has to be added explicitly. For an example see the # documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE tag can be used to specify if the generated HTML output # should be rendered with a dark or light theme. # Possible values are: LIGHT always generate light mode output, DARK always # generate dark mode output, AUTO_LIGHT automatically set the mode according to # the user preference, use light mode if no preference is set (the default), # AUTO_DARK automatically set the mode according to the user preference, use # dark mode if no preference is set and TOGGLE allow to user to switch between # light and dark mode via a button. # The default value is: AUTO_LIGHT. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE = AUTO_LIGHT # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: # https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To # create a documentation set, doxygen will generate a Makefile in the HTML # output directory. Running make will produce the docset in that directory and # running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag determines the URL of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDURL = # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # on Windows. In the beginning of 2021 Microsoft took the original page, with # a.o. the download links, offline the HTML help workshop was already many years # in maintenance mode). You can download the HTML help workshop from the web # archives at Installation executable (see: # http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo # ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location (absolute path # including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to # run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine tune the look of the index (see "Fine-tuning the output"). As an # example, the default style sheet generated by doxygen has an example that # shows how to put an image at the root of the tree instead of the PROJECT_NAME. # Since the tree basically has the same information as the tab index, you could # consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES # When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the # FULL_SIDEBAR option determines if the side bar is limited to only the treeview # area (value NO) or if it should extend to the full height of the window (value # YES). Setting this to YES gives a layout similar to # https://docs.readthedocs.io with more room for contents, but less room for the # project logo, title, and description. If either GENERATE_TREEVIEW or # DISABLE_INDEX is set to NO, this option has no effect. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. FULL_SIDEBAR = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 1 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email # addresses. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. OBFUSCATE_EMAILS = YES # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for # the HTML output. These images will generally look nicer at scaled resolutions. # Possible values are: png (the default) and svg (looks nicer but requires the # pdf2svg or inkscape tool). # The default value is: png. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FORMULA_FORMAT = png # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # With MATHJAX_VERSION it is possible to specify the MathJax version to be used. # Note that the different versions of MathJax have different requirements with # regards to the different settings, so it is possible that also other MathJax # settings have to be changed when switching between the different MathJax # versions. # Possible values are: MathJax_2 and MathJax_3. # The default value is: MathJax_2. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_VERSION = MathJax_2 # When MathJax is enabled you can set the default output format to be used for # the MathJax output. For more details about the output format see MathJax # version 2 (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 # (see: # http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best # compatibility. This is the name for Mathjax version 2, for MathJax version 3 # this will be translated into chtml), NativeMML (i.e. MathML. Only supported # for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This # is the name for Mathjax version 3, for MathJax version 2 this will be # translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. The default value is: # - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 # - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # for MathJax version 2 (see https://docs.mathjax.org/en/v2.7-latest/tex.html # #tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # For example for MathJax version 3 (see # http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): # MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /