Source code for triqs_maxent.minimizers.levenberg_minimizer

# TRIQS application maxent
# Copyright (C) 2018 Gernot J. Kraberger
# Copyright (C) 2018 Simons Foundation
# Authors: Gernot J. Kraberger and Manuel Zingl
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.



import numpy as np
from .minimizer import Minimizer
from .convergence_methods import *


[docs] class LevenbergMinimizer(Minimizer): r""" The Levenberg minimization algorithm. The task of this algorithm is to minimize a function :math:`Q` with respect to a quantity :math:`H`. That is equivalent to searching a solution to :math:`f := \partial Q/\partial H = 0`. We assume that the equation is parametrized by a solution vector :math:`v` (i.e., we are looking for a solution in :math:`H(v)`). We then calculate the matrix :math:`J = \frac{\partial}{\partial v} \frac{\partial Q}{\partial H}`. Depending on the settings of the parameters, the solution is searched for in the following way: * ``J_squared=False, marquardt = False``: :math:`(J + \mu 1)\delta v = f` * ``J_squared=False, marquardt = True``: :math:`(J + \mathrm{diag} J)\delta v = f` * ``J_squared=True, marquardt = False``: :math:`(J^TJ + \mu 1)\delta v = f` * ``J_squared=True, marquardt = True``: :math:`(J^TJ + \mathrm{diag} J^T J)\delta v = f` Then, :math:`v` is updated by subtracting :math:`\delta v`. The step length is determined by the damping parameter :math:`\mu`, which is chosen so that :math:`Q` is minimal. Parameters ========== convergence : :py:mod:`ConvergenceMethod <.convergence_methods>` method to check convergence; the default is ``MaxDerivativeConvergenceMethod(1.e-4) | RelativeFunctionChangeConvergenceMethod(1.e-16)`` maxiter : int the maximum number of iterations miniter : int the minimum number of iterations J_squared : bool if ``True``, the algorithm solves :math:`(J^TJ +\mu 1) \delta v = J^T f`, else :math:`(J +\mu 1) \delta v = f`. marquardt : bool if ``True``, the algorithm uses :math:`\mathrm{diag}(J)` or :math:`\mathrm{diag}(J^T J)` (depending on the parameter ``J_squared``), instead of the identity matrix. mu0 : float the initial value of the Levenberg damping parameter nu : float the relative increase/decrease of mu when necessary max_mu : float the maximum number of mu, to prevent infinite loops verbose_callback : function a function used to print verbosity information e.g., the print function Attributes ---------- n_iter : int the total number of iterations performed since the creation of the class n_iter_last : int the number of iterations performed when ``minimize`` was last called converged : bool whether the algorithm converged in fewer than ``maxiter`` loops when ``minimze`` was last called """ def __init__(self, convergence=None, maxiter=1000, miniter=0, J_squared=False, marquardt=False, mu0=1.e-18, nu=1.3, max_mu=1.e20, verbose_callback=None): if convergence is None: self.convergence = OrConvergenceMethod( MaxDerivativeConvergenceMethod(1.e-4), RelativeFunctionChangeConvergenceMethod(1.e-16)) else: self.convergence = convergence self.maxiter = maxiter self.miniter = miniter self.J_squared = J_squared self.marquardt = marquardt self.mu0 = mu0 self.nu = nu self.max_mu = max_mu self.verbose_callback = verbose_callback # the number of iterations performed in total self.n_iter = 0 # the number of iterations performed in last call self.n_iter_last = 0
[docs] def minimize(self, function, v0): """ Perform the minimization. Parameters ========== function : DoublyDerivableFunction the function to be minimized v0 : array the initial function argument :math:`v` Returns ======= v : array the vector :math:`v` at the minimum """ if self.nu <= 1.0: raise Exception('If nu <= 1, there will be an infinite loop.') # obviously, we are not converged yet self.converged = False # initialize the damping parameter mu = self.mu0 # initialize v v = v0 # we set the initial function value func_val = function(v) Q1 = func_val.f() Q0 = np.nan # the main iteration for i in range(self.maxiter): # here, f is the Jacobian of the function wrt v f = func_val.d() # the Jacobian of f J = func_val.dd() # check convergence conv_status, self.converged = self.convergence(func_val, v, Q0=Q0, Q1=Q1) if self.verbose_callback is not None: msg = '{:6d} '.format(i + 1) msg += "Q: {:12.6e}, ".format(func_val.f(v)) msg += 'max_f: {:12.6e}, '.format(np.max(np.abs(f))) msg += 'conv: {:12.6e}'.format(conv_status) self.verbose_callback(msg) # declare converged if self.converged and i >= self.miniter: break # change J and f by multiplying the equation with J^T if self.J_squared: f = np.dot(J.transpose(), f) J = np.dot(J.transpose(), J) # define the identity matrix if self.marquardt: Id = np.diag(np.diag(J)) else: Id = np.eye(len(J)) # adjusting mu until the function value Q is minimal # let Q0 be the last Q-value Q0 = Q1 # the change in v to the next iteration dv = np.linalg.solve(J + mu * Id, f) # in the following, there might be some unphysical values of # v that are supplied to the function. Turn off warnings. old_seterr = np.seterr(all='ignore') # first, we check what the Q of the new iteration will be Q1 = function(v - dv).f() # as Q has to decrease in every iteration, we increase mu # until this is the case (Q0 is the function value from the # last iteration) while (Q1 > Q0 or np.isnan(Q1)) and mu < self.max_mu: mu *= self.nu dv = np.linalg.solve(J + mu * Id, f) Q1 = function(v - dv).f() # we check what increasing mu does to Q dv2 = np.linalg.solve(J + self.nu * mu * Id, f) Q2 = function(v - dv2).f() # if Q decreased when mu was increased, we will try to # further decrease Q by further increasing mu if Q2 < Q1: nuf = self.nu mu *= self.nu Q2 = Q1 dvnew = dv2 # if Q increased when mu was increased, we will try to # further increase Q by decreasing mu else: nuf = 1.0 / self.nu mu /= nuf dvnew = dv Q1 = np.inf # just to make sure the loop runs at least once while (Q2 < Q1 and mu < self.max_mu and mu > self.nu * np.finfo(float).eps): Q1 = Q2 dv = dvnew mu *= nuf dvnew = np.linalg.solve(J + mu * Id, f) Q2 = function(v - dvnew).f() # here, we require v to be physical again. Reset the seterr status. np.seterr(**old_seterr) # update v v -= dv func_val = function(v) # just to be sure that Q1 is accurate (will be used in the next # loop) Q1 = func_val.f() self.n_iter_last = i + 1 self.n_iter += i + 1 return v