
    ϑiC                        S SK Jr  S SKJr  S SKJr  S SKJrJrJ	r	J
r
  S SKrS SKJr  S SKJr  SS	KJr  \(       a'  S S
KJrJr  S SKJr  S SKJr  S SKJr  S SKJr  \
" SSS9r\" SSSS9 " S S\5      5       rg)    )annotations)defaultdict)reduce)TYPE_CHECKINGAnyLiteralTypeVarN)	Optimizer)
deprecated   )_strong_wolfe)CallableSequence)Tensor)GradientClipBase)_ParameterConfig)WeightDecayRegularizer_T_coT)	covariantz2.5.0zpaddle.optimizer.LBFGS)since	update_tolevelc                    ^  \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'              S                       SU 4S jjjrSS jrS rS r	S r
S rS rS rSS jrSrU =r$ )LBFGS%   a  
The L-BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
Closely related is the Newton method for minimization. Consider the iterate update formula:

.. math::
    x_{k+1} = x_{k} + H_k \nabla{f_k}

If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then
it's a quasi-Newton. In practice, the approximated Hessians are obtained
by only using the gradients, over either whole or part of the search
history, the former is BFGS, the latter is L-BFGS.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp179: Algorithm 7.5 (L-BFGS).

Args:
    learning_rate (float, optional): learning rate .The default value is 1.
    max_iter (int, optional): maximal number of iterations per optimization step.
        The default value is 20.
    max_eval (int, optional): maximal number of function evaluations per optimization
        step. The default value is max_iter * 1.25.
    tolerance_grad (float, optional): termination tolerance on first order optimality
        The default value is 1e-5.
    tolerance_change (float, optional): termination tolerance on function
        value/parameter changes. The default value is 1e-9.
    history_size (int, optional): update history size. The default value is 100.
    line_search_fn (string, optional): either 'strong_wolfe' or None. The default value is strong_wolfe.
    parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
        This parameter is required in dygraph mode. The default value is None.
    weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
        It canbe a float value as coeff of L2 regularization or \
        :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
        If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
        the regularization setting here in optimizer will be ignored for this parameter. \
        Otherwise, the regularization setting here in optimizer will take effect. \
        Default None, meaning there is no regularization.
    grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of \
        some derived class of ``GradientClipBase`` . There are three clipping strategies \
        ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` , \
        :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
    name (str, optional): Normally there is no need for user to set this property.
        For more information, please refer to :ref:`api_guide_Name`.
        The default value is None.

Return:
    loss (Tensor): the final loss of closure.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> import numpy as np
        >>> from paddle.incubate.optimizer import LBFGS

        >>> paddle.disable_static()
        >>> np.random.seed(0)
        >>> np_w = np.random.rand(1).astype(np.float32)
        >>> np_x = np.random.rand(1).astype(np.float32)

        >>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)]
        >>> # y = 2x
        >>> targets = [2 * x for x in inputs]

        >>> class Net(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...         w = paddle.to_tensor(np_w)
        ...         self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w))
        ...     def forward(self, x):
        ...         return self.w * x

        >>> net = Net()
        >>> opt = LBFGS(learning_rate=1, max_iter=1, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn='strong_wolfe', parameters=net.parameters())
        >>> def train_step(inputs, targets):
        ...     def closure():
        ...         outputs = net(inputs)
        ...         loss = paddle.nn.functional.mse_loss(outputs, targets)
        ...         print('loss: ', loss.item())
        ...         opt.clear_grad()
        ...         loss.backward()
        ...         return loss
        ...     opt.step(closure)

        >>> for input, target in zip(inputs, targets):
        ...     input_tensor = paddle.to_tensor(input)
        ...     target_tensor = paddle.to_tensor(target)
        ...     train_step(input_tensor, target_tensor)

floatlearning_rateintmax_itermax_evaltolerance_gradtolerance_changehistory_sizeLiteral['strong_wolfe'] | Noneline_search_fndict[str, dict[str, Any]]statec                  > Uc  US-  S-  nXl         X l        X0l        X@l        XPl        X`l        Xpl        [        U[        R                  5      (       a  [        S[        U5      -   5      e[        [        5      U l        [        TU ]A  SUU	U
US9  [        U R"                  S   [        5      (       d  U R"                  U l        O(['        U R(                  5       H  u  pUS   U l        M     S U l        g )N      z^parameters argument given to the optimizer should be an iterable of Tensors or dicts, but got       ?)r   
parametersweight_decay	grad_clipnamer   params)r   r   r    r!   r"   r#   r%   
isinstancepaddler   	TypeErrortyper   dictr'   super__init___parameter_list_params	enumerate_param_groups_numel_cache)selfr   r   r    r!   r"   r#   r%   r,   r-   r.   r/   idxparam_group	__class__s                 _/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/lbfgs.pyr7   LBFGS.__init__   s     !|q(H*  , 0(,j&--00<>B:>NO 
 !&
!% 	 	
 $..q1488//DL$-d.@.@$A *84 %B !    c                x    0 nU R                   R                  5        H  u  p#UR                  X#05        M     SU0$ )zReturns the state of the optimizer as a :class:`dict`.

Return:
    state, a dict holding current optimization state. Its content
        differs between optimizer classes.
r'   )r'   itemsupdate)r=   packed_statekvs       rA   
state_dictLBFGS.state_dict   s?     JJ$$&DA' ' &&rC   c                n    U R                   c  [        S U R                  S5      U l         U R                   $ )Nc                &    XR                  5       -   $ N)numel)totalps     rA   <lambda>LBFGS._numel.<locals>.<lambda>   s    !2rC   r   )r<   r   r9   )r=   s    rA   _numelLBFGS._numel   s4    $ &2DLL!!D    rC   c                   / nU R                    Hd  nUR                  c'  [        R                  " U5      R	                  S/5      nOUR                  R	                  S/5      nUR                  U5        Mf     [        R                  " USS9$ )Nr   )axis)r9   gradr2   
zeros_likereshapeappendconcat)r=   viewsrQ   views       rA   _gather_flat_gradLBFGS._gather_flat_grad   sn    Avv~((+33RD9vv~~rd+LL  }}U++rC   c           	        SnU R                    Hd  n[        S UR                  5      n[        R                  " UR                  X#X5-    R                  UR                  5      U-  5      U5      nX5-  nMf     X0R                  5       :X  d   eg )Nr   c                
    X-  $ rN    )xys     rA   rR   !LBFGS._add_grad.<locals>.<lambda>   s    rC   )r9   r   shaper2   assignaddr[   rT   )r=   alpha	directionoffsetrQ   rO   s         rA   	_add_gradLBFGS._add_grad   s    A-qww7Ev~6>>qwwG%O 	A OF  &&&rC   c                `    U R                    Vs/ s H  oR                  5       PM     sn$ s  snf rN   )r9   clone)r=   rQ   s     rA   _clone_paramLBFGS._clone_param   s"    #'<<0<a	<000s   +c                n    [        U R                  U5       H  u  p#[        R                  " X25        M     g rN   )zipr9   r2   ri   )r=   params_datarQ   pdatas       rA   
_set_paramLBFGS._set_param   s%    DLL+6HAMM%# 7rC   c                    U R                  X45        [        U" 5       5      nU R                  5       nU R                  U5        XV4$ rN   )rn   r   r`   rx   )r=   closurere   rk   dloss	flat_grads          rA   _directional_evaluateLBFGS._directional_evaluate   s<    u WY**,	rC   c           
     .  ^ ^ [         R                  " 5          [         R                  " 5       " T5      mT R                  nT R                  nT R
                  nT R                  nT R                  nT R                  nT R                  nT R                  n	U	R                  SS5        U	R                  SS5        T" 5       n
[        U
5      nSnU	S==   S-  ss'   T R                  5       nUR                  5       R                  5       U:*  nU(       a  U
sSSS5        $ U	R!                  S5      nU	R!                  S5      nU	R!                  S5      nU	R!                  S	5      nU	R!                  S
5      nU	R!                  S5      nU	R!                  S5      nU	R!                  S5      nSnUU:  Ga  US-  nU	S==   S-  ss'   U	S   S:X  a7  UR#                  5       n/ n/ n/ n[         R$                  " SU
R&                  S9nGOUR)                  U5      nUR+                  [         R$                  " UUR&                  S95      nUR-                  U5      nUS:  a  [/        U5      U:X  a3  UR1                  S5        UR1                  S5        UR1                  S5        UR3                  U5        UR3                  U5        UR3                  SU-  5        UUR-                  U5      -  n[/        U5      nSU	;  a	  S/U-  U	S'   U	S   nUR#                  5       n[5        US-
  SS5       HP  nUU   R-                  U5      UU   -  UU'   [         R6                  " UR9                  UU   UU   * -  5      U5        MR     [         R*                  " UU5      =nn[5        U5       HO  nUU   R-                  U5      UU   -  n [         R6                  " UR9                  UU   UU   U -
  -  5      U5        MQ     Uc  UR;                  5       nO[         R6                  " UU5        UnU	S   S:X  a/  [=        SSUR                  5       R?                  5       -  5      U-  nOUnUR-                  U5      n!U!U* :  a  GOWSn"Ubp  US:w  a  [A        S5      eT RC                  5       n#UU 4S jn$[E        U$U#UXUU!5      u  pnn"T RG                  UU5        UR                  5       R                  5       U:*  nOyT RG                  UU5        UU:w  aa  [         R                  " 5          [        T" 5       5      nSSS5        T R                  5       nUR                  5       R                  5       U:*  nSn"UU"-  nU	S==   U"-  ss'   U(       a  OOUU-  R                  5       R                  5       U::  a  O)[        UU-
  5      U:  a  OX:  a  OUU:X  a  O	UU:  a  GM  XS'   UU	S'   UU	S'   UU	S	'   UU	S
'   UU	S'   UU	S'   UU	S'   SSS5        U
$ ! , (       d  f       N= f! , (       d  f       W
$ = f)z
Performs a single optimization step.

Args:
    closure (callable): A closure that reevaluates the model
        and returns the loss.


func_evalsr   n_iterr   Nr|   rk   old_ykold_skroH_diagprev_flat_grad	prev_lossr+   )dtypeg|=alrW   strong_wolfez only 'strong_wolfe' is supportedc                *   > TR                  TXU5      $ rN   )r   )re   rk   r|   r{   r=   s      rA   obj_funcLBFGS.step.<locals>.obj_func  s    #'#=#= '1$ rC   )$r2   no_gradenable_gradr   r   r    r!   r"   r%   r#   r'   
setdefaultr   r`   absmaxgetneg	to_tensorr   subtractmultiplydotlenpopr\   rangeri   rj   rq   minsumRuntimeErrorrr   r   rn   )%r=   r{   r   r   r    r!   r"   r%   r#   r'   	orig_lossr}   current_evalsr~   opt_condr|   rk   r   r   r   r   r   r   r   rf   sysnum_oldr   qirbe_igtdls_func_evalsx_initr   s%   ``                                   rA   step
LBFGS.step   s    ^^((*73G ..M}}H}}H!00N#44!00N,,LJJE\1-Xq)  	I#DM,1$..0I }}**,>H  7 < 		#AIIg&EYYx(FYYx(F4BYYx(F"YY'78N		+.IF8#!h1$
 ?a'!AFFB#--cIF "**>:A

6#3#3E#IJAqBEzv;,6"JJqM"JJqMFF1I a(a(		#(+ "$aeeAh "&kG5('+f|&;dtB "A"7Q;B7 &q	a 02a5 81aeeF1I"Q%,@&A1E 8 #OOAv66A"7^%ay}}Q/"Q%7aeeF1IA,F&GK , ")%.__%6NMM)^< 	 ?a'Cy}}':':'<!<=M  *E  mmA& *** !"!-%7*+MNN!%!2!2!4
 AN$feQiA= NN5!,(}}224FH NN5!,)#//1#(#3D 2$($:$:$<	#,==?#6#6#8N#J() .l#}4#  I??$((*.>>ti'(+;; !,X%C 8#F #J"E'N$E(O$E(OE$K$E(O&4E"#!*E+g j K 21a j s2   C:XO2XW4B!X'X4
X	>X
X)
r<   r9   r#   r   r%   r    r   r'   r"   r!   )r+      NgHz>g&.>d   NNNNN)r   r   r   r   r    z
int | Noner!   r   r"   r   r#   r   r%   r$   r,   z4Sequence[Tensor] | Sequence[_ParameterConfig] | Noner-   z%float | WeightDecayRegularizer | Noner.   zGradientClipBase | Noner/   z
str | Nonereturnr   )r   r&   )r{   zCallable[[], _T_co]r   r   )__name__
__module____qualname____firstlineno____doc____annotations__r7   rJ   rT   r`   rn   rr   rx   r   r   __static_attributes____classcell__)r@   s   @rA   r   r   %   s   Yv MM22$$  ## $"&9=KO>B-1/!/! /! 	/!
 /!  /! /! 7/! I/! </! +/! /! 
/! /!b'!,'1$ rC   r   )
__future__r   collectionsr   	functoolsr   typingr   r   r   r	   r2   paddle.optimizerr
   paddle.utilsr   line_search_dygraphr   collections.abcr   r   r   paddle.nn.clipr   paddle.optimizer.optimizerr   paddle.regularizerr   r   r   rd   rC   rA   <module>r      si    # #  7 7  & # .2/;9Gt,E '%=QGRI R HRrC   