
    ϑi5                        S SK Jr  S SKJrJr  S SKrS SKrSSKJ	r	  SSK
JrJrJr  \(       a  S SKJr  S SKJr            S
                         SS	 jjrg)    )annotations)TYPE_CHECKINGLiteralN   )strong_wolfe)_value_and_gradient&check_initial_inverse_hessian_estimatecheck_input_type)Callable)Tensorc                  ^ ^^^^^^^	^
^^ T
S;  a  [        ST
 S35      eSn[        USU5        Uc#  [        R                  " UR                  S   T
S9mO[        USU5        [        U5        Um[        R                  " UR                  5       5      n[        T U5      u  p[        R                  " S	/SS
S9n[        R                  " S	/SSS9n[        R                  " S	/SSS9n[        R                  " S	/S	S
S9n[        R                  " / TS
S9m[        R                  " S	/S	S
S9n[        R                  " S	/SS
S9nUR                  S   n[        R                  " TS	-   U4T
S9n[        R                  " TS	-   U4T
S9n[        R                  " TS	-   S	4T
S9n[        R                  " TS	-   S	4T
S9mU4S jnUUU
UU	UUU UU4
S jn[        R                  R                  R                  UUUUUUUUUUUUUU/S9  UUXU4$ )ah  
Minimizes a differentiable function `func` using the L-BFGS method.
The L-BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
Closely related is the Newton method for minimization. Consider the iterate update formula:

.. math::
    x_{k+1} = x_{k} + H_k \nabla{f_k}

If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then
it's a quasi-Newton. In practice, the approximated Hessians are obtained
by only using the gradients, over either whole or part of the search
history, the former is BFGS, the latter is L-BFGS.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp179: Algorithm 7.5 (L-BFGS).

Args:
    objective_func: the objective function to minimize. ``objective_func`` accepts a 1D Tensor and returns a scalar.
    initial_position (Tensor): the starting point of the iterates, has the same shape with the input of ``objective_func`` .
    history_size (Scalar): the number of stored vector pairs {si,yi}. Default value: 100.
    max_iters (int, optional): the maximum number of minimization iterations. Default value: 50.
    tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
    tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
    initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
    line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the future. Default value: 'strong wolfe'.
    max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
    initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
    dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
    name (str, optional): Name for the operation. For more information, please refer to :ref:`api_guide_Name`. Default value: None.

Returns:
    output(tuple):

        - is_converge (bool): Indicates whether found the minimum within tolerance.
        - num_func_calls (int): number of objective function called.
        - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regarding to the initial position.
        - objective_value (Tensor): objective function value at the `position`.
        - objective_gradient (Tensor): objective function gradient at the `position`.

Examples:
    .. code-block:: python
        :name: code-example1

        >>> # Example1: 1D Grid Parameters
        >>> import paddle
        >>> # Randomly simulate a batch of input data
        >>> inputs = paddle. normal(shape=(100, 1))
        >>> labels = inputs * 2.0
        >>> # define the loss function
        >>> def loss(w):
        ...     y = w * inputs
        ...     return paddle.nn.functional.square_error_cost(y, labels).mean()
        >>> # Initialize weight parameters
        >>> w = paddle.normal(shape=(1,))
        >>> # Call the bfgs method to solve the weight that makes the loss the smallest, and update the parameters
        >>> for epoch in range(0, 10):
        ...     # Call the bfgs method to optimize the loss, note that the third parameter returned represents the weight
        ...     w_update = paddle.incubate.optimizer.functional.minimize_bfgs(loss, w)[2]
        ...     # Use paddle.assign to update parameters in place
        ...     paddle.assign(w_update, w)

    .. code-block:: python
        :name: code-example2

        >>> # Example2: Multidimensional Grid Parameters
        >>> import paddle
        >>> def flatten(x):
        ...     return x. flatten()
        >>> def unflatten(x):
        ...     return x.reshape((2,2))
        >>> # Assume the network parameters are more than one dimension
        >>> def net(x):
        ...     assert len(x.shape) > 1
        ...     return x.square().mean()
        >>> # function to be optimized
        >>> def bfgs_f(flatten_x):
        ...     return net(unflatten(flatten_x))
        >>> x = paddle.rand([2,2])
        >>> for i in range(0, 10):
        ...     # Flatten x before using minimize_bfgs
        ...     x_update = paddle.incubate.optimizer.functional.minimize_bfgs(bfgs_f, flatten(x))[2]
        ...     # unflatten x_update, then update parameters
        ...     paddle.assign(unflatten(x_update), x)

)float32float64z?The dtype must be 'float32' or 'float64', but the specified is .minimize_lbfgsinitial_positionr   dtype initial_inverse_hessian_estimater   int64shape
fill_valuer   Fboolc                   > U T:  U) -  $ N )kdoneis_convergenum_func_callsvaluexkg1sk_vecyk_vecrhok_vecheadtail	max_iterss               j/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/functional/lbfgs.pycondminimize_lbfgs.<locals>.cond   s     I$&&    c                4  >
^^^	^
^^^ [         R                  " U5      n[         R                  " / T
S-
  R                  T5      SS9nU4S jnUU	UU4S jn[         R                  R
                  R                  XXT/S9  [         R                  " TU5      n[         R                  " / TS-   SS9nU
4S jnUUU	UU4S jn[         R                  R
                  R                  XUU/S9  U* nT!S	:X  a  [        T#UUT"T TS
9u  nnnnO[        ST! S35      e[         R                  " UU-   U5        UU-  nUU-
  n[         R                  " UU5      m[         R                  R
                  R                  TS:H  U4S jU4S j5      n[         R                  " 5       (       a  UTT
'   UTT
'   UT	T
'   Oc[         R                  R                  TT
U5      m[         R                  R                  TT
U5      m[         R                  R                  T	T
U5      m	T
S-   T-  m
S m[         R                  R
                  R                  T
T:H  UU4S jS 5        UU-   nUnU S-  n [         R                  R                  U[         R"                  S9n[         R                  R                  U[         R"                  S9n[         R                  " UUT%:  -  UT$:  -  U5        [         R                  " X5        [         R                  " UUS:H  -  U5        U UUUUUUTTT	T
T/$ )Nr   r   r   c                   > U T:g  $ r   r   )iqai_vecr)   s      r+   r,   *minimize_lbfgs.<locals>.body.<locals>.cond       9r.   c           	     D  > [         R                  " 5       (       a#  TU    [         R                  " TU    U5      -  X '   O>[         R                  R	                  X TU    [         R                  " TU    U5      -  5      nXU    TU    -  -
  nU S-
  R                  T5      n XU4$ Nr   )paddlein_dynamic_modedotstaticsetitemmod)r1   r2   r3   history_sizer'   r%   r&   s      r+   body*minimize_lbfgs.<locals>.body.<locals>.body   s    %%''$QK&**VAY*BB	..x{VZZq	1-EE 1Iq	))AQL)A<r.   r,   r?   	loop_varsc                   > U T:g  $ r   r   )r1   rr(   s     r+   r,   r4      r5   r.   c                   > TU    [         R                  " TU    U5      -  nUTU    TU    U-
  -  -   nU S-   R                  T5      n X4$ r7   )r8   r:   r=   )r1   rD   betar3   r>   r'   r%   r&   s      r+   r?   r@      sU    A;F1Iq!99DF1IT!122AQL)A4Kr.   r   )fr#   pkr*   initial_step_lengthr   zNCurrently only support line_search_fn = 'strong_wolfe', but the specified is ''g        c                 2   > [         R                  " S/ST S9$ )Nr   g     @@r   )r8   fullr   s   r+   <lambda>.minimize_lbfgs.<locals>.body.<locals>.<lambda>  s    FKKqcfEJr.   c                    > ST -  $ )N      ?r   )rhok_invs   r+   rM   rN     s	    C(Nr.   c                8    [         R                  " U S-   U 5        g r7   )r8   assign)r)   s    r+   true_fn-minimize_lbfgs.<locals>.body.<locals>.true_fn   s    MM$(D)r.   c                    > T" T 5      $ r   r   )r)   rT   s   r+   rM   rN   $  s	    GDMr.   )p)r8   rS   rL   r=   r;   nn
while_loopmatmulr   NotImplementedErrorr:   r,   r9   r<   linalgnormnpinf)&r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r2   r1   r,   r?   rD   rH   alphag2ls_func_callsskykrhokgnormpk_normrQ   rT   H0r3   r   r>   rI   line_search_fnmax_line_search_itersobjective_functolerance_changetolerance_grads&          `````              @@r+   r?   minimize_lbfgs.<locals>.body   s   " MM"KK$(!=W
			  		  	##Q6N 	$ 	
 MM"a KKbTAXWE		 	 	##QF#KR ^+.: /$7/+E5"m &`ao`ppqr  	n}4nE RZ"W::b"%}}$$OJ"

 !!##F4LF4L!HTN]]**64<F]]**64<F}},,XtTBHqL(	* 	ddl,A4H"W	Q ""2"0--$$R266$2EN*+w9I/IJD	
 	d(desl+T2 
 	
r.   rA   )
ValueErrorr
   r8   eyer   r	   rS   detachr   rL   zerosr;   rX   rY   )rk   r   r>   r*   rm   rl   r   ri   rj   rI   r   nameop_namer#   r"   r$   r   r   r    r!   r(   r)   r   r%   r&   r'   r,   r?   rh   r3   s   ` ```` ````                 @@r+   r   r   $   s   H **MeWTUV
 	
 G%'97C'/ZZ(..q1?,.	

 	//OP- 
'..0	1B#NB7IE1#!7;A;;aSU&AD++QCEHK[[sqHN;;RLPL;;aSQg>D;;aSQg>D""1%E
 \\<!+U35AF\\<!+U35AF||\A-q1?H\\<!+Q/u=F' 
 
B MM
   $ 255r.   )
d   2   :0yE>rw   Nr   rv   rP   r   N)rk   zCallable[[Tensor], Tensor]r   r   r>   intr*   rx   rm   floatrl   ry   r   zTensor | Noneri   zLiteral['strong_wolfe']rj   rx   rI   rx   r   zLiteral['float32', 'float64']rs   z
str | Nonereturnz(tuple[bool, int, Tensor, Tensor, Tensor])
__future__r   typingr   r   numpyr^   r8   line_searchr   utilsr   r	   r
   collections.abcr   r   r   r   r.   r+   <module>r      s    # )   %  (  "6:.<!#"+4q6.q6q6 q6 	q6
 q6 q6 '4q6 ,q6 q6 q6 )q6 q6 .q6r.   