
    x-j5                        d dl mZ d dlmZmZ d dlZd dlZddlm	Z	 ddl
mZmZmZ erd dlmZ d dlmZ 	 	 	 	 	 	 	 	 	 	 d&d'd%ZdS )(    )annotations)TYPE_CHECKINGLiteralN   )strong_wolfe)_value_and_gradient&check_initial_inverse_hessian_estimatecheck_input_type)Callable)Tensord   2   :0yE>r         ?float32objective_funcCallable[[Tensor], Tensor]initial_positionr   history_sizeint	max_iterstolerance_gradfloattolerance_change initial_inverse_hessian_estimateTensor | Noneline_search_fnLiteral['strong_wolfe']max_line_search_itersinitial_step_lengthdtypeLiteral['float32', 'float64']name
str | Nonereturn(tuple[bool, int, Tensor, Tensor, Tensor]c                D   	
 
dvrt          d
 d          d}t          |d|           |"t          j        |j        d         
          n"t          |d	|           t          |           |t          j        |                                          }t           |          \  }}t          j	        d
gdd          }t          j	        d
gdd          }t          j	        d
gdd          }t          j	        d
gd
d          }t          j	        g d          t          j	        d
gd
d          }t          j	        d
gdd          }|j        d         }t          j
        d
z   |f
          }t          j
        d
z   |f
          }t          j
        d
z   d
f
          }t          j
        d
z   d
f
          fd}
	 f
d}t          j        j                            ||||||||||||||g           |||||fS )a  
    Minimizes a differentiable function `func` using the L-BFGS method.
    The L-BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
    Closely related is the Newton method for minimization. Consider the iterate update formula:

    .. math::
        x_{k+1} = x_{k} + H_k \nabla{f_k}

    If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
    If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then
    it's a quasi-Newton. In practice, the approximated Hessians are obtained
    by only using the gradients, over either whole or part of the search
    history, the former is BFGS, the latter is L-BFGS.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp179: Algorithm 7.5 (L-BFGS).

    Args:
        objective_func: the objective function to minimize. ``objective_func`` accepts a 1D Tensor and returns a scalar.
        initial_position (Tensor): the starting point of the iterates, has the same shape with the input of ``objective_func`` .
        history_size (Scalar): the number of stored vector pairs {si,yi}. Default value: 100.
        max_iters (int, optional): the maximum number of minimization iterations. Default value: 50.
        tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
        tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
        initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the future. Default value: 'strong wolfe'.
        max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
        initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
        dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
        name (str, optional): Name for the operation. For more information, please refer to :ref:`api_guide_Name`. Default value: None.

    Returns:
        output(tuple):

            - is_converge (bool): Indicates whether found the minimum within tolerance.
            - num_func_calls (int): number of objective function called.
            - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regarding to the initial position.
            - objective_value (Tensor): objective function value at the `position`.
            - objective_gradient (Tensor): objective function gradient at the `position`.

    Examples:
        .. code-block:: python
            :name: code-example1

            >>> # Example1: 1D Grid Parameters
            >>> import paddle
            >>> # Randomly simulate a batch of input data
            >>> inputs = paddle. normal(shape=(100, 1))
            >>> labels = inputs * 2.0
            >>> # define the loss function
            >>> def loss(w):
            ...     y = w * inputs
            ...     return paddle.nn.functional.square_error_cost(y, labels).mean()
            >>> # Initialize weight parameters
            >>> w = paddle.normal(shape=(1,))
            >>> # Call the bfgs method to solve the weight that makes the loss the smallest, and update the parameters
            >>> for epoch in range(0, 10):
            ...     # Call the bfgs method to optimize the loss, note that the third parameter returned represents the weight
            ...     w_update = paddle.incubate.optimizer.functional.minimize_bfgs(loss, w)[2]
            ...     # Use paddle.assign to update parameters in place
            ...     paddle.assign(w_update, w)

        .. code-block:: python
            :name: code-example2

            >>> # Example2: Multidimensional Grid Parameters
            >>> import paddle
            >>> def flatten(x):
            ...     return x. flatten()
            >>> def unflatten(x):
            ...     return x.reshape((2,2))
            >>> # Assume the network parameters are more than one dimension
            >>> def net(x):
            ...     assert len(x.shape) > 1
            ...     return x.square().mean()
            >>> # function to be optimized
            >>> def bfgs_f(flatten_x):
            ...     return net(unflatten(flatten_x))
            >>> x = paddle.rand([2,2])
            >>> for i in range(0, 10):
            ...     # Flatten x before using minimize_bfgs
            ...     x_update = paddle.incubate.optimizer.functional.minimize_bfgs(bfgs_f, flatten(x))[2]
            ...     # unflatten x_update, then update parameters
            ...     paddle.assign(unflatten(x_update), x)

    )r   float64z?The dtype must be 'float32' or 'float64', but the specified is .minimize_lbfgsr   Nr   r!   r   r   int64shape
fill_valuer!   Fboolc                    | k     | z  S N )kdoneis_convergenum_func_callsvaluexkg1sk_vecyk_vecrhok_vecheadtailr   s               j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/incubate/optimizer/functional/lbfgs.pycondzminimize_lbfgs.<locals>.cond   s     I$&&    c                  
	
 t          j        |          }t          j        g 
dz
                                d          }fd}	fd}t           j        j                            ||||g           t          j        |          }t          j        g dz   d          }
fd}	fd}t           j        j                            ||||g           | }!d	k    rt          #||" 
          \  }}}}nt          d! d          t          j        ||z   |           ||z  }||z
  }t          j
        ||          t           j        j                            dk    fdfd          }t          j                    r|
<   |
<   |	
<   nct           j                            
|          t           j                            
|          t           j                            	
|          	
dz   z  
d t           j        j                            
k    fdd            ||z   }|}| dz  } t           j                            |t           j                  }t           j                            |t           j                  }t          j        ||%k     z  |$k     z  |           t          j        ||           t          j        ||dk    z  |           | ||||||	
gS )Nr   r,   r-   c                    | k    S r2   r3   )iqai_vecr?   s      r@   rA   z*minimize_lbfgs.<locals>.body.<locals>.cond       9rB   c           	     b   t          j                    r(|          t          j        |          |          z  || <   nCt           j                            || |          t          j        |          |          z            }|||          |          z  z
  }| dz
                                } | ||fS Nr   )paddlein_dynamic_modedotstaticsetitemmod)rE   rF   rG   r   r=   r;   r<   s      r@   bodyz*minimize_lbfgs.<locals>.body.<locals>.body   s    %'' $QK&*VAY*B*BBq		..Ax{VZq	1-E-EE  F1Iq	))AQL))Aa<rB   rA   rQ   	loop_varsc                    | k    S r2   r3   )rE   rr>   s     r@   rA   z*minimize_lbfgs.<locals>.body.<locals>.cond   rH   rB   c                    |          t          j        |          |          z  }||          |          |z
  z  z   }| dz                                 } | |fS rJ   )rK   rM   rP   )rE   rU   betarG   r   r=   r;   r<   s      r@   rQ   z*minimize_lbfgs.<locals>.body.<locals>.body   s[    A;F1Iq!9!99DF1IT!122AQL))Aa4KrB   r   )fr9   pkr   r    r!   zNCurrently only support line_search_fn = 'strong_wolfe', but the specified is ''g        c                 4    t          j        dgd           S )Nr   g     @@r-   )rK   fullr+   s   r@   <lambda>z.minimize_lbfgs.<locals>.body.<locals>.<lambda>  s    FKqcfEJJJ rB   c                     d z  S )Nr   r3   )rhok_invs   r@   r]   z.minimize_lbfgs.<locals>.body.<locals>.<lambda>  s    C(N rB   c                6    t          j        | dz   |            d S rJ   )rK   assign)r?   s    r@   true_fnz-minimize_lbfgs.<locals>.body.<locals>.true_fn   s    M$(D)))))rB   c                                 S r2   r3   )r?   rb   s   r@   r]   z.minimize_lbfgs.<locals>.body.<locals>.<lambda>$  s    GGDMM rB   )p)rK   ra   r\   rP   rN   nn
while_loopmatmulr   NotImplementedErrorrM   rA   rL   rO   linalgnormnpinf)&r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   rF   rE   rA   rQ   rU   rY   alphag2ls_func_callsskykrhokgnormpk_normr_   rb   H0rG   r!   r   r    r   r   r   r   r   s&          `````              @@r@   rQ   zminimize_lbfgs.<locals>.body   s   " M"K$(!=!=W
 
 
	 	 	 	 			  		  		  		  		  		  		  		  	##DQ6N 	$ 	
 	
 	
 M"a  KbTAXWEEE	 	 	 	 		 	 	 	 	 	 	 	 	 	##DQF#KKKR ^++.: /$7/ / /+E5"mm &raorrr   	n}4nEEE RZ"W:b"%%}$$OJJJJ""""
 

 !## 	CF4LF4L!HTNN]**64<<F]**64<<F},,XtTBBHqL(	* 	* 	* 	ddl,A,A,A,A,A4HHH"W	Q ""2"00-$$R26$22EN*+w9I/IJD	
 	
 	
 	dK(((desl+T222 
 	
rB   rR   )
ValueErrorr
   rK   eyer.   r	   ra   detachr   r\   zerosrN   re   rf   )r   r   r   r   r   r   r   r   r   r    r!   r#   op_namer9   r8   r:   r4   r5   r6   r7   r>   r?   r.   r;   r<   r=   rA   rQ   ru   rG   s   ` ```` ````                 @@r@   r*   r*   $   s   H ***VeVVV
 
 	
 G%'97CCC'/Z(.q1???,.	
 	
 	

 	//OPPP- 
'..00	1	1B#NB77IE21#!7;;;A;aSU&AAAD+QCEHHHK[sqHHHN;RLPPPL;aSQg>>>D;aSQg>>>D"1%E
 \<!+U35AAAF\<!+U35AAAF|\A-q1???H\<!+Q/u===F' ' ' ' ' 
 
 
 
 
 
 
 
 
 
 
 
 
 
B M
     $ E255rB   )
r   r   r   r   Nr   r   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r"   r#   r$   r%   r&   )
__future__r   typingr   r   numpyrk   rK   line_searchr   utilsr   r	   r
   collections.abcr   r   r*   r3   rB   r@   <module>r      s   # " " " " " ) ) ) ) ) ) ) )      % % % % % %           ((((((  "6:.<!#"+4q6 q6 q6 q6 q6 q6 q6rB   