
    ϑi)                     (    S SK r SS jr    SS jrg)    Nc                 :   Ub  Uu  pxOX::  a  X4OX04u  pxX%-   SX-
  -  X-
  -  -
  n	U	S-  X%-  -
  n
U
S:  a_  U
R                  5       nX::  a  X3U -
  X[-   U	-
  XR-
  SU-  -   -  -  -
  nOX U-
  X+-   U	-
  X%-
  SU-  -   -  -  -
  n[        [        X5      U5      $ Xx-   S-  $ )a-  Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
    Use two points and their gradient to determine a cubic function and get the minimum point
    between them in the cubic curve.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
    pp59: formula 3.59

Args:
    x1, f1, g1: point1's position, value and gradient.
    x2, f2, g2: point2's position, value and gradient.
    bounds: bounds of interpolation area

Returns:
    min_pos: the minimum point between the specified points in the cubic curve.
      r   g       @)sqrtminmax)x1f1g1x2f2g2bounds
xmin_bound
xmax_boundd1	d2_squared2min_poss                m/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/line_search_dygraph.py_cubic_interpolater      s    $ !'
J-/X"B8
	1=BG,	,BAIA~^^8G2"'AF:J(KLLGG2"'AF:J(KLLG3w+Z88'3..    c           
         UR                  5       R                  5       nUR                  5       nU " XU5      u  pSn[        R                  " X5      n[        R
                  " SUR                  S9UUU4u  nnnnSnSnUU
:  a  XXr-  U-  -   :  d  US:  a%  UU:  a  UU/nUU/nUUR                  5       /nUU/nO[        R                   " U5      U* U-  ::  a  U/nU/nU/nSnOUS:  a  UU/nUU/nUUR                  5       /nUU/nOgUSUU-
  -  -   nUS-  nUn[        UUUUUUUU4S9nUnUnUR                  5       nUnU " XU5      u  pUS-  nUR	                  U5      nUS-  nUU
:  a  M  UU
:X  a
  SU/nXL/nX]/nSnWS   US	   ::  a  S
OSu  nnU(       Gd  UU
:  Ga  [        R                   " WS   US   -
  5      U-  U	:  a  GO[        US   US   WS   US   US   US   5      nS[        U5      [        U5      -
  -  n [        [        U5      U-
  U[        U5      -
  5      U :  a  U(       d  U[        U5      :  d  U[        U5      ::  af  [        R                   " U[        U5      -
  5      [        R                   " U[        U5      -
  5      :  a  [        U5      U -
  nO[        U5      U -   nSnOSnOSnU " XU5      u  pUS-  nUR	                  U5      nUS-  nXXr-  U-  -   :  d	  UUU   :  a6  UUU'   UUU'   UR                  5       WU'   UUU'   US   US   ::  a  S
OSu  nnOu[        R                   " U5      U* U-  ::  a  SnO2UUU   UU   -
  -  S:  a   UU   UU'   UU   UU'   WU   UU'   UU   UU'   UUU'   UUU'   UR                  5       WU'   UUU'   U(       d	  UU
:  a  GM  WU   nUU   nWU   nXX.4$ )aL  Implements of line search algorithm that satisfies the strong Wolfe conditions using double zoom.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
    pp60: Algorithm 3.5 (Line Search Algorithm).

Args:
    obj_func: the objective function to minimize. ```` accepts a multivariate input and returns a scalar.
    xk (Tensor): the starting point of the iterates.
    alpha (Scalar): the initial step size.
    d (Tensor): search direction.
    loss (scalar): the initial loss
    grad (Tensor): the initial grad
    c1 (Scalar): parameter for sufficient decrease condition.
    c2 (Scalar): parameter for curvature condition.
    tolerance_change (Scalar): terminates if the change of function value/position/parameter between
        two iterations is smaller than this value.
    max_ls(int): max iteration of line search.
    alpha_max (float): max step length.

Returns:
    loss_new (Scaler): loss of obj_func at final alpha.
    grad_new, (Tensor): derivative of obj_func at final alpha.
    alpha(Tensor): optimal step length, or 0. if the line search algorithm did not converge.
    ls_func_evals (Scaler): number of objective function called in line search process.

Following summarizes the essentials of the strong Wolfe line search algorithm.
Some notations used in the description:

    - `func` denotes the objective function.
    - `obi_func` is a function of step size alpha, restricting `obj_func` on a line.

        obi_func = func(xk + alpha * d),
        where xk is the position of k'th iterate, d is the line search direction(decent direction),
        and a is the step size.
    - alpha : substitute of alpha
    - a1 is alpha of last iteration, which is alpha_(i-1).
    - a2 is alpha of current iteration, which is alpha_i.
    - a_lo is alpha in left position when calls zoom, which is alpha_low.
    - a_hi is alpha in right position when calls zoom, which is alpha_high.

Line Search Algorithm:
    repeat
        Compute obi_func(a2) and derphi(a2).
        1. If obi_func(a2) > obi_func(0) + c_1 * a2 * obi_func'(0) or [obi_func(a2) >= obi_func(a1) and i > 1],
            alpha= zoom(a1, a2) and stop;

        2. If |obi_func'(a2)| <= -c_2 * obi_func'(0),
            alpha= a2 and stop;

        3. If obi_func'(a2) >= 0,
            alpha= zoom(a2, a1) and stop;

        a1 = a2
        a2 = min(2 * a2, a2)
        i = i + 1
    end(repeat)

zoom(a_lo, a_hi) Algorithm:
    repeat
        aj = cubic_interpolation(a_lo, a_hi)
        Compute obi_func(aj) and derphi(aj).
        1. If obi_func(aj) > obi_func(0) + c_1 * aj * obi_func'(0) or obi_func(aj) >= obi_func(a_lo),
            then a_hi <- aj;
        2.
            2.1. If |obi_func'(aj)| <= -c_2 * obi_func'(0), then alpha= a2 and stop;

            2.2. If obi_func'(aj) * (a2 - a1) >= 0, then a_hi = a_lo

            a_lo = aj;
    end(repeat)
   r   )dtypeFTg{Gz?
   )r   )r   r   )r   r   g?)	absr   clonepaddledot	to_tensorr   r   r   )!obj_funcxkalphadlossgradgtdc1c2tolerance_changemax_lsd_normloss_newgrad_newls_func_evalsgtd_newt_prevf_prevg_prevgtd_prevdonels_iterbracket	bracket_f	bracket_gbracket_gtdmin_stepmax_steptmpinsuf_progresslow_poshigh_posepss!                                    r   _strong_wolferD   6   s   l UUW[[]F::<D!"Q/HMjj%G 	$**-	($FFFH DG
F
bj3../aKH.uoG*I!12I#W-K::g2#)+gG!
I!
IDa<uoG*I!12I#W-K 456>222:"h'
 !%b3,,q/1a F
f &e*$	$	
 N"+A,)B-"?VGXw'::gaj71:-.7:JJ #AJaLNAJaLN
" S\CL01s7|e#US\%9:S@#g,!6%3w<:O::ec'l23fjjCL(7   L3.EL3.E!&!%"N%b3,,q/1 rzC//09W-- !&GH"*Ih"*.."2Ih$+K!#A,)A,6F GX zz'"rcCi/GH-0@@AQF$+G$4!&/&8	(#&/&8	(#(3G(<H%  %GG!)Ig!)!1Ig#*K S w'X GE!H!Hu33r   )N)g-C6?g?g&.>   )r    r   rD    r   r   <module>rG      s$    !/X 
s4r   