
    x-j)                     &    d dl Z ddZ	 	 	 	 d	dZdS )
    Nc                 t   ||\  }}n| |k    r| |fn|| f\  }}||z   d||z
  z  | |z
  z  z
  }	|	dz  ||z  z
  }
|
dk    rs|
                                 }| |k    r||| z
  ||z   |	z
  ||z
  d|z  z   z  z  z
  }n| | |z
  ||z   |	z
  ||z
  d|z  z   z  z  z
  }t          t          ||          |          S ||z   dz  S )a]  Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
        Use two points and their gradient to determine a cubic function and get the minimum point
        between them in the cubic curve.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
        pp59: formula 3.59

    Args:
        x1, f1, g1: point1's position, value and gradient.
        x2, f2, g2: point2's position, value and gradient.
        bounds: bounds of interpolation area

    Returns:
        min_pos: the minimum point between the specified points in the cubic curve.
    N      r   g       @)sqrtminmax)x1f1g1x2f2g2bounds
xmin_bound
xmax_boundd1	d2_squared2min_poss                m/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/incubate/optimizer/line_search_dygraph.py_cubic_interpolater      s   $ !'
JJ-/2XX"bB8
J	b1R=BG,	,BARIA~~^^88BGb2"r'AF:J(KLLGGBGb2"r'AF:J(KLLG3w
++Z888Z'3..    -C6??&.>   c           
         |                                                                 }|                                } | |||          \  }}d}t          j        ||          }t          j        d|j                  |||f\  }}}}d}d}||
k     r	||||z  |z  z   k    s|dk    r)||k    r#||g}||g}||                                g}||g}nt          j         |          | |z  k    r|g}|g}|g}d}n|dk    r#||g}||g}||                                g}||g}nz|d||z
  z  z   }|dz  }|}t          ||||||||f          }|}|}|                                }|} | |||          \  }}|dz  }|                    |          }|dz  }||
k     	||
k    rd|g}||g}||g}d}|d         |d	         k    rd
nd\  }}|s|||
k     rut          j         |d         |d         z
            |z  |	k     rnIt          |d         |d         |d         |d         |d         |d                   }dt          |          t          |          z
  z  } t          t          |          |z
  |t          |          z
            | k     r|s&|t          |          k    s|t          |          k    rrt          j         |t          |          z
            t          j         |t          |          z
            k     rt          |          | z
  }nt          |          | z   }d}nd}nd} | |||          \  }}|dz  }|                    |          }|dz  }||||z  |z  z   k    s|||         k    r@|||<   |||<   |                                ||<   |||<   |d         |d         k    rd
nd\  }}nt          j         |          | |z  k    rd}nD|||         ||         z
  z  dk    r,||         ||<   ||         ||<   ||         ||<   ||         ||<   |||<   |||<   |                                ||<   |||<   |s||
k     u||         }||         }||         }||||fS )a4  Implements of line search algorithm that satisfies the strong Wolfe conditions using double zoom.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
        pp60: Algorithm 3.5 (Line Search Algorithm).

    Args:
        obj_func: the objective function to minimize. ```` accepts a multivariate input and returns a scalar.
        xk (Tensor): the starting point of the iterates.
        alpha (Scalar): the initial step size.
        d (Tensor): search direction.
        loss (scalar): the initial loss
        grad (Tensor): the initial grad
        c1 (Scalar): parameter for sufficient decrease condition.
        c2 (Scalar): parameter for curvature condition.
        tolerance_change (Scalar): terminates if the change of function value/position/parameter between
            two iterations is smaller than this value.
        max_ls(int): max iteration of line search.
        alpha_max (float): max step length.

    Returns:
        loss_new (Scaler): loss of obj_func at final alpha.
        grad_new, (Tensor): derivative of obj_func at final alpha.
        alpha(Tensor): optimal step length, or 0. if the line search algorithm did not converge.
        ls_func_evals (Scaler): number of objective function called in line search process.

    Following summarizes the essentials of the strong Wolfe line search algorithm.
    Some notations used in the description:

        - `func` denotes the objective function.
        - `obi_func` is a function of step size alpha, restricting `obj_func` on a line.

            obi_func = func(xk + alpha * d),
            where xk is the position of k'th iterate, d is the line search direction(decent direction),
            and a is the step size.
        - alpha : substitute of alpha
        - a1 is alpha of last iteration, which is alpha_(i-1).
        - a2 is alpha of current iteration, which is alpha_i.
        - a_lo is alpha in left position when calls zoom, which is alpha_low.
        - a_hi is alpha in right position when calls zoom, which is alpha_high.

    Line Search Algorithm:
        repeat
            Compute obi_func(a2) and derphi(a2).
            1. If obi_func(a2) > obi_func(0) + c_1 * a2 * obi_func'(0) or [obi_func(a2) >= obi_func(a1) and i > 1],
                alpha= zoom(a1, a2) and stop;

            2. If |obi_func'(a2)| <= -c_2 * obi_func'(0),
                alpha= a2 and stop;

            3. If obi_func'(a2) >= 0,
                alpha= zoom(a2, a1) and stop;

            a1 = a2
            a2 = min(2 * a2, a2)
            i = i + 1
        end(repeat)

    zoom(a_lo, a_hi) Algorithm:
        repeat
            aj = cubic_interpolation(a_lo, a_hi)
            Compute obi_func(aj) and derphi(aj).
            1. If obi_func(aj) > obi_func(0) + c_1 * aj * obi_func'(0) or obi_func(aj) >= obi_func(a_lo),
                then a_hi <- aj;
            2.
                2.1. If |obi_func'(aj)| <= -c_2 * obi_func'(0), then alpha= a2 and stop;

                2.2. If obi_func'(aj) * (a2 - a1) >= 0, then a_hi = a_lo

                a_lo = aj;
        end(repeat)
       r   )dtypeFTg{Gz?
   )r   )r   r   )r   r   g?)	absr   clonepaddledot	to_tensorr   r   r   )!obj_funcxkalphadlossgradgtdc1c2tolerance_changemax_lsd_normloss_newgrad_newls_func_evalsgtd_newt_prevf_prevg_prevgtd_prevdonels_iterbracket	bracket_f	bracket_gbracket_gtdmin_stepmax_steptmpinsuf_progresslow_poshigh_posepss!                                    r   _strong_wolferH   6   s   l UUWW[[]]F::<<D!"eQ//HhMj1%%G 	$*---	($FFFH DG
F

tb5j3..//aKKH..uoG*I!1!12I#W-K:g2#)++gG!
I!
IDa<<uoG*I!1!12I#W-K 456>222:"h'
 
 
 !!%Xb%33(,,q//1a F

f &e*8$	8$	
 N"+A,)B-"?"?VGX I+w'':gaj71:-..7:JJJ #AJaLNAJaLN
 
" S\\CLL01s7||e#US\\%9::S@@ 
&#g,,!6!6%3w<<:O:O:ec'll233fjCLL(7 7    LL3.EELL3.E!&!%"N%Xb%33(,,q//1 rEzC//009W--- !&GH"*Ih"*.."2"2Ih$+K!#A,)A,66F GXX z'""rcCi//GH-0@@AQFF$+G$4!&/&8	(#&/&8	(#(3G(<H%  %GG!)Ig!)!1!1Ig#*K S  I+w''X GE!H!HXum33r   )N)r   r   r   r   )r$   r   rH    r   r   <module>rJ      sW    !/ !/ !/ !/X 
s4 s4 s4 s4 s4 s4r   