
    ϑio0                     6    S SK r SSKJr  S r       SS jrg)    N   )_value_and_gradientc                 J  ^ ^^^^	^
^^ [         R                  R                  R                  T T:*  U U4S jU U4S j5      u  mmTT-   SX-
  -  T T-
  -  -
  m	T	S-  TT-  -
  m
U	U
UUU UUU4S jnUU4S jn[         R                  R                  R                  T
S:  Xg5      nU$ )a  Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
    Use two points and their gradient to determine a cubic function and get the minimum point
    between them in the cubic curve.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
    pp59: formula 3.59

Args:
    x1, f1, g1: point1's position, value and gradient.
    x2, f2, g2: point2's position, value and gradient.
Returns:
    min_pos: the minimum point between the specified points in the cubic curve.
c                     > T T4$ N x1x2s   p/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/incubate/optimizer/functional/line_search.py<lambda>&cubic_interpolation_.<locals>.<lambda>$   s	    2r(    c                     > TT 4$ r   r   r	   s   r   r   r   $   s	    RHr         c                  (  >^ TR                  5       mUUUUU	U
4S jn UUUUU	U
4S jn[        R                  " T	T
S9n[        R                  R                  R                  X U5      n[        R                  " [        R                  " UT5      T5      $ )Nc                  >   > TTT-
  TT-   T -
  TT-
  ST-  -   -  -  -
  $ Nr   r   d1d2g1g2r
   r   s   r   true_fn2:cubic_interpolation_.<locals>.true_func1.<locals>.true_fn2,   1    bb2glrBwR7G%HIIIr   c                  >   > TTT-
  TT-   T -
  TT-
  ST-  -   -  -  -
  $ r   r   r   s   r   	false_fn2;cubic_interpolation_.<locals>.true_func1.<locals>.false_fn2/   r   r   )xy)sqrtpaddle
less_equalstaticnncondminimummaximum)r   r   predmin_posr   r   	d2_squarer   r   r
   r   xmaxxmins       @r   
true_func1(cubic_interpolation_.<locals>.true_func1)   ss    ^^	J 	J	J 	J   2,--""''	B~~fnnWd;TBBr   c                     > TT -   S-  $ )Ng       @r   )r.   r/   s   r   false_func1)cubic_interpolation_.<locals>.false_func17   s    ts""r           )r$   r&   r'   r(   )r
   f1r   r   f2r   r0   r3   r,   r   r-   r.   r/   s   ` `` `   @@@@r   cubic_interpolation_r8      s     !!&&
b"$4JD$ 
b1=BG,	,BARIC C# mm##I$4jNGNr   c
                   ^ ^^^^^^^^^^^^^^^ U UU4S jmUUUUU4S jm[         R                  " S/TU	S9m[         R                  " S/SU	S9n
[         R                  " S/XYS9nT" U
5      u  pm[         R                  " U5      m[         R                  " T5      m[         R                  " S/SSS9n[         R                  " S/SU	S9m[         R                  " U5      m[         R                  " U5      m[         R                  " S/SSS9n[         R                  " S/SS	S9nU4S
 jnUUUUUUUUUUU4S jn[         R                  R                  R                  UUXXXU/S9  TTTU4$ )aH  Implements of line search algorithm that satisfies the strong Wolfe conditions using double zoom.

Reference:
    Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
    pp60: Algorithm 3.5 (Line Search Algorithm).

Args:
    f: the objective function to minimize. ``f`` accepts a multivariate input and returns a scalar.
    xk (Tensor): the starting point of the iterates.
    pk (Tensor): search direction.
    max_iters (Scalar): the maximum number of iterations.
    tolerance_grad (Scalar): terminates if the gradient norm is smaller than
        this. Currently gradient norm uses inf norm.
    tolerance_change (Scalar): terminates if the change of function value/position/parameter between
        two iterations is smaller than this value.
    initial_step_length (Scalar): step length used in first iteration.
    c1 (Scalar): parameter for sufficient decrease condition.
    c2 (Scalar): parameter for curvature condition.
    alpha_max (float): max step length.
    dtype ('float32' | 'float64'): the datatype to be used.

Returns:
    num_func_calls (float): number of objective function called in line search process.
    a_star(Tensor): optimal step length, or 0. if the line search algorithm did not converge.
    phi_star (Tensor): phi at a_star.
    derphi_star (Tensor): derivative of phi at a_star.

Following summarizes the essentials of the strong Wolfe line search algorithm.
Some notations used in the description:

    - `f` denotes the objective function.
    - `phi` is a function of step size alpha, restricting `f` on a line.

        phi = f(xk + a * pk),
        where xk is the position of k'th iterate, pk is the line search direction(decent direction),
        and a is the step size.
    - a : substitute of alpha
    - a1 is a of last iteration, which is alpha_(i-1).
    - a2 is a of current iteration, which is alpha_i.
    - a_lo is a in left position when calls zoom, which is alpha_low.
    - a_hi is a in right position when calls zoom, which is alpha_high.

Line Search Algorithm:
    repeat
        Compute phi(a2) and derphi(a2).
        1. If phi(a2) > phi(0) + c_1 * a2 * phi'(0) or [phi(a2) >= phi(a1) and i > 1],
            a_star= zoom(a1, a2) and stop;

        2. If |phi'(a2)| <= -c_2 * phi'(0),
            a_star= a2 and stop;

        3. If phi'(a2) >= 0,
            a_star= zoom(a2, a1) and stop;

        a1 = a2
        a2 = min(2 * a2, a2)
        i = i + 1
    end(repeat)

zoom(a_lo, a_hi) Algorithm:
    repeat
        aj = cubic_interpolation(a_lo, a_hi)
        Compute phi(aj) and derphi(aj).
        1. If phi(aj) > phi(0) + c_1 * aj * phi'(0) or phi(aj) >= phi(a_lo),
            then a_hi <- aj;
        2.
            2.1. If |phi'(aj)| <= -c_2 * phi'(0), then a_star= a2 and stop;

            2.2. If phi'(aj) * (a2 - a1) >= 0, then a_hi = a_lo

            a_lo = aj;
    end(repeat)
c                 b   > [        TTU T-  -   5      u  p[        R                  " UT5      nXU4$ )zgCompute function value and derivative of phi at a.
phi = f(xk + a * pk)
phi'(a) = f'(xk + a * pk) * pk
)r   r$   dot)a	phi_valuef_gradphi_gradfpkxks       r   phi_and_derphi$strong_wolfe.<locals>.phi_and_derphi   s7    
 02B;?	::fb)(**r   c	                    >^^^ Tm[         R                  " S/SSS9n	[         R                  " S/SSS9n
UU4S jnUUUUU4S jn[         R                  R                  R	                  UUU	U
U UUUUUU/	S	9  U	$ )
Nr   r   int64shape
fill_valuedtypeFboolc	                 ~   > [         R                  " Xb-
  5      T:  n	[         R                  " X-  U5        U T
:  U) -  $ r   )r$   absassign)j	done_zooma_lophi_lo	derphi_loderf_loa_hiphi_hi	derphi_hir+   max_zoom_iterstolerance_changes             r   	cond_zoom-strong_wolfe.<locals>.zoom.<locals>.cond_zoom   s>     ::dk*-==DMM)*I6&9*44r   c	                   >^ ^^^^^^^^^^^^^ [        TTTTTT5      mS[        R                  " TT-
  5      -  n	[        R                  " [        R                  " TT-
  5      [        R                  " TT-
  5      5      U	:  n
[        R                  R
                  R                  U
UU4S jU4S j5      mT" T5      u  mmmUUUUUU4S jnUUUUUUUUUUUU4S jmTTTT-  T-  -   :  TT:  -  n[        R                  R
                  R                  XUUU4S j5        [        R                  R
                  R                  TU 4S jU 4S j5      m T TTTTTTTT/	$ )	Ng?c                     > STT -   -  $ )Ng      ?r   )rU   rQ   s   r   r   ?strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.<lambda>   s    cTD[1r   c                     > T $ r   r   )ajs   r   r   r^      s    2r   c                     > [         R                  " TT 5        [         R                  " TT5        [         R                  " TT5        g r   r$   rN   )rU   r`   rW   derphi_jrV   phi_js   r   true_fn>strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.true_fn   s-    b$'eV,h	2r   c                   >^  [         R                  " T5      T* T
-  :*  n[         R                  " X!5        UU UUUU4S jnU) TTT -
  -  S:  -  n[         R                  R                  R                  XCS 5        [         R                  " TT 5        [         R                  " TT5        [         R                  " TT5        [         R                  " TT	5        g )Nc                     > [         R                  " TT 5        [         R                  " TT5        [         R                  " TT5        g r   rb   )rU   rQ   rW   rS   rV   rR   s   r   re   Pstrong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.false_fn.<locals>.true_fn   s-    MM$-MM&&1MM)Y7r   r   )r$   rM   rN   r&   r'   r(   )rQ   rP   pred3re   pred4rU   r`   c2derf_jrT   derphi_0rW   rc   rS   rV   rd   rR   s   `    r   false_fn?strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.false_fn   s    

8,h>e/8 8
 #
h$+&>!&CD  %%ed;b$'eV,h	2fg.r   c                     > T" T T5      $ r   r   )rQ   rP   ro   s   r   r   r^      s    y(Ar   c                     > T $ r   r   rO   s   r   r   r^      s    r   c                     > T S-   $ )Nr   r   rs   s   r   r   r^      s	    AEr   )r8   r$   rM   r)   r&   r'   r(   )rO   rP   rQ   rR   rS   rT   rU   rV   rW   
min_changer+   re   pred2r`   rm   rc   ro   rd   c1rl   rn   phi_0rC   s   `````````    @@@@@r   	body_zoom-strong_wolfe.<locals>.zoom.<locals>.body_zoom   sA    &fivyB vzz$+66Jvzz"t)4fjjd6KL  !!&&1:B '5R&8#E683 3/ / /" UR"Wx%777EVOLEMM!! A   %%iMJA
 
r   r(   body	loop_vars)r$   fullr&   r'   
while_loop)rQ   rR   rS   rT   rU   rV   rW   rx   rn   rO   rP   rZ   ry   rX   rw   rl   	max_itersrC   rY   s          ``    @r   zoomstrong_wolfe.<locals>.zoom   s     #KKqcaw?KKqce6J		5?	 ?	B 	##
 	$ 	
  r   r   rG   r5   rF   r   FrK   c                    > U T:  U) -  $ r   r   )ils_func_callsa1a2phi_1derf_1doner   s          r   r(   strong_wolfe.<locals>.cond#  s    I$&&r   c                   >^ ^^^^^^^^ T" T5      u  mmm[         R                  " TS-   T5        [         R                  " U[         R                  " [         R                  " T5      5      -  U5        UUUUUUUUUUUUUU4S jnU) TTTT-  T-  -   :  TT:  T S:  -  -  -  n[         R                  " Xh-  U5        [         R                  R
                  R                  XS 5        UUUUUU4S jn	U) [         R                  " T5      T* T-  :*  -  n
[         R                  " Xj-  U5        [         R                  R
                  R                  XS 5        UUUUUUUUUUUUUU4S jnU) TS:  -  n[         R                  " Xl-  U5        [         R                  R
                  R                  XS 5        UUUUUU UU4S jn[         R                  R
                  R                  US U5        T TTTTTU/$ )Nr   c                     > T" TTTTTTTT
T5	      n [         R                  " TT5        [         R                  " TT5        [         R                  " TT5        [         R                  " T	U -   T	5        g r   rb   )rO   r   r   a_starr   	derf_starrn   derphi_1derphi_2r   rx   r   phi_2phi_starr   s    r   true_fn1,strong_wolfe.<locals>.body.<locals>.true_fn1+  g    
A MM"f%MM%*MM&),MM-!+];r   c                     > [         R                  " T T5        [         R                  " TT5        [         R                  " TT5        g r   rb   )r   r   derf_2r   r   r   s   r   r   ,strong_wolfe.<locals>.body.<locals>.true_fn2B  s-    MM"f%MM%*MM&),r   c                     > T" TTTTTTTT
T5	      n [         R                  " TT5        [         R                  " TT5        [         R                  " TT5        [         R                  " T	U -   T	5        g r   rb   )rO   r   r   r   r   r   rn   r   r   r   rx   r   r   r   r   s    r   true_fn3,strong_wolfe.<locals>.body.<locals>.true_fn3K  r   r   r   c                  "  > [         R                  " TT 5        [         R                  " TT5        [         R                  " TT5        [         R                  " [         R                  " ST-  T5      T5        [         R                  " TS-   T5        g )Nr   r   )r$   rN   r)   )r   r   	alpha_maxr   r   r   r   r   s   r   ro   ,strong_wolfe.<locals>.body.<locals>.false_fn`  s\    MM"b!MM%'MM&&)MM&..R;R@MM!a%#r   )r$   rN   anyisinfr&   r'   r(   rM   )r   r   r   r   r   r   r   r   pred1r   rv   r   rj   ro   r   r   r   r   r   rw   rl   r   rn   r   rx   rC   r   r   s   ``````        @@@r   r|   strong_wolfe.<locals>.body&  s   "0"4vxma'7dVZZU(;<<dC	< 	< 	<" UR"Wx///Ue^A4NO
 	dlD)et4	- 	-
 H-"x?@dlD)et4	< 	< 	<" Q'dlD)et4	$ 	$ 	dD(3="b%>>r   r{   )r$   r~   rN   r&   r'   r   )r@   rB   rA   r   rY   initial_step_lengthrw   rl   r   rJ   r   r   r   r   r   r   r   r(   r|   r   r   rn   r   rx   rC   r   r   s   ````` ```          @@@@@@@@r   strong_wolfer   >   sA   l+p pd 1#)5II	A33e	<B	A3+>	LB,R0E8MM% E}}X&HKKqcawGM [[sq>F}}U#Hf%I1#!7;A;;aSU&AD'B? B?H MMRUDA    8Y55r   )   g:0yE>g      ?g-C6?g?
   float32)r$   utilsr   r8   r   r   r   r   <module>r      s0     &'\ 

r6r   