
    Αi`                    Z   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SK	r	S SK	J
r
  S SKJr  S SKJr  S SKJr  \(       a  S SKJr  \R&                  R(                  S	\R&                  R*                  S
\R&                  R,                  S
\R&                  R.                  S\R&                  R0                  S\R&                  R2                  S	\R&                  R4                  S	\R&                  R6                  S	\R&                  R8                  S
\R&                  R:                  S\R&                  R<                  S\R&                  R>                  S	\R&                  R@                  S\R&                  RB                  S0r"/ SQr#S\$S'   SS/r%S\$S'   / r&S\$S'   / SQr'S\$S'   / SQr(S\$S'   S/r)S\$S'   Sr*Sr+Sr,S r- " S S 5      r. " S! S"5      r/ S3               S4S# jjr0                S5S$ jr1S% r2S& r30 S0 4S' jr4S( r5S) r6S* r7S+ r8S, r9S- r:S. r;S/ r<S0 r=S1 r>S2 r?g)6    )annotationsN)TYPE_CHECKING)pir)backward_utils)core)in_cinn_debug_mode)Sequence               )Mpd_op.full_int_array
pd_op.fullzpd_op.dividezpd_op.subtractz	pd_op.addzpd_op.multiplyzpd_op.elementwise_powzpd_op.rsqrtzpd_op.reshapezpd_op.full_likezpd_op.assignzpd_op.expandzpd_op.scalez	pd_op.expz	pd_op.sinz	pd_op.coszpd_op.add_nz
pd_op.castzpd_op.concatzpd_op.full_with_tensorzpd_op.gather_ndzpd_op.logical_andzpd_op.logical_notzpd_op.wherez	pd_op.powzpd_op.shapezpd_op.shape64zpd_op.slicezpd_op.squeezezpd_op.unsqueezezpd_op.transposez	pd_op.logzpd_op.log1pzpd_op.logitzpd_op.expand_aszpd_op.splitzpd_op.arangezpd_op.put_along_axisz
pd_op.tanhz
pd_op.atanzpd_op.atanhz
pd_op.sinhz
pd_op.asinzpd_op.asinhz
pd_op.coshz
pd_op.acoszpd_op.acoshz	pd_op.absz
pd_op.signzpd_op.expm1z	pd_op.erfzpd_op.erfinvz
pd_op.ceilzpd_op.floorz
pd_op.fraczpd_op.roundzpd_op.trunczpd_op.anglezpd_op.as_complexzpd_op.as_realzpd_op.complexz
pd_op.realz
pd_op.imagz
pd_op.conjzpd_op.greater_equalzpd_op.greater_thanzpd_op.not_equalzpd_op.equalzpd_op.less_equalzpd_op.less_thanzpd_op.bitwise_andzpd_op.bitwise_orzpd_op.bitwise_xorzpd_op.bitwise_notzpd_op.isinfzpd_op.isnanzpd_op.sigmoidz	list[str]DEFAULT_RECOMPUTABLE_OPSr   r   TENDING_TO_RECOMPUTE_OPSVIEW_OPS)zpd_op.randintzpd_op.uniformzpd_op.dropout
RANDOM_OPS)zpd_op.matmulzpd_op.conv2dzpd_op.layer_normzpd_op.batchnormzpd_op.softmaxzpd_op.all_reduce_zpd_op.c_broadcast_zpd_op.reduce_COMPUTE_INTENSIVE_OPSzcf.stack_create
IGNORE_OPSF   g?c                     [         R                  " S5      nU(       a*  [        U5      R                  5       S;   a  [	        U SS06  g g g )N FLAGS_print_auto_recompute_debug)1trueflushT)osgetenvstrlowerprint)argsflags     ^/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/decomposition/recompute.py
DebugPrintr%      s:    9978DD	!]2t 4  3t    c                  &    \ rS rSrS rS rS rSrg)JudgeFusionLoop   c                *   UR                  5       R                  U l        X l        U R                   Vs0 s H  o3[        5       _M     snU l        U R                   Vs0 s H  o3[        5       _M     snU l        U R                  5         g s  snf s  snf N)global_blockopsunrecomputable_opsset!downstream_unrecomputable_ops_mapupstream_unrecomputable_ops_map_set_has_unfusible_on_path_map)selfprogramr.   ops       r$   __init__JudgeFusionLoop.__init__   sq    '')--"4FJhh1Ohce)h1O.DHHH/MHbCE	H/M,++- 2P/Ms   BBc                &  ^ ^^^^ U4S jmU4S jmU4S jmS mUU 4S jnUU 4S jnT R                    H   nT R                  U==   U" U5      -  ss'   M"     [        T R                   5       H   nT R                  U==   U" U5      -  ss'   M"     g )Nc                2   > [        5       n/ nT" XU 5        U$ r+   )r/   )r5   defined_valuesused_values_get_used_external_value_impls      r$   _get_used_external_valuePJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_used_external_value   s     UNK).rJr&   c                  > UR                  5        H,  nX0;  d  M
  UR                  U5        U R                  U5        M.     UR                  5        HU  nUR	                  5        H  nU R                  U5        M     UR                  5        H  u  peU R                  U5        M     MW     UR                  5        H  nUR                   H  nT	" XU5        M     M!     UR                  5        H  nU R                  U5        M     g r+   )operands_sourceappendaddblocksr"   kwargsr-   results)
r:   r;   r5   operandblockvalue_inner_opresult_valuer<   s
            r$   r<   UJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_used_external_value_impl   s    --/0&&w/"&&w/ 0 "ZZ\E"&&u- * %HA"&&u- !/ %
  %		H1&X !* %
 !#

""<0 !-r&   c                   > [        5       nT" U 5       H[  nUR                  5       c  M  UR                  5       nUR                  5       U R                  5       :X  d  MJ  UR                  U5        M]     U$ r+   )r/   get_defining_opget_parent_blockrB   )r5   	producersrF   	source_opr=   s       r$   _get_producer_opsIJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_producer_ops   sg    I3B7**,4#335	--/23F3F3HHMM), 8 r&   c                    [        5       nU R                  5        H0  nUR                  5        H  nUc  M  UR                  U5        M     M2     U$ r+   )r/   rE   all_used_ops_in_same_blockrB   )r5   	consumersresult	parent_ops       r$   _get_consumer_opsIJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_consumer_ops   sG    I**,!'!B!B!DI ,!i0 "E ' r&   c                   > [        5       nT" U 5       H  nUTR                  U   -  nM     U R                  5       TR                  ;   a  UR	                  U 5        U$ r+   )r/   r1   namer.   rB   )curupstream_unrecomputable_opsnew_oprR   r3   s      r$   _get_upstream_ops_recursivelyUJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_upstream_ops_recursively   s]    *-%'+C0+88@+ 1 xxzT444+//4..r&   c                   > [        5       nT" U 5       H  nUTR                  U   -  nM     U R                  5       TR                  ;   a  UR	                  U 5        U$ r+   )r/   r0   r\   r.   rB   )r]   downstream_unrecomputable_opsr_   rY   r3   s      r$   _get_downstream_ops_recursivelyWJudgeFusionLoop._set_has_unfusible_on_path_map.<locals>._get_downstream_ops_recursively   s]    ,/E)+C0-::6B- 1 xxzT444-11#600r&   )r-   r1   reversedr0   )r3   r`   rd   r5   rY   rR   r=   r<   s   `   @@@@r$   r2   .JudgeFusionLoop._set_has_unfusible_on_path_map   s    		1$			/	1 ((B004-b14  488$B2226/36 %r&   c                    [        U R                  U   U R                  U   -  5      S:H  =(       a+    [        U R                  U   U R                  U   -  5      S:H  nUb
  Ub  U(       + $ S$ )Nr   F)lenr0   r1   )r3   op1op2no_unfusible_op_on_paths       r$   _has_unfusible_op_on_any_path-JudgeFusionLoop._has_unfusible_op_on_any_path   s    66s;66s;< 	 	
 66s;66s;< 	 	  3? ('	
 	
r&   )r0   r-   r.   r1   N)__name__
__module____qualname____firstlineno__r6   r2   rm   __static_attributes__ r&   r$   r(   r(      s    .FP
r&   r(   c                       \ rS rSrS rS rSrg)	Op2IdxMapi  c                    0 U l         [        UR                  5       R                  5       H  u  p#X R                   U'   M     g r+   )op_to_idx_map	enumerater,   r-   )r3   r4   idxop_iters       r$   r6   Op2IdxMap.__init__  s9    %g&:&:&<&@&@ALC*-w' Br&   c                x    U R                   R                  US 5      (       a  U R                   U   $ [        S5      e)Nzop not found in program)rx   getRuntimeError)r3   r5   s     r$   get_idxOp2IdxMap.get_idx  s8    !!"d++%%b))455r&   )rx   N)ro   rp   rq   rr   r6   r   rs   rt   r&   r$   rv   rv     s    .
6r&   rv   c           	     |  ^^&^'^(^)^*^+^,^-^.^/^0^1^2^3 [        SU 5        SSKn[        R                  " 5       n[        XU5      u  m/n	n
[	        U	5      S:X  d'  U[	        U R                  5       R                  5      :  a  X4$ U R                  5       R                  n[        UT/5      m*[        n[        m3UT3-  nTb  [        T5      O
[        U5      m[        n[        n[        m0X-   m2T[        U5      -  m,[        R                  " U5      n[        R                  " U5      nX-  n0 nUR!                  5       m.[#        U T25      n[        U R                  5       R                  SUS-    5      m+S nU" X5      m)U,4S jm&U&U)U+U/U34S jm(U&U)U+4S jm'U'U*U04S	 jnU(U*UU0U24S
 jnT/U	-  U
-   GHO  nUR%                  5       (       d  M  UR'                  5       R)                  5       S:X  a  M?  UR'                  5       R)                  5       [*        ;   a  Mg  [	        UR-                  5       5      S:X  a'  UR-                  5       S   R)                  5       S;   a  M  UU	;   aW  [        SUR.                  SSS5        T.R1                  UR.                  S-   S[2        R4                  S9  UUUR.                  '   GM  UU;   aT  [        SSSUR.                  S5        T.R1                  SUR.                  S-   [2        R4                  S9  UUUR.                  '   U" U5      (       aZ  UT/;   aT  [        SSSUR.                  S5        T.R1                  SUR.                  S-   [2        R4                  S9  UUUR.                  '   U" UU5      nT.R1                  UR.                  S-   UR.                  S-   US9  UUUR.                  '   [7        UT)ST+5      nU H_  n[        SUR.                  SUR.                  S5        T.R1                  UR.                  S-   UR.                  S-   [2        R4                  S9  Ma     UR-                  5        H  nUT+;   d  M  UR9                  UR'                  5       U5      (       d  M2  [        SSSUR.                  S5        T.R1                  SUR.                  S-   [2        R4                  S9  [        SUR.                  SSS5        T.R1                  UR.                  S-   S[2        R4                  S9  M     GMR     UR;                  T.SS5      u  nn[        SU5        Uu  nm-[        5       nU.4S jU 5        H"  u  m1nUR=                  U-U14S jU 5       5        M$     [        R                  " 5       nU H-  u  nn USS U SS :X  d   eUUSS    nUR?                  U5        M/     Un!Un![A        U U!UUT)UU5      u  n"n#[        SU"5        [        R                  " 5       n$[C        5       (       aM  [D        RF                  " S 5      n%U%RI                  [D        RJ                  5        U%RM                  S!U$U-
   S"35        U"U#4$ )#a  
Considering the compiler fuse strategy, we model the pir graph.
Convert the pir calculation graph into a networkx calculation
graph. Find the cut point through the min-cut algorithm,
which is the value to be saved in pir forward calculation graph.

Recompute the forward computation graph to replace intermediate
variables in the forward graph held by the backward graph.

.. warning::
    This API is experimental and likely to change.

Args:
    program (Program): The program to be recomputed.
    inputs:(list[Value]|tuple(Value)): The input Values
        of the forward graph.
    outputs:(list[Value]|tuple(Value)): The out Values
        of the forward graph.
    grad_outputs:(list[Value]|tuple(Value)): initial gradient values
        of `outputs` .
    forward_op_end_idx(int): The index of the last forward op.
    backward_op_start_idx(int): The index of the start backward op.
    recomputable_ops(list[str]|tuple(str)|None): The op names that can
        be recomputed. If 'recompute_ops' is None, we will use the
        default recomputable_ops. Default None.
Returns:
    recomputed_program(Program): The recomputed program.
    fwd_op_end_idx(int): The index of the last forward op in recomputed program.

Examples:
    .. code-block:: python

    >>> import numpy as np
    >>> import paddle
    >>> from paddle.autograd.ir_backward import grad as ir_grad
    >>> from paddle.base import core
    >>> from paddle.decomposition import decompose
    >>> def forward(x):
    ...     y = paddle.sin(x)
    ...     z = paddle.cos(y)
    ...     return z

    >>> np_x = np.random.random(size=[4096, 4096]).astype("float32")
    >>> paddle.enable_static()
    >>> core._set_prim_all_enabled(True)
    >>> main_program = paddle.static.Program()
    >>> with paddle.static.program_guard(main_program):
    >>>     x = paddle.static.data(
    >>>         name="x", shape=[4096, 4096], dtype="float32"
    >>>     )
    >>>     x.stop_gradient = False
    >>>     out = forward(x)
    >>>     out_grad = paddle.full(
    >>>         shape=out.shape, fill_value=3, dtype="float32"
    >>>     )
    >>>     [out] = decompose(main_program, [out])
    >>>     [dx] = ir_grad(out, [x], out_grad)
    >>>     main_program, _ = paddle.decomposition.auto_recompute(
    >>>         main_program,
    >>>         [x],
    >>>         [out],
    >>>         grad_outputs=[out_grad],
    >>>         fwd_op_end_idx=2,
    >>>         backward_op_start_idx=4
    >>>     )
    >>>     exe = paddle.static.Executor(paddle.CUDAPlace(0))
    >>>     res = exe.run(
    >>>         feed={'x': np_x},
    >>>         fetch_list=[dx],
    >>>     )
    >>>     print(main_program)
    {
        (%0) = "pd_op.data" () {dtype:(pd_op.DataType)float32,name:"x",place:(pd_op.Place)Place(undefined:0),shape:(pd_op.IntArray)[4096,4096],stop_gradient:[false]} : () -> pd_op.tensor<4096x4096xf32>
        (%1) = "pd_op.sin" (%0) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%2) = "pd_op.cos" (%1) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%3) = "pd_op.full" () {dtype:(pd_op.DataType)float32,place:(pd_op.Place)Place(undefined:0),shape:(pd_op.IntArray)[4096,4096],stop_gradient:[true],value:(Float)3} : () -> pd_op.tensor<4096x4096xf32>
        (%4) = "pd_op.sin" (%0) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%5) = "pd_op.sin" (%4) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%6) = "pd_op.full" () {dtype:(pd_op.DataType)float32,place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[true],value:(Float)-1} : () -> pd_op.tensor<1xf32>
        (%7) = "pd_op.scale" (%5, %6) {bias:(Float)0,bias_after_scale:true,stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>, pd_op.tensor<1xf32>) -> pd_op.tensor<4096x4096xf32>
        (%8) = "pd_op.multiply" (%7, %3) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>, pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%9) = "pd_op.cos" (%0) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%10) = "pd_op.multiply" (%9, %8) {stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>, pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
        (%11) = "pd_op.fetch" (%10) {col:(Int32)0,is_persistable:[true],name:"fetch0",stop_gradient:[false]} : (pd_op.tensor<4096x4096xf32>) -> pd_op.tensor<4096x4096xf32>
    }
zprogram before recompute:r   Nr
   c                L   [         R                  " 5       n[         R                  " 5       nU R                  5       R                  US   HT  nUR	                  5        H=  nUR                  U5        UR                  U5      (       a  M,  UR                  U5        M?     MV     X2-
  nU$ r+   )r   ValueSetr,   r-   r@   rB   is_no_need_buffer)r4   backward_op_start_idxneed_buffer_values
all_valuesr5   op_operand_sourcebw_no_need_buffer_valuess          r$   _get_bw_no_need_buffer_values5auto_recompute.<locals>._get_bw_no_need_buffer_values  s    +446#,,.
&&(,,-B-CDB%'%7%7%9!01''(9::"&&'89	 &: E $.#B ''r&   c                   > U R                  5       R                  5       T;   =(       a!    UR                  5       R                  5       T;   $ r+   )rN   r\   )value_node1value_node2fusible_opss     r$   _is_fusible#auto_recompute.<locals>._is_fusible  sB    '')..0K? D++-224C	
r&   c                ^  > [         R                  " 5       nUR                  U 5        [        U5      S:  aw  UR	                  5       n[        UTST5      nU HB  nUT;  a  T" X$5      (       d    gUT;  d  M   [        U5      T	;   d  M1  UR                  U5        MD     [        U5      S:  a  Mw  g)Nr   TF)r   r   rB   ri   popfind_value_node_usersget_real_define_op_name)

value_nodecur_value_nodescur_value_nodeusersuserr   r   forward_opsrequired_fw_value_nodesview_opss
        r$   _is_materialized_backwards2auto_recompute.<locals>._is_materialized_backwards  s    (113J'/"Q&,002N) 8$E 66{"@ @   77/5A#''-  /"Q& r&   c                f   >^  T U;   a  g[        T TST5      n[        UU 4S jU 5       5      (       + $ )NTc              3  6   >#    U  H  nT" TU5      v   M     g 7fr+   rt   ).0r   r   r   s     r$   	<genexpr>;auto_recompute.<locals>._is_materialized.<locals>.<genexpr>  s     G{:t44s   )r   all)r   placeholder_value_nodesr   r   r   r   s   `  r$   _is_materialized(auto_recompute.<locals>._is_materialized  s9    00%0$
 GGGGGr&   c           
        > [        U 5      nU R                  5       R                  5       T;   a  US:X  a  [        $ [	        US[        [        TU    S5      S5      -  -  5      nT" X5      (       a  U$ US-  $ )Nr   g?d   r
   r   )cal_value_node_sizerN   r\   MINIMUM_WEIGHTintmaxmin)r   r   mem_szr   dist_from_bwtending_to_recompute_opss      r$   _get_node_weight(auto_recompute.<locals>._get_node_weight  s    $Z0 &&(--/3KK!!! cS\*%=s!CQGGH
 J@@MA:r&   c                  > [         (       a!  U R                  5       R                  5       T;   $ U R                  5       R                  5       T;   a  gU R                  5       R                  5       T;  a  gT" U 5      (       a  gTU    [        :  a  g[	        U 5      n[        U 5      n[        S U 5       5      nUS-  U:  $ )NFTc              3  8   #    U  H  n[        U5      v   M     g 7fr+   )r   )r   is     r$   r   =auto_recompute.<locals>._ban_recomputation.<locals>.<genexpr>  s     Ef1!44fs   r   )AGGRESSIVE_RECOMPUTATIONrN   r\   MAX_DIST_FROM_BWr   get_real_input_nodessum)	r   output_sizeinputsinputs_sizer   r   recomputable_opsr   r.   s	       r$   _ban_recomputation*auto_recompute.<locals>._ban_recomputation  s    ##--/446:LLL))+0026NN))+002:JJ **55J'*:: .j9K)*5FEfEEK?[00r&   builtin.combinezbuiltin.splitzbuiltin.slicezadd edge link from: z -> sinkz (inf) _in)capacityz source z (inf)sourcez(inf)_outTz sink z
Cut Value:c              3  0   >#    U  H  oTU   4v   M     g 7fr+   rt   )r   nnx_graphs     r$   r   !auto_recompute.<locals>.<genexpr>  s     8i$is   c              3  :   >#    U  H  oT;   d  M
  TU4v   M     g 7fr+   rt   )r   vnon_reachableus     r$   r   r     s     Ad=.@fq!fds   	zprogram after recompute:zauto-recomputez(Time of auto recompute program: ***** [ z ] ***** seconds.)'r%   networkxtimeclassify_value_noderi   r,   r-    cal_value_nodes_dist_to_backwardr   r   r/   r   r   r   r   r   DiGraphr(   initializedrN   r\   r   rU   idadd_edgemathinfr   rm   minimum_cutupdaterB   partition_joint_graphr   logging	getLoggersetLevelINFOinfo)4r4   r   outputsgrad_outputsfwd_op_end_idxr   r   nx
start_timerequired_bw_value_nodesunclaimed_value_nodesall_opsdefault_recomputable_ops
random_opscompute_intensive_opsr   value_id_dictjudge_fusion_loopr   r   r   r   weightr   r   	cut_value	partition	reachablecutsetnbrscut_value_nodesvalue_node_invalue_node_outsaved_valuesprogram_after_recomputefwd_op_end_idx_after_recomputeend_timeloggerr   r   r   r   r   r   r   r   r   r   r   r   r.   r   s4         `                               @@@@@@@@@@@@@@r$   auto_recomputer    s\   ~ *G4 J 	G>B	 "#q(,AS""F - &&""$((G3(L
  8H( ' 	)*  J17#;"S_4K %%g.G$$V,F$.Mzz|H'1CDg**,001E>A3EFGK
(  = 
 (H&1 18 	 
!	"
	  	
 %%''%%',,.2CC%%',,.*<113
88:1=BBD I
 
 00&
vvy jmme3VdhhO+5M*--(& *--%/$((   ,6M*--( z**55& *--%/$((   ,6M*--(!#
 	MME!:==6#9F 	 	
 (2jmm$%0$
 D& &%$((    99;D{"$BB..0$  ."" %% *--%"7$(( &  ."  %%". & 1 <m	 f >>(HfEIy|Y'(I}UF8i84AdAA 9 %--/O)/%~Sb!^CR%8888"="#56
J' *0
 #L #L
 	 	& )+BCyy{H""#34%6x*7L6MM^_	
 #$BBBr&   c           
     >   [         R                  " U5      n[         R                  " U5      n[        U UUUUUU5      n[        S5        [        U Vs/ s H(  nSU SUR	                  5       R                  5        S3PM*     sn5        [        S5        [        U Vs/ s H(  nSU SUR	                  5       R                  5        S3PM*     sn5        Sn	U H  n
U	[        U
5      -  n	M     [        SU	S-  S-  S-  S	5        [        U UUUU5      u  pX4$ s  snf s  snf )
a  
Partition the joint graph, recompute the intermediate values
by saved values to save memory.
Args:
    program(Program): The program to be recomputed.
    saved_values(list[valueiable]): The saved values
        of forward graph which used by backward graph.
    inputs:(list[Value]|tuple(Value)): The input Values
        of the forward graph.
    outputs(list[valueiable]): The out values
        of the forward graph.
    forward_op_end_idx(int): The index of the last forward op.
    backward_op_start_idx(int): The index of the start backward op.
Returns:
    recomputed_program(Program): The recomputed program.
    fwd_op_end_idx(int): The index of the last forward op in
        recomputed program.
zsaved values: (z, )zmid values: r   zSaved Memory is: i   GB)r   r   analyze_mid_hold_valuesr%   rN   r   r   (replace_mid_values_with_forward_subgraph)r4   r   r   r   r   r   r   mid_hold_valuesr   memmids              r$   r   r     s7   6 "**<8L%%g.G . O  lKl!A3b**,//12!4lKL~oNo!A3b**,//12!4oNO
C"3'' "C$J$5$<dC GG ""% LNs   /D/Dc                   S n[        U 5      n[        U R                  5       R                  S US-    5      n[        U R                  5       R                  US  5      nU R                  5       R                  U   n	U" X5      n
U
S   nU
S   nU
S   n[	        U UUU	UU5      u  pnU H  nUR                  SS5        M     U H  nUR                  SS5        M     [        R                  " 5       nU H7  nUR                  U5      nUR                  UU5        UR                  U5        M9     [        U5       H  nUR                  US 5      nUR                  5        Hd  nUR                  5        HM  nUR                  US	5      (       d  M  Ub)  UR                  UU   5      UR                  U5      :  d  MH  UU   nMO     Mf     Uc   eUUU'   M     U H  nUR!                  UU   5        M     X4$ )
Nc                   ^ U4S jm[        5       n[        R                  " 5       nUnU H  nT" UU UU/ 5        M     [        S[	        U5      5        [        SU5        UUUS.nU$ )Nc                  > [        U5      nUR                  U 5        U R                  5       nXb;   d  Uc  g UR                  5       S;   a  X;  a  UR	                  U 5        g UR                  5       n[        U5      S:X  a6  UR                  5       S;  a"  [        SU  SUR                  5        SU 35      eU H,  nX;   a  X;  a  UR	                  U5        M   T	" UUUUU5        M.     UR	                  U5        g )N)zbuiltin.parameterz
pd_op.datar   )r   r   zEvery path to recompute value zr must have saved value or starting point of the path is one of op in [pd_op.full, pd_op.full_int_array], but find z op, op ir is )listrA   rN   r\   rB   r@   ri   	Exception)
recompute_valuer   marked_recompute_opsneeded_saved_valueschain	new_chain	define_op	op_inputsop_input_find_recompute_opss
            r$   r  replace_mid_values_with_forward_subgraph.<locals>._extract_forward_recompute_subgraph_for_backward.<locals>._find_recompute_ops  sB    UI_-'779I0I4E~~ $  #='++O<!113I9~"y~~'7 @ (  4_4E  Fx  yB  yG  yG  yI  xJ  JX  Yb  Xc  d  &+:+//9# (' & !$$Y/r&   zRecompute Ops: )r   recompute_opsr   )r/   r   r   r%   ri   )r   
mid_valuesrecompute_subgraph_opsrecompute_subgraph_inputs*recompute_subgraph_outputs_backward_neededr  recompute_subgraphr  s          @r$   0_extract_forward_recompute_subgraph_for_backwardbreplace_mid_values_with_forward_subgraph.<locals>._extract_forward_recompute_subgraph_for_backward  s    )	V "%$2$;$;$=!5?2)O&)  * 	$c*@&AB$&<=/3A

 "!r&   r
   r  r   r   is_recompute_opTis_recompute_bw_opr   )rv   r/   r,   r-   clone_graphset_bool_attrr   r   look_upreplace_grad_users_withrB   rf   r~   rE   rU   r   move_before)r4   r   r  r   r   r!  op_2_id_mapr   backward_opsfirst_backward_oprecompute_forward_subgraph
origin_opsorigin_subgraph_inputsorigin_subgraph_outputs
cloned_ops	value_mapcloned_op_first_grad_user_map	origin_op	cloned_opcloned_subgraph_outputsorigin_valuecloned_valuer5   first_subgraph_grad_user
op_outputschilds                             r$   r  r    s+   B"H G$Kg**,001E>A3EFGKw++-112G2HIJL,,.223HI 	9	
  ,O<J7A8C;F<8J8  	 148  	 4d;   -557/ ((6,,\<H##L1 0 z"#@#D#DR#N **,J#>>@044UA>>/7;;N;N5e<<#++,DE<F :%@ 1 A ' (333,D%b) #  	;IFG  ""r&   c                   U R                  5       R                  n[        US US-    5      n[        [	        SUS-   5      5      n[
        R                  " U R                  5       R                  U5      5      n[        [	        US-   [        U5      5      5      n[
        R                  " U R                  5       R                  U5      5      nUU[
        R                  " 5       4$ )Nr
   r   )	r,   r-   r/   r  ranger   r   get_values_by_op_idxri   )	r4   r   r   r   required_fw_opsrequired_fw_op_idxsr   required_bw_op_idxsr   s	            r$   r   r   q  s    ""$((G'"6NQ$678OuQ(:;<,55334GH u^a%7WFG,55334GH( 	 ! r&   c                   [         R                  " 5       nU R                  5       nU(       a  X;   a  U Vs/ s H  ofU;   d  M
  UPM     nnU GH  nUR                  5       S:X  a  UR	                  5       S   nUR                  5        H  nUR	                  5       n	U	 H  n
[        U
R                  5       5      S:X  aa  U
R                  5       S   R                  5       S;   a<  U
R                  5       S   R	                  5       nU[         R                  " U5      -  nM  UR                  U
5        M     M     M  UR	                  5       n	U	 H  n
[        U
R                  5       5      S:X  aa  U
R                  5       S   R                  5       S;   a<  U
R                  5       S   R	                  5       nU[         R                  " U5      -  nM  UR                  U
5        M     GM     U$ s  snf )zH
Find all the value nodes which use the same value node to be computed.
r   r   r
   r   )r   r   rU   r\   rE   ri   rB   )r   r   without_no_need_bufferr   r   r-   r5   combine_resultcombine_res_used_oprE   rW   split_resultss               r$   r   r     s    ##%E

/
/
1C1 #9"['82C9779))ZZ\!_N  ::< $-557%F99;%@@B df !  )/(I(I(K)!') & !8!8!GG		&) & =$ jjlG!557!<<>qAFFH M  %+$E$E$G%gi " ^44]CCEIIf% "1 J LM :s   	G(G(c                   [         R                  " 5       nU R                  5       nUR                  5       S;   a4  UR	                  5       S   nUR                  5       nUR	                  5       nOUR	                  5       nU H  nUR                  5       (       aY  UR                  5       R                  5       S:X  a7  U[         R                  " UR                  5       R	                  5       5      -  nMq  UR                  U5        M     U$ )Nr   r   r   )r   r   rN   r\   r@   rB   )output_value_nodereal_input_nodesr  r  real_define_opinput_value_nodesinput_value_nodes          r$   r   r     s    %..0!113I~~==,,.q1!113*::<%557-,,.. 002779=NN 7 7 002BBD!    !12 . r&   c                    U R                  5       nUR                  5       S;   a1  UR                  5       S   nUR                  5       R                  5       $ UR                  5       $ )Nr   r   )rN   r\   r@   )r   r  r  s      r$   r   r     sX    **,I~~==,,.q1'')..00~~r&   c                J     SU R                   ;   $ !   [        SU  S35      e= f)Nz!value node not found in program:  )shape
ValueErrorr   s    r$   is_dynamic_value_noderT    s6    LZ%%%%L<ZLJKKs    "c                l     U R                  5       R                  5       S L$ !   [        SU  S35      e= f)Nzvalue node illegal: rP  )typeas_vec_typerR  rS  s    r$   is_vector_value_noderX    s>    ? ,,.d::?/
|1=>>s   " 3c                    [        U 5      (       a$  U R                   Vs/ s H  oS:w  d  M
  UPM     nnOU R                  n[        R                  " S US5      [        U R
                     -  $ s  snf )NrO  c                
    X-  $ r+   rt   )xys     r$   <lambda>*cal_value_node_size_impl.<locals>.<lambda>  s    aer&   r
   )rT  rQ  	functoolsreduce_PADDLE_DTYPE_2_NBYTESdtype)r   r   value_node_shapes      r$   cal_value_node_size_implrd    so    Z(('1'7'7C'7!7A'7C%+++-=qA
 !1!1
2	3 Ds
   	A.A.c                    [        U 5      (       aG  U R                  5       R                  5       R                  5       nSnU H  nU[	        U5      -  nM     U$ [	        U 5      $ Nr   )rX  rV  rW  as_listrd  )r   	value_vecsum_res
child_nodes       r$   r   r     sZ    J''OO%113;;=	#J/
;;G $#J//r&   c                   [         R                  " 5       n[        U 5       H  nUR                  5       S:X  a  M  UR	                  5       nU H|  nUR                  5       n[        U5      S:X  a  US   R                  5       S;   a  M;  [        U5      nXQ;  a  SX%'   MQ  [        S5      X%'   U H  n[        X%   X(   S-   5      X%'   M     M~     M     U$ )Nr   r
   r   r   g    eA)
r   	ValueDictrf   r\   rE   rU   ri   r   r   r   )	r   r   r   r5   
op_results	op_resultused_ops
real_usersr   s	            r$   r   r     s    !++-Lw779))ZZ\
#I ;;=H8}!hqk&6&6&8 = ' .y9J7*+'*-c('&D.1$/1Ca1G/L+ ' $	  ( r&   c                L    S n[        [        X!R                  5       5      5      $ )Nc                    U R                  5       S:X  a-  [        U R                  S5      R                  5       5      S:X  a  gg)Nr   r   FT)r\   ri   rW   rU   )r5   s    r$   filter_unused_combine;all_used_op_consider_combine.<locals>.filter_unused_combine)  s5    GGI**BIIaL;;=>!Cr&   )r  filterrU   )r4   rH   rs  s      r$   all_used_op_consider_combinerv  (  s(     $&F&F&HI r&   c                  ^ [        U R                  5       R                  S US-    5      n[        U R                  5       R                  US  5      m[        R                  " 5       nU H  n	U	R                  5        Hq  n
[        X
5      n[        U4S jU 5       5      (       d  M*  X;  d  M1  X;  d  M8  X;  d  M?  X;  d  MF  U	R                  5       [        ;  d  M`  UR                  U
5        Ms     M     U$ )Nr
   c              3  ,   >#    U  H	  oT;   v   M     g 7fr+   rt   )r   used_opr+  s     r$   r   *analyze_mid_hold_values.<locals>.<genexpr>F  s     H<|+<s   )r/   r,   r-   r   r   rE   rv  anyr\   r   rB   )r4   r   r   r   no_need_buffer_valuesr   r   r   r	  r5   rW   all_used_opsr+  s               @r$   r  r  6  s     g**,001E>A3EFGKw++-112G2HIJL$--/OjjlF7HLH<HHH.)(7GGIZ/##F+ #  r&   c                    S nU R                  5       S   R                  5        H5  nXA;   d  M
  Ub&  UR                  U5      UR                  U5      :  d  M3  UnM7     U$ rf  )rE   rU   r   )fwd_opr+  r*  first_backward_use_opuser_ops        r$   get_first_backward_use_opr  Q  sf     >>#A&AAC"!)""7+!!"789 %,! D ! r&   c           	        [         R                  " U5        U R                  5       R                  n[        R                   R                  5       n[        U5      n/ n0 n	U H  n
UR                  X5        M     U H  nX;   d  M
  UR                  U[        R                   R                  SSS5      5      n[        XU5      nUbd  UR                  S5      (       aN  UR                  S5      (       a8  UR                  SUR                  5        UR                  SUR                  5        UR                  U5        Uc  M  XU'   M     [         R                   " U R                  5       5        XU	4$ )NFTop_rolechunk_id)r   set_insertion_pointr,   r-   paddle	IrMappingr/   rB   cloneCloneOptionsr  has_attrset_int_attrr  r  rA    set_insertion_point_to_block_end)r4   r.  graph_inputsclone_insertion_opr+  r*  r   r2  r1  r3  input_valuer5   r_   r  s                 r$   r%  r%  ]  sD    ./""$((G

$$&IZJJ$&!#k/ $XX6::225$EF %>+%! &1)229==)22:>>##I/D/L/LM##J0E0N0NOf%$08Mf5# $ (()=)=)?@"???r&   r+   )r4   paddle.static.Programr   Sequence[pir.Value]r   r  r   r  r   r   r   r   r   zSequence[str] | Nonereturn!tuple[paddle.static.Program, int])r4   r  r   list[pir.Value]r   r  r   r  r   r  r   r   r   r   r  r  )@
__future__r   r_  r   r   r   r   typingr   r  r   paddle.autogradr   paddle.baser   paddle.base.frameworkr   collections.abcr	   DataTypeBOOLFLOAT16BFLOAT16FLOAT32FLOAT64FLOAT8_E4M3FNFLOAT8_E5M2INT8INT16INT32INT64UINT8	COMPLEX64
COMPLEX128ra  r   __annotations__r   r   r   r   r   r   r   r   r%   r(   rv   r  r   r  r   r   r   r   rT  rX  rd  r   r   rv  r  r  r%  rt   r&   r$   <module>r     s   #    	      *  4( 	MMMM1MMAMM1MM1MMMMqMMMMMMMMMMMMQMMb $T' ) Tp ' ) 
 ) K
I K	$ y 	 
I  !  !a
 a
H	6 	6& .2QC"QCQC !QC &	QC
 QC QC +QC 'QCh;#";#!;# ;# 	;#
 .;# ;# ;# ';#|~#B!N   	3l, L?046	!#@r&   