
    Αi                        S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJrJ	r	  S SK
JrJrJrJrJr  S SKJrJrJr  S SKJrJr  S SKJr  S S	KJrJr  S S
KJr  SSKJr  \ R@                  " \!5      r"\S 5       r#S r$S r%S r&S r' S5S jr(\)" 5       \)" 5       S S4S jr*S r+S\R,                  S\R,                  4S jr,S r- S6S\S\R,                  S\.S\/4S jjr0S r1S r2S  r3    S7S! jr4S\S\R,                  S\R,                  S\.S\/4
S" jr5S\S\R,                  S\.S#\6S$\/S\/4S% jr7S\S\R,                  S\.4S& jr8S' r9S( r:S) r;S* r<S+ r=S, r>S- r?S. r@S/ rAS0 rBS1 rCS2 rDS3 rES6S4 jrFg)8    N)pir)ir_backward)	ValueDictValueSet)call_decomp_rulecall_decomp_vjp decomp_ops_contain_unused_outputhas_decomp_rulehas_decomp_vjp)pir_chunk_id_guardpir_op_name_guardpir_op_role_guard)Block	Operation)signature_safe_contextmanager)
DebugPrintauto_recompute)core   )registerc               #      #    [         R                  " 5       n  U (       d  [         R                  " S5        S v   U (       d  [         R                  " S5        g g ! U (       d  [         R                  " S5        f f = f7f)NTF)r   _is_all_prim_enabled_set_prim_all_enabled)
prim_states    [/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/decomposition/decomp.py
prim_guardr   ,   sX     **,J.&&t,&&u- z&&u- s   A<!A A< A99A<c                     [        U [        R                  5      (       a  U 4$ [        U [        R                  5      (       a  [        U 5      $ [        S[        U 5       S35      $ )NzType z is not supported.)
isinstancer   ValuetypingSequencetuple	TypeErrortype)xss    r   _build_tensor_tupler&   8   sN    "cii  u	B	(	(RyuT"XJ&89::    c                    [        U 5      [        U5      :X  d   e/ n[        U5       H  u  pE[        X   [        R                  5      (       a  UR                  5       [        R                  " 5       ;   a$  U[        UR                  5          ;   a	  US   b   eO3[        U5      S:X  a"  [        US   [        R                  5      (       d   eUR                  US   5        M  UR                  U5        M     U$ )Nr   r   )	len	enumerater   r   r   namer	   keysappend)	orig_outsdecomp_outsopresidxvalues         r   _analyse_decomp_resultsr4   @   s    y>S----
C,
incii00	=BBDD;BGGIFFQx'''5zQ:eAh		+J+JJJJJuQx JJu - Jr'   c                 T   Sn/ nU R                  5        H  nUR                  5       nUR                  5       (       ay  UR                  5       n[	        U[
        5      (       aA  UR                  5       U:X  a-  UR                  5        Vs/ s H  ofR                  5       PM     nnUR                  U5        M  UR                  S5        M     U R                  5       U:X  a  U4$ X R                  5        Vs/ s H  o0R                  5       U   PM     sn-   n[        U5      $ s  snf s  snf )z
For standard api of operator, its inputs should keep consistent with organization of its inputs and attrs.

Args:
op (Operator): The target operator.
builtin.combineN)operandssourceinitializedget_defining_opr   r   r+   r-   get_attr_namesattrsr"   )r0   combine_op_nameinputsxinputprev_opitemapi_argumentss           r   _prepare_python_api_argumentsrD   R   s     (OF[[]
++-G7I..LLNo53:3C3C3EF3E43EFMM% 
 MM$ " 
wwyO#y5F5F5HI5Hhhjm5HIIM G Js   D 4D%c           	         Sn/ nU R                  5        GH+  nUR                  5       nUR                  5       (       d  M+  UR                  5       n[	        U[
        5      (       a  UR                  5       U:X  a  UR                  5        Hi  nUR                  5       R                  nSU;   d  M%  [        R                  " SUR                  5       R                   SU R                  5        S35            g   M  UR                  nSU;   d  M  [        R                  " SUR                   SU R                  5        S35          g   g )Nr6   z;Decomp op does not support dynamic shape -1, but got shape z in inputs of op  Tz in op )
r7   r8   r9   r:   r   r   r+   shapewarningswarn)r0   r=   r>   r?   r@   rA   rB   rH   s           r   _check_prim_dynamicrK   s   s0   'OF[[]
++-G7I..LLNo5#,,.D KKM//EU{ YZ^ZeZeZgZmZmYnn  AC  AH  AH  AJ  @K  KL  M  $ / ;MMUV[VaVaUbbijljqjqjsittuv  + r'   c           	      0   [        U5      [        U5      :X  d"   SU  S[        U5       S[        U5       35       e[        UU5       H  u  pVUb  Uc(  U [        R                  ;  a  [	        SU  SU SU 35      eUc  M8  Ub  Ub  Ub  XS;   a  XdX5   '   UR
                  nUR
                  nUR                  n	UR                  n
Xx:X  d   SU  SU S	U 35       eS
U
;  d   SU  S35       eX:X  d   SU  SU	 SU
 35       eUSL USL -  (       a   S5       e  g   g)aZ  
Check whether the replaced outputs are consistent with origin outputs.

Args:
op_name (str): The name of operator.
orig_outs (tuple): The outputs of original operator.
new_outs (tuple): The outputs of replaced operator.
orig_vars (dict): Origin variables of original block.
dst_vars (list): Corresponding replaced variables of Origin variables.
zwhen replace origin op z[ with composite rule, num of origin outs should be equal to new outs, but len(orig_outs) = z and len(new_outs) = Nzop z2 should not contain any None value. original outs=z and its composite rule outs=z\ with composite rule, origin out dtype should be equal to new out dtype, but orig_out dtype=z and new_out dtype=rF   z1 with composite rule, composite out shape has -1.z\ with composite rule, origin out shape should be equal to new out shape, but orig_out shape=z and new_out shape=z"orig_out and new_out should match.)r)   zipr   ops_contain_none
ValueErrordtyperH   )op_namer.   new_outs	orig_varsdst_varsorig_outnew_out
orig_dtype	new_dtype
orig_shape	new_shapes              r   _check_op_resultsr[      s    y>S]* 
!' +  #I//DS]O	U*
 ! 4000gYPQZP[[x  zB  yC  D   $)=(4;Y01!JI!JI* )' 3&&0\1DYKQ* Y& )'2cd& * )' 3&&0\1DYKQ* !D(W_= 4= 	Gr'   rF   c                     [         R                  S   U-  n[        U[        5      (       d   e[        U[        5      (       d   e[         R                  " XX#XE5      $ )a  
Search nonbasic ops which have be registered composite rules and replace them with primitive ops.
The operators in blacklist will be excluded from program when decomposed into primitives, and only the
operators in whitelist will be decomposed. The priority of blacklist is higher than whitelist, it means
an operator both in blacklist and whitelist will not be decomposed.

The finally set that will be decomposed is:
    (block.ops & ops have decomposite rule & whitelist) - blacklist

Note:
    All variables must be contained inside the given program.

Args:
    program (Program): The program to be processed.
    src_vars (list[Value]): In program, once some operator is decomposed, its vars will be replaced by new ones. This argument means some vars will be used later and corresponding vars will be returned for later usage.
    blacklist (frozenset): The Operators that will be exclude when decomposed into primitives.
    whitelist (frozenset): Only the operators in whitelist will be decomposed into primitives.
    start_index (int): The start index of decomposed operator in global block, default 0;
    end_index (int): The end index of decomposed operator in global block, default -1 means all ops will be composed. start_index and end_index follow the principle of left closed and right open, that is [start_index, end_index).

Returns:
    dst_vars (list): A list contains all vars which replace origin ones in src_vars.
forward_blacklist)r   prim_configr   intsinking_decomp)programsrc_vars	blacklist	whiteliststart_index	end_indexs         r   	decomposerg      sZ    >   !45	AIk3''''i%%%%9 r'   c                 6   U R                  5       nUR                  5       nUR                  5       UR                  5       :w  a  g[        UR                  5       5       H9  nUR                  U5      R	                  UR                  U5      5      (       a  M9    g   g)zAcheck whether the inputs of two builtins.combine ops are the sameFT)r:   num_operandsrangeoperand_sourceis_same)input1input2builtin_combine_op1builtin_combine_op2is        r   _check_combine_inputsrr      s     002 002'')-@-M-M-OO*779:A#2215=='66q9   ; r'   fwd_opbwd_opc                 z   U b%  U R                  5       S-   UR                  5       :w  a  gUR                  5       nUR                  5        Vs/ s H  o3R                  5       PM     nn[	        U5      [	        U5      :X  d   S5       e/ n[        U5       H   u  pgSU;  d  M  UR                  XF   5        M"     U R                  5        Vs/ s H  o3R                  5       PM     nnU R                  5       n	U R                  5        Vs/ s Hj  nUR                  5       R                  5       (       d  M(  UR                  5       R                  5       R                  5       S:X  d  MZ  UR                  5       PMl     n
nSS/nU H  nUR                  5       (       aL  UR                  5       R                  5       S:X  a*  SnU
 H  n[        X5      (       d  M  Sn  O   U(       d    gMd  U[        U5      ;   a  Mu  U[        U	5      ;   a  M  UR                  5       R                  5       U;   a  M    g   gs  snf s  snf s  snf )z3check whether the bwd_op is corresponding to fwd_op_gradFz1backward op names do not match backward op inputsr6   zpd_op.full_int_arrayz
pd_op.fullT)r+   get_input_namesr7   r8   r)   r*   r-   resultsr9   r:   rr   r   )rs   rt   bwd_op_input_namesr?   
bwd_inputsfwd_op_related_inputs_outputsr2   r+   
fwd_inputsfwd_outputsfwd_vec_inputsinserted_op_name_listoperandin_fwd	vec_inputs                  r   	_check_opr      s   
 ~0FKKMA//1&,oo&78&7((*&7J8!"c*o5 ;5 %'!12	$)00A 3 '-oo&78&7((*&7J8.."K ""A88:!!# 	 HHJ&&(--/3DD 	
"   4\B0!!'')..04EEF+	(<<!F ,   8J//h{33**,1137LL% 1( M 9 9s   H.	H3'H81.H8#H8c                     U R                  5       n/ SQn[        U5       HH  u  pEXS;   d  M  U R                  U5      R                  5       nXa;   d  M2  X   nUR	                  5       nUs  $    g )N)out_gradOut_grad	loss_grad)rw   r*   r   r8   r:   )	rt   grad_var_to_varry   out_grad_namer2   
input_namer   outrs   s	            r   _get_fwd_opr   0  sk    //19M$%78&~~c*113H*%/,,. 9 r'   blockr   returnc                    [         R                  R                  U R                  5         UR	                  5       nUR                  5       n[        R                  " U5      n[        U5      nU=(       d    UnU(       Ga  [        U5      (       a
   SSS5        gUb  [         R                  " U5        O[         R                  " U5        [        U5      n	U(       a  [        U5      n
[        XZU5      nO[        U" U	6 5      n[        XEU5        [!        X%US9  UR	                  5       ["        R$                  " 5       ;   aN  ['        [)        U5      5       H5  nU["        UR	                  5          ;  d  M   X\   R+                  X   5        M7     OPUR	                  5       ["        R$                  " 5       ;   a  US   R+                  US   5        OUR+                  U5        U R-                  U5        UbL  SnUR                  5        H  nUR/                  5       (       d  M  Sn  O   U(       a  U R-                  U5        SnUS4sSSS5        $ [1        U5      S4sSSS5        $ ! , (       d  f       g= f)a  
Decompose the forward op into a list of primitive ops.
Args:
    block (Block): the block to which the forward op belongs.
    fwd_op (pir.Operation): the forward op to be decomposed.
    grad_var_to_var (dict): a dict obtained from distributed processing,
        which maps the backward grad variable to its corresponding forward variable.
    prev_op (pir.Operation): the previous op of fwd_op in the block. If prev_op is builtin.combine, insertion point when decomposing fwd_op will be set to prev_op.
Returns:
    new_outputs (tuple(Value)): the new outputs after decomposing.
    has_decomposed: whether the forward op has been successfully decomposed.
NNF)r.   rR   r   TF)r   r   program_guardra   r+   rx   r   get_decomp_ruler
   rK   set_insertion_pointrD   r   r4   r&   r[   _upgrade_grad_var_to_varr	   r,   rj   r)   replace_all_uses_with	remove_ophas_one_user"   )r   rs   r   rA   rQ   r.   
decom_rulehas_sink_decomp_rulelower
input_argsr/   rR   r2   r   rB   s                  r   _decomp_fwd_opr   =  s      
			.++-NN$	--g6
.v622"6**" 
/	. "''0''/ 7v>J#.v62F /z:/FGg(; %x
 {{} @ E E GG Y0C?NO "<<X]K 1 ;;=$D$I$I$KKaL66x{C00:OOF# " 	#OO-D''))$)	 . OOG,T>w 
/	.| #U*} 
/	.	.s&   A&ICI2B&I#I	I
I-c                    / nU R                  5        H  nUR                  5       R                  5       (       a  UR                  5       R                  5       R	                  5       S:X  aj  UR                  5       R                  5       n[        SUR                  5       5       Vs/ s H  nUR                  U5      PM     nnUR                  U5        M  UR                  UR                  5       /5        M     U$ s  snf )Nr6   r   )	r7   r8   r9   r:   r+   rj   ri   rk   r-   )rs   
new_inputsr@   builtin_combine_oprq   	new_inputs         r   _prepare_inputsr     s    J"LLN&&((..0557;LL!&!?!?!A q"4"A"A"CDDA #11!4D   i(u||~./ # s   #C9c                 4   U R                  5       nU R                  5       n[        U5      [        U5      :X  d   S5       eUR                  5        Vs/ s H  oDR	                  5       PM     nnUR                  5       n[        U5      [        U5      :X  d   S5       eU R                  5        Vs/ s H  oDR	                  5       PM     nnU R                  5        Vs/ s Hj  nUR	                  5       R                  5       (       d  M(  UR	                  5       R                  5       R                  5       S:X  d  MZ  UR	                  5       PMl     nn/ n	/ n
[        U5       H  u  pUR                  5       (       aq  UR                  5       R                  5       S:X  aO  SnU H  n[        X5      (       d  M  Sn  O   U(       d'  U	R                  U/5        U
R                  Xk   5        M  M  U[        U5      ;   a  M  U[        U5      ;   a  M  U	R                  U/5        U
R                  Xk   5        M     / nSnU HL  nUS-   U
;   a  UR                  U	U   5        US-  nM'  UR                  [        R                  " 5       /5        MN     U$ s  snf s  snf s  snf )	Nz7forward op output names do not match forward op outputsz7backward op input names do not match backward op inputsr6   FTr   rv   r   )rx   get_output_namesr)   r7   r8   rw   r9   r:   r+   r*   rr   r-   r   r   
fake_value)rs   rt   r}   fwd_output_namesr?   rz   bwd_input_namesr|   r~   grad_outputsgrad_output_namesrq   	bwd_inputr   r   new_grad_outputsindexfwd_output_names                     r   _prepare_grad_outputsr     sc   .."K..0 C$44 A4 '-oo&78&7((*&7J8,,.O3z?2 A2
 '-oo&78&7((*&7J8 ""A88:!!# 	 HHJ&&(--/3DD 	
"   L!*-!!##))+0026GGF+	(>>!F , ##YK0!(();< 
 Xj11 55##YK0!(();<' .. E+g%*;;##L$78QJE##S^^%5$67 , _ 9 9s   J.J'J.J8Jc                     / n[        U5       HN  u  p4UR                  5       (       a  S/[        X   5      -  nOS/[        X   5      -  nUR                  U5        MP     U$ )NFT)r*   r9   r)   r-   )r|   bwd_outputsstop_gradientsr2   
bwd_outputstop_gradients         r   _prepare_stop_gradientsr     sc    N$[1!!##"Gc*/&::M!FS%99Mm, 2 r'   c                 ,   U c   S5       eUb4  Ub1  [        U5       H"  u  pVX`;   d  M  U R                  U5      XU   '   M$     UbP  UbL  U R                  5        H7  u  px[        U5       H#  u  pUR                  U
5      (       d  M  XI   X'   M%     M9     g g g )Nz"grad_var_to_var should not be None)r*   popitemsrl   )r   
orig_grads	new_gradsr.   rR   r2   
grad_inputgrad_varvarrq   orin_vars              r   r   r     s     &L(LL&)"7(4OC,2A2E2E3#/  5
 !5,224MH(3;;x((08O-  4 5 "6r'   c                    [        U5      nUR                  5        Vs/ s H  oU/PM     nn[        X5      n[        XBR                  5       5      nU R                  R                  U5      n	[        U R                  5      n
[        R                  " XXgU5      n[        U R                  5      nX-
  nUS:X  aN  U R                  S   R                  5       UR                  5       :X  a  U R                  U R                  S   5        gU R                  S   R                  5       S:X  a.  U R                  S   R                  S5      R                  5       //n/ nU H[  nUS   b.  US   R                  5       (       a  UR                  US   5        M7  UR                  [        R                   " 5       5        M]     [        U5      [        UR                  5       5      :X  d   S5       e[#        X2R                  5       US9  U	n[%        X5       H'  nU R'                  U R                  U   U5        US-  nM)     UR)                  U5        U R                  U5        [+        U5      S4$ s  snf )	z
Decompose the backward op into a list of primitive ops.
If forward op has composite vjp rules (including custom vjp), call call_vjp() to get a list of primitive operators in backward graph, then replace backward op.
r   rF   r   builtin.splitr   zNresults of original backward op do not match results of decomposed backward opr   r   T)r   rx   r   r   opsr   r)   r   call_vjpr+   r   r   r8   r9   r-   r   r   r   rj   move_opr   r"   )r   rs   rt   r   fwd_inputs_
fwd_outputfwd_outputs_grad_outputs_stop_gradients_
bwd_op_idxbefore_num_opsnew_grad_inputsafter_num_opsnum_appended_opsr1   r   
insert_idxrq   s                     r   _decomp_bwd_with_vjpr     s
    "&)K39>>3CD3CZL3CLD)&9M-k>>;KLO (J^Nmm\/O 		NM$5 12!3!3!5!F		"& 99R=?2 %		" 5 5a 8 ? ? ABCO)J!}(Z]-F-F-H-H

:a=)

3>>+,	 *
 3x3v~~/00 	
\	
0
 	!(8C	

  
~5AMM%))A,
3!OJ 6 	$$S)Sz4Y Es   I#r|   fwd_outputs_after_decomposec                   ^^^ Tc  [        S5      eUR                  5        Vs/ s H  oUR                  5       PM     nnUR                  5       n[	        UU4S jU 5       5      n[	        U4S jU 5       5      n	[	        U4S jU 5       5      n
U R
                  R                  U5      n[        U R
                  5      n[        R                  " XU5      n[        U R
                  5      n/ nSn[        U5       HZ  u  nnUR                  5       (       a  UR                  UU   5        US-  nM6  UR                  [        R                  " 5       5        M\     [        TXS9  Un[!        X5       H'  nU R#                  U R
                  U   U5        US-  nM)     UR%                  U5        U R'                  U5        Sn[	        U5      U4$ s  snf )	aF  
Decompose the backward op into a list of primitive ops.
If forward op has no composite vjp rules, and forward op has been decomposed to a list of primitive operators in forward graph previously,
call grad() for the decomposed forward subgraph to get a list of primitive operators in backward graph, then replace backward op.
z=To decompose backward op, please decompose forward op firstlyc              3   l   >#    U  H)  nU[        T5      ;   a  M  U[        T5      ;   a  M%  Uv   M+     g 7fN)r   ).0r   r|   r   s     r   	<genexpr>*_decomp_bwd_without_vjp.<locals>.<genexpr>I  s<      #I*-- 	 H%@AA	 		#s   44	4c              3   .   >#    U  H
  nTU   v   M     g 7fr    )r   grad_outputr   s     r   r   r   Q  s      8D$s   c              3   \   >#    U  H!  nUR                  5       (       d  M  TU   v   M#     g 7fr   )r9   )r   r   r   s     r   r   r   T  s+      %J!!# 	$
#%s   ,,r   r   r   T)RuntimeErrorr7   r8   rx   r"   r   r   r)   r   gradr*   r9   r-   r   r   r   rj   r   r   r   )r   rt   r   r|   r   r?   rz   grad_inputsr   r   r   r   r   r   r   r1   input_grads_idxr2   r   r   rq   has_decomposeds     ```                 r   _decomp_bwd_without_vjpr   4  s    #*K
 	

 '-oo&78&7((*&7J8.."K # L  8D L  % K (J^N!&&|,OO		NM CO$[1Z!!##JJ78q OJJs~~'( 2 K
 J>1eiilJ/a
 2   %	OOFN:~%%c 9s   Gc                    [        X5      n[        X15      (       d'  [        R                  UR	                  5        S35        g[        U5      (       d  [        U5      (       a  g[        U UUU5      u  nnU(       dV  UR                  5        Vs/ s H  ofR                  5       PM     nn[        U UU5      u  nn	U	(       a  [        U UUUU5      u  nnXE4$ s  snf )a  
Decompose a backward op in pir program.
Get the corresponding forward op according to grad_var_to_var firstly, then
(1) try to decompose backward op by calling _decompose_bwd_with_vjp, if forward op has composite vjp rules (including custom vjp),
_decompose_bwd_with_vjp will call call_vjp() to get a list of primitive operators in backward graph, then replace backward op successfully and return True;
(2) when _decompose_bwd_with_vjp return False, means there is no composite vjp rules,
try to decompose forward op firstly by calling _decomp_fwd_op firstly and get corresponding primitive operators in backward graph by calling _decompose_bwd_without_vjp secondly, then replace backward op successfully and return True;
(3) if the backward op is still not decomposed by the above two steps, returns False.

Args:
    block (Block): the block to which the backward op belongs.
    bwd_op (pir.Operation): the backward op to be decomposed.
    grad_var_to_var (dict): a dict obtained from distributed processing,
        which maps the backward grad variable to its corresponding forward variable.
Return:
    new_input_grads (tuple(Value)): new results of backward op after decomposing.
    has_decomposed: whether the backward op has been successfully decomposed.
zM can not be decomposed due to the mismatch between forward op and backward opr   )r   r   loggerdebugr+   rK   r   r7   r8   r   r   )
r   rt   r   rs   r   bwd_has_decomposedr?   r|   new_fwd_outputsfwd_has_decomposeds
             r   _decomp_bwd_opr   {  s    4 1FV$${{}ojk	
 6""&9&&A&A 			 *0//*;<*;Qhhj*;
< 
	
 
 (" ((+ =s   Cc                 N   / nU R                  5       nUR                   H  nUR                  5       R                  S5      (       d&  UR                  5       R                  S5      (       d  MM  UR                  5       U;  d  Mc  UR	                  UR                  5       5        M     U$ )Nrv   _grad_)global_blockr   r+   endswithr-   )pir_programbwd_opsr   r0   s       r   _get_all_bwd_opsr     sy    G++-LGGIw''2779+=+=h+G+Gggiw&NN2779%	 
 Nr'   c                     / n [         R                  " 5       n[         R                  " 5       nU R                  U5        U R                  U5        [         R                  " S5        [         R
                  " S5        [        R                  R                  R                  S5      S   n[        R                  R                  SS05        S[        R                  R                  R                  l        U R                  U5        U $ )NTFLAGS_enable_pir_api)r   _is_fwd_prim_enabled_is_bwd_prim_enabledr-   _set_prim_forward_enabled_set_prim_backward_enabledpaddlebase	framework	get_flags	set_flags
global_var_use_pir_api_)stateprev_fwd_prim_stateprev_bwd_prim_stateprev_pir_api_flags       r   _set_prim_stater     s    E335335	LL$%	LL$%""4(##D)--778NO 	& 6:FKK$$2	LL"#Lr'   c                 4   [        U 5      S:X  d   S5       e[        R                  " U S   5        [        R                  " U S   5        [        R
                  R                  SU S   05        U S   [        R                  R
                  R                  l	        g )N   zEstate should contain fwd_prim_state, bwd_prim_state and pir_api_stater   r   r      )
r)   r   r   r   r   r   r   r   r   r   )r   s    r   _reset_prim_stater    s|    u:? O? 	""58,##E!H-
 6aAB5:1XFKK$$2r'   c                 r   [        5       nUR                  5        GH  u  p4X0R                  5       ;   d  M  X@R                  5       ;   d  M0  [        X   5      S:X  a%  [        X   5      S:X  a  X   S   nX   S   nXbU'   Mf  / n/ n[        X   5      S:X  a  UR	                  X   S   5        O[        X   5      S:X  a>  X   S   R                  5       R                  5       S:X  a  UR	                  X   S   5        O4[        S[        X   5      5       H  n	UR	                  X   U	   5        M     [        X   5      S:X  a  UR	                  X   S   5        O[        X   5      S:X  a>  X   S   R                  5       R                  5       S:X  a  UR	                  X   S   5        OOX   S   R                  5       n
U
R                  5       R                  S5      (       a  UR	                  X   S   5        [        U5      S:X  d   S5       e[        S[        U5      5       H  n	US   X'U	   '   M     GM     U$ )zbtranslate grad_var_to_var (mapping VarDesc->VarDesc) to pir_grad_var_to_var (mapping Value->Value)r   r   r  zbuiltin.slicerF   _z#translate pir_grad_var_to_var error)	r   r   r,   r)   r-   r:   r+   rj   r   )param_mappingr   pir_grad_var_to_varr   r   new_grad_varnew_varnew_grad_varsnew_varsrq   last_ops              r   _translate_gradvartovar_to_pirr    s'   #+(..0))++7I7I7K0KM+,1*+q0,6q9',Q/4;L1 "}./14!(()@)CD/0A5%/2BBDIIK&' "(()@)CD"1c-*A&BC%,,]-DQ-GH D })*a/OOM$6q$9:*+q0%*1-==?DDF&' OOM$6q$9:+04DDFG||~..s33 (:1(=>8})P+PP)q#m"45A<DQK'a(89 6M 1P r'   c                    [         R                  R                  R                  U 5         [	        U 5      n/ nU R                  5       R                  nU Hf  nUR                  5       nUR                  5       U;   d  M)  [        U R                  5       XQ5      u  pxU(       a  MN  Xc;  d  MU  UR                  U5        Mh     SSS5        [        R                  SW 35        g! , (       d  f       N'= f)z2Traverse and decompose all backward OPs in programNz.Following backward ops can not be decomposed: )r   r   r   r   r   r   r   r+   r   r-   r   r   )	r   r  r   undecomposed_bwd_opsr   r0   bwd_op_namer  r   s	            r   _decomp_bwd_programr    s    		&	&{	3";/!&&(,,B'')KwwyG#(6,,.)% +*#?(//< 	 
4  LL
89M8NO! 
4	3s   AC=!C"C)C
C-c                 j   [         R                  R                  R                  U 5         U R	                  5       R
                  n[        U 5      nSS/n/ nSnU H  nUR                  5       nUR                  5       U;  af  UR                  5       U;  a<  [        U R	                  5       UUU5      u  pU
(       d  X;  a  UR                  U5        OX;  a  UR                  U5        UR                  5       S:X  a  UOSnM     SSS5        [        R                  SW 35        g! , (       d  f       N'= f)z1Traverse and decompose all forward OPs in programzpd_op.stackzpd_op.squeezeNr6   z-Following forward ops can not be decomposed: )r   r   r   r   r   r   r   r+   r   r-   r   r   )r   r  r   r   black_fwd_opsundecomposed_fwd_opsrA   r0   fwd_op_namer  r   s              r   _decomp_fwd_programr  0  s	   		&	&{	3&&(,,";/&8!B'')Kwwy'779M1,:#002+	-)A /'C,33K@">,33K@GGI)::bG%  
48 LL
78L7MN9 
4	3s   CD$$
D2c                    [        U / 5        [        R                  S   nU R                  5       nSn[        R
                  R                  R                  U 5         U R                  5       R                  nU GHz  nUR                  5       nUR                  S5      S   U;   a  M.  Sn[        U5      (       d  MB  [        R                  " 5       (       d  [        U5      (       a  SnU(       Gd  [        UR                  5       5         [        UR                  5         [!        UR"                  5         [
        R$                  " U5        UR'                  5       nSn	[)        U5      n
[+        [-        U5      5       H  nX   R/                  5       (       d  M  X   R1                  5       R3                  5       nUR                  5       S:X  d  MR  Sn	[5        UR                  5       UR'                  5       X   5        UR7                  X   5        UR9                  U5        M     SSS5        SSS5        SSS5        W	(       d8  [;        WW
U5      n[5        UR                  5       X5        UR7                  U5        UR9                  U5        UR                  5       S:X  a  UnUc  GM.  SnUR'                  5        H  nUR/                  5       (       d  M  Sn  O   U(       a  UR9                  U5        SnGM}     SSS5        [        R
                  R=                  U5        g! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       Nd= f)	zs
Decompose all non-primitive ops into primitive ops in a pir program. It may contain forward ops and backward ops.
backward_blacklistN.rF   FTr   r6   )rg   r   r^   r   r   r   r   r   r+   splitr   _enable_prim_dynamic_shaperK   r   r   op_roler   chunk_idr   rx   r   rj   r)   r   	first_useownerr[   r   r   r4    set_insertion_point_to_block_end)r   rc   r   pre_combine_opr   r0   r  skip_decompr.   is_next_splitr/   rq   next_oprR   r   rB   s                   r   decompose_dist_programr%  S  s   
 k2   !56I$$&EN		&	&{	3&&(,,B'')K  %b)Y6Kb!!7799)"--"&K")"'')4)"**5*2;;7//3$&JJL	(-&5b&9!&s9~!6A(|7799*3,*@*@*B*H*H*J#*<<>_#D48M$5(/(/(9(3%&
 %,$A$A(3%& %*OOG$< "7 8 6 50 )#:%{B$ *"'')YI00:OOB'779 11%'N!- $I . 6 6 8++--(-I! !9 !7%)Nq  
4v JJ//6W 87 65 54 
4	3sw   AM:AMML1	3A L2LAL"L1	*M2A/M&(M%M
L.)L1	1
M ;M
MM
M#c                 r    [        5       n[        X5      n[        X5        [        X5        [	        U5        U$ )a^  
Decompose all PHI ops into prim ops in a pir program.
Args:
    pir_program (Program): the program to be decomposed
    param_mapping (dict): a map of program variables to pir program values
    grad_var_to_var (dict): a dict obtained from distributed processing,
        which maps the backward grad variable to its corresponding forward variable.
)r   r  r  r  r  )r   r  r   r   r  s        r   decompose_pir_programr'    s:     E8 99er'   c                 (   / nU R                  5       R                   Hq  nUR                  5       S:X  a"  UR                  UR	                  5       S   5        UR                  5       S:X  d  MO  UR                  UR	                  5       S   5        Ms     U$ )Nz
pd_op.datar   zbuiltin.parameter)r   r   r+   r-   rx   r   rx   r0   s      r   "get_inputs_from_data_and_parameterr*    sp    G&&(,,779$NN2::<?+779++NN2::<?+	 -
 Nr'   c                     / nU R                  5       R                   HG  nUR                  5       S:X  d  M  UR                  UR	                  S5      R                  5       5        MI     U$ )Nzpd_op.fetchr   )r   r   r+   r-   r   r8   r)  s      r   get_outputs_from_fetch_opr,    sR    G&&(,,779%NN2::a=//12 - Nr'   c                     / n[        5       nUR                  5        H	  u  pEXCU'   M     U  H  nUR                  X6   5        M     U$ r   )r   r   r-   )outputsr  rx   var2grad_varkvoutputs          r   get_grad_var_for_listr3    sH    G;L#))+Q ,|+, Nr'   c                 t   ^  U 4S jn/ nU H(  nUR                  U" UR                  5       5      5        M*     U$ )Nc                    > [        TR                  5       R                  5       H  u  pX:X  d  M  Us  $    [        S5      eNzop not found in programr*   r   r   r   r0   r2   op_iterra   s      r   getIdx'get_defining_op_indices.<locals>.getIdx  <    %g&:&:&<&@&@ALC}
 B 455r'   )r-   r:   )ra   output_valuesr:  rx   r2  s   `    r   get_defining_op_indicesr>    s8    6 Gvf44678  Nr'   c                    ^  U 4S jn/ nT R                  5       R                   H)  nU" U5      (       d  M  UR                  U" U5      5        M+     U$ )Nc                    > [        TR                  5       R                  5       H  u  pX:X  d  M  Us  $    [        S5      er6  r7  r8  s      r   r:  #get_forward_op_idxs.<locals>.getIdx  r<  r'   )r   r   r-   )ra   is_forward_op_funcr:  rx   r0   s   `    r   get_forward_op_idxsrC    sJ    6 G""$((b!!NN6":& ) Nr'   c                    [        S5        [        SU 5        [        U 5      n[        U 5      nSn[        U5      (       a  [	        [        X5      5      n[        R                  " S5      nUR                  [        R                  5        Ub   [	        [        X5      5      nUS:X  a  UR                  S5        U $ US-   n[        U UU/ UU5      u  pxU$ !   UR                  S5         NG= f)NzStart Recompute Pir Program:zBefore Recompute: rF   zauto-recomputezNo Forward Ops Found!zSkip Auto Recompute!r   )r   r*  r,  r)   maxr>  logging	getLoggersetLevelINFOrC  infor   )	r   rB  r>   r.  fwd_op_end_idxr   backward_op_start_idxra   r  s	            r   auto_recompute_pir_programrM    s    -.#[1/<F'4GN
7||4[JK/0F
OOGLL!%	1 #KDN *+*Q.
JG N#	1KK/0s   C C()NNr   )NNNN)GrF  r    rI   r   r   paddle.autogradr   paddle.autograd.backward_utilsr   r   paddle.base.corer   r   r	   r
   r   paddle.base.frameworkr   r   r   paddle.base.libpaddle.pirr   r   paddle.base.wrapped_decoratorr   paddle.decomposition.recomputer   r   paddle.frameworkr    r   rG  __name__r   r   r&   r4   rD   rK   r[   	frozensetrg   rr   r   r   dictr"   r   r   r   r   r   r   listr   r   r   r   r  r  r  r  r%  r'  r*  r,  r3  r>  rC  rM  r   r'   r   <module>r[     s        ' >  
 7 G E ! 			8	$ . .;$ B 8 <@5v kk$N"/MM/MM/d
 IMN+N+--N+:>N+
N+b$6r <*8 8 MM8  MM8  	8 
 8 vD&D&MMD& D& 	D&
 "'D& D&NE)E)MME) E)P&>+\. FG7T.
#r'   