
    Αie                     6   S SK r S SKrS SKrS SKJrJrJrJrJr  S SK	J
r
Jr  S SKJr  S SKJr  SSKJr  SSKJrJrJrJrJrJrJrJr  SS	KJr  S
SKJrJr  \" \ R@                  5      r! " S S\5      r"S r#SS jr$S r%SS jr&\" S5       " S S\5      5       r'g)    N)ProgramStats_append_grad_suffix__find_op_path__get_no_grad_set_name_rename_arg_)OP_ROLE_KEYOpRole)core)unique_name   )OperatorDistAttr)get_loss_opinsert_dependencies_for_two_opsis_backward_opis_recompute_exclude_opis_recompute_op6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_dist_op_desc_original_idset_var_dist_attr)
get_logger   )PassBaseregister_passc                   f   ^  \ rS rSrU 4S jr\S 5       r\S 5       rS rS r	/ 4S jr
S rS	rU =r$ )
RecomputeState/   c                 J   > [         TU ]  XS9  0 U l        / U l        / U l        g )N)blockops)super__init__seg_op_deps_checkpoints_reserved_vars)selfr   r   	__class__s      q/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/passes/auto_parallel_recompute.pyr!   RecomputeState.__init__0   s+    u.     c                     U R                   $ N)r#   r%   s    r'   checkpointsRecomputeState.checkpoints6   s       r)   c                     U R                   $ r+   )r$   r,   s    r'   reserved_varsRecomputeState.reserved_vars:   s    """r)   c                 :    [        S U R                   5       5      $ )Nc              3   8   #    U  H  n[        U5      v   M     g 7fr+   )r   ).0ops     r'   	<genexpr>.RecomputeState.is_recompute.<locals>.<genexpr>?   s     :2?2&&s   )anyr   r,   s    r'   is_recomputeRecomputeState.is_recompute>   s    ::::r)   c                    [        U R                  5       GH  u  p[        U5      (       a    g UR                   Hj  nX0R                  ;   a$  U R                  U   S   R                  U/5        M6  0 U R                  U'   U/U R                  U   S'   / U R                  U   S'   Ml     UR                   Hj  nX0R                  ;   a$  U R                  U   S   R                  U/5        M6  0 U R                  U'   / U R                  U   S'   U/U R                  U   S'   Ml     [        U5      (       d8  U R                  R                  UR                  5        [        U5      (       d  GMT  UR                  S5      n[        R                  " SU5      nUR                  S5      nX@R                  ;  a  U/U R                  U'   GM  U R                  U   S   S-   U:X  d   S5       eU R                  U   R                  U/5        GM     g )	Nvar_as_input_opsvar_as_output_opsop_namescopez/auto_parallel/rc_[0-9]*r   r   z0The recompute segment's ops should be continuous)	enumerater   r   input_arg_namesvar_op_depsextendoutput_arg_namesr   r#   r   attrresearchgroupr"   )r%   ir5   nameseg_nameress         r'   build_statesRecomputeState.build_statesA   s   txx(EAb!!**+++$$T*+=>EEqcJ-/D$$T*BCD$$T*+=>BDD$$T*+>? + +++++$$T*+>?FFsK-/D$$T*ACD$$T*+=>CD#D$$T*+>? , #2&&!!(()<)<=.r22ww~.H))6ACyy|H///./S  *''1"59Q> F>   *111#6E )r)   c                    / nU R                   R                  5        He  n[        U5      S:X  a  M  UR                  US   US   S-   /5        U R                  R                  U R                  US      R                  5        Mg     [        USS9 H:  nU[        U5      :  d   SU S[        U5       S35       eUR                  U5        M<     U$ )	Nr   r   r?   T)reversezthe no_recompute_segments idx [z)] should be lower the number of segment [])
r"   valueslenappendr#   rC   r   rD   sortedpop)r%   no_recompute_segmentssegmentssegment_idxrI   s        r'   get_recompute_segments%RecomputeState.get_recompute_segmentsf   s    ++224K;1$OO[^[_q-@AB$$TXXk"o%>%O%OP	 5 -t<As8}$ 1!4]^abj^k]llmn$ LLO	 = r)   c           	      z   U R                    Vs/ s H  o"R                  PM     nnSU;  a  SU;  a  gSnU[        U R                   5      :  Ga  U R                   U   nSUR                  ;   a  GOUR                  S:X  a,  U R                  R	                  UR
                  5        US-  nMw  UR                  S;  a  US-  nM  UR                  S:X  a  S	OS
nUR                  U5      b&  [        UR                  U5      5      (       a  US-  nM  UR                  U5      n[        R                  " S5      n[        R                  " SR                  US/5      5      n	U R                  R                  U	5        U R                  R                  U	S[        R                   R"                  R$                  SSS9n
S/nUR&                  n[)        UU
UUUR*                  S9nUR-                  S5      SL a  SO[/        UR-                  S5      5      nU R                  R1                  UR2                  S0 SU
0USS.S9nUR5                  SUR-                  S5      5        [7        UUUUUR*                  S9  U R                   R9                  XO5        UR:                  R=                  Xi/5        UR:                  R5                  SS5        UR:                  R5                  SS5        UR?                  U
R@                  U5        US-  nU[        U R                   5      :  a  GM  U R                  RC                  5         gs  snf )z
If program's forward part has 'dropout' op, this function will insert
a seed op before it to guarantee that two dropout op have the same outputs.
dropoutfused_dropout_addNr   gradseedr   )r]   r^   seed_tensorSeedrc_seed.tmpint32F)rJ   dtypetypepersistablestop_gradientr?   chunk_idfix_seedOutT)r`   	force_cpu)indexrh   inputsoutputsattrsr>   r   )"r   rh   rS   r$   rC   rD   inputget_op_dist_attr_for_programr   generategenerate_with_ignorable_keyjoinrT   r   
create_varr
   VarDescVarTypeDENSE_TENSORprocess_meshr   rl   rE   int_insert_op_without_syncidx	_set_attrr   insertdesc	set_inputset_input_dist_attrrJ   _sync_with_cpp)r%   dist_contextr5   op_typesop_idxcur_opseed_tensor_namecur_op_dist_attrop_unique_namevar_unique_nameseed_varref_dims_mappingref_process_meshseed_var_dist_attrr`   seed_ops                   r'   !modify_forward_desc_for_recompute0RecomputeState.modify_forward_desc_for_recomputev   s   
 '+hh/hGGh/H$)<H)Ls488}$XXf%F${{f$##**6+B+BC!{{"BB!!'0C!C  ||,-9c-.? ? !+HHP )11)<N)EE.%01O &&7zz,,$\\))66!# - H !#t/<<!2  )22" ;;z*e3 V,-  jj88jj)#$7 9 G nfkk..IJB  )22 HHOOF,KK!!"24EFKK!!*e4KK!!&!,001 aKF_ s488}$b 	

!!#m 0s   L8)r#   r$   r"   )__name__
__module____qualname____firstlineno__r!   propertyr-   r0   r9   rM   rZ   r   __static_attributes____classcell__r&   s   @r'   r   r   /   sQ    ! ! ! # #;#7J <>  [$ [$r)   r   c                     [        U R                  R                  5       5       H0  nUR                  U R                  R                  U5      :X  d  M.  Us  $    g)Nr?   )ranger   op_sizer5   )r   r   r   s      r'   _find_op_indexr      sA    UZZ'')*;;%**--,,J + r)   c                 \   Uc  [        5       nO[        U5      n[        5       nU R                  5        HL  nSUR                  ;   a    O;UR                  (       d  M(  UR                  [        UR                  5      5        MN     UR                  [        [        [        U5      5      5        U$ )zget no grad varz@GRAD)
setr   	list_varsrJ   rj   addr   updatelistmap)programno_grad_setno_grad_set_namevars       r'   _get_stop_gradientsr      s    e+K8u  "chh  !5chh!?@	 #
 D%9;!GHIr)   c                 ~   [        U 5      S:X  a  / $ / nU  GH"  n[        U[        R                  R                  5      (       a  UR
                  n[        U[        5      (       a  US   nSnUR                  5        HD  nUR                  U5      (       a"  UR                  U5      R                  (       a  M;  X;  d  MB  SnMF     U(       d  M  UR
                  R                  5       n	U	R                  U5        [        XU5        U	R                  [        [         R"                  5        UR%                  U	5        GM%     U$ )z<
Get the recomputed ops which will insert the backward part
r   FT)rS   
isinstancepaddlestaticOperatorr   tuplerD   has_varr   ri   	append_op	copy_fromr   r   r   r	   BackwardrT   )
descsr   
main_blockvars_should_be_holdr   result_descsr   	is_neededrJ   new_op_descs
             r'   _add_needed_descs_to_blockr      s     5zQ	LdFMM223399DdE""7D	))+D!!$''JNN4,@,L,L. 		 ,
 9**..0K!!$'(LI!!+v?,# $ r)   c                 V    [        X5      n[        U R                  5       U// U5      nU$ r+   )r   r   global_block)main_programlossr   r   op_paths        r'   _find_op_pathr     s3    *<E!!#dVR1AG Nr)   auto_parallel_recomputec                   P   ^  \ rS rSrU 4S jrS rS rS
S jrS rS r	S r
S	rU =r$ )RecomputePassi  c                    > [         TU ]  5         U R                  SS 5        U R                  SS 5        U R                  SS 5        U R                  S/ 5        g )Nr   r   r   rW   )r    r!   set_attr)r%   r&   s    r'   r!   RecomputePass.__init__  sG    fd#nd+mT*-r2r)   c                 P    U R                  S5      c  gU R                  S5      c  gg)Nr   Fr   T)get_attrr,   s    r'   _check_selfRecomputePass._check_self  s)    ==(0== (r)   c                     gNT )r%   
other_passs     r'   _check_conflictRecomputePass._check_conflict#  s    r)   c                    S n[        U5      n[        U5       Vs/ s H  n/ PM     nn[        U5       Vs/ s H  n/ PM     nnSn	Sn
Sn[        U5       H  u  pX-  U:  a  U
S-  n
U" U5        U[        U5      S-
  :  a8  UR                  R                  XS-      R                  R                  :w  a  US-  nX-  U:  a  Mn  [        U5       HV  u  pUR                  R                  U:X  d  M!  U	S-  n	X~   R                  U5        X   R                  UR                  5        MX     M     [        U5      X-   :X  d   SX-    S[        U5       35       eXx4$ s  snf s  snf )zV
Get ops and op_names of each process mesh excluding ops within the first "sr" chunks
c                 j    [        U 5      (       d  [        U 5      (       a  U R                  SS5        g g )Nr>    )r   r   r   )r5   s    r'   reset_recompute_op<RecomputePass.get_ops_per_device.<locals>.reset_recompute_op+  s,    r""&=b&A&A^R0 'Br)   r   r   zbThe sum of pushed_ops_count and reset_ops_count must be the same as length of ops, but the sum is z while length of ops is )rS   r   r@   	dist_attrr}   rT   rh   )r%   r   all_ops_process_meshessrr   all_process_meshes_count_ops_of_stagesop_names_of_stagespushed_ops_countreset_ops_countrl   op_idr5   idr}   s                   r'   get_ops_per_device RecomputePass.get_ops_per_device&  s   
	1 $''=#> %*+C%DE%D%DE*/0H*IJ*IQb*IJ"3IE3b81$"2&C1$LL--qy>++889 A3b8$-.D$E <<,,<$)$!%,,R0&*11"'':	 %F ($ 3x?== 	
p  rA  rT  qU  Um  nq  ru  nv  mw  x	
= 005 FJs
   EEc                    U R                  S5      nU R                  S5      nU R                  S5      nU R                  S5      U l        U R                  SS5      U l        U R                  S/ 5      U l        UR	                  5       n[        XU5      n/ n	U HD  n
U
R                  R                  U	;  d  M  U	R                  U
R                  R                  5        MF     U R                  XU R                  5      u  p[        U5      nU Vs/ s H  n/ PM     nnU R                   H  nUS   nUS:  a  UOUnUS	   nUS
   nUS   n[        U5      n[        U5      nUU-   U-   n[        U5      n[        U5       Hv  u  nnSn[        U5      n[        UU-
  S-   5       HN  nUUUU-    U:X  d  M  UU:  d  M  US-  nUU   R                  [        [        UU-   UU-   U-   5      5      5        MP     Mx     M     [        R!                  SU 35        [        U5       HY  u  nnU HM  n[#        UU   U   5      (       d  M  UU   U   R%                  S5      n UU   U   R'                  SU S-   5        MO     M[     [)        Xx5      n!U!R+                  5       (       d  g U!R-                  U R                  5        U!R/                  5         U!R1                  U5      n"U"/ :X  a  g [        U"5       GH  u  nu  n#n$[        R3                  SUS-    S[        U"5       S35        [        R3                  SU!R4                  U#   R6                   SU!R4                  U#   R8                   SU!R4                  U#   R:                   S35        [        R3                  SU!R4                  U$S-
     R6                   SU!R4                  U$S-
     R8                   SU!R4                  U$S-
     R:                   S35        GM     / n%U" H*  n&U%R                  U!R=                  U&S   U&S   5      5        M,     [?        U%5      [?        U!R@                  5      -
  n'[        R3                  S[        U'5       SU' S35        U%R                  U!RB                  5        U%R                  U!RE                  5       5        [        [?        U%5      [?        U!R@                  5      -  5      n%0 n(0 n)URF                  RI                  5       n*[        U"S S S2   5       GH  u  nn&UU&S   U&S    n+SU 3n,U+ GHW  n
/ n-U-R                  U
R8                  5        U-R                  U
R:                  5        U R                  RK                  U
5      n.U.c   eU- H  n/URM                  U/5      RN                  (       d  U/U%;   a  M+  U/U(;  d  M3  U.R                  n0U/U
R8                  ;   a  U.RQ                  U/5      n1OU.RS                  U/5      n1U/U,-   U(U/'   URM                  U/5      n2URU                  U(U/   U2RV                  U2RX                  U2R6                  U2RN                  U2RZ                  S9n3[]        U R                  U3U1U0U.R^                  S9  M     GMZ     [a        U+U*UU%U R                  5      n4U( H  n5[c        U4U5U(U5   5        M     UU&S   S-
     n6SU4/U)U6Rd                  Rg                  5       '   GM     UR4                  n7[i        U5      n8[k        UU85      n9U R                  Rl                  n:U9S:w  d   e[        [        U75      S-
  U9S5       GH}  nU7U   n;/ n-U-R                  U;R8                  5        U-R                  U;R:                  5        U( H8  n<U<U-;  a  M  U Ro                  U;U(5        [c        U;Rd                  /U<U(U<   5        M:     U;Rd                  Rg                  5       n=U=U:Rp                  ;   d  M  U:Rp                  U=   n>U>U);   d  M  U)U>   S   (       d  M  U;Rr                  n?U?S-
  S:  a<  U7U?S-
     R6                  S:X  a&  U?S-  n?U?S-
  S:  a  U7U?S-
     R6                  S:X  a  M&  U)U>   S   n4S n@[u        [        [        U45      5      5       H  u  nnAURw                  U?S S!9n@U@Rd                  nBUBRy                  UA5        UBR{                  UBR}                  5       5        U R                  R                  UARg                  5       5      nCUCc   eU R                  W@WCU(5        M     S"U)U>   S'   W@(       d  GM  UR4                  W@Rr                  S-
     nDU@nEU R                  RK                  UD5      R                  nFU R                  RK                  UE5      R                  nGUFUG:X  d  GMc  [        UU?WDWEU R                  SS"S#S$9  GM     UR                  5         g s  snf )%Nr   r   rW   r   r   r   refined_ops_patternsnummain_opspre_opssuf_opsr   z,The excluded ops in recompute segments are:
r>   _exclude_rczrecompute segment[/rQ   zsegment start op: [z]: [z] [zsegment end op: [zfound [z'] vars which cross recompute segment: [z6],better checkpoints might be set to reduce those varsr?   z	.subprog_)rJ   shaperg   rh   ri   rj   rk   Tsumnop)rh   Frecompute_segment_dep)r9   syncr>   )Cr   _dist_context_sr_refined_ops_patternsr   r   r   r}   rT   r   rS   r@   r   rC   r   loggerinfor   rE   r   r   r9   r   rM   rZ   debugr   rh   rA   rD   get_out_of_subgraph_varsr   r-   r0   get_input_nodesr   _create_blockru   r   ri   get_input_dims_mappingget_output_dims_mappingry   r   rg   rj   r   rl   r   r   r   original_idr   r   dist_op_contextreset_op_dist_attrgrad_op_id_to_op_idr   reversedr   r   set_original_idr   $get_op_dist_attr_for_program_with_idset_op_dist_attrr   r   )Hr%   r   startup_programcontextr   r   rW   r   r   r   r5   ops_devicesop_names_devicesall_ops_lenr   all_exclude_ops_idsrefined_ops_patternr   r   r   r   main_start_idmain_ops_lenpattern_opspattern_ops_lenr   op_names_devicepattern_countops_len_devicerI   exclude_ops_idsr   rc_mark_strrc_staterX   idx1idx2r   segment
cross_varsvar_name_dictckpt_ops_dictbuffer_blockfwd_ops
var_suffixinput_and_output_namesr   rJ   r   r   ref_varrc_varsegment_descskeyckpt_opr   loss_oploss_op_idxr   grad_opvarnamer   	fwd_op_idr   rc_opop_descrc_descfwd_op_dist_attrprior_opposterior_op
prior_meshposterior_meshsH                                                                           r'   _apply_single_impl RecomputePass._apply_single_implL  s
   }}V$mmM2 $.E F!]]>:==q)%)]]3I2%N" "..0
K@ "$B||((0FF&--bll.G.GH 
 )-(?(?TXX)
% 'l+;<+;ar+;<#'#=#=%e,Cax[  +:6H))4G))4GLMx=L!H,w6K!+.O'01A'B#O !!$_!5~?!CDA'A,?@KO)C/%*+B/66  %$%$5$%$5$D!" E (C $>< 	;<O;PQ	
 $--@#AB(";r?5#9::"-b/%"8"="=n"MKOE*44&m(C ) $B "*6$$&& 	2243E3EF223HIr>(2OA|dLL-a!eWAc(m_AFGLL%hll4&8&=&=%>d8<<PTCUCeCeBffijrjvjvw{j|  kN  kN  jO  OP  Q LL#HLL$:$?$?#@X\\RVYZRZE[EkEkDllopxp|p|  ~B  EF  ~F  qG  qX  qX  pY  YZ  [  3 !G&&11'!*gajI   ,-H4H4H0II
c*o&&Mj\ ZC C	
 	""8#9#9:""8#;#;#=>"#$s8+?+?'@@

 !))779#HTrTN3JAwgaj71:6G$QCJ)+&&--b.@.@A&--b.A.AB &&CCBG ! (3332D"t,88#66 =0+;+H+H(2#5#55 0 G G M -
 !1 H H N - /3Z.?d+",.."6!+!6!6!.t!4")--")--!((/(;(;*1*?*? "7 " * ..",,%5%>%>= 3 ` 7#""M %]Cs1CD % gaj1n-G9=}8MM',,2245E 4J nnj)$Z9,,<<b    s3x!|["5A!fG%'""))'*A*AB"))'*B*BC("88''?gll^WmG6LM	 ) ",,224KoAAA+??L	--	2J12M2M!++C'Q,3sQw<+<+<+Eq 'Q,3sQw<+<+<+E$1)$<Q$?M E&.tIm4L/M&N
7 * B Be !C ! #(**))'2//

=+/+=+=+b+b#//1,(  0;;;--!#3] 'O  38M),Q/u#->>%))a-#@', ..KK (*l # !..KK ,*l ' &7; * # ( , $ 2 2-1%*-D	q 6D 	##%K =s   f?c                 x   U R                   R                  U5      nUc   eUR                   H=  nXBR                  5       ;   d  M  UR	                  U5      nUR                  X$   U5        M?     UR                   H=  nXbR                  5       ;   d  M  UR                  U5      nUR                  X&   U5        M?     g r+   )	r   ru   rA   keysget_input_dist_attrr   rD   get_output_dist_attrset_output_dist_attr)r%   r5   r  op_dist_attrrt   in_dist_attroutputout_dist_attrs           r'   r   RecomputePass.reset_op_dist_attrK  s    ))FFrJ'''''E**,,+??F00!(, ( ))F++-- , A A& I11!)= *r)   c                    [        5       nSUl        UR                  Ul        UR                  Ul        UR                  Ul        UR
                  Ul        UR                  R                  5        H[  nXSR                  5       ;   a%  UR                  U   nUR                  X5   U5        M;  UR                  U   nUR                  XV5        M]     UR                  R                  5        H[  nXsR                  5       ;   a%  UR                  U   nUR                  X7   U5        M;  UR                  U   nUR                  Xx5        M]     U R                  R                  X5        g r   )r   r9   impl_idx	impl_typer}   rl   inputs_dist_attrsr7  r   outputs_dist_attrsr:  r   set_op_dist_attr_for_program)	r%   r5   old_dist_attrr  new_dist_attrrt   r<  r=  r>  s	            r'   r  RecomputePass.set_op_dist_attr[  s>   (*%)"!.!7!7"/"9"9%2%?%?"!.!7!7"4499;E**,,,>>uE11!(,  ->>uE11%F < $66;;=F++-- - @ @ H22!)= !. @ @ H226I > 	77Jr)   )r   r   r   )r   )r   r   r   r   r!   r   r   r   r4  r  r  r   r   r   s   @r'   r   r     s0    3$1L}&~ K Kr)   r   r+   )(loggingrF   r   paddle.base.backwardr   r   r   r   r   /paddle.distributed.fleet.meta_optimizers.commonr   r	   paddle.frameworkr
   paddle.utilsr   #auto_parallel.static.dist_attributer   auto_parallel.static.utilsr   r   r   r   r   r   r   r   utils.log_utilsr   	pass_baser   r   INFOr   r   r   r   r   r   r   r   r)   r'   <module>rS     s     	   P ! $ B	 	 	 ) .	GLL	!b$\ b$J"> ()`KH `K *`Kr)   