
    Αi                     F   S SK r S SKJrJr  S SKJr  S SKJr  S SKJ	r	J
r
  S SKJrJr  S SKJr  S SKJrJrJrJrJrJrJrJrJr  S S	KJr  S
SKJr  S
SKJrJ r J!r!J"r"J#r#  SSK$J%r%J&r&  \" 5       r'/ SQr(S r)S r* " S S5      r+ " S S5      r,\&" S5       " S S\%5      5       r-g)    N)
check_typecheck_variable_and_dtype)OperatorDistAttr)get_world_process_group)6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_var_dist_attr)OP_ROLE_KEYOpRole)core)	AutoMixedPrecisionLists_is_in_black_varnames_keep_fp32_input_keep_fp32_output_rename_arg_valid_typesfind_op_indexfind_true_post_opfind_true_prev_op)unique_name   )ProcessMesh)is_backward_opis_forward_opis_loss_grad_op
is_loss_opis_optimize_op   )PassBaseregister_pass)create_py_readercreate_double_buffer_readerwhilec                 X    U [         R                  :X  a  gU [         R                  :X  a  gg)Nfp16bf16fp32)paddlefloat16bfloat16dtypes    k/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/passes/auto_parallel_amp.py_dtype_to_strr-   =   s#    	&//	!    c                     U S:X  a$  [         R                  R                  R                  $ U S:X  a$  [         R                  R                  R                  $ [         R                  R                  R
                  $ )Nr(   r)   )r   VarDescVarTypeFP16BF16FP32)dstrs    r,   _str_to_dtyper6   F   sT    y||##(((		||##(((||##(((r.   c                       \ rS rSr    SS jr\S 5       r\S 5       r\S 5       r\S 5       r	\S 5       r
\S	 5       rS
 rS rS rSrg)AMPListsO   Nc                 h    [        [        U5      [        U5      [        U5      US9U l        X@l        g )Nr*   )r   set	_amp_list_dtype)self
white_list
black_listblack_varnamesr+   s        r,   __init__AMPLists.__init__P   s-     1
OS_c..A
 r.   c                 .    U R                   R                  $ N)r<   r?   r>   s    r,   r?   AMPLists.white_list\       ~~(((r.   c                 .    U R                   R                  $ rE   )r<   r@   rF   s    r,   r@   AMPLists.black_list`   rH   r.   c                 .    U R                   R                  $ rE   )r<   	gray_listrF   s    r,   rL   AMPLists.gray_listd   s    ~~'''r.   c                 .    U R                   R                  $ rE   )r<   rA   rF   s    r,   rA   AMPLists.black_varnamesh   s    ~~,,,r.   c                     U R                   $ rE   )r=   rF   s    r,   r+   AMPLists.dtypel   s    {{r.   c                     U R                   $ rE   )r<   rF   s    r,   amp_listAMPLists.amp_listp   s    ~~r.   c                 ,    [        XR                  5      $ rE   )r   r<   )r>   ops     r,   _is_in_black_fp32_varnames#AMPLists._is_in_black_fp32_varnamest   s    $R88r.   c                 P    UR                   R                  (       d  g[        X5      $ NT)amp_optionsenabler   )r>   rV   in_names      r,   _op_keep_fp32_inputAMPLists._op_keep_fp32_inputw   s    ~~$$,,r.   c                 P    UR                   R                  (       d  g[        X5      $ rZ   )r[   r\   r   )r>   rV   out_names      r,   _op_keep_fp32_outputAMPLists._op_keep_fp32_output|   s    ~~$$ ..r.   )r<   r=   )NNNr(   )__name__
__module____qualname____firstlineno__rB   propertyr?   r@   rL   rA   r+   rS   rW   r^   rb   __static_attributes__ r.   r,   r8   r8   O   s     
 ) ) ) ) ( ( - -    9-
/r.   r8   c                   >    \ rS rSrS rS rS rS rS rS r	S r
S	rg
)AMPState   c                     Xl         X@l        X l        X0l        UR                  R
                  U l        0 U l        0 U l        0 U l	        g rE   )
programdist_context	amp_lists	amp_dtypedist_op_contextgrad_op_id_to_op_idgrad_op_to_op_map_op_fp16_dict_var_name_dictout_var_op_deps)r>   ro   rq   rr   rp   s        r,   rB   AMPState.__init__   sH    (""((<< 	
   !r.   c                 :    U R                   R                  US 5      $ rE   )rv   get)r>   op_ids     r,   _is_fp16_opAMPState._is_fp16_op   s    !!%%eT22r.   c                    SnU R                   R                   GH  nUR                   GH  nUR                   Hs  nX@R                  ;  a*  UR
                  R                  5       /U R                  U'   M<  U R                  U   R                  UR
                  R                  5       /5        Mu     [        U5      (       a  SnUR                  [        ;   a  M  [        U5      (       a  U R                  X2R                  U5        M  [        U5      (       a  UR
                  R                  5       U R                  ;   a  U R                  UR
                  R                  5          nXPR                  ;   d   [!        U5      5       eU R#                  U5      U R                  UR
                  R                  5       '   GM  GM  [%        U5      (       d  GM    GM     GM     U R                   R                   H  nU R'                  U5        M     U$ )NFT)ro   blocksopsoutput_arg_namesrx   descoriginal_idextendr   type__amp_skip_ops__r   _mark_black_white_opsr   ru   rv   strr}   r   _cast_block)r>   is_trainblockrV   name	fwd_op_ids         r,   build_stateAMPState.build_state   s   \\((Eii//D#7#7768gg6I6I6K5L,,T2,,T299WW0023	 0 #2&&#H77.. $$..r99eD#B''ww**,0F0FF$($:$:GG//1%	  ),>,>>GBG> ,,Y7 **277+>+>+@A G $B''9   )@ \\((EU# ) r.   c                 $   UR                   R                  (       d(  SU R                  UR                  R	                  5       '   g UR
                  S:X  a;  SUR                  S   ;   a(  SU R                  UR                  R	                  5       '   g UR
                  S:X  a  UR                  S   n[        U R                  U   5      S:  av  U R                  U R                  U   S   5      (       d(  SU R                  UR                  R	                  5       '   g SU R                  UR                  R	                  5       '   g U R                  R                  bH  U R                  R                  U5      (       a(  SU R                  UR                  R	                  5       '   g UR
                  U R                  R                  ;   a(  SU R                  UR                  R	                  5       '   g UR
                  U R                  R                  ;   a(  SU R                  UR                  R	                  5       '   g UR
                  U R                  R                   ;   Ga  SnSnUR"                   GH  nU(       d  M  UR%                  U5       H  nUR'                  U5      n	U	R(                  c  M#  U	R(                  UL a  [+        X!U5      n
U
c  MC  OU	R(                  n
U R                  U
R                  R	                  5       5      SL d$  U
R
                  U R                  R                  ;   a  SnM  U R                  U
R                  R	                  5       5      SL d&  U
R
                  U R                  R                  ;   d  M  SnM     GM     U(       a(  SU R                  UR                  R	                  5       '   g U(       a(  SU R                  UR                  R	                  5       '   g g SU R                  UR                  R	                  5       '   g )NFassignarray_r   r   T)r[   r\   rv   r   r   r   input_arg_namesr   lenrx   r}   rq   rA   rW   r@   r?   rL   input_namesinput_var_recursiverV   r   )r>   rV   r   r   ra   is_black_opis_white_opr]   in_var_namein_varprev_ops              r,   r   AMPState._mark_black_white_ops   s*   ~~$$8=Drww2245 77h8r/A/A!/D#D8=Drww2245 77h**1-H4''12Q6''(<(<X(Fq(IJJ@ED&&rww':':'<=  AED&&rww':':'<= NN))599"==8=Drww224577dnn///8=Drww2245WW1118<Drww2245WW000KK>>7')xx'8!&!5!5k!B!99,$#YY"_&7&MG& (  / '-iiG !,,W\\-E-E-GH$%&||t~~/H/HH*.K ,,W\\-E-E-GHDP&||t~~/H/HH*.K- (9 *4 <A""277#6#6#89<@""277#6#6#89 9>Drww2245r.   c           	      r
   SnSnU[        UR                  5      :  Ga	  UR                  U   nSnUR                  [        ;   a  US-  nMF  [	        U5      (       Gad  U R                  UR                  R                  5       5      SL aW  U R                  UUU[        U R                  5      [        R                  R                  R                  U R                  5      nGO&U R                  UR                  R                  5       5      SL a  UR!                  S5      (       aH  UR#                  S5      [$        R&                  :X  a%  UR)                  S[        U R                  5      5        U R                  UUU[        R                  R                  R                  [        U R                  5      U R                  5      nGOE[+        U5      (       Ga4  U R                  R-                  U5      n[+        U5      (       aV  [	        UR                  US-
     5      (       d   [/        UR                  US-
     5      (       a  UR0                  (       d  US-  nUR                  R                  5       U R2                  ;   Gaf  U R                  UR                  R                  5       5      SL aX  U R5                  UUU[        U R                  5      [        R                  R                  R                  U R                  U5      nGOU R                  UR                  R                  5       5      SL a  UR!                  S5      (       aH  UR#                  S5      [$        R&                  :X  a%  UR)                  S[        U R                  5      5        U R5                  UUU[        R                  R                  R                  [        U R                  5      U R                  U5      nGO$UR                  S:X  a  UR                  R7                  5       S   nUR                  R9                  5       S   nUR;                  U5      n	UR=                  U5      n
UR8                   HJ  nU
R>                  UR;                  U5      R>                  :X  a  M.   U
 SUR;                  U5       SU 35       e   U	R                  RA                  U
R>                  5        O8[C        UR#                  S5      5      S	:X  a  O[E        S
UR                   S35      eX%S-   -  nU[        UR                  5      :  a  GM	  URG                  5         g )Nr   r   FTr+   sumz, op_rolei  'z/' op is not supported in the complete amp pass.)$r   r   r   r   r   r}   r   r   _insert_cast_op_forwardr6   rr   r   r0   r1   r4   rp   has_attrattrr'   float32	_set_attrr   get_op_dist_attr_for_programr   is_recomputeru   _insert_cast_op_backwardr   r   var_find_var_recursiver+   	set_dtypeint
ValueError_sync_with_cpp)r>   r   idxappended_grad_timesrV   num_cast_opsop_dist_attrout_var_namer   out_varr   s              r,   r   AMPState._cast_block  s!   C		N"3BLww**qr""##BGG$7$7$9:eC#'#?#?%dnn5,,11))$L %%bgg&9&9&;<D G,,GGG,>WmDNN.KL#'#?#?,,11%dnn5))$L  ##  $00MM  ""%%!%))C!G"455!%))C!G"455'44+q0+77&&(D,B,BB''(;(;(=>%G'+'D'D!)$..9 LL0055 --/( ))"''*=*=*?@DH KK00 " 0FNN BLL-2OP'+'D'D! LL0055)$..9 --/( WW%#%77#;#;#=a#@L"$''"9"9";A">K#ii5G"66{CF')'9'9%||uyy/E/K/KK %hb;)?(@2$GK (: LL**6<<8+,3$BGG9$ST  !##Cu C		N"v 	r.   c                 8	   Sn0 nUR                   S:X  a  UR                  UR                  S5      S   5      n	UR                  UR                  S5      S   5      n
UR	                  SU	R
                  5        U
R                  R                  [        R
                  " UR                  S5      5      5        U$ UR                   GH}  nU[        R                  :X  a"  U R                  R                  X+5      (       a  M:  UR                  U5       GH-  nUR                  U5      n	U	R                   [        ;  d  U	R
                  U:X  a  M;  U	R
                  U:X  Ga  U	R                  S-   [!        U5      -   nUR"                  R%                  U5      nXU	R                  '   UR'                  U5      nUc   eUb  UR
                  U:w  Ga  UR)                  U	R                  5      nUc   eUR*                  nUR,                  nUR.                  nUUl        UR1                  UU5        UR3                  UUSU	R4                  S	9n[7        UUUUUS
9  SnUR9                  S5      (       a  UR                  S5      nUR;                  USSU	0SU0U	R
                  UR
                  S.S9nUR	                  SU5        [=        UUUUUS
9  US-  nO-UR)                  U	R                  5      nUR1                  UU5        [?        X)R                  U5        GM  UR9                  S5      (       d  GM  UR	                  SU5        GM0     GM     XR@                  UR                  RC                  5       '   U[        R                  :X  Ga  U[E        U RF                  5      :X  Ga   URH                   H  nU R                  RK                  UU5      (       a  M&  UR                  U5       H  nURM                  U5      n
U
R                   [        ;  a  M*  U
R
                  [        R                  :X  d  MJ  U
R                  R                  [E        U RF                  5      5        UR9                  S5      (       d  M  UR	                  S[E        U RF                  5      5        M     M     U$ )z7
only for forward cast
modified from paddle.static.amp
r   castXOutin_dtype	out_dtypez.cast_F)r   r+   persistablestop_gradientchunk_id/op_namescope)r   r   r   inputsoutputsattrsr   )'r   r   r   outputr   r+   r   r   r'   r   r   r   rq   r^   r   r   r-   varsr{   r   get_input_dist_attrprocess_meshdims_mappingr   set_input_dist_attr
create_varr   r   r   _insert_op_without_syncr   r   rw   r   r6   rr   output_namesrb   r   )r>   r   rV   r   	src_dtype	dst_dtyperp   r   var_name_dictr   r   r]   r   	cast_namecast_varconsume_op_attrin_var_dist_attrref_meshref_mappingref_chunk_idr   cast_opra   r   s                           r,   r    AMPState._insert_cast_op_forwardd  s    77f..rxx}Q/?@F//		%0@0CDGLLV\\2LL""6<<0D#EF~~GV^^+NN66rCC!xx022;?;;l2flli6O<<9,h.y1II   %zz~~i8H1:&++.&2&O&O'O +666'8>>Y+F ,;+N+N"KK,(  0;;;#3#@#@&6&C&C'6'?'?4@(1';;%'7 $)#3#3!*"+(-*0*>*>	 $4 $ *($'$%1 (+;;~66+-77>+BL"'"?"?!'$'=%*H$5,2LL-5^^# #@ 	#  ))*L O#$'(%1 %)+:+N+N"KK,( (;;%'7  KK;{{:..Z;]  1 &j 6CBGG//12&9NN9
 ,
 OO>>66r8DD$&IIh$7L#22<@G||<7 }}6..}T^^/LM;;{33LL +]4>>-J %8 , r.   c                 `   S nS n	Sn
UR                   R                  5       nUR                  nU R                  U   nUR                  S:X  Ga  UR                  S5      S   nUR                  S5      S   nUR                  U5      nUR                  U5      nUR                  USUR                  S5       5      nUR                  USUR                  S5       5      nUR                  S	UR                  5        UR                  S
UR                  5        UR                   R                  UR                  5        UR                   R                  UR                  5        U
$ UR                   GH\  nU[        R                  :X  aX  U" X.5      (       aK  UR                  U5       H4  nUR                  U5      nUR                  [        R                  :X  a  M4   e   Mp  UR                  U5       H  nUR                  U5      nUR                  U:X  d  M&  UR!                  U5      nUU R"                  U   ;   aS  U R"                  U   U   nUR                   R%                  UU5        UR'                  U5      nUR)                  UU5        M  UR                  U:X  a  M   SUR                   SU SU SUR                   SU 3
5       e   GM_     UR*                   GH  nU[        R                  :X  aX  U	" X/5      (       aK  UR                  U5       H4  nUR                  U5      nUR                  [        R                  :X  a  M4   e   Mp  UR                  U5       GHi  nUR                  U5      nUSUR                  S5       nUR                  U5      nUR                  UR                  :w  a%  UR                   R                  UR                  5        UR                  U:X  Ga  UU R"                  U   ;   Ga  UR!                  U5      nU R"                  U   U   nSnSU;   a  UUR                  S5      S nUS-   U-   nUR,                  R/                  U5      nUb  UR                  U:w  Ga=  UR                   R1                  UU5        UR3                  U5      nUR4                  nUR6                  n UR8                  n!U!Ul        UR;                  UU5        U c   eUR=                  UUR>                  USUR@                  S9n[C        UUU UU!S9  UURD                  U   U'   URG                  US-   SSU0SU0UR                  UR                  [H        RJ                  S.S9n"U"RM                  S5        U"RM                  S5        U"RM                  S5        [O        U"UU UU!S9  U
S-  n
GMQ  GMT  GMW  UR                  U:X  a  GMj   e   GM     URQ                  S5      (       aH  URS                  S5      [        R                  :X  a%  UR                  S[U        U RV                  5      5        U
$ )zonly for backward castc                 2    U R                   nUS;   a  US;  $ g)Nlayer_norm_grad>   r   Y@GRADFr   )rV   r]   op_types      r,   r   ;AMPState._insert_cast_op_backward.<locals>._keep_fp32_input  s"    ggG--o55r.   c                 2    U R                   nUS;   a  US:g  $ g)Nr   X@GRADFr   )rV   ra   r   s      r,   r   <AMPState._insert_cast_op_backward.<locals>._keep_fp32_output  s"    ggG--8++r.   r   r   r   r   N@r   r   zop [z] expect input [z] to be dtype [z] BUT got [z].  z@RENAME@GRADF)r   shaper+   r   r   r   r   r   r   r   r   op_role_varr   with_quant_attrr+   ),r   r   rs   ru   r   r   r   r   findr   r+   r   r   r'   r   r   r   rw   _rename_inputr   r   r   r   r{   _rename_outputget_output_dist_attrr   r   r   set_output_dist_attrr   r   r   r   grad_var_to_var
_insert_opr
   Backward_remove_attrr   r   r   r6   rr   )#r>   r   rV   r   r   r   rp   r   r   r   r   r   rs   r   r]   ra   r   r   	in_var_fw
out_var_fwr   r   r   r   r   out_var_name_prefixfwd_varfwd_cast_namesuffixr   out_var_dist_attrr   r   r   r   s#                                      r,   r   !AMPState._insert_cast_op_backward  s   		 gg))+&66**;7	77fhhsmA&Gyy'*H..w7F//9G11':MGLL<M2NOI22-8==-.J LLY__5LLj&6&67KK!!)//2LL"":#3#34~~GFNN*/?/L/L#%88G#4K"11+>F!<<6>>999 $5 !xx0--k:<<9,&2&O&O'O #d&9&9)&DD %)$7$7	$B;$O	--k9E+:+N+N',( (;;%'7  &||y8 "277)+;G9OT]S^^ijpjvjviwwz{}z~8%  1 &: HFNN*/@/N/N$&IIh$7L#22<@G"==FNN::: %8  "		( 3..|<&23K\5F5Fs5K&L#../BC==GMM1LL**7==9==I-*d.A.A).LL
 )EEbI ( )-(;(;I(F/) "$$4%1 , 1 1) < >&F %2G$;f$D	#(::>>)#<#+x~~/JGG22<K / D D$0!" .
 (9'E'EH*;*H*HK+:+C+CL9E-6+@@ )+< $/#::#:','7'7%.&-mm&/,1.5.C.C (8 (H . , ( + ()5 ,9 ,;; 3') ',&6&6 #a%+(+X).(80818/5'" '7 
'G $00?#00@#001BCR ' ( + ,)5 )A-Lo 0K# MT #==I555g !4 (x ;;wBGGG$4$FLL-"?@r.   )rv   rw   rr   rq   rp   ru   rx   ro   N)rd   re   rf   rg   rB   r}   r   r   r   r   r   ri   rj   r.   r,   rl   rl      s-    " 3%NE>N^@yvhr.   rl   auto_parallel_ampc                   d   ^  \ rS rSrU 4S jrS rS rS rS rS r	S r
S	 rS
 rS rS rSrU =r$ )AMPPassi  c                 v  > [         TU ]  5         U R                  SS5        U R                  SS 5        U R                  SS 5        U R                  SS 5        U R                  SS 5        U R                  SS 5        U R                  SS	5        U R                  S
S5        U R                  SS5        U R                  SS5        U R                  SS5        U R                  SS5        U R                  S/ 5        U R                  S/ 5        U R                  SS5        S U l        S U l        S U l        S U l        g )Nr+   r   lossrp   custom_white_listcustom_black_listcustom_black_varnamesinit_loss_scalingg      @incr_every_n_stepsi  decr_every_n_nan_or_infr   
incr_ratiog       @
decr_ratiog?use_dynamic_loss_scalingF
input_dataparams_grads)superrB   set_attr_loss_loss_scaling_num_good_steps_num_bad_steps)r>   	__class__s    r,   rB   AMPPass.__init__  s   gr"fd#nd+)40)40-t4)73*D1/3lC(lC(0%8lB'nb)gr"
!#"r.   c                 2   U R                  S5      S;  a  gU R                  S5      S:  a  gU R                  S5      S:  a  gU R                  S5      S:  a  gU R                  S5      S:  a  gU R                  S	5      S:  a  gU R                  S
5      c  gg)Nr+   )r(   r)   Fr  r   r	  r
  r  r  rp   T)get_attrrF   s    r,   _check_selfAMPPass._check_self  s    ==!)@@==,-1==-.2==23a7==&*==&*==(0r.   c                     grZ   rj   )r>   
other_passs     r,   _check_conflictAMPPass._check_conflict  s    r.   c           	         U R                  S5      U l        U R                  S5      U l        U R                  S5      U l        [	        [        U R                  S5      5      [        U R                  S5      5      [        U R                  S5      5      U R                  5      n[        R                  R                  X5         [        XU R                  U R                  5      nUR                  5       nU(       a+  U R                  5         U R                  U R                  5        U(       a  U R                  S:X  a  U R                  5         U R                  5         U R                  S5      (       d  U R                  S	5      S
:w  a  U R                  5       u  pxU R                  S5      (       a  U R!                  WW5        S S S 5        g ! , (       d  f       g = f)Nrp   r  r+   r  r  r  r(   r  r        ?)r  rp   r  rr   r8   r;   r'   staticprogram_guardrl   r   _update_backward_cast_ops
_cast_loss_init_amp_var_scale_loss_check_and_update_gradient_update_loss_scaling)	r>   main_programstartup_programcontextrq   	amp_stater   grads	found_infs	            r,   _apply_single_implAMPPass._apply_single_impl  s_    MM.9 MM.9w/123123567NN	
	 ]]((G 9J9JI !,,.H..0/DNNi7""$  "MM"<==}}%89S@'+'F'F'H$E==!;<<--eY?) HGGs   D G
Gc           
         [         R                  R                  5       R                  5       nUR	                  5         U R
                   GHj  u  p#UR                  nUR                  [         R                  :X  d  M2  UR                  S:X  d  MD  [        UR                  S5      5      [        [        R                  5      :X  a'  UR                  S5      (       a  UR                  S5        [!        UR"                  XCR$                  5      nU(       a  ['        SU SUS    35      eXAR"                  S   :X  a  M  UR(                  R+                  5       nUR-                  UR(                  5        [         R                  R/                  UUSSSSS	9nUR"                  R1                  U5        U R2                  R5                  U5      nU R2                  R5                  UR7                  UR8                  S   5      5      n	Uc   eU	c   e[;        UUR<                  UR>                  U R2                  UR@                  S
9  UR<                  U	l        UR>                  U	l        UR@                  U	l         [C        UR(                  UR(                  5      n
U
S:X  a  ['        SU S35      eURE                  U
SS9  GMm     UR	                  5         g)zW
move param grad cast to the end of backward segment
in order to enable fp16 allreduce
r   r   r   zThe cast op zH's output should not beused by a non-optimize op, however, itis used by r   N)r   r   r   r   r   r   r   zThe op z is not in programF)sync)#r'   r"  default_main_programglobal_blockr   r  rV   r+   r   r   r   r   r
   r   r   r   r   r   r   r   r   	append_op	copy_fromOperatorappendrp    get_tensor_dist_attr_for_programr   r   r   r   r   r   r   
_remove_op)r>   
main_blockpgrV   post_opsnew_op_descnew_opparam_dist_attroutput_dist_attrop_idxs              r,   r$  !AMPPass._update_backward_cast_ops  sp    ]]779FFH
!!#%%DABww&..(RWW->rwwy)*cOO/ kk-00OOM2,Z^^RH$&rd +&&.qk]4  ++ )oo779%%bgg.//$$  0  %%f- %%FFqI   %%FF"r':':1'=> !
 '222'333F#00#00%%,55 1@0L0L -0?0L0L -,;,D,D )&z@R<$wrd2D%EFF%%f5%9u &x 	!!#r.   c           	         [         R                  R                  5       R                  5       nUR	                  5         U R
                   VVs/ s H  u  p#UPM	     nnn[        US[        [        4S5        U H  n[        US/ SQS5        M     UR                  [        R                  " SR                  SS/5      5      S/S[        R                  R                   R"                  S	S	S
9n[%        U R&                  US/[(        R*                  SS9  X@R,                  S.nXFS.nS[.        R0                  0n	UR3                  SUUU	S9n
[5        U
R6                  5      n[9        [(        R*                  5      Ul        SUl        SUl        [A        [(        R*                  5      S:  a  SUl!        U Ho  nU R&                  RE                  U5      nUc   eURG                  URH                  URJ                  5        URM                  URH                  URJ                  5        Mq     U R&                  RO                  X5        XF4$ s  snnf )Nxcheck_finite_and_unscaler(   r   float64.find_infinite_scaletmpr   boolF)r   r   r+   r   r   r   r3  r   r   )r   Scale)r   FoundInfiniter   r   )(r'   r"  r5  r6  r   r  r   tuplelistr   r   r   generate_with_ignorable_keyjoinr   r0   r1   DENSE_TENSORr   rp   world_process_groupranksr  r
   Optimizer7  r   r   r   r   impl_idxr   r   	impl_typer;  set_input_dims_mappingr   r   set_output_dims_mappingset_op_dist_attr_for_program)r>   r=  _r?  r.  er/  r   r   r   rB  new_op_dist_attrg_dist_attrs                r,   r(  "AMPPass._check_and_update_gradient#  s"   ]]779FFH
!!##0010tq015#t}.HIA$1*	  ))88/78 #%%22 * 	
	 	D%%	
 '9'9:<FOO,%%+	 & 
 ,FKK8(34G4M4M(N%$%!$%!"(()A-)C&A++LLQOK***3300 4400  	66vPq 2s   H>c                    [         R                  R                  [        R                  " S5      S/U R                  S5      SSS9U l        [        U R                  U R                  S/[        R                  SS	9  U R                  S
5      (       a  [         R                  R                  [        R                  " S5      S/SSSS9U l        [        U R                  U R                  S/[        R                  SS	9  [         R                  R                  [        R                  " S5      S/SSSS9U l        [        U R                  U R                  S/[        R                  SS	9  g g )Nloss_scalingr   r  r   T)r   r   valuer+   r   r3  r   r   r  num_good_stepsint32num_bad_steps)r'   r"  create_global_varr   generater  r  r   rp   rW  rX  r  r  rF   s    r,   r&  AMPPass._init_amp_vara  sW   #]]<<%%n5#-- 34 = 
 	D%%	
 ==344#)==#B#B ))*:;c  $C $D  !!$$#)) #)--"A"A ))/:c  #B #D !!###))/ 5r.   c           
      ~   [         R                  R                  5       R                  5       nUR	                  5         U R                  S5      nUc   eUR                  nU R                  R                  U5      nUR                  [        R                  R                  R                  :w  Gaq  [        R                  " UR                   S-   5      nUR#                  U[        R                  R                  R                  S9nU R                  R%                  U5      nUR&                  n	UR(                  n
Xl        U R                  R+                  Xx5        [-        UR.                  UR.                  5      nUR1                  US-   SSU/0SU/0UR                  [        R                  R                  R                  UR3                  5       [4           S.S	9nUR7                  [4        [8        R:                  5        [=        UU	UR>                   Vs/ s H  nS
PM     snU R                  U
S9  S nSn[A        URB                  US  5       HA  u  nnURD                  S:X  a  [G        U5      (       a	  UnUS-   n  O[I        U5      (       d  MA    O   Uc   S5       eUR#                  [        R                  " US-   5      UR>                  [        R                  R                  R                  URJ                  S9n[M        U R                  US
/[O        UR>                  5      -  U	U
S9  URP                  S   nURS                  UUR                   5        [=        UU	S
/[O        UR>                  5      -  U R                  U
S9  UR1                  X-   SSU/0SU/0[        R                  R                  R                  [U        U5      [8        RV                  S.S	9n[=        UU	UR>                   Vs/ s H  nS
PM     snU R                  U
S9  UnUnU RY                  SU5        X0l-        UR	                  5         g s  snf s  snf )Nr  z
.cast_fp32)r   r+   r   r   r   r   r   r   r3  r      fill_constantThere is not loss_grad op.r   r   r   r+   r   r   ).r'   r"  r5  r6  r   r  rV   rp   r   r+   r   r0   r1   r4   r   rk  r   r   r;  r   r    set_tensor_dist_attr_for_programr   r   r   	all_attrsr	   r   r
   Forwardr   r   	enumerater   r   r   r   r   r   r   r   r   r6   r   r  r  )r>   target_dtyper=  r  loss_oploss_op_dist_attrtmp_name	cast_lossloss_dist_attrr   r   loss_op_idxr   ifirst_backward_opinsert_op_offsetr   rV   cast_loss_gradpre_grad_namecast_grad_ops                        r,   r%  AMPPass._cast_loss  s   ]]779FFH
!!#}}V$'' --JJ
 ::--222"++DII,DEH"--T\\%9%9%>%> . I "..OON )55H,55L&2#>>
 (
FK ++adV}, $

!%!5!5!:!:&002;? , 
G k6>>:B!ZZ(ZZ(!!% !% $Z^^KL%ABR77o-/"2E2E(*%'*Qw$!"%% C %0N2NN0'22 ))(W*<=jjll**// ,,	 3 N !!s4::&% .>>qAM,,]N<O<OPB!s4::&!!% &00.n-.0 $ 4 4 9 9!.|!<% 1 
L C!ZZ(ZZ(!!% GDMM&$'
!!# )n )s   P5+P:c           	      	   [         R                  R                  5       R                  5       nU R	                  S5      nUc   eUR
                  nU R                  R                  U5      nU R	                  S5      (       d  U R	                  S5      S:w  Ga  [        UR                  UR                  5      nUR                  nUR                  nUR                  [        R                  " S5      UR                  UR                   UR"                  S9n[%        U R                  UUR                   V	s/ s H  n	SPM     sn	UUS9  UR'                  US	-   S
U/U R(                  /S.SU/0SUR+                  5       [,           0S9n
UR/                  [,        [0        R2                  5        [5        U
UUR                   V	s/ s H  n	SPM     sn	U R                  US9  S nUR6                  US   H9  nUR8                  S:X  a  [;        U5      (       a  Un  O[=        U5      (       d  M9    O   Uc   S5       eUR                  [        R                  " S5      S-   UR                  UR                   UR"                  S9n[%        U R                  US/[?        UR                  5      -  UUS9  UR@                  S   nURC                  XRD                  5        [5        UUS/[?        UR                  5      -  U R                  US9  Xl        URG                  5         UR                  R'                  US-   5      nURI                  S5        URK                  SURD                  /5        URK                  SURD                  /5        URK                  SU R(                  RD                  /5        URM                  SU/5        URM                  S/ 5        UR/                  [,        [0        RN                  5        UR/                  SS5        [         R                  RQ                  X5      nUR6                  RS                  US-   U5        URG                  5         UR6                  US-      nUR8                  S:X  d   e[5        UUUR                   V	s/ s H  n	SPM     sn	U R                  US9  OUnXl*        URG                  5         g s  sn	f s  sn	f s  sn	f )Nr  r  r  r!  scaled_lossrq  r3  r   r   elementwise_mul)r   Yr   r   r   ro  rp  r   r   rn  elementwise_mul_gradzOut@GRADr   r  r   r   axis)+r'   r"  r5  r6  r  rV   rp   r   r   r   r   r   r   r   rk  r   r+   r   r   r   r  rs  r	   r   r
   rt  r   r   r   r   r   r   r   r   r   r   set_type	set_input
set_outputr   r9  insertr  )r>   r=  r  rw  rx  r|  r   r   r  r}  elementwise_mul_opr~  rV   scaled_loss_gradr  elementwise_mul_grad_op_descelementwise_mul_grad_ops                    r,   r'  AMPPass._scale_loss  sw   ]]779FFH
}}V$'' --JJ

 MM455}}01S8'
FK )55H,55L$// ))-8jjjj ,,	 0 K !!!ZZ(ZZ(% ",!6!6a&"V4+=+=*>?.w002;? "7 " k6>>:B"!ZZ(ZZ(!!% !% nn[\277o-/"2E2E(*%!"%% 3 %0N2NN0)44 ))-87Bjjjj ,,	  5   !! s4::&% .>>qAM,,44 C!s4::&!!% #4%%'+5??+E+Ea,( )112HI(22-223 )223D(22d((--. )33H}oN(33HbA(22;P(2262>&,mm&<&<'# NN!!+/3JK%%'&0nn[1_&E#*//3IIIIB'!ZZ(ZZ(!!% K 
!!#{ )$ )H )s   1R=<SSc                    [         R                  R                  5       R                  5       nUR	                  5         [        U R                  SSS/S5        [        US[        [        4S5        U H  n[        US/ SQS5        UR                  [         R                  :X  a1  U R                  R                  [         R                  :X  d   S5       eMb  U R                  R                  UR                  :X  a  M   S5       e   UUU R                  U R                  U R                  S	.nUU R                  U R                  U R                  S
.nU R                  S5      U R                  S5      U R                  S5      U R                  S5      U R                  S5      [         R"                  S.nUR%                  SUUUS9n['        UR(                  5      n	[+        [,        R.                  5      U	l        SU	l        SU	l        [7        [,        R.                  5      S:  a  SU	l        U Ho  n
U R:                  R=                  U
5      nUc   eU	R?                  U
R@                  URB                  5        U	RE                  U
R@                  URB                  5        Mq     U R:                  RG                  X5        UR	                  5         g )Nprev_loss_scalingr   rK  update_loss_scalingrH  rJ  zPThe dtype of prev_loss_scaling should be float32 when the dtype of x is float16.zAThe dtype of prev_loss_scaling should be equal to the dtype of x.)r   rQ  PrevLossScalingInGoodSteps
InBadSteps)r   LossScalingOutGoodStepsOutBadStepsr	  r
  r  r  stop_update)r	  r
  r  r  r  r   r   r   r   )$r'   r"  r5  r6  r   r   r  r   rR  rS  r+   r(   r   r  r  r  r
   rY  r7  r   r   r   rW  rX  r   rZ  r   r   r[  rp   r;  r\  r   r   r]  r^  )r>   r.  r/  r=  r`  r   r   r   rB  ra  r?  rb  s               r,   r)  AMPPass._update_loss_scalingy  s   ]]779FFH
!!# 	"!		
 	5#t}.CDA$39;P ww&..())//6>>A fA ))//177: W:  &#11//--
 -- 00..	
 #'--0D"E'+}}5N'O--5--5==7
 %%&	 & 
 ,FKK8(34G4M4M(N%$%!$%!"(()A-)>&A++LLQOK***3300 4400  	66vP!!#r.   c                 ^    U R                   (       a  U R                   $ U R                  S5      $ )Nr  )r  r  rF   s    r,   get_lossAMPPass.get_loss  s$    
 ::::==((r.   )r  r  r  r  rr   rp   r  )rd   re   rf   rg   rB   r  r  r0  r$  r(  r&  r%  r'  r)  r  ri   __classcell__)r  s   @r,   r  r    sK    #," @DF$P< |-^l$\y$vJ$X) )r.   r  ).r'   paddle.base.data_feederr   r   6paddle.distributed.auto_parallel.static.dist_attributer   5paddle.distributed.auto_parallel.static.process_groupr   -paddle.distributed.auto_parallel.static.utilsr   r   /paddle.distributed.fleet.meta_optimizers.commonr	   r
   paddle.frameworkr   paddle.static.amp.fp16_utilsr   r   r   r   r   r   r   r   r   paddle.utilsr   auto_parallel.process_meshr   auto_parallel.static.utilsr   r   r   r   r   	pass_baser   r   rW  r   r-   r6   r8   rl   r  rj   r.   r,   <module>r     s     H P !
 
 
 % 4  /-/  )0/ 0/fE EP "#B	)h B	) $B	)r.   