
    ёi              	         S SK Jr  S SKrS SKJr  S SKrS SKrS SKJ	r	J
r
Jr  S SKJr  S SKJr  S SKJr  SS	KJrJrJr  \" \\R.                  S
S9r\	R2                  R4                  R6                  \	R2                  R4                  R8                  \	R2                  R4                  R:                  /rSr\ " S S5      5       r \ " SSSSSSS9r!S r"S r#S r$Sq%S r&S r'S r(S r)S r*S/S jr+S r,S r-S r.\S  5       r/\	R2                  R4                  R`                  \	R2                  R4                  Rb                  \	R2                  R4                  Rd                  \	R2                  R4                  Rf                  1r4\	R2                  R4                  R`                  \	R2                  R4                  Rb                  \	R2                  R4                  Rd                  1r5S! r6S" r7S# r8S$ r9S% r:S& r;S0S' jr<      S1S( jr=SS\	R2                  R4                  Rb                  S)S4S* jr>S+ r?S, r@SS\	R2                  R4                  Rb                  S0 4S- jrAS. rBg)2    )annotationsN)	dataclass)core	frameworkglobal_scope)in_pir_mode)
get_logger)signature_safe_contextmanager   )AutoMixedPrecisionLists
black_listget_low_precision_dtypestrz&%(asctime)s-%(levelname)s: %(message)s)fmt__use_fp16__c                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   Srg)
AmpOptions/   boolenablezlist[str] | Nonecustom_white_listcustom_black_liststrleveldtypeuse_promote N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r       \/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/static/amp/fp16_utils.pyr   r   /   s#    L''''JJr#   r   TO1float16)r   r   r   r   r   r   c                    U R                   n[        U[        5      (       a  US   nUR                  X5        UR	                  X5        g)z
If an op has old_name input and output, rename these input
args new_name.

Args:
    op (Operator): Current operator.
    old_name (str): The old name of input args.
    new_name (str): The new name of input args.
r   N)desc
isinstancetuple_rename_input_rename_output)opold_namenew_nameop_descs       r$   _rename_argr1   C   s?     ggG'5!!!*(-8.r#   c                    U R                    Hd  nUR                  nUR                  nU HC  nXr;  d  Xs;   a  M  UR                   H$  nXU   ;   d  M  UR	                  XU   U   5        M&     ME     Mf     g N)blocksopsidxinput_arg_namesr+   )	programop_var_rename_map
origin_opskeep_fp32_opsblockr5   block_idr-   names	            r$   _rename_op_inputr?   T   sm    ii99B#r':**X66$$TX+Ft+LM +   r#   c                    U [         R                  R                  R                  [         R                  R                  R                  4;   a  gg)zh
Convert specific variable type to its corresponding string.

Args:
    dtype (VarType): Variable type.
fp16fp32)r   VarDescVarTypeFP16BF16)r   s    r$   _dtype_to_strrG   `   s9     %%**DLL,@,@,E,EFF r#   c                     [        U 5      S:X  a  [        $ [        U 5      S:X  a  [        U S   [        5      (       d   e[        nU S   qU$ )Nr   r   )len(_keep_layer_norm_scale_bias_to_fp32_flagr)   r   )args	old_values     r$   #_keep_layer_norm_scale_bias_to_fp32rM   r   sI    
4yA~774yA~*T!Wd";";;;<	3770r#   c                    U R                   nUS:X  a  US:g  $ US:X  a  [        5       (       a  US:g  $ US:X  a  US:g  $ US:X  a  US;  $ US:X  a  US;  $ US	;   a  US
;   $ US:X  a  US;   $ g)N
batch_normX
layer_norminstance_normfused_bn_add_activation>   rP   Zresnet_unit>   rP   rT   FilterXFilterZfused_attentionfused_feedforward>   LnBiasLn1BiasLn2BiasLnScaleLn1ScaleLn2Scalefused_multi_transformer>   r[   r^   	FFNLnBias
FFNLnScaleFtyperM   )r-   in_nameop_types      r$   _keep_fp32_inputrh   }   s    ggG,#~,#F#H#H#~/!#~++j((->>>:: 
 
 	
 ++JJJr#   c                    U R                   nUS;   a  US:g  $ US:X  a  [        5       (       a  US:g  $ US:X  a  US;  $ US;   a  US;   $ g)	N)rO   rS   YrQ   rU   >   rj   ConvXConvZrX   >   LnMeanLn1MeanLn2Mean
LnVarianceLn1VarianceLn2VarianceFrd   )r-   out_namerg   s      r$   _keep_fp32_outputrt      sm    ggG;;3,#F#H#H3-666:: 
 
 	
 r#   c                   SnUR                    GH~  nU[        R                  :X  a  [        X5      (       a  M*  UR	                  U5       GH>  nU R                  U5      nUR                  [        ;  d  UR                  U:X  a  M;  UR                  S-   [        U5      -   n	U R                  U	5      n
U
(       a2  U
R                  U:X  a"  [        XR                  U
R                  5        M  UR                  U:X  d  M  U R                  R                  U	5      nUb  UR                  U:w  Ga=  UR                  S5      nU[        R                  :X  as  UR                  (       ab  SnUR                   UL a  [#        U R$                  X5      nOUR                   b  UR                   nSnUb  UR                  S5      nUb  SU;   a  UnU R'                  U	USUR                  S9nUR)                  S5      (       d-  [+        [,        R.                  R0                  R2                  5      OUR                  S5      nU R5                  US	S
U0SU0UR                  UR                  UUS.S9  US-  n[        XR                  UR                  5        GMA     GM     S HH  nUR)                  U5      (       d  M  UR                  U5      [6        ;   d  M6  UR9                  UU5        MJ     U$ )a  
Insert cast op and rename op's input.

Args:
    block (Program): The block in which the operator is.
    op (Operator): The operator to insert cast op.
    idx (int): The index of current operator.
    src_dtype (VarType): The input variable dtype of cast op.
    dest_dtype (VarType): The output variable dtype of cast op.

Returns:
    num_cast_op (int): The number of cast ops that have been inserted.
r   z.cast_N	op_deviceallF)r>   r   persistablestop_gradientop_rolecastrP   Out)in_dtype	out_dtyperv   rz   )re   inputsoutputsattrsr   )r}   r~   r   )input_namespaddlefloat32rh   input_find_var_recursivere   _valid_typesr   r>   rG   r1   varsgetattrry   r-   find_true_prev_opr5   
create_varhas_attrintr   op_proto_and_checker_makerOpRoleForward_insert_op_without_syncFLOAT_TYPES	_set_attr)r<   r-   r6   	src_dtype
dest_dtypenum_cast_opsrf   in_var_namein_var	cast_name
casted_varout_varrv   prev_opprev_op_devicerz   	attr_names                    r$   _insert_cast_opr      s~    L>>&+;B+H+H88G,K..{;F{{,.&,,*2Lh.z1JJI229=Jj..*<BZ__= ||y(**..3?gmmz&A " 4I !FNN2v7K7K"&!99?&7 %		2'G $YY2&,iiG)-".-4\\+-FN +6 % 7(6I#..&($)&,&:&:	 / G  "{{955 D;;BBJJKWWY/ 
 11# #V}!& 0(.)0)2'.	 2  !A%LBW\\:G - "P 8	;;y!!bggi&8K&GLLJ/ 8 r#   c                   / nU  HM  nXA:X  a    OGUR                    H3  nUR                  U5       H  nXb:X  d  M
  UR                  U5        M     M5     MO     U(       a#  [        U5      S:X  d  [	        SU S35      eUS   $ g)z
Find the true prev op that outputs var_name variable.

Args:
    ops (list): A list of ops.
    cur_op (Operator): Current operator which has var_name variable.
    var_name (string): Variable name.
r   z0There must be only one previous op that outputs z	 variabler   N)output_namesoutputappendrI   
ValueError)r5   cur_opvar_namer   r-   rs   out_var_names          r$   r   r     s     G<H "		( 3+NN2& !4 (  7|q   (z4 
 1:r#   Fc                "   / nU(       a   SnO[        U 5       H  u  pVXa:X  d  M    O   [        WS-   [        U 5      5       HJ  nX   nUR                   H3  nUR	                  U5       H  n	X:X  d  M
  UR                  U5        M     M5     ML     U$ )a?  
if there are post ops, return them, if there is no post op,
return None instead.
Args:
    ops (list): A list of ops.
    cur_op (Operator): Current operator which has var_name variable.
    var_name (string): Variable name.
    search_all (bool): The type of operator search. Use if "cur_op" is not in the "ops" set.
r   )	enumeraterangerI   r   r   r   )
r5   r   r   
search_allpost_opr6   r-   irf   r   s
             r$   find_true_post_opr   '  s     G	  ~GC| & 37CH%V~~G!xx0*NN2&  1 & & Nr#   c                t    [        U R                  5       5       H  nXR                  U5      :X  d  M  Us  $    g) r   )r   op_sizer-   )
block_desccur_op_descr6   s      r$   find_op_indexr   J  s3    Z'')*--,,J + r#   c                    U R                    H  nX!R                  ;   d  M    g   U R                   H  nX1R                  ;   d  M    g   gNTF)r7   black_varnamesoutput_arg_names)r-   	amp_listsrf   rs   s       r$   _is_in_black_varnamesr   R  sG    %%... & ''/// ( r#   c                L   U R                   U;   a  g/ nUR                  [        U R                  5      5        UR                  [        U R                  5      5        U H  nSU;   d  M    g   U(       a1  U R                  S5      (       a  [        U R                  S5      ;   a  ggg)NTlearning_rateop_namescopeF)re   extendlistr7   r   r   _fp16_guard_patternr   )r-   unsupported_op_listuse_fp16_guardin_out_arg_namesr>   s        r$   _need_keep_fp32r   ^  s    	ww%%  D!3!345D!4!456 d" ! ;;~&&277>#::  r#   c               #  x   #    [         R                  " [        S9   Sv   SSS5        g! , (       d  f       g= f7f)a-  
As for the pure fp16 training, if users set `use_fp16_guard` to True,
only those ops created in the context manager `fp16_guard` will be
transformed as float16 type.

Examples:
    .. code-block:: python

        >>> import numpy as np
        >>> import paddle
        >>> import paddle.nn.functional as F
        >>> paddle.enable_static()
        >>> data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
        >>> conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)

        >>> with paddle.static.amp.fp16_guard():
        ...     bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
        ...     pool = F.max_pool2d(bn, kernel_size=2, stride=2)
        ...     hidden = paddle.static.nn.fc(pool, size=10)
        ...     loss = paddle.mean(hidden)
)prefixN)r   
name_scoper   r   r#   r$   
fp16_guardr   y  s$     . 
		%8	9 
:	9	9s   :)	:
7:c           
     4   [        5       nU H  nS n UR                  U5      nUb  UR                  [        ;  a  M0  UR                  [        ;   a3  UR                  U5        U(       a  UR                  R                  U5        [        R	                  SU R                   SU SUR                   S35        M     U$ ! [         aP  n	[        R	                  SU	 S35        UR                  U5      nUb  [        R	                  SU S35         S n	A	NS n	A	ff = f)	Nz-- z&, try to get it in the global block --z-- var z is got in the global block --z---- op type: z, var name: z, var dtype: z ----)set_var_recursiver   _loggerdebugvarre   r   r   r   addr(   	set_dtype)
r-   	var_namesr<   global_blockr   need_set_dtypelow_precision_var_namesr   r   es
             r$   set_var_dst_dtyper     s     "e	&&x0C ;#((,699##''1""5)RWWI\(=SXY	
) 0 #")  	MMCs"HIJ""8,ChZ'EF		s   B==
DADDc                   [        5       nUS:X  d  US:X  a  U$ / nU R                   GH  nUR                  UR                  5       5        UR                  nU H  n	U	R
                  UR                  ;   a  M  [        XR                  U5      (       a5  U	R                   H#  n
UR                  U	R                  U
5      5      nM%     Mo  U	R                   HQ  n
[        R                  " 5       (       a  M  [        X5      (       d  M1  UR                  U	R                  U
5      5      nMS     M     GM     U HV  nUR                  U;  d  M  [         R#                  SUR                   SU S35        UR$                  R'                  U5        MX     U$ )Nr%   ODz-- set param  to z --.)r   r4   r   all_parametersr5   re   r   r   unsupported_listr   unionr   r   is_compiled_with_ipurh   r>   r   r   r(   r   )r8   r   r   r   r   keep_fp32_var_namesr   r<   r5   r-   rf   params               r$   set_param_dtyper     sP   %}""Ne2245iiB ww)...r#=#=~NN!~~G*=*C*C)+'  .
  "~~G4466;K< < /B.G.GHHW-/+	  .   2  ::00MMM%**T%EFJJ  '   r#   c                   Sn[        5       n[        U UR                  U5      (       a  SnXE4$ UR                  b  [	        X5      (       a  SnXE4$ U R
                  UR                  ;   a[  SnU R                   HI  nU H@  nUR                  U R                  U5      ;   d  M$  UR                  UR                  /5      nMB     MK     XE4$ )NFT)r   r   r   r   r   re   r   r   r>   r   r   )r-   r   r   params_listneed_keep_fp32fp16_varname_list_in_fp32_oprf   paramss           r$   op_need_keep_fp32r     s    N#&5 
"" 
  77 
	!	!	-2G
3 3  77 
I((	(~~G%;;"((7"334::FKK=I 1 & & 77r#   c           	     (   UnU R                    H  n[        R                  " 5       (       d=  [        X5      (       a-  [        R                  SU SU R                  U5       S35        MZ  U(       ay  U R                  U5       Hb  nUR                  U5      nU(       d  M  UR                  [        R                  :X  d  M=  [        R                  R                  R                  n  M     M  [        R                  R                  R                  nGM     U$ )Nz---- Input  z should be kept fp32 ----)r   r   r   rh   r   r   r   r   r   r   r   rC   rD   FP32)r-   	amp_dtyper<   	dst_dtyperf   r   r   s          r$   get_promote_dtyper     s    I>>((**/?/L/LMMgYa(9'::ST !xx022;?6fllfnn< $ 4 4 9 9I	  1 ,,11I "" r#   c                   US:X  a  U$ UR                   nUnU R                  UR                  ;   a  Sn	Sn
U R                   H  nU(       d  M  U R	                  U5       H  nUR                  U5      nUR                  c  M#  UR                  U L a  [        XpU5      nUc  MC  OUR                  nX;   d  UR                  UR                  ;   a&  [        R                  R                  R                  nM  X;   d  UR                  UR                  ;   d  M  UnM     M     U$ [        R                  R                  R                  nU$ )NO2F)r5   re   	gray_listr   r   r   r-   r   r   r   rC   rD   r   
white_list)r-   r   r   r<   r   r;   keep_fp16_opsr5   r   	keep_fp32	keep_fp16rf   r   r   r   s                  r$   get_amp_dst_dtyper     s    }
))CI	ww)%%%		~~Gw#%88G#4K"66{CFyy( b"3C["I"?$ + #)))  0"<<9+?+??$(LL$8$8$=$=	0"<<9+?+??$-	- $5 &<  LL((--	r#   c           
        [        5       nU R                   H]  n[        R                  " 5       (       d  [	        X5      (       a  M/  [        U U R                  U5      UUUSS9nUR                  U5      nM_     U R                   HL  n[        R                  " 5       (       d  [        X5      (       a  M/  [        U U R                  U5      UUUSS9  MN     U$ )NF)r   T)r   r   r   r   rh   r   r   r   r   rt   r   )r-   r<   r   r   r   rf   in_varsrs   s           r$   process_op_input_and_outputsr   A  s    !e >>((**/?/L/L#HHW 
 #:"?"?"H " OO((**/@/N/NIIh	
	 $ #"r#   c                    U" X5        U R                   nU R                   HO  nUR                  S5      (       d  M  UR                  UR	                  S5      R
                     n[        XQU5        MQ     g )N	sub_block)r8   r5   r   r4   r   id	map_block)r<   fn	parent_opr8   r-   r   s         r$   r   r   c  sZ    ummGii{{;''NN277;#7#:#:;	)$	 r#   c                   ^^^ 0 mUUU4S jn[        U R                  5       U5        TR                  5        H  u  pEUR                  U5        M     g )Nc                   > U R                   nU R                  nU HL  nTR                  UT
5      nUT	;   a,  T	U    H#  u  pgnUR                   [        Xx5      ;   d  M!  Un  O   UTU'   MN     g r3   )r6   r5   r   r   )r<   r   	block_idxr5   r-   current_op_amp_optionsamp_optionsstartendamp_recordsglobal_amp_optionsop_amp_options_maps            r$   fill_amp_enable_op_map6prepare_op_amp_options.<locals>.fill_amp_enable_op_mapt  s|    II	iiB%7%;%;-&" K'/:9/E+Kvvu!221<. 0F &<r" r#   )r   r   itemsset_amp_options)r8   r  r  r
  r-   r   r	  s    ``   @r$   prepare_op_amp_optionsr  m  sK    
 DF< g""$&<=(..0

6" 1r#   r   c                
   [         R                  S5        [         R                  U 5        Uc  [        U5      n[        U5      nUS:X  a  UR                  [        -
  Ul        US:X  a7  Ub  [        U5      n[        U5      nUR
                  UR                  -
  Ul        U R                  5       n[        5       n[        5       n	[        5       n
[        5       n[        U UUUUS9nUR                  U5      nS nU R                   GHO  nUR                  nU GH8  n[         R                  SU S35        U" U5      (       d  [         R                  S5        MA  UR                  5       n[        UXU5      u  nnU
R                  U5      n
U(       aW  UR                  U5        [!        UX["        R$                  R&                  R(                  5        [         R                  S	5        M  UR*                  UR                  ;   aG  U	R                  U5        [!        UXU5      nU
R                  U5      n
[         R                  S
5        GM1  UR*                  S:X  Ga-  UR-                  UR/                  S5      S   5      nUR-                  UR1                  S5      S   5      nUR3                  SUR4                  5        UR6                  R9                  [:        R4                  " UR=                  S5      5      5        [         R                  SR?                  UR*                  UR/                  S5      S   UR4                  UR1                  S5      S   UR4                  UR=                  S5      UR=                  S5      5      5        GMo  UnU(       d  [A        UUUUUUU	5      nO[C        UX>5      nUU:X  aG  U	R                  U5        [!        UXU5      nU
R                  U5      n
[         R                  S5        GM  UR                  U5        [!        UX["        R$                  R&                  R(                  5        [         R                  S5        GM;     GMR     U R                   H  nUR                  nSnU[E        U5      :  d  M"  UU   nSnUU	;   a6  [G        UUU["        R$                  R&                  R(                  U5      nUU-  nUU;   a6  [G        UUUU["        R$                  R&                  R(                  5      nUU-  nUUS-   -  nU[E        U5      :  a  M  M     [         R                  S5        [         R                  U 5        U
RI                  U5        U
$ )aY  
Traverse all ops in the whole model and set their inputs and outputs
to the fp16 data type. This function will do some special process for
the batch normalization, which keeps the computational process of
batchnorms in FP32.
Args:
    program (Program): The used program.
    amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object.
    use_fp16_guard(bool): Determine whether to use `fp16_guard` when
                          constructing the program. Default True.
    dest_type(core.VarDesc.VarType): the cast type. such as core.VarDesc.VarType.FP16 and core.VarDesc.VarType.BF16.
z#---- before cast model to fp16 ----r   r   )r   r   r   r   c                  ^ ^ SnU 4S jm[        T R                  5      S:  a%  [        U4S jT R                   5       5      (       a  gT R                  S;   a  U$ T R                  S;   a  SnU$ S HP  nT R	                  S	5      (       a  M  T R	                  U5      (       d  M3  T R                  U5      [        ;   d  MN  SnMR     U$ )
NTc                6  > TR                   R                  U 5      (       d  gTR                   R                  U 5      R                  [        R
                  R                  R                  :w  a  gTR                   R                  U 5      R                  [        ;   $ r   )
r<   r   r   re   r   rC   rD   DENSE_TENSORr   SUPPORT_FLOAT_TYPES)r>   r-   s    r$   is_support_typeAcast_model_to_fp16.<locals>.need_process.<locals>.is_support_type  sx    88//  ''-22<<''445 88**4066:MMMr#   r   c              3  >   >#    U  H  nT" U5      (       + v   M     g 7fr3   r   ).0r>   r  s     r$   	<genexpr>;cast_model_to_fp16.<locals>.need_process.<locals>.<genexpr>  s       /
2D$%%%2Ds   F)	set_value)create_py_readerread)r~   r   r}   )rI   r7   rw   re   r   r   r   )r-   need_processr   r  s   `  @r$   r  (cast_model_to_fp16.<locals>.need_process  s    
	N r!!"Q&3 /
242D2D/
 ,
 ,
  77m#  7722 L  4	 {{:..KK	**rwwy/A[/P#(L 4 r#   z-- process op: z  --z/---- The op does not need to be processed ----.zE---- Add into keep_fp32_ops because the op needs to be kept fp32 ----z=---- Add into keep_fp16_ops because the op in white_list ----r{   rP   r   r|   r}   r~   zq---- op type: {}, in var [name: {} dtype: {}], out var [name: {} dtype: {}], attr [in_dtype {} out_dtype {}] ----zG----  Add into keep_fp16_ops because it should be promoted to fp16 ----zG----  Add into keep_fp32_ops because it should be promoted to fp32 ----r   z"---- after cast model to fp16 ----)%r   r   r   r   r   all_listr   r   r   r   r   r4   r5   r   r   r   r   r   rC   rD   r   re   r   r   r   r   r   r(   r   r   r   formatr   r   rI   r   difference_update)r8   r   r   	dest_typer   r   r   r   r;   r   to_fp16_var_namesr   fp32_var_namesr  r<   r5   r-   
all_paramsop_keep_fp32fp16_var_names_in_fp32_opfp16_var_namesr   r   r   r6   r   in_var_cast_nums                              r$   cast_model_to_fp16r*    s   ( MM78MM'*95+E2	 }(33j@	} .y9E/6I(11I4H4HH	'')LEMEM% %%N .33NC$N iiBMMOB4t45##OP%446J6GIz73L3 !2 7 7)! !!"%,T\\-A-A-F-F [ I000!!"%!=Y" %6$;$;N$K!S
 77f$"66rxx}Q7GHF#77		%8H8KLGLLV\\:LL**6<<8L+MNMM L  S  SGGHHSM!,"LLIIe,Q/#MMGGJ/GGK0
 %	" 1!!%%!I !2"i GI	)!%%b)%AE&N ):(?(?(O%MMa "%%b)0E1E1E1J1J MMa]   j iiCHnSBL]""1LL((--# /]""1LL((--# /<!##C- CHn  4 MM67MM'''(;<r#   c                   [         R                  " 5         [        R                  " U 5        [         R                  " U5      n[         R
                  " U[         R                  5      R                  5       n[         R                  " 5         U$ r3   )	r   disable_staticr   _set_expected_place	to_tensorr{   bfloat16numpyenable_static)place
fp32_arrayfp32_tensor
bf16_arrays       r$   _convert_float_to_bfloat16r6  Y  s[    
!!%("":.K[&//:@@BJ
r#   c                v   [         R                  " 5         [        R                  " U 5        [         R                  " U5      n[         R
                  " U[         R                  5      R                  5       n[        5       (       a   UR                  5       R                  SS5        [         R                  " 5         U$ )Nmaster_grad_castT)r   r,  r   r-  r.  r{   r   r0  r   get_defining_opset_bool_attrr1  )r2  	org_array
org_tensorr3  s       r$   _convert_to_floatr=  b  s|    
!!%(!!),JZ8>>@J}}""$223EtL
r#   c           	        / nUR                    H"  nUR                  UR                  5       5        M$     [        U5      n	U(       a  UO	[	        5       n
U(       a  UO	[        5       nU GH  nUR                  U
;   d  M  [        R                  SUR                   SU	 SU  35        UR                  UR                  5      (       Ga  UR                  UR                  5      R                  5       n[        R                  " U5      nU[        R                  :X  a  [        X5      nUR	                  X5        O'[        R                   " U5      nUR	                  X5        U(       aj  UR                  U;   aW  UR                  XlR                     R                  5      nUR                  5       n[#        X5      nUR	                  UU 5        GMd  GMg  GMj  [        R%                  SUR                   35        GM     g)a!  
Traverse all parameters in the whole model and set them to the FP16 data type.
Whereas, this function will keep parameters of batchnorms in FP32.
Args:
    place(base.CPUPlace|base.CUDAPlace): `place` is used to restore the FP16 weight tensors.
    program (Program): The used program.
    scope(base.Scope, optional): `scope` is used to get the FP32 weight tensor values.
                                  Default is None.
    to_fp16_var_names(set|list, optional): The data types of vars in `to_fp16_var_names`
                                           will be set to FP16. Usually, it is the returned
                                           value of `cast_model_to_fp16` API.
    dest_type(core.VarDesc.VarType): the cast type. such as core.VarDesc.VarType.FP16 and core.VarDesc.VarType.BF16.
z-- cast r   z, place is zCannot find N)r4   r   r   r   r   r   r>   r   r   find_var
get_tensornparrayr   r/  r6  r&   r=  warning)r2  r8   scoper#  r"  rewrite_master_weightmaster_weightsr   r<   	dtype_strr(  	var_scoper   param_tdatap_arraymaster_p_var
master_p_tmaster_p_arrays                      r$   cast_parameters_to_fp16rO  m  s   , Ne2245   +95I*;&NLNI::'MM5::,d9+[H !!%**--#,,UZZ8CCExx(/8EGKK/ jj.GKK/(UZZ>-I#,#5#5&zz277$L ".!8!8!:J%6u%FNNN>59 .J( ,uzzl ;</  r#   c           
        U R                  5       nUR                  5         [        R                  R                  R
                  n[        R                  R                  R                  nU GHY  u  pVUR                  nUR                  [        R                  :X  d  M2  UR                  S:X  d  MD  UR                  S5      nU[        U5      -  (       a(  UR                  S5      (       a  UR                  S5        O[!        SU S35      eUR#                  UR$                  S   5      S   n	['        UR(                  Xy5      n
[        R                  R+                  5       nUR,                  U	/nU
R                  U5      (       a   UR/                  U
R                  U5      5        U
R1                  X5        UR1                  SU5        XrR(                  S   :X  a  GMp  [3        UR(                  XvR,                  5      nU(       a  [!        SU SUS    35      eUR4                  R7                  5       nUR9                  UR4                  5        [:        R<                  " UUS	S	S	S	S
9nUR(                  R?                  U5        [A        UR4                  UR4                  5      nUS:X  a  [!        SU S35      eURC                  USS9  GM\     UR                  5         g	)a  
Update op_role_var attr for some ops to make sure the gradients
transferred across GPUs is FP16.
1. Check whether the op that outputs gradient is cast or not.
2. If op is cast and gradient is FP32, remove the op_role_var
   and find the prev op which outputs FP16 gradient
3. Update the op_role_var of the prev op.

Args:
    main_prog (Program): The main program for training.
    params_grads (list): A list of params and grads.
r{   rz   op_role_varzThe cast op z4 must be in BACKWARD role and have op_role_var attr.r   r   zH's output should not beused by a non-optimize op, however, itis used by N)r<   r(   re   r   r   r   zThe op z is not in programF)sync)"r   _sync_with_cppr   r   r   BackwardOptimizer-   r   r   r   re   r   r   r   _remove_attrr   r   r   r   r5   kOpRoleVarAttrNamer>   r   r   r   r(   	append_op	copy_fromr   Operatorr   r   
_remove_op)	main_progparams_gradsr<   BACKWARDOPTIMIZEpgr-   rolefp16_grad_nameop_for_fp16_gradop_role_var_attr_nameattr_valpost_opsnew_op_descnew_opop_idxs                    r$   update_role_var_gradrk    sl    ""$E	..55>>H..55>>HTT77fnn$F):779%Dc(m#M(B(B. "2$ '1 1 
  XXbnnQ&78;N0BO//BBD " /H(()>?? 0 5 56K LM&&'<G LLH-YYr]"(B?H "2$ '""*1+0   **..0K!!"''*'' F IIV$"5::rww7F| 72$.@!ABBV%0e f 
r#   )Fr3   )r8   zpaddle.static.Programr  z,dict[int, list[tuple[AmpOptions, int, int]]]r  r   )C
__future__r   loggingdataclassesr   r0  rA  r   paddle.baser   r   r   paddle.base.frameworkr   paddle.base.log_helperr	   paddle.base.wrapped_decoratorr
   
fp16_listsr   r   r   r   INFOr   rC   rD   r  SELECTED_ROWSDENSE_TENSOR_ARRAYr   r   r   DEFAULT_AMP_OPTIONSr1   r?   rG   rJ   rM   rh   rt   r   r   r   r   r   r   r   r   rE   rF   FP64r   r  r   r   r   r   r   r   r   r  r*  r6  r=  rO  rk  r   r#   r$   <module>ry     sa   #  !   5 5 - - G  gll H
 	LL%%LL&&LL++ %     !

 /"	N ,0 (6(\~8 F	6  6 	LLLLLLLL	 	LLLLLL #>"J82,)X#D%#"#=# ##: ll""''
Nb ll""''4=nDr#   