
    ϑi                     <   S SK Jr  S SKrS SKrS SKJr  S SKJrJr  S SK	J
r
JrJrJr  S SKJr  S SKJrJr  S SKJr  S S	KJr  S S
KJr  SSKJr  SSKJrJrJr  / r " S S5      r  " S S5      r! " S S5      r" " S S5      r# " S S5      r$SS jr%\ SS j5       r&g)    )deepcopyN)_legacy_C_ops)_in_amp_guard_in_pure_fp16_guard)backwardcore	frameworkprogram_guard)BuildStrategy)
check_typeconvert_dtype)switch_to_static_graph	get_flags)LRScheduler   )logging_utils)RETURN_NO_VALUE_MAGIC_NUMbackend_guardconstruct_grad_namesc                   P    \ rS rSrSrSS jrS rS rS rS r	\
S 5       rS	 rS
rg)NestSequence'   zZ
A wrapper class that easily to flatten and restore the nest structure of
given sequence.
c                     Xl         U R                  5       U l        U R                  5       U l        U R                  U5        g N)_NestSequence__raw_inputtolist_NestSequence__input_list_get_var_ids_NestSequence__var_ids_check_non_variable)self	raw_input
need_checks      d/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/jit/dy2static/partial_program.py__init__NestSequence.__init__-   s4    $ KKM**,  ,    c                 T    [         R                  R                  U R                  5      $ )z1
Flattens the nested sequences into single list.
)paddleutilsflattenr   r"   s    r%   r   NestSequence.tolist3   s     ||##D$4$455r(   c                     [        U R                  5      [        U5      :X  d   e[        R                  R	                  U R
                  U5      $ )z/
Restores the nested sequence from value list.
)lenr   r*   r+   pack_sequence_asr   )r"   
value_lists     r%   restoreNestSequence.restore9   s>     4$$%Z888||,,T-=-=zJJr(   c                     / n[        U R                  5       HQ  u  p#[        U[        R                  [
        R                  R                  45      (       d  M@  UR                  U5        MS     U$ r   )		enumerater   
isinstancer	   Variabler   eagerTensorappend)r"   var_idsidxvars       r%   r   NestSequence._get_var_ids@   sS    !$"3"34HC#	 2 2DJJ4E4EFGGs# 5 r(   c                 N   U(       a  [        5       nU R                   HX  n[        U[        R                  [
        R                  R                  45      (       a  M>  UR                  [        U5      5        MZ     U(       a$  [        R                  " S[        U5       S35        ggg)zN
Raises warning if output of traced function contains non-tensor type values.
z;Output of traced function contains non-tensor type values: z. Currently, We don't support to update them while training and will return what we first saw. Please try to return them as tensor.N)setr   r7   r	   r8   r   r9   r:   addtyper   warnlist)r"   r$   warning_typesr>   s       r%   r!    NestSequence._check_non_variableH   s     EM((!#	(:(:DJJ<M<M'NOO!%%d3i0 ) ""QRVWdReQf gN N  r(   c                     U R                   $ r   )r    r-   s    r%   r<   NestSequence.var_idsX   s    ~~r(   c                      U R                   U   $ r   )r   )r"   items     r%   __getitem__NestSequence.__getitem__\   s      &&r(   )__input_list__raw_input	__var_idsNF)__name__
__module____qualname____firstlineno____doc__r&   r   r3   r   r!   propertyr<   rL   __static_attributes__ r(   r%   r   r   '   s:    
-6K   'r(   r   c                   $    \ rS rSrSrS rS rSrg)LazyInitialized`   z:
Descriptor to implement lazy initialization of property.
c                     Xl         g r   function)r"   r_   s     r%   r&   LazyInitialized.__init__e   s     r(   c                 h    U R                  U5      n[        XR                   R                  U5        U$ r   )r_   setattrrR   )r"   instanceclsvals       r%   __get__LazyInitialized.__get__h   s)    mmH%--00#6
r(   r^   N)rR   rS   rT   rU   rV   r&   rf   rX   rY   r(   r%   r[   r[   `   s    !r(   r[   c                   $    \ rS rSrSrS rS rSrg)ProgramInfon   z.
A helper class to record Program information
c                 6    SSSS.U l         0 U l        SU l        g )Nfp32ampfp16infer)op_sizeprogramsmoder-   s    r%   r&   ProgramInfo.__init__s   s$    

 	r(   c                     US;   d   eXR                   ;  aJ  U" SS9nX0R                   U'   UR                  R                  S5      R                  5       U R                  U'   U R                   U   U R                  U   4$ )z#
Record infer program and op size.
rm   T)is_infer_moder   )rs   descblockrr   )r"   keyprog_creator
infer_progs       r%   __call__ProgramInfo.__call__|   sv     ----mm#%D9J!+MM# * 5 5a 8 @ @ BDLL}}S!4<<#444r(   )rt   rr   rs   N)rR   rS   rT   rU   rV   r&   r}   rX   rY   r(   r%   ri   ri   n   s    
5r(   ri   c                   &    \ rS rSrS rS rS rSrg)PartialProgramLayerHook   c                     g r   rY   )r"   forward_programs     r%   before_append_backward.PartialProgramLayerHook.before_append_backward   s    sr(   c                     g r   rY   )r"   whole_programbackward_start_idxs      r%   after_append_backward-PartialProgramLayerHook.after_append_backward   s    r(   c                     g r   rY   r"   infer_programs     r%   after_infer#PartialProgramLayerHook.after_infer   s    #r(   rY   N)rR   rS   rT   rU   r   r   r   rX   rY   r(   r%   r   r      s    :K-r(   r   c                   
  ^  \ rS rSrSr S@U 4S jjrS rS rS rS r	SAS jr
\SBS	 j5       r\SBS
 j5       r\SBS j5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r \S 5       r!\S 5       r"S r#\$S  5       r%\$S! 5       r&\$S" 5       r'\$S# 5       r(\$S$ 5       r)\$S% 5       r*S& r+S' r,\S( 5       r-S) r.S* r/\$S+ 5       r0\$S, 5       r1S- r2\S. 5       r3\S/ 5       r4S0 r5\S1 5       r6S@S2 jr7S3 r8S4 r9SAS5 jr:S6 r;S7 r<S8 r=\S9 5       r>S: r?S; r@S< rAS= rBS> rCS?rDU =rE$ )CPartialProgramLayer   a'  
PartialProgramLayer wraps all the ops from layers decorated by `@to_static`
and execute them as a static subgraph.

.. note::
    **1. This is a very low level API. Users should not use this API
         directly. Please use `partial_program_from(concrete_program)`
         to create it.
    **2. DenseTensorArray is not currently supported in the output.

Args:
    main_program(Program): The main program that contains ops need to be executed.
    inputs(list[Variable]): The input list of the decorated function by `@to_static`.
    outputs(list[Variable]): The output list of the decorated function by `@to_static`.
    parameters(list[Tensor]|None): All trainable parameters included in the program. Default None.

Returns:
    Layer: A Layer object that run all ops internally in static graph mode.
c                 ,  > [         TU ]  5         [        U5      U l        [        USS9U l        Ub  UO/ U l        UR                  S[        5       5      U l        [        U R                  [        5      (       d   eU R                  U5      U l        [        R                  R                  R                  [        R                  R                   R#                  5       5         U R%                  5       U l        S S S 5        SU l        [+        5       U l        0 U l        Su  pgn[        R0                  " 5       n	U	(       a  U	R3                  5       u  pxU	R4                  nUb>  US;   a8  [        R6                  R8                  R:                  R=                  UUUS9U l        0 U l         0 U l!        S U l"        UR                  SS 5      U l#        0 U l$        / U l%        U R                   HW  n
[        U
[        RL                  5      (       d  M$  U RJ                  RO                  U
RP                  RS                  5       5        MY     U R                  RT                   Vs/ s H  oR                  U   RP                  PM     snU l+        g ! , (       d  f       GN= fs  snf )NT)r$   build_strategy)NNN)float16bfloat16)custom_white_listcustom_black_listdtypebackend),superr&   r   _inputs_outputs_paramsgetr   _build_strategyr7   _verify_program_origin_main_programr*   baser	   _dygraph_guarddygraphTracer_create_cuda_graph_vec_cuda_graph_vectrainingri   _infer_info_forward_end_index_map_dygraph_tracer_get_amp_op_list
_amp_dtypestaticro   
fp16_listsAutoMixedPrecisionLists	_amp_list_pir_scope_cache_legacy_scope_cache_hooker_backend_grad_var_names_in_var_namesr8   r;   rx   namer<   _out_var_descs)r"   main_programinputsoutputs
parameterskwargs	amp_dtyper   r   tracerr>   var_id	__class__s               r%   r&   PartialProgramLayer.__init__   s    	#F+$W>%/%;z%zz*:MOL$..>>>>$($8$8$F![[""11&++2E2E2L2L2NO#'#>#>#@D  P &=&(#:J7	&7**,393J3J3L0))I Y2I%I !!,,DD&7&7# E  N !##% 

9d3!<<C#y1122""))#((--/:   6:]]5J5J
5J6MM&!&&5J
C POB
s    I?"J?
Jc           
          U R                  U5      u  p#U R                  5       nU R                  U5        U R                  5       nUR	                  SU/5        U R                  5         [        R                  " U R                  U5      U R                  U R                  5      U R                  U5      U R                  U R                  SS9U R                  /UQ76   U R                  U5      nU R                  U5      nU$ )zA
Execute static graph by Interpreter and Return dynamic Tensors.
x_namesT
program_iduse_scope_cache)_prepare_inputs_prepare_outputs_cast_fp16_if_pure_fp16_prepare_attributesextend_sync_lr_value_with_schedulerr   run_program_valid_varsr   _create_scope_vecr   r   _restore_out_remove_no_value)r"   r   in_varsin_var_namesout_varsattrsrestored_nest_outs          r%   r}   PartialProgramLayer.__call__   s     !% 4 4V <((*$$W-((*i./**,!!W%T\\*X&""??D #    		
 		
 !--h7 112CD  r(   c           
         U R                  5       nU R                  U5        U R                  5       nUR                  SU R                  /5        U R                  5         [        R                  " U R                  U5      U R                  U R                  5      U R                  U5      U R                  U R                  SS9U R                  /UQ76   U$ )zj
In sot, inputs and outputs of partial program only contain tensors, so we can skip some step to speed up
r   Tr   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r"   r   r   r   s       r%   sot_callPartialProgramLayer.sot_call   s     ((*$$V,((*i!3!345**,!!V$T\\*X&""??D #    		
 		
 r(   c                    U R                   n[        US5      (       a  [        US5      (       a  UR                  nUR                  n[	        U[
        5      (       d   S5       eU R                   R                  nU" 5       n[        R                  " U5      R                  [        UR                  5      5      nUR                  U5        ggg)z4Update lr_var value with calculated by lr_scheduler.lr_schedulerlr_varzmust be LRSchedulerN)r   hasattrr   r   r7   r   nparrayastyper   r   	set_value)r"   r   r   r   lr_valuedatas         r%   r   1PartialProgramLayer._sync_lr_value_with_scheduler  s    00<00W(6
 6
 (44L!((FlK88O:OO844AAL#~H88H%,,]6<<-HIDT"6
0r(   c                     Xl         g r   )r   )r"   hookers     r%   
set_hookerPartialProgramLayer.set_hooker  s    r(   c                 V   U R                   (       d  U R                  (       a  U R                  nOU R                  nU(       d  [        R
                  " 5       $ X;  a  / X1'   X1   nU H  nUR                  (       d  M  Us  $    [        R
                  " 5       nUR                  U5        U$ r   )_in_pir_pt_mode_enable_pir_in_executorr   r   r   Scope_can_reusedr;   )r"   r   r   _scope_cachecached_scopesscopes         r%   
_get_scopePartialProgramLayer._get_scope  s    4#?#?00L33L::<)')L$$0"E    # 

U#r(   c                    U(       aG  U R                   R                  US9nU R                  (       a  U R                  R                  U5      nU$ U R	                  U R                   5      nU R                  U R                  U5        U$ )Nfor_test)r   cloner   r   _append_backward_desc_set_grad_typer   )r"   rw   r   train_programs       r%   _create_program#PartialProgramLayer._create_program/  s{     55;;& < M || $ 8 8 G   66))M m<  r(   c                    U R                   R                  US9n[        U5         [        R                  R
                  R                  R                  X R                  SSS9  S S S 5        U(       a.  U R                  (       a  U R                  R                  U5      nU$ U R                  U5      nU R                  U R                  U5        U$ ! , (       d  f       Nr= f)Nr   FO1)use_fp16_guardlevelr   r   r
   r*   r   ro   
fp16_utilscast_model_to_fp16r   r   r   r   r   r   )r"   rw   amp_programtrain_amp_programs       r%   _create_amp_program'PartialProgramLayer._create_amp_program@  s    //55}5M;'MM((;;^^E <  ( ||"ll66{C $ : :; G.?@$$ ('s   >C
Cc                    U R                   R                  US9n[        U5         [        R                  R
                  R                  R                  X R                  SS9  S S S 5        U(       a.  U R                  (       a  U R                  R                  U5      nU$ U R                  U5      nU R                  U R                  U5        U$ ! , (       d  f       Nr= f)Nr   F)r  r  )r"   rw   pure_fp16_programtrain_pure_fp16_programs       r%   _create_pure_fp16_program-PartialProgramLayer._create_pure_fp16_programP  s     55;;" < 
 ,-MM((;;!>>% <  .
 ||$(LL$<$<=N$O!$$&*&@&@!'# .EF** .-s   =C
Cc                 n    U R                   nU R                  U5      nUS:  d   eU R                  X5      $ Nr   )_train_programget_forward_end_op_idx"_get_forward_backward_program_formr"   r   forward_end_op_indexs      r%   &_create_forward_backward_train_program:PartialProgramLayer._create_forward_backward_train_programe  sB    ++#::=I#q(((66
 	
r(   c                 n    U R                   nU R                  U5      nUS:  d   eU R                  X5      $ r  )_train_amp_programr  r  r  s      r%   *_create_forward_backward_train_amp_program>PartialProgramLayer._create_forward_backward_train_amp_programo  sB    //#::=I#q(((66
 	
r(   c                 n    U R                   nU R                  U5      nUS:  d   eU R                  X5      $ r  )_train_pure_fp16_programr  r  r  s      r%   0_create_forward_backward_train_pure_fp16_programDPartialProgramLayer._create_forward_backward_train_pure_fp16_programy  sB    55#::=I#q(((66
 	
r(   c                 "    U R                  5       $ r   )r   r-   s    r%   r  "PartialProgramLayer._train_program  s    ##%%r(   c                 `    U R                  SU R                  5      u  pU R                  X5      $ )Nrn   )r   r   _build_infer_programr"   programrr   s      r%   _infer_program"PartialProgramLayer._infer_program  s.    ++FD4H4HI((::r(   c                 "    U R                  5       $ r   )r	  r-   s    r%   r  &PartialProgramLayer._train_amp_program  s    ''))r(   c                 `    U R                  SU R                  5      u  pU R                  X5      $ )Nro   )r   r	  r$  r%  s      r%   _infer_amp_program&PartialProgramLayer._infer_amp_program  s.    ++E43K3KL((::r(   c                 "    U R                  5       $ r   )r  r-   s    r%   r  ,PartialProgramLayer._train_pure_fp16_program  s    --//r(   c                 `    U R                  SU R                  5      u  pU R                  X5      $ )Nrp   )r   r  r$  r%  s      r%   _infer_pure_fp16_program,PartialProgramLayer._infer_pure_fp16_program  s3    ++D22
 ((::r(   c                 &    U R                  5       nU$ r   )r  r"   r&  s     r%   _train_forward_backward_program3PartialProgramLayer._train_forward_backward_program  s    ==?r(   c                 &    U R                  5       nU$ r   )r  r4  s     r%   #_train_amp_forward_backward_program7PartialProgramLayer._train_amp_forward_backward_program  s    AACr(   c                 >    [         R                  R                  5       $ r   )r*   r   Programr-   s    r%    _empty_backward_program_for_eval4PartialProgramLayer._empty_backward_program_for_eval  s    }}$$&&r(   c                 &    U R                  5       nU$ r   )r  r4  s     r%   )_train_pure_fp16_forward_backward_program=PartialProgramLayer._train_pure_fp16_forward_backward_program  s    GGIr(   c                 Z    [         R                  R                  U R                  U 5      nU$ r   )r*   r+   _hash_with_idr  r"   r   s     r%   _train_program_id%PartialProgramLayer._train_program_id  s$    \\//0C0CTJ
r(   c                 V    [         R                  R                  U R                  U 5      $ r   )r*   r+   rB  r'  r-   s    r%   _infer_program_id%PartialProgramLayer._infer_program_id  s    ||))$*=*=tDDr(   c                 Z    [         R                  R                  U R                  U 5      nU$ r   )r*   r+   rB  r  rC  s     r%   _train_amp_program_id)PartialProgramLayer._train_amp_program_id  s$    \\//0G0GN
r(   c                 V    [         R                  R                  U R                  U 5      $ r   )r*   r+   rB  r,  r-   s    r%   _infer_amp_program_id)PartialProgramLayer._infer_amp_program_id  s    ||))$*A*A4HHr(   c                 Z    [         R                  R                  U R                  U 5      nU$ r   )r*   r+   rB  r  rC  s     r%   _train_pure_fp16_program_id/PartialProgramLayer._train_pure_fp16_program_id  s)    \\//))4

 r(   c                 V    [         R                  R                  U R                  U 5      $ r   )r*   r+   rB  r1  r-   s    r%   _infer_pure_fp16_program_id/PartialProgramLayer._infer_pure_fp16_program_id  s    ||))$*G*GNNr(   c                 Z    U R                   [        R                  R                  X5         $ r   )r   r*   r+   rB  r4  s     r%   r  *PartialProgramLayer.get_forward_end_op_idx  s'    **LL&&w5
 	
r(   c                 T    U R                   (       a  U R                  $ U R                  $ )z'
Return current train or eval program.
)r   r   r   r-   s    r%   r&  PartialProgramLayer.program  s#    
 ==%%%%%%r(   c                 ,   U R                   (       aB  [        5       (       a  U R                  $ [        5       (       a  U R                  $ U R
                  $ [        5       (       a  U R                  $ [        5       (       a  U R                  $ U R                  $ )z/
Return current train or eval program hash id.
)	r   r   rJ  r   rP  rD  rM  rS  rG  r-   s    r%   r   PartialProgramLayer.program_id  ss    
 ==111$&&777---111$&&777---r(   c                     [        5       (       a  U R                  $ [        5       (       a  U R                  $ U R                  $ r   )r   r  r   r  r  r-   s    r%   r   !PartialProgramLayer.train_program  s6    ??*** ""000&&&r(   c                     [        5       (       a  U R                  nU$ [        5       (       a  U R                  nU$ U R                  nU$ r   )r   r,  r   r1  r'  r   s     r%   r   !PartialProgramLayer.infer_program  sM    ?? 33M
 	 !"" 99M  !//Mr(   c                     S nU R                   (       aQ  [        5       (       a  U R                  nUS   $ [        5       (       a  U R                  nUS   $ U R
                  nUS   $ U R                  nU$ r  )r   r   r8  r   r?  r5  r   )r"   r   progss      r%   r   #PartialProgramLayer.forward_program  ss    ==@@
 8O	 %&&FF 8O <<8O"00Or(   c                     U R                   (       aQ  [        5       (       a  U R                  nUS   $ [        5       (       a  U R                  nUS   $ U R
                  nUS   $  U R                  $ Nr   )r   r   r8  r   r?  r5  r<  )r"   r`  s     r%   backward_program$PartialProgramLayer.backward_program  sp    ==@@
 8O	 %&&FF 8O <<8O 888r(   c                 J    U R                  U5        U R                  U5        U$ )zp
Verify that the program parameter is initialized, prune some unused params,
and remove redundant op callstack.
)_check_params_all_inited_prune_unused_paramsr"   r   s     r%   r   #PartialProgramLayer._verify_program+  s&     	%%l3!!,/r(   c                    ^^ U4S jnU4S jn[        [        X@R                  R                  5       5      5      nU H4  nUR	                  5       R                  UR                  5      nU" X85        M6     g)ak  
Why we need add gradient aggregation operation ?
In some cases, if non leaf nodes are used as output, gradient overwriting will occur, such as
def forward(self, in):
    x = 2 * in  # <---- x is a non-leaf node in program.
    y = x + 3
    return x, y

loss = forward(in)[0].sum()
loss.backward()  # <----- x@grad will be overwritten by elementwise_add_grad Op
c                   > [        U [        R                  5      (       aV  U R                  [        R
                  R                  R                  [        R
                  R                  R                  4;  a  gU R                  [        R                  [        R                  4;  a  gTR                  S5      R                   H(  nUR                   H  nX R                   :X  d  M      g   M*     g)z5
if exist a op whose inputs is var, then return True
Fr   T)r7   r	   r8   rC   r   VarDescVarTypeDENSE_TENSORSELECTED_ROWSr   r*   float32float64ry   opsinput_arg_namesr   )r>   opin_argr   s      r%   _need_aggregationKPartialProgramLayer.prepare_gradient_aggregation.<locals>._need_aggregationF  s     c9#5#566#(($$11$$22K ; yy @@"((+// 00F)# 1 0 r(   c           
        >^ SnUR                   mUR                  U-   S-   n[        [        UU4S j[	        U R                  S5      R                  5      5      5      n[        U5      S:X  a  g U R                  S5      R                  UUR                  UR                  UR                  S9  U H)  u  pVUR                  TU5        UR                  TU5        M+     U R                  S5      R                  US   S   S-   SS	TU/0S
T0S9  g )Nz
@dy2staticz@GRADc                 f   > U S   T:  =(       a"    [        U4S jU S   R                   5       5      $ )Nr   c              3   .   >#    U  H
  nUT:H  v   M     g 7fr   rY   ).0out_argvar_grad_names     r%   	<genexpr>~PartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_var.<locals>.<lambda>.<locals>.<genexpr>^  s      '<G  =0'<s   r   )anyoutput_arg_names)x	start_idxr~  s    r%   <lambda>kPartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_var.<locals>.<lambda>]  s7    adi/  '(t'<'< r(   r   )r   rC   r   shaperl   r   sumXOut)rC   r   r   )	grad_namer   rE   filterr6   ry   rs  r0   
create_varrC   r   r  _rename_input_rename_output
_insert_op)	target_programr>   suffixnew_grad_name	found_opsr=   ru  r~  r  s	          @r%   _insert_aggregation_ops_for_varYPartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_varW  s   !FMMMHHv-7M
 n221599:	I 9~"  #.."XXiiii	 /  %  >!!-? %
   #.."a 1$m];<.	 /  r(   N)rE   r  r   r   global_blockr>   r   )	r"   r  r   r  rw  r  to_processed_vars_var
target_vars	    ``      r%   prepare_gradient_aggregation0PartialProgramLayer.prepare_gradient_aggregation7  sb    	"&	P !$mm&:&:&<=
 &D'446::499EJ+NG &r(   c                    UR                  SS9nU R                  (       a  U R                  R                  U5      n/ nU R                  R	                  5        H\  n[        U[        R                  5      (       d  M$  UR                  UR                  5       R                  UR                  5      5        M^     [        UR                  S5      R                  5      [        U R                  R	                  5       5      -   nU(       Ga  [        UR                  S5      R                  5      [        U R                  R	                  5       5      -   n[        U R                   5         [#        US[        R                  [$        [&        4S5        [(        R*                  " U/ S9nU R,                   Vs/ s HN  n[        U[        R                  5      (       d  M$  UR                  S5      R                  UR                  5      PMP     nnU R.                   V	s/ s H,  oR                  S5      R                  U	R                  5      PM.     n
n	U R                   Vs/ s HN  n[        U[        R                  5      (       d  M$  UR                  S5      R                  UR                  5      PMP     nn[1        XhX5      U l        S S S 5        U R                  (       a  U R                  R5                  X%5      u  p%U R7                  US-   X5        U[        U R                  R	                  5       5      -
  U R8                  [:        R<                  R?                  X 5      '   U$ s  snf s  sn	f s  snf ! , (       d  f       N= f)NFr   r   targetszpaddle.static.gradients)r  r   r   ) r   r   r   r   r   r7   r	   r8   r;   r  r>   r   r0   ry   rs  r   r   r   rE   tupler   calc_gradient_helperr   r   r   r   r   r  r   r*   r+   rB  )r"   r   r&  r  outr  grad_info_mapr>   x_varsparam
param_varsr   s               r%   r   )PartialProgramLayer._append_backward_desc  s   $$e$4<<ll99'BG=='')C#y1122w33599#((CD * a(,,-DMM4H4H4J0KK	GMM!,001C8L8L8N4OOIt}}-''u5-	 !) = =#B!  $||+!#y'9'9: 3GMM!$((2+   CG,,BNMM!$((4,  
  $}},!#y'9'9: 3GMM!$((2,   (<!:($3 .: ||%)\\%G%G&" --A| DMM00233 	##LL&&w5	
 ;
' .-sC   %AM,0#M.MM,3M"M,#M'?.M'-M,M,,
M:c                 0   / nU R                    H~  nSnUR                   Hi  nUR                   HL  nUR                  UR                  ;   d  UR                  UR
                  ;   d  M9  UR                  U5        Sn  O   U(       d  Mh    M|     M     X l         g)z
Prune the parameters not used anywhere in the program.
The `@to_static` may only decorated a sub function which
contains some unused parameters created in `__init__`.
So prune these parameters to avoid unnecessary operations in
`run_program_op`.
FTN)r   blocksrs  r   rt  r  r;   )r"   r&  required_paramsr  found_paramry   ru  s          r%   rh  (PartialProgramLayer._prune_unused_params  s     \\EK ))B

b&8&88 ::)<)<<'..u5&* $ ; ( " 'r(   c                    [        5       (       a  [        U5       H  u  p#UR                  nU R                  R	                  5       R                  U5      (       d  MA  U R                  R	                  5       R                  U5      R                  [        R                  :X  d  M  UR                  S5      X'   XAU   l        M     g g )Nr   )r   r6   r   r&  r  has_varr>   r   r*   r   r   )r"   r   ir>   r   s        r%   r   +PartialProgramLayer._cast_fp16_if_pure_fp16  s      #G,xxLL--/77==11377=CC~~& "%I!6GJ&*AJO - !r(   c                    Sn[        U5      U   n[        R                  " 5       =(       d    [        R                  " 5       nU R                  R                  5       nU R                  R                  nU(       d  U(       d  U(       a  SnU$ )N!FLAGS_enable_pir_with_pt_in_dy2stF)r   r   _is_fwd_prim_enabled_is_bwd_prim_enabledr   is_cinnr   build_cinn_pass)r"   pir_dy2st_flagin_pir_pt_modeis_prim_enabledin_cinn_backendis_cinn_enableds         r%   r   #PartialProgramLayer._in_pir_pt_mode  sl    <">2>B%%'F4+D+D+F 	 --//1..>>o"Nr(   c                 &    Sn[        U5      U   nU$ )NFLAGS_enable_pir_in_executorr   )r"   enable_pir_in_executor_flagenable_pir_in_executors      r%   r   +PartialProgramLayer._enable_pir_in_executor  s$    &D#!*+F!G'"
 &%r(   c                    SU R                   R                  R                  S5      SU R                  R                  R                  S5      SU R                  (       + SU R
                  /nU R                  (       ae  UR                  SU R                  R                  S/ 5      SU R                  R                  S	/ 5      S
U R                  R                  S/ 5      45        U R                  nUR                  SU/5        U$ )Nforward_global_blockr   backward_global_blockis_testr   param_grad_namesr  out_grad_namesr  x_grad_namesr  r  )
r   rx   ry   rd  r   r   r   r   r   r   )r"   r   r  s      r%   r   'PartialProgramLayer._prepare_attributes  s    "  %%++A.#!!&&,,Q/OO	
 == LL&((,,Wb9$((,,UB7"((,,S"5	 --&78r(   c                 ~    U R                  U5      n[        USUU R                  U5      nU R                  US 5        U$ r  )_parse_skip_gc_varsadd_build_strategy_forr   _apply_inplace_pass)r"   r   r  forward_skip_varsbuilt_infer_programs        r%   r$  (PartialProgramLayer._build_infer_program  sL     44]C4   
 	  !4d;""r(   c                    U[        U R                  R                  5      -   nUR                  R	                  S5      R                  5       nU R                  U5      U R                  R                  S/ 5      -   n[        UUUU R                  U5      nU R                  X5      n[        USUU R                  U5      nU R                  X5        X/$ )Nr   r  )r0   r   r<   rx   ry   rr   r  r   r   r  r   r  )	r"   r   r  backward_start_op_indexbackward_end_op_indexbackward_skip_varsbackward_built_programr  forward_built_programs	            r%   r  6PartialProgramLayer._get_forward_backward_program_form%  s     #7MM!!:
 #
 !. 2 2 8 8 ; C C E "55
  $$Wb12 "8#!  "
 !44
 !7   !
 	  !6O%>>r(   c                    SSSS.n[         R                  R                  5       n[        R                  " 5       (       a  SOSnU R                  X5      nU R                  U5      nU(       a  UUSS.nU(       a  UUSS.ng g )Nboolz	list[str])use_cudamem_opt_skip_varsfor_partial_blockTF)r*   r   r;  r   is_compiled_with_cudar  )	r"   r   rd  
attr_typesempty_startup_programr  forward_mem_opt_skip_varsbackward_mem_opt_skip_varsr   s	            r%   r  'PartialProgramLayer._apply_inplace_passK  s    !,!'


 !' 5 5 755774U$($<$<%
! &*%=%=o%N"$%>%)E $%?%)E r(   c                    / nU R                    Ha  n[        U[        R                  R                  R
                  5      (       d  M8  UR                  UR                  R                  5       5        Mc     U R                   Ha  n[        U[        R                  R                  R
                  5      (       d  M8  UR                  UR                  R                  5       5        Mc     U$ )z;
Returns Variable Names from self._inputs and self.outputs
)
r   r7   r*   r   r	   r8   r;   rx   r   r   )r"   	var_namesr>   s      r%   _inout_var_names$PartialProgramLayer._inout_var_namesu  s    
 	<<C#v{{44==>>  1   ==C#v{{44==>>  1 ! r(   c                 Z   [        U R                  5      nUR                  5       R                  R	                  5        H)  u  pEUR
                  (       d  M  UR                  U5        M+     U(       a9  [        R                  " UR                  S5       H  nUR                  U5        M     U$ )z
Parse variables that need to skip GC after execute it.
If specify backward_program, it will keep the variables used in backward.
T)
r   r  r  varsitemsis_datar;   r   #parse_safe_eager_deletion_skip_varsrx   )r"   r&  rd  	skip_varsvar_namer>   s         r%   r  'PartialProgramLayer._parse_skip_gc_vars  s     T223	$11388>>@MH{{{  * A  DD %%t   * r(   c           	      .   [        U[        [        45      (       d   e[        R                  R                  U5      n/ n/ n[        R                  " 5       n[        U5       GH.  u  pg[        U[        R                  5      (       aI  Sn[        R                  R                  UU R                  U   R                  R!                  5       SUSS9nOy[        U[        R                  R                  5      (       aN  UR"                  (       a:  UR$                  R'                  U5      (       d  UR)                  US5      nSUl        OUnOM  UR+                  U R                  U   R                  R!                  5       5        UR+                  U5        GM1     X44$ )z!
Prepare inputs, outputs, attrs.
NFT)valuer   persistableplace	zero_copy)r7   r  rE   r*   r+   r,   r	   _current_expected_placer6   r   ndarrayr   r9   r:   r   rx   r   stop_gradientr  _equals_copy_tor;   )	r"   r   flatten_inputs
input_varsinput_var_namesexpected_placer  r  r>   s	            r%   r   #PartialProgramLayer._prepare_inputs  sJ    &5$-0000--f5
"::<!.1HA%,,jj''a--224 %(" (  E4::#4#455 &&u{{/B/B"0 0  ..?C(,C%C""4<<?#7#7#<#<#>?c"1 24 **r(   c                 h    [         R                  R                  R                  U R                  5      $ r   )r*   r	   r   #create_empty_tensors_with_var_descsr   r-   s    r%   r   $PartialProgramLayer._prepare_outputs  s*    $$HH
 	
r(   c                 &    U R                  XS9nU/$ )Nr   )r   )r"   r   r   inner_scopes       r%   r   %PartialProgramLayer._create_scope_vec  s"    oo! & 
 }r(   c                     [         R                  R                  [         R                  R                  R
                  / S[         R                  R                  R                  S5      nSUl        U$ )N
cuda_graphT)r   r9   r:   rm  rn  FP32RAWr  r"   r>   s     r%   r   *PartialProgramLayer._create_cuda_graph_vec  sT    jjLL  %%LL  $$
 !
r(   c                 t   ^  U 4S jn[        T R                  R                  U5       H  u  p4U" X45        M     g )Nc                    > TR                   U    n[        U[        R                  5      (       d   eUR                  Ul        g r   )r   r7   r	   r8   r  )r   eager_tensorr>   r"   s      r%   set_stop_gradientDPartialProgramLayer._update_stop_gradient.<locals>.set_stop_gradient  s8    --'Cc9#5#56666),):):L&r(   )zipr   r<   )r"   r   r  r=   r>   s   `    r%   _update_stop_gradient)PartialProgramLayer._update_stop_gradient  s/    	;
 DMM118<HCc' =r(   c                     U R                   R                  5       n[        U R                   R                  5       H  u  p4X   X$'   M     U R                   R	                  U5      nUb  [        U5      S:X  a  US   nU$ )zJ
Restores same nested outputs by only replacing the Variable with Tensor.
r   r   )r   r   r6   r<   r3   r0   )r"   r   flatten_outputsr  r=   outss         r%   r    PartialProgramLayer._restore_out  sm    
 --..0 5 56FA#+;O  7}}$$_5D	Q7Dr(   c                      UR                  SS9$ )NTr   )r   ri  s     r%   _clone_for_test#PartialProgramLayer._clone_for_test  s    !!4!00r(   c                     [        U[        R                  R                  5      (       a-  UR                  S/:X  a  UR                  5       S   [        :X  a  gg)Nr   r   TF)r7   r   r9   r:   r  numpyr   r  s     r%   _is_no_value PartialProgramLayer._is_no_value  s?    c4::,,--#))s2Byy{1~!::r(   c                   ^  [        U[        R                  R                  5      (       a  T R	                  U5      (       a  gU$ [        U[
        [        45      (       a  [        U[
        5      (       a  [        U 4S jU 5       5      nO*U Vs/ s H  nT R	                  U5      (       a  M  UPM     nn[        U5      [        U5      :  n[        U5      S:X  a  U(       a  g[        U5      S:X  a  U(       a  US   $ U$ U$ s  snf )z;
Removes invalid value for various-length return statement
Nc              3   X   >#    U  H  nTR                  U5      (       a  M  Uv   M!     g 7fr   )r  )r|  r>   r"   s     r%   r  7PartialProgramLayer._remove_no_value.<locals>.<genexpr>  s#      #+C43D3DS3ICC8s   *	*r   r   )r7   r   r9   r:   r  r  rE   r0   )r"   r   resr>   has_removeds   `    r%   r   $PartialProgramLayer._remove_no_value  s     h

 1 122  **O5$-00(E** #+ 
 '/Mhsd6G6G6LshMh-#c(2K 3x1}SQ;1vJ Ns   D,Dc                    U H  nUR                   [        R                  " 5       -   nUR                  R	                  S5      R                  UR                  5       5      nUc  Mb  UR                  UR                  5       5        M     g r  )	r   r   grad_var_suffixrx   ry   find_varencoder   rC   )r"   paramsr   r  r  grad_vars         r%   r   "PartialProgramLayer._set_grad_type  sl     E

T%9%9%;;I$))//2;;I<L<L<NOH  1 r(   c           	      v   [        U R                  [        [        45      (       d"  [	        S[        U R                  5       S35      e[        5       n[        U R                  5       Hd  u  p4[        U[        R                  R                  5      (       d  [	        SU S[        U5       S35      eUR                  UR                  5        Mf     UR                   H[  nUR                  R                  5        H:  u  pd[        U[         R"                  5      (       d  M&  Xb;  d  M-  [%        SU S35      e   M]     g)a]  
Check all params from main program are already initialized, see details as follows:
    1. all parameters in self._params should be type `framework.EagerParamBase` which are created in dygraph.
    2. all parameters from transformed program can be found in self._params.
       Because they share same data with EagerParamBase of original dygraph.
zRType of self._params in PartialProgramLayer should be list or tuple, but received .zType of self._params[zG] in PartialProgramLayer should be Parameter or Variable, but received zv
	We don't support to define layer with parameters in the function decorated by `@to_static`.
	But we found parameter(z) was created in the decorated function.

	Revise suggestion: 
		1. Please ensure all your sublayers are inherited from nn.Layer.
		2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as ListN)r7   r   rE   r  	TypeErrorrC   rA   r6   r   r9   r:   rB   r   r  r  r  r	   	Parameter
ValueError)r"   r   param_and_buffer_names_setr  r>   ry   r   s          r%   rg  ,PartialProgramLayer._check_params_all_inited   s'    $,,u66deijnjvjvewdxxyz  &)U"-FAc4::#4#455+A3.uvz{~v  vA  AB  C  '**3884 . "((E"ZZ--/	c9#6#677=(::> @YY  0 )r(   c                     U(       a  U$ S $ r   rY   )r"   r  s     r%   r   PartialProgramLayer._valid_varsB  s    t%%r(   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )NFrQ   )FrR   rS   rT   rU   rV   r&   r}   r   r   r   r   r   r   r	  r  r  r  r  r[   r  r'  r  r,  r  r1  r5  r8  r<  r?  rD  rG  rJ  rM  rP  rS  r  rW   r&  r   r   r   r   rd  r   r  r   rh  r   r   r   r   r$  r  r  r  r  r   r   r   r   r  r   r  r  r   r   rg  r   rX   __classcell__)r   s   @r%   r   r      s[   * 9=/
b!80#$ ! !  % % + +( 
 
 
 
 
 
 & & ; ; * * ; ; 0 0 ; ;     ' '     E E   I I   O O

 & & . .& ' '     9 9*
NH` 4 4l'2
+ 
 
 & &@ 
# 
# #? #?J T  $%+N

	( 1 182 D& &r(   r   c                     U R                   nU(       a  U(       a  USS  n[        U R                  UU R                  U R                  40 U R
                  D6$ rc  )r   r   r   r   r   r   )concrete_programfrom_methodr   s      r%   partial_program_fromr6  F  sZ    $$F +%%  ##	
 
!
! r(   c                    X:  Ga  [         R                  R                  [        R                  " U R
                  X5      US9nU(       a%  UR                  R                  S[        U5      5        UR                  [        R                  " 5       [        R                  " 5       5        [        R                  " UR                  5      nUR                  5       n[        UR                  S5      (       a  UR                  R                   Ul        Oo[         R                  R#                  5       nU R%                  S5      R&                  R)                  5        H$  nUR%                  S5      R+                  US5        M&     [-        U R.                  UR.                  5       H4  u  pU
R
                  R1                  U	R
                  R2                  5        M6     U$ )N)r   skip_gc_varsr   r   F)r*   r   CompiledProgramr   Graphrx   _graphrA   _compiler   r	   r  IrGraph
to_programr   _programr   r;  ry   r  values_clone_variabler  r  set_parent_idxparent)r&  start_op_indexend_op_indexr   r  compiled_programir_graphbuilt_programr>   origincurrents              r%   r  r  V  s_    $!==88JJw||^B) 9 
 ##''IG!!JJL);;=	
 $$%5%<%<= ++-#,,n==)9)B)B)O)OM& --/==#((//1C"223> 2 w~~}/C/CD##FKK$6$67 E r(   rQ   )NN)'copyr   r  r   r*   r   paddle.amp.auto_castr   r   paddle.baser   r   r	   r
   paddle.base.compilerr   paddle.base.data_feederr   r   paddle.base.dygraph.baser   paddle.base.frameworkr   paddle.optimizer.lrr    r   r+   r   r   r   __all__r   r[   ri   r   r   r6  r  rY   r(   r%   <module>rU     s         C @ @ . = ; + +   6' 6'r 5 56. .r& r&j  JN r(   