
    x-j                     ^   d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZmZmZ g Z G d d          Z  G d d          Z! G d d          Z" G d d          Z# G d d          Z$ddZ%e	 dd            Z&dS )    )deepcopyN)_legacy_C_ops)_in_amp_guard_in_pure_fp16_guard)backwardcore	frameworkprogram_guard)BuildStrategy)
check_typeconvert_dtype)switch_to_static_graph	get_flags)LRScheduler   )logging_utils)RETURN_NO_VALUE_MAGIC_NUMbackend_guardconstruct_grad_namesc                   N    e Zd ZdZddZd Zd Zd Zd Ze	d             Z
d	 Zd
S )NestSequencezf
    A wrapper class that easily to flatten and restore the nest structure of
    given sequence.
    Fc                     || _         |                                 | _        |                                 | _        |                     |           d S N)_NestSequence__raw_inputtolist_NestSequence__input_list_get_var_ids_NestSequence__var_ids_check_non_variable)self	raw_input
need_checks      d/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/jit/dy2static/partial_program.py__init__zNestSequence.__init__-   sG    $ KKMM**,,  ,,,,,    c                 J    t           j                            | j                  S )zA
        Flattens the nested sequences into single list.
        )paddleutilsflattenr   r!   s    r$   r   zNestSequence.tolist3   s     |##D$4555r&   c                     t          | j                  t          |          k    sJ t          j                            | j        |          S )z?
        Restores the nested sequence from value list.
        )lenr   r(   r)   pack_sequence_asr   )r!   
value_lists     r$   restorezNestSequence.restore9   sA     4$%%Z8888|,,T-=zJJJr&   c                     g }t          | j                  D ]E\  }}t          |t          j        t
          j        j        f          r|                    |           F|S r   )		enumerater   
isinstancer	   Variabler   eagerTensorappend)r!   var_idsidxvars       r$   r   zNestSequence._get_var_ids@   s\    !$"344 	$ 	$HC#	 2DJ4EFGG $s###r&   c                 *   |rt                      }| j        D ]O}t          |t          j        t
          j        j        f          s"|                    t          |                     P|r)t          j        dt          |           d           dS dS dS )z^
        Raises warning if output of traced function contains non-tensor type values.
        z;Output of traced function contains non-tensor type values: z. Currently, We don't support to update them while training and will return what we first saw. Please try to return them as tensor.N)setr   r3   r	   r4   r   r5   r6   addtyper   warnlist)r!   r#   warning_typesr:   s       r$   r    z NestSequence._check_non_variableH   s      
	EEM( 1 1!#	(:DJ<M'NOO 1!%%d3ii000 "NRVWdReRe N N N    
	 
	
 r&   c                     | j         S r   )r   r+   s    r$   r8   zNestSequence.var_idsX   s
    ~r&   c                     | j         |         S r   )r   )r!   items     r$   __getitem__zNestSequence.__getitem__\   s     &&r&   NF)__name__
__module____qualname____doc__r%   r   r0   r   r    propertyr8   rE    r&   r$   r   r   '   s         
- - - -6 6 6K K K        X' ' ' ' 'r&   r   c                       e Zd ZdZd Zd ZdS )LazyInitializedzB
    Descriptor to implement lazy initialization of property.
    c                     || _         d S r   )function)r!   rP   s     r$   r%   zLazyInitialized.__init__e   s     r&   c                 f    |                      |          }t          || j         j        |           |S r   )rP   setattrrG   )r!   instanceclsvals       r$   __get__zLazyInitialized.__get__h   s/    mmH%%$-0#666
r&   N)rG   rH   rI   rJ   r%   rV   rL   r&   r$   rN   rN   `   s<         ! ! !    r&   rN   c                       e Zd ZdZd Zd ZdS )ProgramInfoz6
    A helper class to record Program information
    c                 8    dddd| _         i | _        d| _        d S )Nfp32ampfp16infer)op_sizeprogramsmoder+   s    r$   r%   zProgramInfo.__init__s   s-    
 

 			r&   c                     |dv sJ || j         vrJ |d          }|| j         |<   |j                            d                                          | j        |<   | j         |         | j        |         fS )z3
        Record infer program and op size.
        r[   T)is_infer_moder   )ra   descblockr`   )r!   keyprog_creator
infer_progs       r$   __call__zProgramInfo.__call__|   s     -----dm##%D999J!+DM# * 5 5a 8 8 @ @ B BDL}S!4<#444r&   N)rG   rH   rI   rJ   r%   rj   rL   r&   r$   rX   rX   n   s<           
5 
5 
5 
5 
5r&   rX   c                        e Zd Zd Zd Zd ZdS )PartialProgramLayerHookc                     d S r   rL   )r!   forward_programs     r$   before_append_backwardz.PartialProgramLayerHook.before_append_backward         r&   c                     d S r   rL   )r!   whole_programbackward_start_idxs      r$   after_append_backwardz-PartialProgramLayerHook.after_append_backward   rp   r&   c                     d S r   rL   r!   infer_programs     r$   after_inferz#PartialProgramLayerHook.after_infer   rp   r&   N)rG   rH   rI   ro   rt   rx   rL   r&   r$   rl   rl      s+        :::KKK-----r&   rl   c                       e Zd ZdZ	 d@ fd	Zd Zd Zd Zd ZdAd	Z	e
dBd
            Ze
dBd            Ze
dBd            Ze
d             Ze
d             Ze
d             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Z ed             Z!d  Z"e#d!             Z$e#d"             Z%e#d#             Z&e#d$             Z'e#d%             Z(e#d&             Z)d' Z*d( Z+e
d)             Z,d* Z-d+ Z.e#d,             Z/e#d-             Z0d. Z1e
d/             Z2e
d0             Z3d1 Z4ed2             Z5d@d3Z6d4 Z7d5 Z8dAd6Z9d7 Z:d8 Z;d9 Z<e
d:             Z=d; Z>d< Z?d= Z@d> ZAd? ZB xZCS )CPartialProgramLayerac  
    PartialProgramLayer wraps all the ops from layers decorated by `@to_static`
    and execute them as a static subgraph.

    .. note::
        **1. This is a very low level API. Users should not use this API
             directly. Please use `partial_program_from(concrete_program)`
             to create it.
        **2. DenseTensorArray is not currently supported in the output.

    Args:
        main_program(Program): The main program that contains ops need to be executed.
        inputs(list[Variable]): The input list of the decorated function by `@to_static`.
        outputs(list[Variable]): The output list of the decorated function by `@to_static`.
        parameters(list[Tensor]|None): All trainable parameters included in the program. Default None.

    Returns:
        Layer: A Layer object that run all ops internally in static graph mode.
    Nc                     t                                                       t          |           _        t          |d           _        ||ng  _        |                    dt                                 _        t           j        t                    sJ  
                    |           _        t          j        j                            t          j        j                                                  5                                    _        d d d            n# 1 swxY w Y   d _        t+                       _        i  _        d\  }}}t          j                    }	|	r|	                                \  }}|	j        }|5|dv r1t          j        j        j                            |||           _        i  _         i  _!        d  _"        |                    dd            _#        i  _$        g  _%         j        D ]M}
t          |
t          j&                  r1 j%        '                    |
j(        )                                           N fd j        j*        D              _+        d S )	NT)r#   build_strategy)NNN)float16bfloat16)custom_white_listcustom_black_listdtypebackendc                 4    g | ]}j         |         j        S rL   )_outputsre   ).0var_idr!   s     r$   
<listcomp>z0PartialProgramLayer.__init__.<locals>.<listcomp>   s0     
 
 
+1DM&!&
 
 
r&   ),superr%   r   _inputsr   _paramsgetr   _build_strategyr3   _verify_program_origin_main_programr(   baser	   _dygraph_guarddygraphTracer_create_cuda_graph_vec_cuda_graph_vectrainingrX   _infer_info_forward_end_index_map_dygraph_tracer_get_amp_op_list
_amp_dtypestaticr]   
fp16_listsAutoMixedPrecisionLists	_amp_list_pir_scope_cache_legacy_scope_cache_hooker_backend_grad_var_names_in_var_namesr4   r7   re   namer8   _out_var_descs)r!   main_programinputsoutputs
parameterskwargs	amp_dtyper   r   tracerr:   	__class__s   `          r$   r%   zPartialProgramLayer.__init__   s    	#F++$W>>>%/%;zz%zz*:MOOLL$.>>>>>$($8$8$F$F!["11&+2E2L2L2N2NOO 	A 	A#'#>#>#@#@D 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A &==&(#:J7	$&7*,, 	*393J3J3L3L00)I Y2I%I%I !,DD&7&7# E   N !##% 

9d33!< 	; 	;C#y122 ;"))#(--//:::
 
 
 
59]5J
 
 
s   :D  D$'D$c           	      @   |                      |          \  }}|                                 }|                     |           |                                 }|                    d|g           |                                  t          j        |                     |          |                     | j	                  |                     |          | 
                    | j        d          | j        g|R   |                     |          }|                     |          }|S )zQ
        Execute static graph by Interpreter and Return dynamic Tensors.
        x_namesT
program_iduse_scope_cache)_prepare_inputs_prepare_outputs_cast_fp16_if_pure_fp16_prepare_attributesextend_sync_lr_value_with_schedulerr   run_program_valid_varsr   _create_scope_vecr   r   _restore_out_remove_no_value)r!   r   in_varsin_var_namesout_varsattrsrestored_nest_outs          r$   rj   zPartialProgramLayer.__call__   s,    !% 4 4V < <((**$$W---((**i.///**,,,!W%%T\**X&&""?D #    		
 		
 		
 		
 		
 !--h77 112CDD  r&   c           	         |                                  }|                     |           |                                 }|                    d| j        g           |                                  t          j        |                     |          |                     | j	                  |                     |          | 
                    | j        d          | j        g|R   |S )zz
        In sot, inputs and outputs of partial program only contain tensors, so we can skip some step to speed up
        r   Tr   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r!   r   r   r   s       r$   sot_callzPartialProgramLayer.sot_call   s     ((**$$V,,,((**i!34555**,,,!V$$T\**X&&""?D #    		
 		
 		
 		
 		
 r&   c                 ~   | j         }t          |d          rt          |d          r|j        }|j        }t	          |t
                    s
J d            | j         j        } |            }t          j        |                              t          |j
                            }|                    |           dS dS dS )z4Update lr_var value with calculated by lr_scheduler.lr_schedulerlr_varzmust be LRSchedulerN)r   hasattrr   r   r3   r   nparrayastyper   r   	set_value)r!   r   r   r   lr_valuedatas         r$   r   z1PartialProgramLayer._sync_lr_value_with_scheduler  s    0<00 
	#W(6
 6
 
	# (4L!(FlK88OO:OOO84AL#|~~H8H%%,,]6<-H-HIIDT"""""
	# 
	# 
	# 
	#r&   c                     || _         d S r   )r   )r!   hookers     r$   
set_hookerzPartialProgramLayer.set_hooker  s    r&   Fc                     | j         s| j        r| j        }n| j        }|st	          j                    S ||vrg ||<   ||         }|D ]}|j        r|c S t	          j                    }|                    |           |S r   )_in_pir_pt_mode_enable_pir_in_executorr   r   r   Scope_can_reusedr7   )r!   r   r   _scope_cachecached_scopesscopes         r$   
_get_scopezPartialProgramLayer._get_scope  s     	44#? 	40LL3L 	 :<<\))')L$$Z0" 	 	E  
U###r&   c                     |r>| j                             |          }| j        r| j                            |          }|S |                     | j                   }|                     | j        |           |S )Nfor_test)r   cloner   rx   _append_backward_desc_set_grad_typer   )r!   rd   rw   train_programs       r$   _create_programz#PartialProgramLayer._create_program/  s     	! 5;;& <  M | H $ 8 8 G G   66) M m<<<  r&   c                    | j                             |          }t          |          5  t          j        j        j                            || j        dd           d d d            n# 1 swxY w Y   |r#| j	        r| j	        
                    |          }|S |                     |          }|                     | j        |           |S )Nr   FO1)use_fp16_guardlevelr   r   r
   r(   r   r]   
fp16_utilscast_model_to_fp16r   r   rx   r   r   r   )r!   rd   amp_programtrain_amp_programs       r$   _create_amp_programz'PartialProgramLayer._create_amp_program@  s   /55}5MM;'' 	 	M(;;T^E <   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  	%| D"l66{CC $ : :; G G.?@@@$$s   3A**A.1A.c                    | j                             |          }t          |          5  t          j        j        j                            || j        d           d d d            n# 1 swxY w Y   |r#| j	        r| j	        
                    |          }|S |                     |          }|                     | j        |           |S )Nr   F)r   r   )r!   rd   pure_fp16_programtrain_pure_fp16_programs       r$   _create_pure_fp16_programz-PartialProgramLayer._create_pure_fp16_programP  s#    5;;" < 
 
 ,-- 	 	M(;;!4>% <   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
  		+| P$(L$<$<=N$O$O!$$&*&@&@!' '# .EFFF**s   2A))A-0A-c                 v    | j         }|                     |          }|dk    sJ |                     ||          S Nr   )_train_programget_forward_end_op_idx"_get_forward_backward_program_formr!   rr   forward_end_op_indexs      r$   &_create_forward_backward_train_programz:PartialProgramLayer._create_forward_backward_train_programe  sM    +#::=II#q((((66/
 
 	
r&   c                 v    | j         }|                     |          }|dk    sJ |                     ||          S r   )_train_amp_programr   r   r   s      r$   *_create_forward_backward_train_amp_programz>PartialProgramLayer._create_forward_backward_train_amp_programo  sM    /#::=II#q((((66/
 
 	
r&   c                 v    | j         }|                     |          }|dk    sJ |                     ||          S r   )_train_pure_fp16_programr   r   r   s      r$   0_create_forward_backward_train_pure_fp16_programzDPartialProgramLayer._create_forward_backward_train_pure_fp16_programy  sM    5#::=II#q((((66/
 
 	
r&   c                 *    |                                  S r   )r   r+   s    r$   r   z"PartialProgramLayer._train_program  s    ##%%%r&   c                 j    |                      d| j                  \  }}|                     ||          S )Nr\   )r   r   _build_infer_programr!   programr`   s      r$   _infer_programz"PartialProgramLayer._infer_program  s5    ++FD4HII((':::r&   c                 *    |                                  S r   )r   r+   s    r$   r   z&PartialProgramLayer._train_amp_program  s    '')))r&   c                 j    |                      d| j                  \  }}|                     ||          S )Nr]   )r   r   r  r  s      r$   _infer_amp_programz&PartialProgramLayer._infer_amp_program  s5    ++E43KLL((':::r&   c                 *    |                                  S r   )r   r+   s    r$   r   z,PartialProgramLayer._train_pure_fp16_program  s    --///r&   c                 j    |                      d| j                  \  }}|                     ||          S )Nr^   )r   r   r  r  s      r$   _infer_pure_fp16_programz,PartialProgramLayer._infer_pure_fp16_program  s<    ++D2
 
 ((':::r&   c                 .    |                                  }|S r   )r   r!   r  s     r$   _train_forward_backward_programz3PartialProgramLayer._train_forward_backward_program  s    ==??r&   c                 .    |                                  }|S r   )r   r  s     r$   #_train_amp_forward_backward_programz7PartialProgramLayer._train_amp_forward_backward_program  s    AACCr&   c                 >    t           j                                        S r   )r(   r   Programr+   s    r$    _empty_backward_program_for_evalz4PartialProgramLayer._empty_backward_program_for_eval  s    }$$&&&r&   c                 .    |                                  }|S r   )r   r  s     r$   )_train_pure_fp16_forward_backward_programz=PartialProgramLayer._train_pure_fp16_forward_backward_program  s    GGIIr&   c                 P    t           j                            | j        |           }|S r   )r(   r)   _hash_with_idr   r!   r   s     r$   _train_program_idz%PartialProgramLayer._train_program_id  s"    \//0CTJJ
r&   c                 L    t           j                            | j        |           S r   )r(   r)   r  r  r+   s    r$   _infer_program_idz%PartialProgramLayer._infer_program_id  s    |))$*=tDDDr&   c                 P    t           j                            | j        |           }|S r   )r(   r)   r  r   r  s     r$   _train_amp_program_idz)PartialProgramLayer._train_amp_program_id  s"    \//0GNN
r&   c                 L    t           j                            | j        |           S r   )r(   r)   r  r  r+   s    r$   _infer_amp_program_idz)PartialProgramLayer._infer_amp_program_id  s    |))$*A4HHHr&   c                 P    t           j                            | j        |           }|S r   )r(   r)   r  r   r  s     r$   _train_pure_fp16_program_idz/PartialProgramLayer._train_pure_fp16_program_id  s)    \//)4
 

 r&   c                 L    t           j                            | j        |           S r   )r(   r)   r  r  r+   s    r$   _infer_pure_fp16_program_idz/PartialProgramLayer._infer_pure_fp16_program_id  s    |))$*GNNNr&   c                 X    | j         t          j                            ||                    S r   )r   r(   r)   r  r  s     r$   r   z*PartialProgramLayer.get_forward_end_op_idx  s'    *L&&w55
 	
r&   c                 ,    | j         r| j        S | j        S )z7
        Return current train or eval program.
        )r   r   rw   r+   s    r$   r  zPartialProgramLayer.program  s     
 = 	&%%%%r&   c                     | j         r1t                      r| j        S t                      r| j        S | j        S t                      r| j        S t                      r| j        S | j        S )z?
        Return current train or eval program hash id.
        )	r   r   r  r   r!  r  r  r#  r  r+   s    r$   r   zPartialProgramLayer.program_id  sx    
 = 	. .11$&& .77-- .11$&& .77--r&   c                 d    t                      r| j        S t                      r| j        S | j        S r   )r   r   r   r   r   r+   s    r$   r   z!PartialProgramLayer.train_program  s7    ?? 	'** "" 	'00&&r&   c                 l    t                      r| j        }nt                      r| j        }n| j        }|S r   )r   r  r   r  r  rv   s     r$   rw   z!PartialProgramLayer.infer_program  s>    ?? 	0 3MM "" 	0 9MM /Mr&   c                     d }| j         r;t                      r| j        }nt                      r| j        }n| j        }|d         S | j        }|S r   )r   r   r  r   r  r  rw   )r!   rn   progss      r$   rn   z#PartialProgramLayer.forward_program  s^    = 		1 =@$&& =F<8O"0Or&   c                     | j         r;t                      r| j        }nt                      r| j        }n| j        }|d         S 	 | j        S Nr   )r   r   r  r   r  r  r  )r!   r*  s     r$   backward_programz$PartialProgramLayer.backward_program  s[    = 	9 =@$&& =F<8O 88r&   c                 Z    |                      |           |                     |           |S )z
        Verify that the program parameter is initialized, prune some unused params,
        and remove redundant op callstack.
        )_check_params_all_inited_prune_unused_paramsr!   r   s     r$   r   z#PartialProgramLayer._verify_program+  s2     	%%l333!!,///r&   c                     fd}fd}t          t          || j                                                            }|D ]:}|                                                    |j                  } |||           ;dS )a  
        Why we need add gradient aggregation operation ?
        In some cases, if non leaf nodes are used as output, gradient overwriting will occur, such as
        def forward(self, in):
            x = 2 * in  # <---- x is a non-leaf node in program.
            y = x + 3
            return x, y

        loss = forward(in)[0].sum()
        loss.backward()  # <----- x@grad will be overwritten by elementwise_add_grad Op
        c                 T   t          | t          j                  r3| j        t          j        j        j        t          j        j        j        fvrdS | j	        t          j        t          j        fvrdS                     d          j        D ]}|j        D ]}|| j        k    r  dS dS )zM
            if exist a op whose inputs is var, then return True
            Fr   T)r3   r	   r4   r>   r   VarDescVarTypeDENSE_TENSORSELECTED_ROWSr   r(   float32float64rf   opsinput_arg_namesr   )r:   opin_argr   s      r$   _need_aggregationzKPartialProgramLayer.prepare_gradient_aggregation.<locals>._need_aggregationF  s     c9#566 #($1$2K ; ; uy @@@u"((++/ $ $ 0 $ $F))#ttt *$ 5r&   c           
      L   d}|j         |j        |z   dz   }t          t          fdt	          |                     d          j                                      }t          |          dk    rd S |                     d                              ||j	        |j
        |j                   |D ]1\  }}|                    |           |                    |           2|                     d                              |d         d         dz   dd	|gid
i           d S )Nz
@dy2staticz@GRADc                 h    | d         k    o%t          fd| d         j        D                       S )Nr   c              3   $   K   | ]
}|k    V  d S r   rL   )r   out_argvar_grad_names     r$   	<genexpr>z~PartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_var.<locals>.<lambda>.<locals>.<genexpr>^  s<        #  =0     r&   r   )anyoutput_arg_names)x	start_idxrC  s    r$   <lambda>zkPartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_var.<locals>.<lambda>]  sR    adi/     '(t'<     r&   r   )r   r>   r   shaperZ   r   sumXOut)r>   r   r   )	grad_namer   r@   filterr2   rf   r:  r-   
create_varr>   r   rJ  _rename_input_rename_output
_insert_op)	target_programr:   suffixnew_grad_name	found_opsr9   r<  rC  rH  s	          @r$   _insert_aggregation_ops_for_varzYPartialProgramLayer.prepare_gradient_aggregation.<locals>._insert_aggregation_ops_for_varW  sl   !FMMHv-7M    
 n221559:: 	 	I 9~~""t  ##.."Xii	 /    % @ @R  >>>!!-????   ##.."a 1$m];<.	 /    4r&   N)r@   rO  r   r   global_blockr:   r   )	r!   rH  r   rT  r>  rX  to_processed_vars_var
target_vars	    ``      r$   prepare_gradient_aggregationz0PartialProgramLayer.prepare_gradient_aggregation7  s    	 	 	 	 	"&	 &	 &	 &	 &	P !$dm&:&:&<&<==
 
 & 	H 	HD'4466::49EEJ++NJGGGG	H 	Hr&   c                 4  	 |                     d          	| j        r| j                            	          	g }| j                                        D ][}t          |t          j                  r?|                    		                                
                    |j                             \t          	                    d          j                  t          | j                                                  z   }|rHt          	                    d          j                  t          | j                                                  z   }t          | j                  5  t#          |dt          j        t$          t&          fd           t)          j        |g           }	fd| j        D             }	fd| j        D             }	fd	| j        D             }t1          ||||          | _        d d d            n# 1 swxY w Y   | j        r| j                            	|          \  	}|                     |d
z   |	           |t          | j                                                  z
  | j        t:          j                            	|           <   	S )NFr   r   targetszpaddle.static.gradients)r_  r   c                     g | ]I}t          |t          j                                      d                               |j                  JS r   r3   r	   r4   rf   r:   r   r   r:   r  s     r$   r   z=PartialProgramLayer._append_backward_desc.<locals>.<listcomp>  sX       !#y'9::MM!$$((22  r&   c                 j    g | ]/}                     d                               |j                  0S ra  )rf   r:   r   )r   paramr  s     r$   r   z=PartialProgramLayer._append_backward_desc.<locals>.<listcomp>  sB       9>GMM!$$((44  r&   c                     g | ]I}t          |t          j                                      d                               |j                  JS ra  rb  rc  s     r$   r   z=PartialProgramLayer._append_backward_desc.<locals>.<listcomp>  sX       !#y'9::MM!$$((22  r&   r   ) r   r   ro   r   r   r3   r	   r4   r7   rY  r:   r   r-   rf   r:  r   r   r   r@   tupler   calc_gradient_helperr   r   r   r   rt   r]  r   r(   r)   r  )
r!   r   r_  outrH  grad_info_mapx_vars
param_varsr   r  s
            @r$   r   z)PartialProgramLayer._append_backward_desc  s   $$e$44< 	Cl99'BBG='')) 	E 	EC#y122 Ew335599#(CCDDDa((,--DM4H4H4J4J0K0KK	 %	GMM!,,011C8L8L8N8N4O4OOIt}--  'u5-	   !) =#B! ! !   #|  
   BF,  
   #}   (<!6:x( ($3              : | %)\%G%GY& &" --A|W   DM0022333 	#L&&w55	
 s   #BG??HHc                     g }| j         D ]Q}d}|j        D ]E}|j        D ]7}|j        |j        v s|j        |j        v r|                    |           d} n8|r nFR|| _         dS )a'  
        Prune the parameters not used anywhere in the program.
        The `@to_static` may only decorated a sub function which
        contains some unused parameters created in `__init__`.
        So prune these parameters to avoid unnecessary operations in
        `run_program_op`.
        FTN)r   blocksr:  r   r;  rF  r7   )r!   r  required_paramsre  found_paramrf   r<  s          r$   r0  z(PartialProgramLayer._prune_unused_params  s     \ 	 	EK  
 
)  B
b&888 :)<<<'..u555&*	 =
  E 'r&   c                 ~   t                      rt          |          D ]\  }}|j        }| j                                                            |          rd| j                                                            |          j        t          j	        k    r%|
                    d          ||<   |||         _        d S d S )Nr}   )r   r2   r   r  rY  has_varr:   r   r(   r}   r   )r!   r   ir:   r   s        r$   r   z+PartialProgramLayer._cast_fp16_if_pure_fp16  s       		+#G,, + +3xL--//77==+113377==C~& & "%I!6!6GAJ&*GAJO		+ 		++ +r&   c                     d}t          |          |         }t          j                    pt          j                    }| j                                        }| j        j        }|s|s|rd}|S )N!FLAGS_enable_pir_with_pt_in_dy2stF)r   r   _is_fwd_prim_enabled_is_bwd_prim_enabledr   is_cinnr   build_cinn_pass)r!   pir_dy2st_flagin_pir_pt_modeis_prim_enabledin_cinn_backendis_cinn_enableds         r$   r   z#PartialProgramLayer._in_pir_pt_mode  sy    <">22>B%''F4+D+F+F 	 -//11.> 	#o 	# 	#"Nr&   c                 4    d}t          |          |         }|S )NFLAGS_enable_pir_in_executorr   )r!   enable_pir_in_executor_flagenable_pir_in_executors      r$   r   z+PartialProgramLayer._enable_pir_in_executor  s'    &D#!*+F!G!G'"
 &%r&   c                    d| j         j                            d          d| j        j                            d          d| j         d| j        g}| j        rf|                    d| j                            dg           d| j                            d	g           d
| j                            dg           f           | j	        }|                    d|g           |S )Nforward_global_blockr   backward_global_blockis_testr   param_grad_namesre  out_grad_namesri  x_grad_namesrG  r{  )
rn   re   rf   r-  r   r   r   r   r   r   )r!   r   r{  s      r$   r   z'PartialProgramLayer._prepare_attributes  s    " %++A..#!&,,Q//O	
 = 	 LL&(,,Wb99$(,,UB77"(,,S"55	 	 	 -&7888r&   c                     |                      |          }t          |d|| j        |          }|                     |d            |S r   )_parse_skip_gc_varsadd_build_strategy_forr   _apply_inplace_pass)r!   rw   r   forward_skip_varsbuilt_infer_programs        r$   r  z(PartialProgramLayer._build_infer_program  sW     44]CC4  
 
 	  !4d;;;""r&   c                    |t          | j        j                  z   }|j                            d                                          }|                     |          | j                            dg           z   }t          |||| j
        |          }|                     ||          }t          |d|| j
        |          }|                     ||           ||gS )Nr   re  )r-   r   r8   re   rf   r`   r  r   r   r  r   r  )	r!   rr   r   backward_start_op_indexbackward_end_op_indexbackward_skip_varsbackward_built_programr  forward_built_programs	            r$   r   z6PartialProgramLayer._get_forward_backward_program_form%  s
    #7M!:
 :
 #
 !. 2 8 8 ; ; C C E E "55
 
 $$Wb112 "8#! "
 "
 !441
 
 !7  !
 !
 	  !68NOOO%'=>>r&   c                     dddd}t           j                                        }t          j                    rdnd}|                     ||          }|                     |          }|r||dd}|r||dd}d S d S )Nboolz	list[str])use_cudamem_opt_skip_varsfor_partial_blockTF)r(   r   r  r   is_compiled_with_cudar  )	r!   rn   r-  
attr_typesempty_startup_programr  forward_mem_opt_skip_varsbackward_mem_opt_skip_varsr   s	            r$   r  z'PartialProgramLayer._apply_inplace_passK  s    !,!'
 


 !' 5 5 7 7577B44U$($<$<-%
 %
! &*%=%=o%N%N" 	$%>%) E  	$%?%) EEE	 	r&   c                 r   g }| j         D ]R}t          |t          j        j        j                  r,|                    |j                                                   S| j	        D ]R}t          |t          j        j        j                  r,|                    |j                                                   S|S )zK
        Returns Variable Names from self._inputs and self.outputs
        )
r   r3   r(   r   r	   r4   r7   re   r   r   )r!   	var_namesr:   s      r$   _inout_var_namesz$PartialProgramLayer._inout_var_namesu  s    
 	< 	2 	2C#v{4=>> 2  111= 	2 	2C#v{4=>> 2  111r&   c                 0   t          | j                  }|                                j                                        D ]!\  }}|j        r|                    |           "|r2t          j        |j	        d          D ]}|                    |           |S )z
        Parse variables that need to skip GC after execute it.
        If specify backward_program, it will keep the variables used in backward.
        T)
r   r  rY  varsitemsis_datar7   r   #parse_safe_eager_deletion_skip_varsre   )r!   r  r-  	skip_varsvar_namer:   s         r$   r  z'PartialProgramLayer._parse_skip_gc_vars  s     T233	$11338>>@@ 	+ 	+MHc{ +  *** 	+ D %t  + +   ****r&   c                    t          |t          t          f          sJ t          j                            |          }g }g }t          j                    }t          |          D ]\  }}t          |t          j
                  rId}t          j                            || j        |         j                                        d|d          }nbt          |t          j        j                  rB|j        r8|j                            |          s|                    |d          }d|_        n|}n|                    | j        |         j                                                   |                    |           ||fS )z1
        Prepare inputs, outputs, attrs.
        NFT)valuer   persistableplace	zero_copy)r3   rg  r@   r(   r)   r*   r	   _current_expected_placer2   r   ndarrayr   r5   r6   r   re   r   stop_gradientr  _equals_copy_tor7   )	r!   r   flatten_inputs
input_varsinput_var_namesexpected_placers  r  r:   s	            r$   r   z#PartialProgramLayer._prepare_inputs  sx    &5$-00000--f55
":<<!.11 	# 	#HAu%,, j''a-2244 %(" (   E4:#455  &  u{/B/B"0 0    ..??C(,C%%CC""4<?#7#<#<#>#>???c""""?**r&   c                 T    t           j        j                            | j                  S r   )r(   r	   r   #create_empty_tensors_with_var_descsr   r+   s    r$   r   z$PartialProgramLayer._prepare_outputs  s&    $HH
 
 	
r&   c                 6    |                      ||          }|gS )Nr   )r   )r!   r   r   inner_scopes       r$   r   z%PartialProgramLayer._create_scope_vec  s)    oo!? & 
 
 }r&   c                     t           j                            t           j        j        j        g dt           j        j        j        d          }d|_        |S )N
cuda_graphT)r   r5   r6   r4  r5  FP32RAWr  r!   r:   s     r$   r   z*PartialProgramLayer._create_cuda_graph_vec  sG    jL %L $
 
 !
r&   c                 j      fd}t           j        j        |          D ]\  }} |||           d S )Nc                 r    j         |          }t          |t          j                  sJ |j        |_        d S r   )r   r3   r	   r4   r  )r   eager_tensorr:   r!   s      r$   set_stop_gradientzDPartialProgramLayer._update_stop_gradient.<locals>.set_stop_gradient  s9    -'Cc9#566666),):L&&&r&   )zipr   r8   )r!   r   r  r9   r:   s   `    r$   _update_stop_gradientz)PartialProgramLayer._update_stop_gradient  s`    	; 	; 	; 	; 	;
 DM18<< 	( 	(HCc3''''	( 	(r&   c                     | j                                         }t          | j         j                  D ]\  }}||         ||<   | j                             |          }|t          |          dk    r|d         }|S )zZ
        Restores same nested outputs by only replacing the Variable with Tensor.
        Nr   r   )r   r   r2   r8   r0   r-   )r!   r   flatten_outputsrs  r9   outss         r$   r   z PartialProgramLayer._restore_out  s    
 -..00 566 	/ 	/FAs#+A;OC  }$$_55D		Q7Dr&   c                 .    |                     d          S )NTr   )r   r1  s     r$   _clone_for_testz#PartialProgramLayer._clone_for_test  s    !!4!000r&   c                     t          |t          j        j                  r1|j        dgk    r%|                                d         t          k    rdS dS )Nr   r   TF)r3   r   r5   r6   rJ  numpyr   r  s     r$   _is_no_valuez PartialProgramLayer._is_no_value  sI    c4:,-- 	#)s2B2Byy{{1~!:::tur&   c                     t          |t          j        j                  r                     |          rdS |S t          |t
          t          f          rt          |t
                    rt           fd|D                       }n fd|D             }t          |          t          |          k    }t          |          dk    r|rdS t          |          dk    r
|r|d         S |S |S )zK
        Removes invalid value for various-length return statement
        Nc              3   F   K   | ]}                     |          |V  d S r   r  r   r:   r!   s     r$   rD  z7PartialProgramLayer._remove_no_value.<locals>.<genexpr>  sJ        43D3DS3I3I     r&   c                 >    g | ]}                     |          |S rL   r  r  s     r$   r   z8PartialProgramLayer._remove_no_value.<locals>.<listcomp>  s,    MMMsd6G6G6L6LMsMMMr&   r   r   )r3   r   r5   r6   r  rg  r@   r-   )r!   r   reshas_removeds   `   r$   r   z$PartialProgramLayer._remove_no_value  s    h
 122 	  ** tO5$-00 	(E** N    #+    
 NMMMhMMMh--#c((2K 3xx1}}}tSQ;1vJr&   c                    |D ]}|j         t          j                    z   }|j                            d                              |                                          }|_|                    |                                           d S r   )	r   r   grad_var_suffixre   rf   find_varencoder   r>   )r!   paramsr   re  rN  grad_vars         r$   r   z"PartialProgramLayer._set_grad_type  s      	2 	2E
T%9%;%;;I$)//22;;I<L<L<N<NOOH  1111	2 	2r&   c           	      N   t          | j        t          t          f          s%t	          dt          | j                   d          t                      }t          | j                  D ]a\  }}t          |t          j	        j
                  s#t	          d| dt          |           d          |                    |j                   b|j        D ]R}|j                                        D ]6\  }}t          |t           j                  r||vrt%          d| d          7SdS )a  
        Check all params from main program are already initialized, see details as follows:
            1. all parameters in self._params should be type `framework.EagerParamBase` which are created in dygraph.
            2. all parameters from transformed program can be found in self._params.
               Because they share same data with EagerParamBase of original dygraph.
        zRType of self._params in PartialProgramLayer should be list or tuple, but received .zType of self._params[zG] in PartialProgramLayer should be Parameter or Variable, but received zv
	We don't support to define layer with parameters in the function decorated by `@to_static`.
	But we found parameter(z) was created in the decorated function.

	Revise suggestion: 
		1. Please ensure all your sublayers are inherited from nn.Layer.
		2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as ListN)r3   r   r@   rg  	TypeErrorr>   r<   r2   r   r5   r6   r=   r   rn  r  r  r	   	Parameter
ValueError)r!   r   param_and_buffer_names_setrs  r:   rf   r   s          r$   r/  z,PartialProgramLayer._check_params_all_inited   sy    $,u66 	zeijnjvewewzzz   &)UU"-- 	5 	5FAsc4:#455  CA  C  Cvz{~vv  C  C  C   '**384444!( 	 	E"Z--// 
 
	cc9#677 	#===(Y:>Y Y Y  
	 	r&   c                     |r|nd S r   rL   )r!   r  s     r$   r   zPartialProgramLayer._valid_varsB  s    %tt%r&   r   )NFrF   )DrG   rH   rI   rJ   r%   rj   r   r   r   r   r   r   r   r   r   r   r   rN   r   r  r   r  r   r  r  r  r  r  r  r  r  r  r!  r#  r   rK   r  r   r   rw   rn   r-  r   r]  r   r0  r   r   r   r   r  r   r  r  r  r   r   r   r   r  r   r  r  r   r   r/  r   __classcell__)r   s   @r$   rz   rz      s        * 9=/
 /
 /
 /
 /
 /
b! ! !8  0# # #     $ ! ! ! !  % % % % + + + +( 
 
 
 
 
 
 
 
 
 & & _& ; ; _; * * _* ; ; _; 0 0 _0 ; ; _;   _   _ ' ' _'   _   _ E E _E   _ I I _I   _ O O _O
 
 

 & & X& . . X.& ' ' X'   X   X 9 9 X9*
 
 
NH NH NH` 4 4 4l' ' '2
+ 
+ 
+ 
 
 X
 & & X&  @ 
# 
# 
# #? #? #?J     T   _   $%+ %+ %+N
 
 

   	 	 	( ( (   1 1 1    82 2 2     D& & & & & & &r&   rz   Fc                 t    | j         }|r|r
|dd          }t          | j        || j        | j        fi | j        S r,  )r   rz   r   r   r   r   )concrete_programfrom_methodr   s      r$   partial_program_fromr  F  sa    $F  + % #	 
 
!  r&   c                 f   ||k     rt           j                            t          j        | j        ||          |          }|r(|j                            dt          |                     |                    t          j	                    t          j                               t          j        |j                  }|                                }t          |j        d          r|j        j        |_        nvt           j                                        }|                     d          j                                        D ]+}|                    d                              |d           ,t-          | j        |j                  D ])\  }	}
|
j                            |	j        j                   *|S )N)r|   skip_gc_varsr   r   F)r(   r   CompiledProgramr   Graphre   _graphr<   _compiler   r	   r  IrGraph
to_programr   _programr   r  rf   r  values_clone_variabler  rn  set_parent_idxparent)r  start_op_indexend_op_indexr|   r  compiled_programir_graphbuilt_programr:   origincurrents              r$   r  r  V  s    $$!=88Jw|^\BB) 9 
 
  	H#''IGGG!!JLL);==	
 	
 	
 $%5%<== ++--#,n== 	P)9)B)OM& --//==##(//11 	? 	?C""223>>>> w~}/CDD 8 8##FK$67777r&   rF   )NN)'copyr   r  r   r(   r   paddle.amp.auto_castr   r   paddle.baser   r   r	   r
   paddle.base.compilerr   paddle.base.data_feederr   r   paddle.base.dygraph.baser   paddle.base.frameworkr   paddle.optimizer.lrr    r   r)   r   r   r   __all__r   rN   rX   rl   rz   r  r  rL   r&   r$   <module>r     s;                          C C C C C C C C @ @ @ @ @ @ @ @ @ @ @ @ . . . . . . = = = = = = = = ; ; ; ; ; ; + + + + + + + + + + + +                6' 6' 6' 6' 6' 6' 6' 6'r       5 5 5 5 5 5 5 56. . . . . . . .r& r& r& r& r& r& r& r&j     JN     r&   