
    x-je                     H   d dl Z d dlZd dlZd dlmZmZmZmZmZ d dl	m
Z
mZ d dlmZ d dlmZ ddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ d
dlmZmZ  ee j                   Z! G d de          Z"d Z#ddZ$d Z%ddZ& ed           G d de                      Z'dS )    N)ProgramStats_append_grad_suffix__find_op_path__get_no_grad_set_name_rename_arg_)OP_ROLE_KEYOpRole)core)unique_name   )OperatorDistAttr)get_loss_opinsert_dependencies_for_two_opsis_backward_opis_recompute_exclude_opis_recompute_op6naive_set_dist_op_attr_for_program_by_mesh_and_mappingset_dist_op_desc_original_idset_var_dist_attr)
get_logger   )PassBaseregister_passc                   f     e Zd Z fdZed             Zed             Zd Zd Zg fdZ	d Z
 xZS )RecomputeStatec                 x    t                                          ||           i | _        g | _        g | _        d S )N)blockops)super__init__seg_op_deps_checkpoints_reserved_vars)selfr   r   	__class__s      q/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/passes/auto_parallel_recompute.pyr    zRecomputeState.__init__0   s>    u#...     c                     | j         S N)r"   r$   s    r&   checkpointszRecomputeState.checkpoints6   s      r'   c                     | j         S r)   )r#   r*   s    r&   reserved_varszRecomputeState.reserved_vars:   s    ""r'   c                 >    t          d | j        D                       S )Nc              3   4   K   | ]}t          |          V  d S r)   )r   .0ops     r&   	<genexpr>z.RecomputeState.is_recompute.<locals>.<genexpr>?   s*      ::2?2&&::::::r'   )anyr   r*   s    r&   is_recomputezRecomputeState.is_recompute>   s!    ::::::::r'   c                    t          | j                  D ]\  }}t          |          r d S |j        D ]^}|| j        v r(| j        |         d                             |g           3i | j        |<   |g| j        |         d<   g | j        |         d<   _|j        D ]^}|| j        v r(| j        |         d                             |g           3i | j        |<   g | j        |         d<   |g| j        |         d<   _t          |          s0| j                            |j                   t          |          s#|
                    d          }t          j        d|          }|                    d          }|| j        vr|g| j        |<   x| j        |         d         dz   |k    s
J d            | j        |                             |g           d S )	Nvar_as_input_opsvar_as_output_opsop_namescopez/auto_parallel/rc_[0-9]*r   r   z0The recompute segment's ops should be continuous)	enumerater   r   input_arg_namesvar_op_depsextendoutput_arg_namesr   r"   r   attrresearchgroupr!   )r$   ir2   nameseg_nameress         r&   build_stateszRecomputeState.build_statesA   s#   tx(( "	7 "	7EArb!! * E E4+++$T*+=>EEqcJJJJ-/D$T*BCD$T*+=>BDD$T*+>??+ F F4+++$T*+>?FFsKKKK-/D$T*ACD$T*+=>CD#D$T*+>??"2&& !(()<===.r22 ww~..H)6AACyy||Ht///./S **'1"59Q>>>F ?>>  *111#6666E"	7 "	7r'   c                    g }| j                                         D ]l}t          |          dk    r|                    |d         |d         dz   g           | j                            | j        |d                  j                   mt          |d          D ]H}|t          |          k     sJ d| dt          |           d            |	                    |           I|S )	Nr   r   r:   T)reversezthe no_recompute_segments idx [z)] should be lower the number of segment [])
r!   valueslenappendr"   r>   r   r?   sortedpop)r$   no_recompute_segmentssegmentssegment_idxrD   s        r&   get_recompute_segmentsz%RecomputeState.get_recompute_segmentsf   s    +2244 	Q 	QK;1$$OO[^[_q-@ABBB$$TXk"o%>%OPPPP-t<<< 	 	As8}}$$$n!nn^abj^k^knnn %$$ LLOOOOr'   c           	         d | j         D             }d|vrd|vrdS d}|t          | j                   k     r| j         |         }d|j        v rn|j        dk    r%| j                            |j                   |dz  }a|j        d	vr|dz  }p|j        dk    rd
nd}|                    |          (t          |                    |                    r|dz  }|                    |          }t          j	        d          }t          j
        d                    |dg                    }| j                            |           | j                            |dt          j        j        j        dd          }	dg}
|j        }t)          ||	|
||j                  }|                    d          du rdn!t/          |                    d                    }| j                            |j        di d|	i|dd          }|                    d|                    d                     t7          |||
||j                   | j                             ||           |j                            ||g           |j                            dd           |j                            dd           |                    |	j         |           |dz  }|t          | j                   k     | j        !                                 dS )z
        If program's forward part has 'dropout' op, this function will insert
        a seed op before it to guarantee that two dropout op have the same outputs.
        c                     g | ]	}|j         
S  typer0   s     r&   
<listcomp>zDRecomputeState.modify_forward_desc_for_recompute.<locals>.<listcomp>{   s    ///BG///r'   dropoutfused_dropout_addNr   gradseedr   )r[   r\   seed_tensorSeedrc_seed.tmpint32F)rE   dtyperY   persistablestop_gradientr:   chunk_idfix_seedOutT)r^   	force_cpu)indexrY   inputsoutputsattrsr9   r   )"r   rM   rY   r#   r>   r?   inputget_op_dist_attr_for_programr   generategenerate_with_ignorable_keyjoinrN   r   
create_varr
   VarDescVarTypeDENSE_TENSORprocess_meshr   ri   r@   int_insert_op_without_syncidx	_set_attrr   insertdesc	set_inputset_input_dist_attrrE   _sync_with_cpp)r$   dist_contextop_typesop_idxcur_opseed_tensor_namecur_op_dist_attrop_unique_namevar_unique_nameseed_varref_dims_mappingref_process_meshseed_var_dist_attrr^   seed_ops                  r&   !modify_forward_desc_for_recomputez0RecomputeState.modify_forward_desc_for_recomputev   si   
 0/dh///H$$)<H)L)LFs48}}$$Xf%F$${f$$#**6+BCCC!{"BBB!!'0C!C!C  ||,--9c-..? ?9 !+HHPP )1)<<N)E.%011 O &&777z,,$\)6!# -  H !#t/<!2  )2" " " ;;z**e33 V,,--  j88j)#$77 9  G nfkk..I.IJJJB  )2    HOOFG,,,K!!"2_4EFFFK!!*e444K!!&!,,,001   aKF_ s48}}$$b 	
!!#####r'   )__name__
__module____qualname__r    propertyr+   r-   r5   rH   rT   r   __classcell__r%   s   @r&   r   r   /   s        ! ! ! ! ! ! ! X! # # X#; ; ;#7 #7 #7J <>     [$ [$ [$ [$ [$ [$ [$r'   r   c                     t          | j                                                  D ])}|j        | j                            |          k    r|c S *dS )Nr:   )ranger   op_sizer2   )r   r   r}   s      r&   _find_op_indexr      sU    UZ''))**  ;%*--,,,,JJJ -2r'   c                 l   |t                      }nt          |          }t                      }|                                 D ];}d|j        v r n/|j        r'|                    t          |j                             <|                    t          t          t          |                               |S )zget no grad varNz@GRAD)
setr   	list_varsrE   rg   addr   updatelistmap)programno_grad_setno_grad_set_namevars       r&   _get_stop_gradientsr      s    ee+K88uu  "" A AchE 	A  !5ch!?!?@@@D%9;!G!GHHIIIr'   c                 \   t          |           dk    rg S g }| D ]}t          |t          j        j                  r|j        }t          |t                    r|d         }d}|                                D ]8}|                    |          r|	                    |          j
        r2||vrd}9|ry|j                                        }	|	                    |           t          |	||           |	                    t          t           j                   |                    |	           |S )zD
    Get the recomputed ops which will insert the backward part
    r   FT)rM   
isinstancepaddlestaticOperatorr   tupler?   has_varr   rf   	append_op	copy_fromr   r~   r   r	   BackwardrN   )
descsr   
main_blockvars_should_be_holdr   result_descsr   	is_neededrE   new_op_descs
             r&   _add_needed_descs_to_blockr      s:    5zzQ	L - -dFM233 	9DdE"" 	7D	))++ 	! 	!D!!$'' JNN4,@,@,L ... 	 	-*..00K!!$'''(dLIII!!+v???,,,r'   c                 p    t          | |          }t          |                                 |gg |          }|S r)   )r   r   global_block)main_programlossr   r   op_paths        r&   _find_op_pathr     s@    *<EE!!##dVR1A G Nr'   auto_parallel_recomputec                   D     e Zd Z fdZd Zd Zd	dZd Zd Zd Z	 xZ
S )
RecomputePassc                     t                                                       |                     dd            |                     dd            |                     dd            |                     dg            d S )Nr   r   r   rQ   )r   r    set_attr)r$   r%   s    r&   r    zRecomputePass.__init__  sp    fd###nd+++mT***-r22222r'   c                 b    |                      d          dS |                      d          dS dS )Nr   Fr   T)get_attrr*   s    r&   _check_selfzRecomputePass._check_self  s5    ==((05==  (5tr'   c                     dS NTrW   )r$   
other_passs     r&   _check_conflictzRecomputePass._check_conflict#  s    tr'   r   c                    d }t          |          }d t          |          D             }d t          |          D             }d}d}	d}
t          |          D ]\  }}|
|z  |k     r|	dz  }	 ||           |t          |          dz
  k     r(|j        j        ||dz            j        j        k    r|
dz  }
|
|z  |k     rft          |          D ]U\  }}|j        j        |k    r@|dz  }||                             |           ||                             |j                   Vt          |          |	|z   k    s J d|	|z    dt          |                       ||fS )zf
        Get ops and op_names of each process mesh excluding ops within the first "sr" chunks
        c                 r    t          |           st          |           r|                     dd           d S d S )Nr9    )r   r   r~   )r2   s    r&   reset_recompute_opz<RecomputePass.get_ops_per_device.<locals>.reset_recompute_op+  sF    r"" 1&=b&A&A 1^R000001 1r'   c                     g | ]}g S rW   rW   r1   _s     r&   rZ   z4RecomputePass.get_ops_per_device.<locals>.<listcomp>0  s    EEEEEEr'   c                     g | ]}g S rW   rW   r   s     r&   rZ   z4RecomputePass.get_ops_per_device.<locals>.<listcomp>1  s    JJJQbJJJr'   r   r   zbThe sum of pushed_ops_count and reset_ops_count must be the same as length of ops, but the sum is z while length of ops is )rM   r   r;   	dist_attrrz   rN   rY   )r$   r   all_ops_process_meshessrr   all_process_meshes_countops_of_stagesop_names_of_stagespushed_ops_countreset_ops_countri   op_idr2   idrz   s                  r&   get_ops_per_devicez RecomputePass.get_ops_per_device&  s   
	1 	1 	1 $''=#>#> EEU+C%D%DEEEJJ%0H*I*IJJJ"3 	; 	;IE233b881$""2&&&C1$$L-uqy>+89 9 A33b88$-.D$E$E ; ; L<,<<$)$!"%,,R000&r*11"':::	;
 3xx?-===== x  rA  DT  rT  x  x  nq  ru  nv  nv  x  x >== 000r'   c                    |                      d          }|                      d          }|                      d          }|                      d          | _        |                      dd          | _        |                      dg           | _        |                                }t          |||          }g }	|D ]/}
|
j        j        |	vr|	                    |
j        j                   0| 	                    ||	| j                  \  }}t          |          }d |D             }| j        D ]}|d	         }|dk    r|n|}|d
         }|d         }|d         }t          |          }t          |          }||z   |z   }t          |          }t          |          D ]\  }}d}t          |          }t          ||z
  dz             D ]]}||||z            |k    rJ||k     rD|dz  }||                             t          t          ||z   ||z   |z                                  ^t                              d|            t          |          D ]k\  }}|D ]c}t#          ||         |                   rF||         |                             d          }||         |                             d|dz              dlt)          ||          } |                                 sd S |                     | j                   |                                  |                     |          }!|!g k    rd S t          |!          D ]\  }\  }"}#t                              d|dz    dt          |!           d           t                              d| j        |"         j         d| j        |"         j         d| j        |"         j         d           t                              d| j        |#dz
           j         d| j        |#dz
           j         d| j        |#dz
           j         d           g }$|!D ]7}%|$                    |                     |%d         |%d                              8t?          |$          t?          | j                   z
  }&t                              dt          |&           d|& d           |$                    | j!                   |$                    | "                                           t          t?          |$          t?          | j                   z            }$i }'i }(|j#        $                                })t          |!d d d                   D ]\  }}%||%d         |%d                  }*d| }+|*D ]/}
g },|,                    |
j                   |,                    |
j                   | j        %                    |
          }-|-J |,D ]}.|&                    |.          j'        s|.|$v r!|.|'vr|-j        }/|.|
j        v r|-(                    |.          }0n|-)                    |.          }0|.|+z   |'|.<   |&                    |.          }1|*                    |'|.         |1j+        |1j,        |1j        |1j'        |1j-                  }2t]          | j        |2|0|/|-j/                   ֐1ta          |*|)||$| j                  }3|'D ]}4tc          |3|4|'|4                    ||%d         dz
           }5d|3g|(|5j2        3                                <   |j        }6ti          |          }7tk          ||7          }8| j        j6        }9|8dk    sJ t          t          |6          dz
  |8d          D ]l}|6|         }:g },|,                    |:j                   |,                    |:j                   |'D ]:};|;|,vr| 7                    |:|'           tc          |:j2        g|;|'|;                    ;|:j2        3                                }<|<|9j8        v r|9j8        |<         }=|=|(v r|(|=         d         r|:j9        }>|>dz
  dk    r6|6|>dz
           j        d k    r"|>dz  }>|>dz
  dk    r|6|>dz
           j        d k    "|(|=         d         }3d }?tu          t          t          |3                              D ]\  }@}A|;                    |>d!"          }?|?j2        }B|B<                    |A           |B=                    |B>                                           | j        ?                    |A3                                          }C|CJ | @                    |?|C|'           d#|(|=         d<   |?rw|j        |?j9        dz
           }D|?}E| j        %                    |D          j        }F| j        %                    |E          j        }G|F|Gk    rt          ||>|D|E| j        dd#d$%           n|B                                 d S )&Nr   r   rQ   r   r   r   refined_ops_patternsc                     g | ]}g S rW   rW   r   s     r&   rZ   z4RecomputePass._apply_single_impl.<locals>.<listcomp>d  s    <<<ar<<<r'   nummain_opspre_opssuf_opsr   z,The excluded ops in recompute segments are:
r9   _exclude_rczrecompute segment[/rK   zsegment start op: [z]: [z] [zsegment end op: [zfound [z'] vars which cross recompute segment: [z6],better checkpoints might be set to reduce those varsr:   z	.subprog_)rE   shapere   rY   rf   rg   rh   TsumnoprX   Frecompute_segment_dep)r5   syncr9   )Cr   _dist_context_sr_refined_ops_patternsr   r   r   rz   rN   r   rM   r;   r   r>   r   loggerinfor   r@   r~   r   r5   r   rH   rT   debugr   rY   r<   r?   get_out_of_subgraph_varsr   r+   r-   get_input_nodesr   _create_blockrr   r   rf   get_input_dims_mappingget_output_dims_mappingrv   r   re   rg   r   ri   r   r   r   original_idr   r   dist_op_contextreset_op_dist_attrgrad_op_id_to_op_idr}   reversedr|   r   set_original_idr   $get_op_dist_attr_for_program_with_idset_op_dist_attrr   r   )Hr$   r   startup_programcontextr   r   rQ   r   r   r   r2   ops_devicesop_names_devicesall_ops_lenall_exclude_ops_idsrefined_ops_patternr   r   r   r   main_start_idmain_ops_lenpattern_opspattern_ops_lenr   op_names_devicepattern_countops_len_devicerD   exclude_ops_idsr   rc_mark_strrc_staterR   idx1idx2r   segment
cross_varsvar_name_dictckpt_ops_dictbuffer_blockfwd_ops
var_suffixinput_and_output_namesr   rE   r   r   ref_varrc_varsegment_descskeyckpt_opr   loss_oploss_op_idxr   grad_opvarnamer   	fwd_op_idr}   rc_opr   op_descrc_descfwd_op_dist_attrprior_opposterior_op
prior_meshposterior_meshsH                                                                           r&   _apply_single_implz RecomputePass._apply_single_implL  s   }}V$$mmM22 $.E F F!]]>::==q))%)]]3I2%N%N" "..00
dK@@ "$ 	I 	IB|(0FFF&--bl.GHHH )-(?(?+TX)
 )
%% 'll<<+;<<<#'#= 	 	%e,Caxx[  +:6H))4G))4GLLMx==L!H,w6K!+..O'01A'B'B  #O !!$_!5!5~?!CDD  A'A,?(?@KOO)C//%*+B/66  %$%$5$%$5$D!" !"   " 	Q<OQQ	
 	
 	
 $--@#A#A 	 	B(  ";r?5#9:: "-b/%"8"="=n"M"MKOE*44&m(C   "*g66$$&& 	F 	2243EFFF223HIIr>>F(22 	 	OA|dLLFa!eFFc(mmFFFGGGLL Qhl4&8&=  Q  Q8<PTCUCe  Q  Qjrjvw{j|  kN  Q  Q  Q   LL [HL$:$?  [  [X\RVYZRZE[Ek  [  [pxp|  ~B  EF  ~F  qG  qX  [  [  [    ! 	 	G&&11'!*gajII    ,--H4H0I0II
Cc*oo C Cj C C C	
 	
 	
 	""8#9:::""8#;#;#=#=>>>"#$$s8+?'@'@@
 

 !)7799#HTTrTN33 B	N B	NJAwgaj71:56G(QJ . .)+&&--b.@AAA&--b.ABBB &CCBGG ! (3332 $ $D"t,,8!#666 =00+;+H(2#555 0 G G M M -,
 !1 H H N N - /3Z.?d+",.."6"6!+!6!6!.t!4")-")-!((/(;*1*? "7 " " * .",,%5%>   =$L 7#" M % E E]Cs1CDDDD
 gaj1n-G9=}8MM',224455 nj))$Z99,<b     s3xx!|["55 A	 A	A!fG%'""))'*ABBB"))'*BCCC( N N"888''???gl^WmG6LMMMM ",2244KoAAA+?L	---	2J12M-!+C'Q,,3sQw<+<+E+Eq 'Q,,3sQw<+<+E+E$1)$<Q$?M E&.tIm4L4L/M/M&N&N  
7 * B Be !C ! ! #(*))'222//

===+/+=+b+b#//11, ,(  0;;;--!#3]    38M),Q/ #->%)a-#@', .KK ( * # !.KK , * ' &77; * # ( , $ 2-1%*-D	 	 	 	 	##%%%%%r'   c                    | j                             |          }|J |j        D ]I}||                                v r1|                    |          }|                    ||         |           J|j        D ]I}||                                v r1|                    |          }|                    ||         |           Jd S r)   )	r   rr   r<   keysget_input_dist_attrr   r?   get_output_dist_attrset_output_dist_attr)r$   r2   r  op_dist_attrrq   in_dist_attroutputout_dist_attrs           r&   r   z RecomputePass.reset_op_dist_attrK  s    )FFrJJ'''' 	 	E**,,,,+??FF00!%(,   ) 	 	F++---- , A A& I I11!&)=  	 	r'   c                    t                      }d|_        |j        |_        |j        |_        |j        |_        |j        |_        |j                                        D ]e}||                                v r*|j        |         }|                    ||         |           B|j        |         }|                    ||           f|j	                                        D ]e}||                                v r*|j	        |         }|
                    ||         |           B|j	        |         }|
                    ||           f| j                            ||           d S r   )r   r5   impl_idx	impl_typerz   ri   inputs_dist_attrsr.  r   outputs_dist_attrsr1  r   set_op_dist_attr_for_program)	r$   r2   old_dist_attrr  new_dist_attrrq   r3  r4  r5  s	            r&   r   zRecomputePass.set_op_dist_attr[  s   (**%)"!.!7"/"9%2%?"!.!7"499;; 	G 	GE**,,,,,>uE11!%(,     ->uE11%FFFF#6;;== 	J 	JF++---- - @ H22!&)=    !. @ H226=IIII77MJJJJJr'   )r   )r   r   r   r    r   r   r   r,  r   r   r   r   s   @r&   r   r     s        3 3 3 3 3    $1 $1 $1 $1L}& }& }&~   K K K K K K Kr'   r   r)   )(loggingrA   r   paddle.base.backwardr   r   r   r   r   /paddle.distributed.fleet.meta_optimizers.commonr   r	   paddle.frameworkr
   paddle.utilsr   #auto_parallel.static.dist_attributer   auto_parallel.static.utilsr   r   r   r   r   r   r   r   utils.log_utilsr   	pass_baser   r   INFOr   r   r   r   r   r   r   rW   r'   r&   <module>rH     s3    				               P O O O O O O O ! ! ! ! ! ! $ $ $ $ $ $ B B B B B B	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ) ( ( ( ( ( . . . . . . . .	GL	!	!b$ b$ b$ b$ b$\ b$ b$ b$J     "  >    ())`K `K `K `K `KH `K `K *)`K `K `Kr'   