
    IЦiu                    z
   % S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
JrJr  S SKJrJrJrJrJrJrJrJr  S SKrS SKrS SKJr  S SKJs  Jr  S SKJr  S SK J!r!J"r"  S SK#J$r$J%r%  S SK&J'r'J(r(J)r)J*r*  S S	K+J,r,  S S
K-J.r.  SSK/J0r0  SSK1J2r2J3r3J4r4  SSK5J6r6  SSK7J8r8  SSK9J:r:J;r;  \(       a  S SK<r<\0Rz                  r>\?\@S'   \R                  " \B5      rC\R                  \@S'   \R                  R                  rF\R                  R                  rG\ " S S5      5       rH\ " S S5      5       rI\ " S S5      5       rJS\R                  S\?4S jrLS\R                  S\?4S jrNS\R                  S\?4S jrOS\R                  S\P4S jrQ " S  S!5      rR\R" 5       rS S_S"\R                  S#\\R                     S$\\R                     S%\\U   S\R                  4
S& jjrVS\R                  S\?4S' jrWS\R                  S\?4S( jrXS\R                  S\?4S) jrYS\R                  S\?4S* jrZS\R                  S\?4S+ jr[S\R                  S\?4S, jr\S\R                  S\?4S- jr]S\R                  S\?4S. jr^S/\R                  S\\\R                     \\R                     4   4S0 jr_S1\\R                     S2\U4S3 jr`S/\R                  S1\\R                     S4\\R                     S5\PS\\R                  \R                  4   4
S6 jraS/\R                  S\\R                  \R                  4   4S7 jrb\P" S85      rcS9\PS\P4S: jrdS\R                  S\P4S; jreS<\R                  4S= jrf\R                  " S5      S> 5       rhS?\\R                  \P4   S\\\R                  \P4      4S@ jriSA\R                  S\R                  4SB jrjS/\R                  SC\R                  SD\R                  SE\PS\\R                  \R                  4   4
SF jrkS/\R                  S\R                  4SG jrl S_S"\R                  SH\ISI\J4SJ jjrmSK rnS\H4SL jroS<\R                  4SM jrpS"\R                  SN\\q   SO\\q   SP\qSH\ISQ\\R                     S\\q\\P   \\P   4   4SR jrrS SSKsJtrt  ST ru S`S"\R                  SH\IS\\R                     4SU jjrv SaS/\R                  S\\R                  \R                  4   4SV jjrw     SbSW\R2                  R                  SX\USY\USZ\?S[\\\U\\U   4      S\\?S]\\U   SS4S^ jjrxg)c    Ndefaultdict)	dataclassreplace)CallableDictListOptionalSetTupleTYPE_CHECKINGUnion)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolshint_intis_symbol_binding_fx_node)graph_drawer)CheckpointPolicy   )config)dp_knapsackgreedy_knapsackilp_knapsack)get_aot_graph_name)is_with_effects)fx_graph_cseget_aten_targetAOT_PARTITIONER_DEBUGlogc                      \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \\   \S'   \\   \S'   S\R                  4S	 jr
S\R                  4S
 jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSrg)OpTypes4   z8Class for keeping track of different operator categoriesfusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsnodec                 2    [        U5      U R                  ;   $ N)r"   r(   selfr-   s     \/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/_functorch/partitioners.py
is_fusibleOpTypes.is_fusible>   s    t$(8(888    c                 2    [        U5      U R                  ;   $ r/   )r"   r)   r0   s     r2   is_compute_intensiveOpTypes.is_compute_intensiveA   s    t$(B(BBBr5   c                 2    [        U5      U R                  ;   $ r/   )r"   r*   r0   s     r2   	is_randomOpTypes.is_randomD   s    t$77r5   c                 2    [        U5      U R                  ;   $ r/   )r"   r+   r0   s     r2   is_viewOpTypes.is_viewG   s    t$55r5   c                 2    [        U5      U R                  ;   $ r/   )r"   r,   r0   s     r2   is_recomputableOpTypes.is_recomputableJ   s    t$(=(===r5    N)__name__
__module____qualname____firstlineno____doc__r   r   __annotations__fxNoder3   r7   r:   r=   r@   __static_attributes__rB   r5   r2   r&   r&   4   s    BXx=(H(m(m#9rww 9C C8bgg 86BGG 6>BGG >r5   r&   c                      \ rS rSr% \\R                     \S'   \\R                     \S'   \\R                     \S'   \\R                     \S'   \	\R                  \
4   \S'   \R                  S\\R                     4S j5       rS	\R                  S\4S
 jrS	\R                  S\4S jrS	\R                  S\4S jrS	\R                  S\
4S jrSrg)NodeInfoN   inputs_required_fw_nodesrequired_bw_nodesunclaimed_nodesfw_orderreturnc                 B   ^  [        S T R                   5       U 4S jS9$ )Nc              3   $   #    U  H  ov   M     g 7fr/   rB   .0ns     r2   	<genexpr>-NodeInfo.required_fw_nodes.<locals>.<genexpr>[   s     0/1Q/s   c                 "   > TR                   U    $ r/   )rS   )rY   r1   s    r2   <lambda>,NodeInfo.required_fw_nodes.<locals>.<lambda>[   s    a@Pr5   key)sortedrP   r1   s   `r2   required_fw_nodesNodeInfo.required_fw_nodesX   s!    0//06P
 	
r5   rY   c                     XR                   ;   $ r/   )rP   r1   rY   s     r2   is_required_fwNodeInfo.is_required_fw^   s    ++++r5   c                     XR                   ;   $ r/   )rQ   rf   s     r2   is_required_bwNodeInfo.is_required_bwa   s    ****r5   c                     XR                   ;   $ r/   )rR   rf   s     r2   is_unclaimedNodeInfo.is_unclaimedd   s    ((((r5   c                 T    XR                   ;   d   SU S35       eU R                  U   $ )NNode z not in fw nodes!)rP   rS   rf   s     r2   get_fw_orderNodeInfo.get_fw_orderg   s2    +++IuQC7H-II+}}Qr5   rB   N)rC   rD   rE   rF   r	   rI   rJ   rH   r   r   int	functoolscached_propertyrc   boolrg   rj   rm   rq   rK   rB   r5   r2   rM   rM   N   s     MBGG$277|#\!277C<  
4= 
 

, ,D ,+ +D +)bgg )$ ) bgg  #  r5   rM   c                   H    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   Srg)	MinCutOptionsl   ban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionrB   N)rC   rD   rE   rF   rv   rH   rK   rB   r5   r2   rx   rx   l   s      $$"&&!!r5   rx   r-   rT   c                 |    U R                   R                  SS 5      [        R                  [        R                  4;   $ )N	recompute)metagetr   MUST_RECOMPUTEPREFER_RECOMPUTEr-   s    r2   must_recomputer   u   s5    99==d+''))0  r5   fx_gc                 f    SnU R                   R                   H  n[        U5      (       d  M    g   g)NFT)graphnodesr   )r   foundr-   s      r2   has_recomputable_opsr   |   s.    E

  $ ! r5   c                    U R                   R                   Hf  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf    g   g)NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   )r   r-   s     r2   has_recomputable_rng_opsr      sV    

  4  V,,		11T[[5E5EE ! r5   c                     [        U R                  S   [        R                  [        R                  45      (       a  g[        U R                  S   [        R
                  5      (       d   eg)Nvalr      )
isinstancer   r   SymIntSymBoolSymFloatr   s    r2   sym_node_sizer      sK    $))E"U\\5==$ABBdii&7777r5   c                       \ rS rSrS rSrg)InvalidNodeBase   c                     g)NzInvalid NoderB   rb   s    r2   __repr__InvalidNodeBase.__repr__   s    r5   rB   N)rC   rD   rE   rF   r   rK   rB   r5   r2   r   r      s    r5   r   joint_graphrO   outputssubgraphc                   ^
 [         R                  " 5       n0 m
U H4  nUR                  UR                  5      nUR                  Ul        UT
U'   M6     U R
                   GH;  n[        U5      (       a  US:w  a  [        T
U'   M%  UT
;   a  M-  UR                  S:X  a  [        T
U'   MH  UR                  S:X  a  [        R                  " UR                  0 UR                  D6nU Vs/ s H7  n[        U[         R                  5      (       d  M$  [        T
U   [        5      PM9     nn[!        U5      (       a  [        T
U'   M  UR#                  UU
4S j5      T
U'   M  UR                  S:X  a  UR#                  UU
4S j5      T
U'   GM(  UR                  S:X  d  GM;  GM>     / n	U H  n[        U[         R                  5      (       aN  UT
;  a  [%        SU S	35      e[        T
U   [        5      (       a   SU S
35       eU	R'                  T
U   5        Mp  U	R'                  U5        M     UR)                  [+        U	5      5        UR-                  5         UR/                  5         U$ s  snf )au  
Given a graph, extracts out a subgraph that takes the specified nodes as
inputs and returns the specified outputs.

This includes specifying non-placeholder nodes as inputs.

The general strategy is to initialize all inputs with proxies as we
encounter them, and trace through the graph, only keeping values which take
in valid proxies. Then, all dead code is eliminated.
backwardplaceholdercall_functionc                    > TU    $ r/   rB   xenvs    r2   r]   4_extract_graph_with_inputs_outputs.<locals>.<lambda>   	    CFr5   get_attrc                    > TU    $ r/   rB   r   s    r2   r]   r      r   r5   outputrp   z couldn't be found in envz was invalid, but is output)rI   Graphr   namer   r   _must_be_in_backwardInvalidNodeoppytreearg_tree_leavesargskwargsr   rJ   r   any	node_copyRuntimeErrorappendr   tupleeliminate_dead_codelint)r   rO   r   r   	new_graphr-   new_nodeall_argsr   output_valuesr   s             @r2   "_extract_graph_with_inputs_outputsr      s(     
I
C ((3		D		  !!%%(j*@#CI3; WW%#CIWW'--tyyHDKKHH "!Aa) 4
3q6?3!  
 8}}'D	!++D2BCCIWW
"!++D2BCCIWW 5 "6 Ma!!|"U1#-F#GHH!A  6qc456    Q(  #  U=)*!!#NN9s   %#I$I$c                     U R                   S:H  =(       aF    S[        U R                  5      ;  =(       a'    [        U 5      (       + =(       a    [	        U 5      (       + $ Nr   tangents)r   strr   _is_bwd_seed_offset_is_fwd_seed_offsetr   s    r2   
_is_primalr      sK    =  	*c$++..	*#D))	* $D))	r5   c                 ^    U R                   S:H  =(       a    S[        U R                  5      ;   $ r   r   r   r   r   s    r2   _is_tangentr      s$    77m#F
c$++6F(FFr5   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   bwd_seedbwd_base_offsetr   r   s    r2   r   r      =    77m# c$++&&O*;s4;;?O*Or5   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   fwd_seedfwd_base_offsetr   r   s    r2   r   r      r   r5   c                     U R                   S:H  =(       a)    [        U R                  R                  S5      [        5      $ )Nr   r   )r   r   r   r   r   r   s    r2   _is_backward_stater      s*    77m#W
499==3G(WWr5   c                 @    U R                   R                  SS 5      S:H  $ )Npartitioner_tagis_backwardr   r   r   s    r2   _has_tag_is_backwardr     s    99==*D1]BBr5   c                 @    U R                   R                  SS 5      S:H  $ )Nr   must_be_in_backwardr   r   s    r2   _has_tag_must_be_in_backwardr     s    99==*D15JJJr5   c                 `    [        U 5      =(       d    [        U 5      =(       a    [        U 5      $ r/   )r   r   r    r   s    r2   r   r   
  s&    '- T"<t'<r5   joint_modulec                |    [         R                  " S U R                  R                  SS9 5       6 nUS U nX!S  nX44$ )Nc              3   8   #    U  H  oR                   v   M     g 7fr/   )r   )rX   r-   s     r2   rZ   +_extract_fwd_bwd_outputs.<locals>.<genexpr>  s     	K J)) J   r   r   )r   r   r   
find_nodes)r   num_fwd_outputsr   fwd_outputsbwd_outputss        r2   _extract_fwd_bwd_outputsr     sS     $$	K 2 2 = = = J	KG *?+K*+K##r5   saved_valuesr   c                 \    U  H&  nUR                   U:X  d  M  U R                  U5          g    g r/   )r   remove)r   r   saved_values      r2   _remove_by_namer     s+    #t#, $r5   saved_sym_nodesr   c                z   [        XS9u  pEU R                  R                  SS9n/ [        [        U5      Qn/ [        [
        U5      Qn/ [        [        U5      Qn	/ [        [        U5      Qn
/ [        [        U5      Qn[        U R                  X!-   U-   U
-   US5      nUR                  SS9 Hp  nUR                  (       d,  [        XR                  5        [        X-R                  5        M@  [        U5      (       d  MR  [        XR                  5        U(       a  Mp   e   [        5       n/ n/ nU HJ  n[        U5      nU(       a$  UR                  U5        UR!                  U5        M9  UR!                  U5        ML     [#        U R                  5      n[$        R&                  " UX5       Hc  nSUR(                  ;  a  M  [+        UR(                  S   5      U-
  n[-        US S9 H  nUU;  a  M  UR!                  UU   5        M!     UU-  nMe     UR/                  5         UR1                  UU-   5        [        U R                  Xy-   XA-   U-   S5      n[        U R                  UU-   U-   U
-   U-   US5      n[2        R4                  R7                  U U5      n[2        R4                  R7                  X5      nUU4$ )	Nr   r   r   r   r   c                     U R                   $ r/   r   )ss    r2   r]   *_extract_fwd_bwd_modules.<locals>.<lambda>^  s    166r5   r_   forward)r   r   r   filterr   r   r   r   r   r   usersr   r   setr   addr   r   	itertoolschainr   r   ra   clearextendrI   _lazy_graph_module_make_graph_module)r   r   r   r   r   r   placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphr-   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr   	fwd_graph
fwd_module
bwd_modules                           r2   _extract_fwd_bwd_modulesr  "  s     8 K  %%00M0BL7fZ67M9vk<89NIv&9<HIIv&9<HIGf%7FG2&7:PP	I $$$6zzL))4OYY7%%L))4(((( 7 (+uM    *40f%#**40#**40   3<3E3EFO 7V		!"499U#34}D)9:A '#**?1+=> ; 	$ W" 25LLM 3."_4	I 3
	
	 !	!  		 
 		I &&99,	RJ&&99,RJz!!r5   c                   [        U 5      (       a
  [        XUS9$ [        [        [        U R
                  R                  5      5      n[        [        [        U R
                  R                  5      5      nX4-   n[        XS9u  pg[        U R
                  XVS5      nUR                   V	s1 s H   oR                  S:w  d  M  U	R                  iM"     n
n	/ n/ nU R
                  R                   GH	  n	U	R                  U
;  a  M  [        U	5      (       a  UR                  U	5        M9  SU	R                  ;  aH  U	R                  S:X  a8  U	R                  n[!        S U 5       5      (       d   eUR#                  U5        M  U	R                   Vs/ s H  oR                  U
;  d  M  UPM     nnSU	R                  ;   a*  [!        S U 5       5      (       a  UR#                  U5        M  UR                  U	5        GM     [        [$        R'                  U5      R)                  5       5      n[        [$        R'                  U5      R)                  5       5      n[+        U UUUS9$ s  sn	f s  snf )	a  
Partitions the :attr:`joint_module` in a manner that closely resembles the
behavior observed in the original ``.forward()`` and ``.backward()`` of the
callable, i.e., the resulting forward graph contains those operators that
are executed in the original ``.forward()`` callable passed to
:func:`aot_function`.

The default partitioner collects the operators that are between the forward
inputs and the forward outputs. This helps in finding the tensors which have
to be stashed for the backward pass. These stashed tensors become the output
of the generated forward graph. The remaining operators are then placed in
the backward graph.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.

Returns:
    Returns the generated forward and backward Fx graph modules.
r   r   r   tensor_metar   c              3   Z   #    U  H!  oR                   [        R                  :H  v   M#     g 7fr/   )r   operatorgetitemrX   users     r2   rZ   $default_partition.<locals>.<genexpr>  s     I54{{h&6&665s   )+c              3   8   #    U  H  n[        U5      v   M     g 7fr/   r   rW   s     r2   rZ   r    s      2(71Ar   r   r   )r   #min_cut_rematerialization_partitionlistr   r   r   r   r   r   r   r   r   r   r   r   r   allr  dictfromkeyskeysr  )r   _joint_inputsr   r  r	  rO   r   r   forward_only_graphr-   forward_node_namesr   r   r   rY   backward_usagess                   r2   default_partitionr,    s&   4 L))2
 	
 
L,>,>,D,DEFM!&)<l>P>P>V>V"WX3F7 K <F 1666d''X:M			6   LO""((99..t ""4($))+?0JJJEI5IIIII&  ::%a7I)I:   		)c 2(72 / /  &&7##D); )< l388:;L4==9>>@AO#''	 O&s   )I II#"I#g    .Anumelc                     XR                   -  $ r/   )itemsize)r-  dtypes     r2   _tensor_nbytesr1    s    >>!!r5   c                   ^ S[         4S jmSU R                  ;   a  U R                  S   n[        U[        5      (       a  g[        U[        [
        45      (       a  [        U4S jU 5       5      $ [        U[        5      (       a#  [        U4S jUR                  5        5       5      $ [        U[        R                  5      (       a  T" U5      $ [        S[        U5       SU  35      eU R                  S	:X  a  g
[        SU  S35      e)NrT   c                     [        U [        R                  5      (       d  g[        [	        U R                  5       SS9U R                  5      $ )Nr      fallback)r   r   Tensorr1  r   r-  r0  r   s    r2   object_nbytes_size_of.<locals>.object_nbytes  s4    !U\\**hqwwy4@!''JJr5   r   r   c              3   4   >#    U  H  nT" U5      v   M     g 7fr/   rB   )rX   rY   r9  s     r2   rZ   _size_of.<locals>.<genexpr>  s     5A}Q''s   c              3   8   >#    U  H  u  pT" U5      v   M     g 7fr/   rB   )rX   _rY   r9  s      r2   rZ   r<    s     @KDA}Q''Ks   zUnknown metadata type z	 on node r   r   rp   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)rs   r   r   r   r#  r   sumr%  itemsr   r7  r   typer   )r-   r   r9  s     @r2   _size_ofrB    s    KC K
 		iic<(( dE]++5555T""@CIIK@@@U\\** %%3DI;ivNOOww*

vde r5   r   c           	         SSK Jn  U" [        5      nU R                   H5  nUR                  S:X  d  M  X#R
                  R                  ==   S-  ss'   M7     [        R                  S[        UR                  5       S SS95        g )	Nr   r   r   r   z%sc                     U S   $ Nr   rB   r8  s    r2   r]   _count_ops.<locals>.<lambda>  s    QqTr5   Tr`   reverse)collectionsr   rs   r   r   r   rC   r$   infora   r@  )r   r   cntr-   s       r2   
_count_opsrL    s`    '%c*C77o%$$%*%  HHT6#))+>4HIr5   c                     / n [        [        R                  R                  5       H  n[	        [        R                  R                  U5      n[        U[        R                  R                  5      (       d  MR  UR                  5        HJ  n[	        X#5      n[        R                  R                  UR                  ;   d  M8  U R                  U5          M     M     U $ r/   )dirr   opsatengetattrr   _opsOpOverloadPacket	overloadsr   	pointwiser   r   )rO  	attr_nameopoverloadpacketoverloadop_overloads        r2   pointwise_opsrZ  	  s    
C(	"599>>9=*EJJ,G,GHH(224H!"2=Kyy""k&6&66

+, 5 ) Jr5   	depth_mapc                     U  Vs0 s H=  n[        U[        R                  R                  R                  5      (       d  M8  X!U   _M?     nn[        UR                  5       S SS9$ s  snf )Nc                     U S   $ rE  rB   r8  s    r2   r]   sort_depths.<locals>.<lambda>  s    AaDr5   TrG  )r   r   rI   r-   rJ   ra   r@  )r   r[  arg
arg_depthss       r2   sort_depthsra    s^    '+'+z#uxx}}?Q?Q/Rs^t   *""$.$GGs   7A% 	A%gmc                   ^
^^ [         R                  " 5       m0 m
U R                  R                  SS9 H  nTR	                  UU
4S j5      T
U'   M     0 m[        U R                  R                  5       H
  u  p!UTU'   M     U
UU4S jn[        [        [        U R                  R                  5      5      nSn[        R                  nU H(  nUR                   H  nTU   U:  d  M  TU   nUnM     M*     Uc  U $ [        U R                  R                  5      TU   S  H  nU" U5        M     [        R                   R                  U T5      n	U	$ )a{  
This pass finds the first bwd node in the graph (by looking at users of
tangents) and then reorders the graph by walking from this node to all the
way to the end of the graph. At each op in this traveral, we insert this op
in a new graph and try to bring only the relevant subgraph from the other
non-bwd edges relevant for this op. This closely mimics the behavior of
autograd engine.

Why is this pass required in the first place?

This is an artifact of how partitioners work today. The starting point of
partitioner is a joint graph, which is fwd and then bwd graph. In the case
of checkpointing, we keep portions of fwd graph in their original place in
the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
graph has copies of recomputed fwd subgraphs followed by the original bwd
graph. If we run this naively, this leads to bad memory footprint, because
the fwd subgraphs are live for way longer duration than necessary. This pass
reorders the operations such that we prioritize the ops for the original bwd
graph while only realizing those ops from the fwd graph that are necessary
at any given point in the graph.
r   r   c                    > TU    $ r/   rB   r   s    r2   r]   5reordering_to_mimic_autograd_engine.<locals>.<lambda>>  s	    Ar5   c                 8  > U /n[        5       n[        U5      S:  aM  UR                  5       n X;   d  U T;   a  M,  UR                  U 5        XR                  -  n[        U5      S:  a  MM  [        UU4S jS9nU H  n TR                  U U4S j5      TU '   M     g )Nr   c                    > TU    $ r/   rB   )rY   orders    r2   r]   Sreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>Q  s	    %(r5   r_   c                    > TU    $ r/   rB   r   s    r2   r]   ri  S  r   r5   )r   lenpopr   all_input_nodesra   r   )r-   	cur_nodesinsertable_nodesr   r   rh  s      r2   insert_node_in_graphAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graphD  s    F	5)nq ==?D'43;  & ---I )nq  ""28JK$D!++D2BCCI %r5   N)rI   r   r   r   r   	enumerater   r#  r   r   mathinfr   r   GraphModule)rb  r-   idxrp  r  first_node_in_bwdminimum_ordertangentr  new_gmr   r   rh  s             @@@r2   #reordering_to_mimic_autograd_enginer{  "  s0   . 
I"$C ##}#5''.>?D	 6 Erxx~~.	d /D$ &bhhnn=>NHHM!MMDT{]* %d$(! " "  	 RXX^^$U+<%=%?@T" A XX!!"i0FMr5   	fw_module	bw_modulenum_sym_nodesc           	      \   [         R                  " 5       nS nS nS nU" U 5      nU" U5      n	U" U5      n
0 nU R                  R                   H  n[	        U5      (       d  M  [        UR                  S5      (       d  M2  [        R                  R                  UR                  R                  ;   d  Mf  XR                     nXR                     nXR                     nXS.X'   M     [        R                  R                  R                  n[        R                  R                  R                  nS nUR                  R!                  SS9 H  nSUR                  ;   d  M  Un  O   Uc  [#        S	5      e/ nUR%                  5        GH  u  nnUS
   nUS   nUR                  nUR'                  U5         UR)                  SUUR                  /UR*                  Q7UR,                  S9nUR)                  S[.        R0                  US40 S9nUR)                  S[.        R0                  US40 S9nUR3                  U5        UR5                  U5        UR7                  U5        S S S 5        UR                  nUR'                  U5         S[9        U5       3nUR;                  U5      nU" U" U5      5      UR<                  S'   S S S 5        UR'                  U5         UR)                  SUWUR                  /UR*                  Q7UR,                  S9nUR3                  U5        UR5                  U5        S S S 5        GM     [9        [?        UR                  R!                  SS95      5      nUR*                  S   n[A        U5      U-
  nUS U [C        U5      -   UUS  -   nUR                  RE                  U5        UR                  R5                  U5        URG                  5         URG                  5         X4$ ! , (       d  f       GN= f! , (       d  f       GNO= f! , (       d  f       GM  = f)Nc                 &   0 nU R                   R                   Ht  nUR                  S:X  d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf  X!UR                  '   Mv     U$ )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )gmodrandom_nodesr-   s      r2   get_rng_ops*functionalize_rng_ops.<locals>.get_rng_ops  sh    JJ$$D?*DKK00II559I9II*.TYY' % r5   c                     SU R                   ;  a  gU R                   S   n[        U[        5      (       d  U4nU H@  n[        U[        R                  5      (       d  M$  UR
                  R                  S:X  d  M@    g   g)zF
Check the example value of the node outputs to find the device type.
r   Ncudacpu)r   r   r   r   r7  devicerA  )r-   
candidates	candidates      r2   
get_device)functionalize_rng_ops.<locals>.get_device  sl     		!YYu%
*e,,$J#I)U\\22##((F2! $
 r5   c                 t    U S:X  a  [         R                  R                  5       $ [         R                  " 5       $ )Nr  )r   r  get_rng_state)r  s    r2   get_sample_rng_state3functionalize_rng_ops.<locals>.get_sample_rng_state  s,    V::++--""$$r5   r   )fwdbwdr   r   ry  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisr  r  r   )r   r   r   r   rng_state_output_r   r   )$r   countr   r   r   r   r   r   r   r   r   r   _prims	rng_primsrun_and_save_rng_staterun_with_rng_stater   r   r@  inserting_beforecreate_noder   r   r  r  replace_all_uses_with
erase_noder   nextr   r   iterrk  r   r   	recompile) r   r|  r}  r~  uidr  r  r  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_mapr-   	base_nodefw_nodebw_noderun_and_save_rngr  bw_tangent_start_nodefw_rng_state_outputs	node_pairfw_graphfunctional_fw_nodestate
rng_outputbw_graph
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr   s                                    r2   functionalize_rng_opsr  l  s   2 //
C	$% &l3"9-"9-!""((4  V,,		11T[[5E5EE+II6I&yy1G&yy1G:A2R$/ ) ||--DD//BB **m*<		!$(! = $o
 	
  8 > > @	9E"E"??&&w/!)!5!5 nn4w||4~~	 "6 " ((  (!,	 ) E "--  &  . J ))*5( ''.1 06 ??&&'<=,T#YK8J ( 4 4Z @,@GAT,U""5) >
 &&w/!--"'G',,G~~	 . J ))*5( 0/M !Ah $y99X9FGHN$$Q'JZ=8&&'
$
%	&
'(
)	* 
 OO7#OO~.w 0/8 >=
 0/s&   5B,O7;P	AP7
P		
P	
P+	c                    U R                   R                   H  n[        U5      (       d  M  UR                   HT  n[        U5      (       d  M  UR                  S   UR                  S   :  d  M7  [
        R                  UR                  S'   MV     UR                  R                  SS5      (       d  M  [        S UR                   5       5      (       a  M  [
        R                  UR                  S'   M     U $ )z
If there are two consecutive checkpointed blocks with no operator in
between, we would still want to stash the tensor at the boundary of
checkpointed blocks. The following pass makes the last output node
non-recomputable to allow for that.
ac_graph_idr   has_backward_hookFc              3   8   #    U  H  n[        U5      v   M     g 7fr/   )r   r  s     r2   rZ   )cleanup_recompute_tags.<locals>.<genexpr>  s      E1;t$$r   )	r   r   r   r   r   r   	MUST_SAVEr   r   )r   r-   r  s      r2   cleanup_recompute_tagsr  	  s     ""(($

"4((		-0499]3KK-=-G-GDIIk* # yy}}0%88 E15E B B& *:)C)C		+&7 )8 r5   	node_infomin_cut_optionsc                   ^^^^&^'^(^)^*^+^,^-^.^/^0^1 Tc
  [        5       m[        5       m0[        (       a  U R                   Vs1 s HQ  nUR                  S:X  d  M  [        UR                  S5      (       d  M2  [        UR                  R                  5      iMS     nnUT0R                   Vs1 s H  n[        U5      iM     sn-
  n[        R                  SU5        S m'S m(U'U(U04S jm) SS KnU)UU04S	 jm+U+UU04S
 jn
U)4S jm*S[        4U*U04S jjnUR!                  5       m/[        5       m&U&UU/U04S jnU R                   GH{  nUR                  S:X  a  M  UTR"                  ;   aj  UTR$                  ;  a.  T/R'                  UR(                  S-   S[*        R,                  S9  Md  T/R'                  UR(                  S-   S[*        R,                  S9  [/        U5      (       a.  T/R'                  UR(                  S-   S[*        R,                  S9  M  [1        U5      (       d  [3        U5      (       a  U" U5        TR5                  U5      (       a  U
" U5      (       a  U" U5        SUR6                  ;  =(       a    SUR6                  ;  =(       dB    SUR6                  ;   =(       a,    [9        UR6                  S   [:        R<                  5      (       + n[?        U5      (       a  [        [A        U5      5      nOPU(       aA  [9        UR6                  RC                  S5      [D        5      (       a  SO[*        R,                  nOU" U5      nT/R'                  UR(                  S-   UR(                  S-   US9  URF                   H<  nT/R'                  UR(                  S-   UR(                  S-   [*        R,                  S9  M>     GM~     S[H        [J        RL                     S[N        S[N        4U)U4S jjnTRP                  (       GaR  TRR                   GHA  nURF                   Vs/ s H,  nTR5                  U5      (       d  M  TRU                  U5      PM.     nnURF                   Vs/ s H  nTR5                  U5      (       d  M  UPM     nn[W        U5      S:  d  M  U" U[Y        U5      5      n[[        URF                  5       H  nTR5                  U5      (       d  M  TRU                  U5      U:  d  M2  T)" UU5      (       d  MB  UT&;   a  MJ  [        R                  SUTRU                  U5      UUTRU                  U5      5        U" U5        M     GMD     TR\                  (       Ga  [        5       nU R                   GHi  nTR5                  U5      (       d  M  TRU                  U5      U4/nTRU                  U5      n[W        U5      S:  d  MR  [^        R`                  " U5      u  nnUU;   a  M2  URc                  U5        TRU                  U5      US-   :  aP  [W        U5      S:X  aA  [        R                  SUUTRU                  U5      TRU                  U5      5        U" U5        M  URF                   H[  nTR5                  U5      (       d  M  T)" UU5      (       d  M+  UT&;  d  M3  [^        Rd                  " UTRU                  U5      U45        M]     [W        U5      S:  a  GM  GMl      URg                  T/SS5      u  nnUu  nm.[        5       nU/4S  jU 5        H"  u  m1nURu                  U.U14S! jU 5       5        M$     [        5       n U H*  u  n!n"U!S S" U"S S# :X  d   eU!S S" n#U Rc                  U#5        M,     [w        U 5      m,[y        U R                  5       V$Vs0 s H  u  n$oDU$_M
     snn$m-[{        U,4S$ jU  5       U-4S% jS&9n%U%T&4$ s  snf s  snf ! [         a  n	[        S5      U	eS n	A	ff = fs  snf s  snf ! [h         ai    [        R                  S5        [        R                  SRk                  URl                  Rn                  Rq                  T/5      5      5        [s        T/5        e f = fs  snn$f )'Nr   _overloadpacketz&Ops banned from re-materialization: %sc                 X   UR                   [        R                  R                  R                  :w  a  gUR
                  S   n[        R                  R                  R                  U5      u  nnU H6  nUR                  U   nXL a    g[        U[        5      (       d  M/  X;   d  M6    g   gNFr   T)r   r   rO  higher_orderauto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   r#  )ab
mutable_opmutable_arg_namesr>  r   r_  s          r2   !can_fuse_into_auto_functionalized8solve_min_cut.<locals>.can_fuse_into_auto_functionalizedB  s    88uyy--AAAVVAY
 ##66GG
S	
%D((4.Cx#t$$8 & r5   c                     UR                   [        R                  R                  R                  :w  a  gUR
                  S   nU H  nUR
                  S   U   nXL d  M    g   g)NFtensors_to_cloner   T)r   r   rO  r   triton_kernel_wrapper_functionalr   )r  r  r  r   r_  s        r2   .can_fuse_into_triton_kernel_wrapper_functionalEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functionalS  s[    88uyy--NNNHH%78%D((8$T*Cx & r5   c                   > [        U5      [        R                  :X  a  gT" X5      (       a  gT" X5      (       a  gU R                  [        R
                  L a?  U R                  S   R                  [        R                  R                  R                  L a  gTR                  U 5      =(       a    TR                  U5      $ )NTr   F)r"   rP  catr   r  r  r   r   rO  r  r  r3   )r  r  r  r  op_typess     r2   r3   !solve_min_cut.<locals>.is_fusible]  s     1),Q229!??HH(((q	  yy%%FFG
 ""1%@(*=*=a*@@r5   r   zANeed networkx installed to perform smart recomputation heuristicsc                 \  > TR                  U 5      (       a  gU 1n[        U5      S:  a  UR                  5       nUR                   HQ  nTR	                  U5      (       d  T" X#5      (       d    gTR                  U5      (       d  M@  UR                  U5        MS     [        U5      S:  a  M  gr  )r=   rk  rl  r   rg   r   )r-   rn  curr  r3   r  r  s       r2   is_materialized_backwards0solve_min_cut.<locals>.is_materialized_backwardsw  s    D!!F	)nq --/C		 //55j>S>S##D))MM$'	 " )nq  r5   c                   > U R                   S:w  a  gU R                  [        R                  :X  a  gU R                  R                  SS 5      [        R                  :X  a  g[        R                  (       a  TR                  U 5      (       a  gU R                  [        R                  R                  [        R                  R                  4;   a  gTR                  (       a  TR!                  U 5      (       d  gO-TR#                  U 5      (       d  TR%                  U 5      (       a  gTR&                  (       a8  T" U 5      (       a+  [(        R+                  SU [-        U R.                  5      5        gU R0                  S:  a  U R0                  [        R2                  :  a  gTR4                  (       a/  [7        S U R8                   5       5      n[;        U 5      nUS-  U:  $ g)	Nr   Fr   Tzmaterialized backwards: %s %si  c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fr/   )r   rI   rJ   rB  rX   is     r2   rZ   Bsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>  s(      %%.*Q2HYs   #;;r   )r   r   r  r  r   r   r   r  r   recompute_viewsr=   rP  lift_fresh_copydefault
lift_freshr}   r@   r:   r7   r|   r$   debugr   r   dist_from_bwmax_dist_from_bwr~   r?  r   rB  )r-   input_tensors_sizeoutput_sizer  r  r  s      r2   should_ban_recomputation/solve_min_cut.<locals>.should_ban_recomputation  s{   77o%;;(***99==d+/?/I/II!!h&6&6t&<&<;;4//779P9PQQ22++D11 2 !!$''8+H+H+N+N 77<U=
 =
 II5tU4::=NO t#(9(9F<S<S(S ++!$ %%)YY% " #4.K?%777r5   c                 r   >^  T R                   S:X  a  g[        UU 4S jT R                   5       5      (       + $ )Nr   Tc              3   6   >#    U  H  nT" TU5      v   M     g 7fr/   rB   )rX   r  r3   r-   s     r2   rZ   9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>  s     E*$z$--*s   )r   r$  r   )r-   r3   s   `r2   is_materialized&solve_min_cut.<locals>.is_materialized  s*    77m#E$**EEEEr5   rT   c           
        > [        U 5      n[        R                  (       a&  TR                  U 5      (       a  [        R
                  $ [        U R                  S   [        5      (       a2  [        U R                  S   [        R                  5      (       d  [        $ [        US[        [        U R                  S5      S5      -  -  5      nT" U 5      (       a  U$ US-  $ )Nr   g?d   r      )rB  r   r  r=   rs  rt  r   r   r   r   r   INT_INFrs   maxminr  )r-   mem_szr  r  s     r2   get_node_weight&solve_min_cut.<locals>.get_node_weight  s    $!!h&6&6t&<&< 88Odii&55dii.== Vsc#d.?.?*Eq&IIJK4  MA:r5   c                 X  > TR                  U 5      (       a  gU T;   a  g[        U 5      (       a  gSU R                  ;   a-  [        U R                  S   [        R
                  5      (       a  gTR                  U 5        TR                  SU R                  S-   [        R                  S9  g)NFr   source_incapacityT)r=   r   r   r   r   r   r   add_edger   rs  rt  )r-   banned_nodesdont_bannx_graphr  s    r2   ban_recomputation_if_allowed3solve_min_cut.<locals>.ban_recomputation_if_allowed  s    D!!8 $DII*TYYu-=u~~"N"N
 	(DII$5Ir5   r   r  sinkr  _outr   r          start_nodes	max_rangec           
        > / nU  H,  n[         R                  " UT	R                  U5      US45        M.     [        U5      S:  a  [         R                  " U5      u  pEnU(       d  T	R                  U5      $ UR
                   Ha  nT	R                  U5      (       d  M  T	R                  U5      U:  a  M2  [         R                  " UT	R                  U5      UT" XW5      45        Mc     [        U5      S:  a  M  U$ )zl
Finds the first unfusible node in the chain of nodes starting from
`start_nodes` and returns its position.
Tr   )heapqheappushrq   rk  heappopr   rg   )
r  r	  sorted_nodesrY   r>  r-   node_is_fusibler  r3   r  s
           r2   find_first_unfusible+solve_min_cut.<locals>.find_first_unfusible4  s    
 9;ANN<)*@*@*CQ)MN  ,!#',}}\'B$A_" --d33

++D11 --d3i? NN$"//5tZ=ST	 #	 ,!# r5   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)r  ztoo long %s %s %s %sr  z-Failed to compute min-cut on following graph:
c              3   0   >#    U  H  oTU   4v   M     g 7fr/   rB   )rX   rY   r  s     r2   rZ    solve_min_cut.<locals>.<genexpr>  s     8i$is   c              3   :   >#    U  H  oT;   d  M
  TU4v   M     g 7fr/   rB   )rX   vnon_reachableus     r2   rZ   r    s     Ad=.@fq!fds   	c              3   .   >#    U  H
  nTU   v   M     g 7fr/   rB   )rX   r-   name_to_nodes     r2   rZ   r    s     2	d		s   c                    > TU    $ r/   rB   )r   node_idxs    r2   r]   solve_min_cut.<locals>.<lambda>  s	    (1+r5   r_   )>r   get_default_op_listr#   r   r   r   r   r   r  r,   r$   rJ  networkxImportErrorr   floatDiGraphrQ   rO   r  r   rs  rt  r   r   r   rg   r   r   r   r7  r   r   r   r   r   r	   rI   rJ   rs   rz   rc   rq   rk  r  r   r{   r  r  r   r  minimum_cut	Exceptionjoin	readwriteedgelistgenerate_edgelistvisualize_min_cut_graphupdateget_name_to_noderr  ra   )2r   r  r  r  r-   joint_module_opsr  ops_ignorednxer  r  r  is_non_tensor_nodeweightr  r  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderr>  r  	cut_value	partition	reachablecutsetnbrs	cut_nodesnode_innode_out	node_namerv  r   r   r  r  r3   r  r  r  r  r  r  r  r  s2    ```                                  @@@@@@@@@@@@r2   solve_min_cutrE  /  sM    5"$H $))
)ww/) -.5dkkCT.U -C++,) 	 

 '(:S:S)T:SQ#a&:S)TT9;G"A&0dF  2 zz|H5L * !!77h9...9+++!!$))e"3Vdhh!O dii&0&488L$
 dii%/$((Kd2488(.
 ##D)).Ft.L.L(. "E}DII'EUtyy SDIIe4Dell)S%S 	 t=./F!$))--"6FFDHH  %T*F$))e+TYY-?&QJJDdii&0$))e2CdhhW e "L$rww- C C  . ,,,"44I &OO+D++D1 -	&&t,+   "+!0I4L4LT4R   6{Q&:8S[&Q#!)//2D!0066%2248;NN&y$77</$O%%229=/ %2248 5T:! 3 5P 111%%++J++J77!..z:JGHG#00<Kg,"w/3'>C  **3/+2CCG)HH."!..s3!..z: 15IID!0066&sD11 4w1G1G1Mt0TU &) g,"" ,B!~~h&I	9  )I}#&5F8i84AdAA 9 I#s|x},,,CRL	i  $
 $K0L+4[5F5F+GH+Gic4c	+GHH2	28ML %%m

 *Uf  R
	v
N  @A2<<00BB8LMN)	& Is[    _
_
5#_
)_(_ _2:_2_7=_7'_< a2
_/_**_/<A3a/c                    SS K nSS KnUR                  R                  U 5      R	                  5       nUR                  U5      S   nUR                  5        He  nXR                  5          UR                  5          S   nUR                  [        U5      5        U[        S5      :X  d  MT  UR                  S5        Mg     [        R                  S5        UR                  S5        g )Nr   r  rt  redz2Visualizing the failed graph to min_cut_failed.svgzmin_cut_failed.svg)r!  pydotnx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   r#  	set_colorr$   rJ  	write_svg)r  r0  rH  
dot_format	dot_graphedger3  s          r2   r+  r+    s    %%h/99;J))*5a8I##%//+,T-A-A-CDZPs6{#U5\!NN5! & HHAB,-r5   c                  n   / [         R                  P[         R                  P[         R                  P[         R                  P[         R
                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                   P[         R"                  P[         R$                  P[         R&                  P[         R(                  P[         R*                  P[         R,                  P[         R.                  P[         R0                  P[         R2                  P[         R4                  P[         R6                  P[         R8                  P[         R:                  P[         R<                  P[         R>                  P[         R@                  P[         RB                  P[         RD                  P[         RF                  P[         RH                  P[         RJ                  P[         RL                  P[         RN                  P[         RP                  P[         RR                  P[         RT                  P[         RV                  P[         RX                  P[         RZ                  P[         R\                  P[         R^                  P[         R`                  P[         Rb                  P[         Rd                  P[         Rf                  P[         Rh                  P[         Rj                  P[         Rl                  P[         Rn                  P[         Rp                  P[         Rr                  P[         Rt                  P[         Rv                  P[         Rx                  P[         Rz                  P[         R|                  P[         R~                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  Pn [         R                  [         R                  [         R                  /nU[         R                  [         R                  [         R                  [        R                  [         R                  [         R                  [         R                  [         R                  /-  nUnU / [        R                  P[        R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P-  n U [         R                  [         R                  /-  n X-  n U [        5       -  n U [         R                  /-  n U [         Vs/ s H  n[        U5      PM     sn-  n [        U 5      n[         R                  [         R                  [         R                  /n[         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         GR                   [         GR                  [         GR                  [         GR                  [         GR                  /nU[        U5      -  nG[        [        U5      [        U5      [        U5      [        U5      [        U5      5      $ s  snf r/   )rP  r   subdivatan2mulr  r  pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltabsbitwise_notceilfloorfracnegreluroundsilutruncr$   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrt
reciprocalsigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardr?  mean_grad_sum_to_sizesum_to_sizeamaxtotype_asr  r  squeeze	unsqueezersub_to_copyaliasviewslicetprimsbroadcast_in_dimexpand
as_stridedpermuteselectconvert_element_typeclone	full_likevarstd_unsafe_viewreshapebroadcast_tensorsscalar_tensorones	new_zerosr  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota)_low_memory_max_pool2d_offsets_to_indicesindexgatherrZ  
zeros_liker   r   r   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr&   )default_recomputable_opsrecomputable_view_opsr+   mr,   r*   r)   r(   s           r2   r   r     s   L0L0L0 	L0 	

	L0
 	L0 	L0 	L0 	L0 	L0 			L0 	L0 	L0 	L0 	L0 	L0  	!L0" 	#L0$ 	%L0& 	'L0( 	)L0* 	+L0, 	-L0. 	/L00 			1L02 	

3L04 			5L06 	7L08 			9L0: 	

;L0< 			=L0> 	

?L0@ 	AL0B 	

CL0D 	

EL0F 			GL0H 	IL0J 	KL0L 	

ML0N 	OL0P 			QL0R 	SL0T 			UL0V 			WL0X 	YL0Z 			[L0\ 			]L0^ 	_L0` 			aL0b 			cL0d 	

eL0f 			gL0h 	

iL0j 	kL0l 	mL0n 	oL0p 	qL0r 	sL0t 	

uL0v 	

wL0x 			yL0z 	{L0| 			}L0~ 	L0@ 	AL0B 			CL0D 	EL0F 	GL0H 			IL0J 	KL0L 	ML0N 	OL0P 	QL0R 	SL0T 			UL0V 	WL0Z "\\4>>4::F		

	 	 %H $!		$!""$! 	

$! 		$!
 	$! 			$! 			$! 	$! 	$! 	$! 	$! 	$! 			$! 	$! 	

$!  	!$!" 	#$!$ 	%$!& 			'$!( 	)$!* 	+$!, 	-$!. 			/$!0 	1$!2 	

3$!4 	5$!6 			7$!8 	9$!: 	

;$!< 	

=$!> 	?$!@ 	A$!B 	C$!D 	

E$!F 	77G$! $L T[[ 99(/!   N1!3A!6 NN34%%t~~tGJ!!

0044%%))   #S_4KK!"JH ' !Os   'd2c                 L    0 nU R                    H  nX!UR                  '   M     U$ r/   )r   r   )r   r  r-   s      r2   r-  r-  i  s'    L"&TYY r5   memoryruntimes
max_memoryall_recomputable_banned_nodesc                     [         R                  nUS:X  a  [        XU5      $ US:X  a  [        XU5      $ US:X  a  [	        XU5      $ [        U5      (       a  U" XX4U5      u  pxSXx4$ [        SU 35      e)Ngreedyilpdpr  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr   r   r   callabler   )	r   r  r  r  r  r  SOLVERsaved_node_idxrecomp_node_idxs	            r2   #_optimize_runtime_with_given_memoryr  p  s     33Fv<<	5Fj99	46Z88	&		*08U+
' ^55I&RSSr5   no_dispatchc                 `  ^ ^^	 [         R                  nS nUS:X  a  gUS:X  ac  [        5          SSKJn  [
        R                  " UT R                  T R                  45      u  mm	UR                  UU	U 4S j5      nUsS S S 5        $ US:X  as  SS	K
Jn  [
        R                  " UT R                  T R                  45      u  mm	U" S
S9 nT R                  " T0 T	D6  S S S 5        WR                  5       n[        US5      $ [        SU 35      e! , (       d  f       g = f! , (       d  f       NI= f)Nc                 x   [        U [        R                  5      (       a  [        U R                  S   [        R
                  5      (       ao  [        U R                  S   R                  5      nS nU Vs/ s H
  o2" U5      PM     nnU R                  S   R                  XR                  S   R                  S9$ [        U [        R                  5      (       aC  [        U R                  S   [        R                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  g[        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  gU $ s  snf )	Nr   c                     [        U SS9$ )Nr4  r5  )r   )ds    r2   realize_symbolAestimate_runtime.<locals>.materialize_arg.<locals>.realize_symbol  s    D11r5   r  )strider4  r5  g      ?T)r   rI   rJ   r   r   r7  r#  shapenew_empty_stridedr  r   r   r   r   )r   r  r  r   s       r2   materialize_arg)estimate_runtime.<locals>.materialize_arg  s7   a!!j&M&M,,-E2 1661^A&E666%=22ff]3:: 3   277##
166%=%,,(O(OAFF5MD99277##
166%=%..(Q(Q277##
166%=%--(P(PH 7s   5F7testingr   profiler   )benchmarkerc                  (   > TR                   " T 0 TD6$ r/   )r   )r   r   r-   s   r2   r]   "estimate_runtime.<locals>.<lambda>  s    4;;3O3Or5   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   *activation_memory_budget_runtime_estimatorr  $torch._inductor.runtime.benchmarkingr  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterr  r   get_total_flopsr  r   )
r-   RUNTIME_MODEr  r  msr  modecounted_flopsr   r   s
   `       @@r2   estimate_runtimer    s    DDL( y 		"]H!???TYY<TULD&**+OPB ] 
	 <DKK8PQfU+tKK(( ,,,.=!$$=l^LMM# ] ,+s   AD	D
D
D-c           	        ^^^^^ ^!^"^#^$^% US:  d  US:  a  [        SU 35      e[        [        R                  [        R                  [        R
                  [        R                  [        R                  S9n[        R                  (       a  [        USSSSS9nUS:X  a  UR                  $ [        U UU5      u  pEUS:X  a  U$ S[        [        R                     S[        4S	 jmT" UR                  5      m#T" U5      m!T!T#::  a  U$ U!U#4S
 jnS[        [        R                     4UU!U#4S jjn[        USSSS9n[        XU5      u  pU" U	5      U:  a  U	$ [        USS9m[        XT5      u  pU" U
5      U:  a  U
$ SSKJm  UR                   Vs1 s H  nT" U5      iM     snm S[$        [        R                     S[        [        R                     4UU 4S jjnU" U5      n['        US S9n['        U[(        SS9m[+        T5      S:X  a  UR                  $ T Vs/ s H  o" [)        U5      5      PM     snm"T Vs/ s H  n[-        U5      PM     snm%SSKJm$  UUU"U$U%4S jn[        R2                  (       Gaa  / n[5        SSS5       H6  nU" US-  XS9u  nnUR7                  U[9        T%5      U-
  U" U5      45        M8     SS KJn  U Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnUR?                  SS9  URA                  UUSS 9  [C        U5       H   u  nnURE                  US! UUU   4S"S#S$S%9  M"     URG                  S&5        URI                  S'5        URK                  S(5        URM                  S5        URO                  5       nURQ                  5         S)[S        5        S*3nURU                  U5        [V        RY                  S+U5        U" X!U S,9S   $ s  snf s  snf s  snf s  snf s  snf )-Nr   r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )rz   r{   r|   r}   r~   F)rz   r{   r|   r}   r   rT   c                 :    [        [        [        U 5      5      S-  $ N    eA)r?  maprB  )r   s    r2   estimate_activations_size:choose_saved_values_set.<locals>.estimate_activations_size  s    3x./#55r5   c                    > U S-  TT-
  -  $ r  rB   )szmax_act_sizemin_act_sizes    r2   get_normalized_size4choose_saved_values_set.<locals>.get_normalized_size  s    S\L899r5   activationsc                 &   > T" U 5      T-
  TT-
  -  $ r/   rB   )r  r  r	  r
  s    r2   get_mem_ratio.choose_saved_values_set.<locals>.get_mem_ratio  s"    )+6E<'
 	
r5   )rz   r{   r|   )r}   )get_node_storager   c                    > U  Vs/ s H.  nUR                   [        S5      :  d  M  T" U5      T;  d  M,  UPM0     sn$ s  snf r  )r  rs   )r   r  r  input_storagess     r2   get_recomputable_banned_nodes>choose_saved_values_set.<locals>.get_recomputable_banned_nodes  sP     "
! S)	 
 %Q'~= !
 	
 
s   >
>>c                     U R                   $ r/   r   r8  s    r2   r]   )choose_saved_values_set.<locals>.<lambda>  s    PQPVPVr5   r_   TrG  r  c                 n  >^ T" 5          [        UTT[        U S5      UT5      u  nnn[        (       Gap  [        T5      n[        [	        TTT5      5       VVV	V
s/ s HJ  u  nu  pn
SU SU SX-   SU
R
                   SU
R                   SU
R                   SU
R                   3PML     nn	nnn
UR                   V
s/ s H  oR                  PM     nn
UR                   V
Vs/ s H/  n
U
R                    H  nUR                  U
R                  4PM     M1     nn
nS[        R                   S[        [        R                  S5       SU SU SS	R                  U5       S
U SU SU S3m[         R"                  R%                  SU4S jS9  [&        R)                  T5        S S S 5        [+        5       nW H  n UR-                  TU   5        M     UR1                  T5      (       d   e[3        UUTU5      u  nnUW4$ s  sn
n	nnf s  sn
f s  snn
f ! , (       d  f       Nv= f! [.         a     Mz  f = f)Nr   z
			z, zQ
Activation Checkpointing - Knapsack Problem Summary:
    Input:
        Solver: z
        Max Memory: z
        Graph Nodes: z
        Graph Edges: zC
        (Index, Memory, Runtime, Node.Op, Node.Target, Metadata):  z'
    Output:
        Expected Runtime: z
        Saved Nodes: z
        Recomputable Nodes: z
            artifactc                     > T $ r/   rB   )knapsack_summarys   r2   r]   Lchoose_saved_values_set.<locals>.get_saved_values_knapsack.<locals>.<lambda>\  s    '7r5   )r   
payload_fn)r  r  r#   rr  zipr   r   r   r   r   r   rm  r   r  activation_memory_budgetr'  r   _loggingtrace_structuredr$   rJ  r   r   BaseExceptionissubsetrE  )memory_budgetr  r   expected_runtimesaved_node_idxsrecomputable_node_idxsmax_runtimer  r  runtimer-   input_summaryjoint_graph_nodesinpjoint_graph_edgesr  rv  r   r>  r  aggressive_optionsr  memories_banned_nodesr  runtimes_banned_nodess                      @r2   get_saved_values_knapsack:choose_saved_values_set.<locals>.get_saved_values_knapsack+  s   ]
 4%%M1%-	 & %$!)
 ;D119;	!;66 ugRxr'2G1H477)SUVZVaVaUbbdeienendooqrvr{r{q|};  	! <G;L;L$M;L4YY;L!$M !, 1 1% 1#33 XXtyy)3 * 1 " %
( 778 988!<= >'( )'( )CCE77=CYBZ [+, -%& '34 5$  //#7 0  )*e f 5)C:3?@ *   !>????'	
a ---e	! %N%9 ]p ! sJ   AHAH,H?H
H%6HB
H=H&H
H#&
H43H4r  )r  r   r  )
      )figsizeo)markerz.2fzoffset points)r   r6  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtimememory_budget_pareto_z.pngz%Generated Pareto frontier curve at %s)r%  r  r   )-r   rx   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   rO   rE  r	   rI   rJ   r#  torch._inductor.fx_utilsr  r   ra   rB  rk  r  torch.utils._mode_utilsr  visualize_memory_budget_paretoranger   r?  matplotlib.pyplotpyplotfigureplotrr  annotatexlabelylabeltitlegridgcfshowr   savefigr$   warning)&r   r  r%  r  runtime_optimized_saved_valuesr>  r  r  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesr   r-   r  recomputable_banned_nodesr  r2  optionssweep_memory_budgetr   r&  pltitemx_valuesy_valuestxtfigfig_namer/  r  r  r  r  r	  r0  r
  r  r1  s&                               @@@@@@@@@@r2   choose_saved_values_setre    sU   
 qMA-XYfXgh
 	
 $$AA#)#K#K%+%O%O & E E88O &&!"'',).$)
 (5)%" --6RWW 6% 6 -Y-=-=>L,-KLL|#--:
4= 
 

 &##(%*	 '4 7'#  12]B++  % ;H 2;7) :;mK4499B9I9IJ9I&t,9IJN	
CL 	
T"''] 	
 	
 !>l K &'@FV W %+!x%! ()Q.2O2OQHQK(2O ,I+H4+H 4D. D.L ,,,#(b"#5-F#c)Y.*L* NN'-.1AA!,/	 $6 	((/0DG0(/0DG0 	

7
#8C0  )FAsLLs)hqk"*   * 	

?#

56		NOggi
*+=+?*@EH;XF %#k	 	s K4v 10s   O(O-?O2O7%O<c          	        ^ U R                   R                  5         U R                  5         U R                   n[        R                  (       a  [        U5      nXPl         U R                   n[        U 5      n[        U 5      nU(       a  [        U 5      n U4S jn	U	" U 5      n
[        U
R                  5      S:X  a
  [        XTS9$ [        U R                   R                  5       H  nUR                  S:X  a  [        S5      Ul        M%  U
R#                  U5      (       d	  SUl        MD  [        S5      Ul        UR$                   H+  n['        UR                   UR                   S-   5      Ul        M-     M     [        R(                  nUR                   HC  n[+        UR,                  R/                  SS5      [0        5      (       d  M4  UR,                  S   n  O   [3        UU
US	9n[5        [7        [8        U5      5      n[5        [7        S
 U5      5      n[;        U UUTS9u  nnU(       a!  U(       a  [=        U UU[        U5      5      u  nn[?        U5      n[@        (       Ga  [C        U Vs/ s H  n[E        U5      [G        U5      4PM     sn5      n[I        S U 5       5      S-  n[J        RM                  SU5        [J        RM                  SU5        UR                   R                   Vs1 s H   oR                  S:X  d  M  URN                  iM"     nnUR                   R                   Vs1 s H   oR                  S:X  d  M  URN                  iM"     nnUU-  n[Q        [        5      nUR                   R                   H\  nURN                  U;   d  M  [S        URT                  S5      (       d  M2  U[G        URT                  RV                  5      ==   S-  ss'   M^     [J        RM                  S[        U5      [        U5      [        U5      5        [C        URY                  5       S SS9n[J        RM                  SU5        UU4$ s  snf s  snf s  snf )a(  
Partitions the joint graph such that the backward recomputes the forward.
Recomputing helps in trading off memory bandwidth with computation.

To create the fwd and bwd graph, we copy the joint graph, manually set the
outputs to just original forward or backward outputs. And then we run the
resulting graphs through dead code elimination.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.
    _joint_inputs: The inputs to the joint graph. This is unused.
    compiler: This option determines the default set of recomputable ops.
        Currently, there are two options: ``nvfuser`` and ``inductor``.
    recomputable_ops: This is an optional set of recomputable ops. If this
        is not None, then this set of ops will be used instead of the
        default set of ops.
    num_fwd_outputs: The number of outputs from the forward graph.

Returns:
    Returns the generated forward and backward Fx graph modules.
c                   > [        U R                  5      n[        5       nU R                  R                   H  nUR                  S:X  a"  SUR
                  ;   a  UR                  U5        O![        U5      (       a  UR                  U5        X2;   d  M]  UR                   H  nUR                  U5        M     M     [        [        [        U R                  R                  5      5      n[        [        [        U R                  R                  5      5      nXV-   n[        U TS9u  pUR                  S U	 5       5        [        U R                  XxS5      n
U
R                   Vs1 s H#  nUR                  S:w  d  M  XR                      iM%     nnU R                  R                   Vs1 s H  nX;;  d  M
  X2;  d  M  UiM     nnSn0 nU R                  R                   H  nX;;   d  M
  XU'   US-  nM     [#        X{X,U5      $ s  snf s  snf )	Nr   r   r   c              3   R   #    U  H  oc  M  UR                   S:w  d  M  Uv   M     g 7f)Nr   r   )rX   r9  s     r2   rZ   Nmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>  s"      !
"!A8HAA{s   ''	'r   r   r   r   )r-  r   r   r   r   r   r   r   r   r#  r   r   r   r   r,  r   r   rM   )r   r  rQ   r-   r  r  r	  rO   r   r   r)  rc   rR   fw_cntrS   r   s                  r2   classify_nodes;min_cut_rematerialization_partition.<locals>.classify_nodes  s   '(:(:;E &&,,Dww-'J$++,E!%%d+%d++!%%d+( JJD%))$/ ' - VJ0B0B0H0HIJ!%&(:(:(@(@A"
 7#;/$
  	   !
"!
 	
 @Y

 +00+
0ww(" $L#0 	 +
 %**00
0, 151N 0 	 

  &&,,D(!'! - '88
 	
!+


s   #H;H'	H4H;Hr   r   r   r  r   r%  N)r%  c                 "    [        U 5      (       + $ r/   r   )rY   s    r2   r]   5min_cut_rematerialization_partition.<locals>.<lambda>'  s    [^);r5   r!  c              3   8   #    U  H  n[        U5      v   M     g 7fr/   )rB  r  s     r2   rZ   6min_cut_rematerialization_partition.<locals>.<genexpr>=  s     'J\\r   z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sr   r  z# remat/fw/bw: %d/%d/%dc                     U S   $ rE  rB   r8  s    r2   r]   rn  T  s    !A$r5   TrG  zCount of Ops Rematerialized: %s)-r   r   r  r   cser!   r   r   r  rk  rQ   r,  reversedr   r   rs   r  rg   r   r  r   r   r   r   r#  re  r#  r   r   r  r  r{  r#   ra   rB  r   r?  r$   r  r   r   r   r   r  r@  )r   r(  compilerr   r   	cse_graphr   graph_has_recomputable_opsgraph_has_recomputable_rng_opsrk  r  r-   r  r%  r   r   r|  r}  r  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opss      `                      r2   r"  r"    s   B **,D zz &	&$$K!5l!C%=l%K"!-l;-
^ |,I
 9&&'1, 
 	
 ++11277h #CD))$// !D #CD

$'(9(94;L;Lq;P$Q! # 3 33M!!diimmOT:EBB IIo6M " +#L 6+|<=O;\JKL 4''	Iy ")#8iC4H$ Iy 4I>IlKlSV4lKL %('J\'J$JS$P!		;=VW 			@,O"+//"7"7
"7$77o;UIDII"7 	 
 #,//"7"7
"7$77o;UIDII"7 	 
 &7!,S!1OO))DyyK'GDKKAR,S,Ss4;;6678A=8 * 			%  		
 $FLLNPTU		35GHi9 L

s   ) Q)Q	 Q	*QQtracedfnamefigname
clear_metaprogparse_stack_tracedot_graph_shapec                 "   U(       aZ  [         R                  " U R                  5      n[        R                  " X5      n U R                  R
                   H
  n0 Ul        M     [        R                  R                  U5      u  pU
(       d  S[        R                  -   n
[        R                  SX5        [        R                  " U UUUS9nUR!                  5       n[#        USU
R%                  S5      -   5      nU	 U
 3nUc	  U" U5        g U" XS9  g )N.zWriting FX graph to file: %s%s)r  r  write_)r  )copydeepcopyr   rI   ru  r   r   ospathsplitextr   torch_compile_graph_formatr$   rJ  r   FxGraphDrawerget_main_dot_graphrQ  lstrip)r  r  r  r  r  r  r  r   r-   baseextgr   write_methods                 r2   
draw_graphr  Y  s     MM&,,/	2LL&&DDI '  'IDF555HH-t9""+'		A 	
A1hC89LfSENE|UU&r5   r/   )r   )inductor)fx_graphTNFN)yr  rt   r  r   loggingrs  r  r  rI  r   dataclassesr   r   typingr   r   r	   r
   r   r   r   r   r   torch._inductor.inductor_primstorch.fxrI   torch.utils._pytreeutils_pytreer   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   torch.fx.passesr   torch.utils.checkpointr   r  r   "_activation_checkpointing.knapsackr   r   r   _aot_autograd.logging_utilsr   _aot_autograd.utilsr    compile_utilsr!   r"   sympydebug_partitionerr#   rv   rH   	getLoggerrC   r$   LoggerrO  rP  r  r&   rM   rx   rJ   r   ru  r   r   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r,  r  r1  rB  rL  	lru_cacherZ  ra  r{  r  r  rE  r+  r   r-  r#  r  rG  r  r  re  r"  r  rB   r5   r2   <module>r     sb          	 # * S S S  %  $ $ ? H L  ) 3  
 < 0 8  %66 t 6''1W^^ 1yy~~		 > > >2      :    T r~~ $ 2>> d  C  
  #	DDMD "'']D sm	D
 XXDNRWW  Gbgg G$ Gbgg $ bgg $ XRWW X XCrww C4 CKrww K4 Krww 4 $..$
4=$rww-'($$rww- s `"..`"rww-`" "'']`"
 `" 2>>2>>)*`"FS..S
2>>2>>)*Sl c("# " "277 s :Jbhh J T "Hbggsl!3 HU277C<=P8Q HGBNN Gr~~ GTZ ..Z ~~Z  ~~Z  	Z 
 2>>2>>)*Z z# #BNN #T 	A&A&A& #A&H."bW bJBHH TTKT 5kT 	T
 T $(=T 5$s)T#Y&'T0 0,Nd j	j	j	 
"'']	j	` o ..o  2>>2>>)*o j ,0#%)'HH  '' ' 	'
 5d3i(
)' ' c]' 
'r5   