
    IЦi*                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SKJ
r
  S SKJr  S SKJr  S SK	JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  S SKJ r J!r!J"r"  S SK#J$r$  S SK%r%S S	K%J&r&J'r'J(r(  S SK)J*s  J+s  J,r-  S SK.r/S SK0r/S SK1J2s  J3r4  S S
K5J6r6  S SK7J8r8  S SK9J:r:  S SK;J<r<  S SK=J>r>J?r?J@r@JArAJBrB  S SKCJDrD  S SKEJFrFJGrGJHrHJIrIJJrJJKrKJLrLJMrM  S SKNJOrO  S SKPJQrQJRrRJSrS  S SKTJUrU  SSKVJWrWJXrX  SSKYJZrZJ[r[  SSKXJ\r\J]r]J^r^J_r_J`r`  SSKaJbrb  SSKcJdrdJere  SSKfJgrg  SSKhJiriJjrj  SSK2JkrkJlrlJmrmJnrnJoroJprpJqrqJrrrJsrsJtrtJuruJvrvJwrwJxrxJyryJzrz  SSK{J|r|J}r}J~r~  \(       a  S SKJr  SSKJr  SS KJr  SS!K2Jr  O\rS"\S#'   \" S$5      r\" S%5      r\" S&5      r\\\&4   rS"\S''   \\\\&4   rS"\S('   \GR                  " \5      r\R                  " \GR$                  S)S*9r\/R                  GR&                  r \\S+\\S+4   S,S-\\\\\\S+4   S+S,S-4         4   rS"\S.'   SS/ jrSS0 jrSS1 jrSS2 jr      SS3 jr/ S4Qr/ S5Qr S     SS6 jjrGS S7 jr S     SS8 jjr\GSGSS: jj5       r\GSGSS; jj5       r GS     GSS< jjr    GSS= jr    GSS> jrGSS? jrGSS@ jr " SA S-5      r\s" SBSC9 " SD SE5      5       r\s " SF SG\5      5       rGSSH jr\s " SI SJ\5      5       r\s " SK SL\5      5       r\" SM5      \" SN5      \" SO5      \" SP5      \" SQ5      \" SR5      SS.rST\SU'    GS       GS	SV jjr        GS
SW jr\s " SX SY\5      5       r " SZ S[\5      r\s " S\ S]\5      5       r\s " S^ S_\5      5       r\s " S` Sa\5      5       rGSSb jrGSSc jr     GS             GSSd jjr\R                  " \S9Se9r      GSSf jr\s " Sg Sh\5      5       r\s " Si Sj\5      5       r\s " Sk Sl\5      5       r\s " Sm Sn\5      5       r\s " So Sp\5      5       r\s " Sq Sr\5      5       r\s " Ss St\5      5       r\s " Su Sv\5      5       r " Sw Sx\5      r\s " Sy Sz\5      5       r\s " S{ S|\5      5       r\s " S} S~\5      5       r      GSS jrGSS jr " S S5      r\s " S S\5      5       r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r\s " S S\5      5       r " S S\5      r\s" SBSC9 " S S\5      5       r\s" SBSC9 " S S\\5      5       r " S S\5      r " S S\5      r " S S\5      r\s " S S\5      5       r\s " S S\5      5       r\s" SBSC9 " S S\5      5       r " S S\5      r " S S\5      r\\\\\\\\\\\4      4   r " S S5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r\s" SBSC9 " S S\5      5       r " S S\5      r " S S\5      r\s" SBSC9 " S S\5      5       r\s" SBSC9 " S S\5      5       r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r\s" SBSC9 " S S5      5       r " S S\5      r\s" SBSC9 " S S\5      5       r\s " S S\5      5       r " S S\5      r\GR                   " S S\5      5       r " S S+\5      r " S S\5      r\s" SBSC9 " S S\5      5       rGSS jr\s" SBSC9 " S S\5      5       r\s" SBSC9 " S S\5      5       Gr \s" SBSC9 " S S\5      5       Gr " S S\5      Gr\s " S S\5      5       Gr " S S\5      Gr " S SG\5      GrGSS jGrg(      )annotationsN)nullcontext)Enum)partial)AnyCallableClassVarContextManagerDict	GeneratorIterableListLiteralOptionaloverloadSequenceSetTupleTYPE_CHECKINGTypeVarUnion)assert_neverNever	TypeAlias)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)CallMethodKeycompute_unbacked_bindingsDivideByKeyfree_unbacked_symbolsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureindex_prevent_reordering)Depextract_free_unbacked_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResult)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningget_kernel_metadatair_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_subs)opsOpsValueV)Node)CUDATemplate)GraphLowering)IndentedBufferr   r[   _T_U_V_IntLike_NumLikez  prefix	TensorBoxr   IRNode_NodeOrNodesc                &   ^ SU4S jjmT" U 5        g )Nc                  > U c  g [        U [        [        45      (       a  U  H  nT" U5        M     g [        U [        5      (       a   U R	                  5        H  nT" U5        M     g [        U [
        R                  R                  R                  [        [        [        [        R                  R                  R                  [         ["        [$        45      (       d   S['        U 5       S35       eg )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])
isinstancelisttupledictvaluestorch	_inductorir
ExpandViewDynamicScalarAssertScalarre   sympylogicboolalgBooleanr   intEffectfulKerneltype)nodesnode_check_tensorboxs     Q/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/_inductor/ir.pyr~   %validate_ir.<locals>._check_tensorbox   s     =e}-- & t$$ & ' OO&&11! KK''//#	  k U}$ijk     )r|   Optional[_NodeOrNodes]returnNone )node_or_nodesr~   s    @r   validate_irr      s    k6 ]#r   c                F   ^  [        T [        5      (       d   eSU 4S jjnU$ )Nc                 0   > [        [        T5      " U 0 UD6$ N)getattrrW   )argskwargsnames     r   fnops_wrapper.<locals>.fn   s    sD!42622r   )r   objectr   r   r   rX   )rj   str)r   r   s   ` r   ops_wrapperr      s"    dC    3 Ir   c           
     f   ^ [        [        U [        [        U 5      5      5      5      mSU4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   lenrange)indexi	inv_orders     r   reindex inverse_reorder.<locals>.reindex   sC    5zS^+++-23u:->?->il#->???   Ar   Sequence[_T]r   r   )rm   zipr   r   )orderr   r   s     @r   inverse_reorderr      s*    Sc%j 123I@ Nr   c                   ^  SU 4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   r   )r   r   r   s     r   r   same_reorder.<locals>.reindex   sB    5zSZ''').s5z):;):AeAh):;;;r   r   r   )r   r   s   ` r   same_reorderr      s    < Nr   c                   ^ ^ SU U4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   )r   reindex1reindex2s    r   r    fuse_reindexing.<locals>.reindex   s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr      s    ) ) Nr   )   r      r7   )   r   r   r   r7   c                <    Uc  [        U 5      nU$ [        X5      nU$ )z)
Convert strides to fill order (argsort)
)rG   rH   )seq	shape_env
sorted_idxs      r   get_fill_orderr      s-     $+CL
  !0
r   c                    [        U 5       VVs0 s H  u  pX!_M	     nnn[        [        U 5      5       Vs/ s H  oCU   PM	     nnU$ s  snnf s  snf )zx
Convert stride order to fill order
For channel last format,

stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
)	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   	  sR     (1'78'783ch'7F8%*3u:%67%6)%6J7 97s
   AAc                    [        X5      n[        [        U 5      5       Vs/ s H  nSPM     nn[        U5       H	  u  pVXTU'   M     U$ s  snf )z!
Convert strides to stride order
r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr     sL     !/s >JCHo
&o1oC
&Z(D	 )J 's   A
Tc                    g r   r   xguard_shapes     r   ir_node_to_tensorr   "      r   c                    g r   r   r   s     r   r   r   '  r   r   c                   U c  g U(       d%  [         R                  R                  R                  nO[        nU R                  5        Vs/ s H
  o2" U5      PM     nn[        U 5      (       a0  U R                  5       R                   Vs/ s H
  o2" U5      PM     nnO[        R                  U5      nU R                  5       nU R                  5       n[        U5      n[        U5      n[         R                  R                  R                  R                  5          [         R"                  " XEXgS9R%                  5       nS S S 5        U$ s  snf s  snf ! , (       d  f       W$ = f)N)sizestridedtypedevice)rY   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerL   r   suppress_guardsro   empty_stridedzero_)	r   r   shape_fnsr   r   r   r   ts	            r   r   r   ,  s    	y 77##--!".AHQKD.Q'(||~'<'<='<!(1+'<=2248KKME\\^F"4(D$V,F	
			#	#	3	3	5E

%' 	
 
6 H / > 
6	5 Hs   	EE$E
E c                D    [        U [        5      (       a
  U (       d  S /$ U $ r   )rj   rk   values    r   may_convert_to_optionalr   H  s!     %u vLr   c                @   [        U [        5      (       d  U c  U $ [        U [        R                  5      (       a  U R                  $ [        U [
        [        45      (       a  [        U R                  5       5      $ [        SU  S[	        U 5      R                   S35        g )Nzget_device_type(: ))rj   r   ro   r   r{   rf   
OutputSpecget_device_typer   r   __name__r   s    r   r   r   R  sz     !SQY	Au||	$	$vv	A
+	,	,q||~..#A3ba)9)9(:!<=r   c                *    [        [        U 5      5      $ r   )rQ   r   r   s    r   	is_tritonr   ^  s    /!$%%r   c                    [        U 5      S:H  $ )Ncpu)r   r   s    r   is_cpur   b  s    1&&r   c                  b   \ rS rSr% \" 5       rS\S'   \R                  " SS9r	S\S'   \R                  " SS9r
S\S	'   \R                  " SS9rS
\S'   \\R                  SES j5       5       rSFS jrSGS jrSHS jrSIS jrSJS jrSKS jrSLSMS jjr SN       SOS jjrSPS jrSQS jrSRS jrSSS jrSTS jrSUS jrSVS jrSWS jrSXS jr \!SYS j5       r"SZS jr#SVS  jr$S[S! jr%S\S]S# jjr&S^S$ jr'S_S% jr(SVS& jr)S`S' jr*SaS( jr+SbS) jr,SXS* jr-ScS+ jr.S[S, jr/S\SdS- jjr0SeS. jr1SGS/ jr2SfS0 jr3SGS1 jr4 Sg     ShS2 jjr5SiS3 jr6SjS4 jr7 Sg     SkS5 jjr8SlS6 jr9SmS7 jr:SnS8 jr;SoS9 jr<SpS: jr=S[S; jr>SqS< jr?SVS= jr@SVS> jrASrS? jrBSsS@ jrCScSA jrDSsSB jrE\F(       a  \!SPSC j5       rGSDrHg"SDrHg")trf   if  zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[List[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     #    [         R                  nX-  [         l         S v   U[         l        g ! U[         l        f = f7fr   )rf   r   )r   olds     r   current_originsIRNode.current_originsn  s4      %%"%-	*&)F#cF#s   A1 A>Ac                0    [         R                  XU5        g r   )r   __setattr__)selfattrr   s      r   _post_init_setattrIRNode._post_init_setattrx  s     	4u-r   c                    U R                  S[        U R                  5      5        U R                  S[        R                  (       a  [
        R                  " 5       OS 5        U R                  SS 5        g )Nr   r   r   )r   r2   r   r8   debug_ir_tracebackr   format_stackr   s    r   __post_init__IRNode.__post_init__~  sV    	:d6K6K+LMV5N5N//1TX	
 	t4r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r   .0deps     r   	<genexpr>(IRNode.get_read_names.<locals>.<genexpr>       ?.>s((.>   r2   	get_readsr  s    r   get_read_namesIRNode.get_read_names      ?dnn.>???r   c                    U R                   $ r   )r   r  s    r   get_tracebackIRNode.get_traceback  s    ~~r   c                    U R                   $ r   r   r  s    r   get_origin_nodeIRNode.get_origin_node      r   c                    g r   r   r  s    r   get_defining_opIRNode.get_defining_op      r   c                d    S[        U SS5       3nU(       a  [        U5      S:  a  US S  S3nU/$ )Nzorigins=r    @   =   z...)r   r   )r   shortenr   s      r   common_reprIRNode.common_repr  s@    WT9b9:;s7|b( "c*Gyr   c                .   [        U5      [        U R                  U5      5      -   n[        [        [        U5      5      nU(       a5  [	        SR                  U5      5      n[        U 5      R                   SU S3$ [        U 5      R                   SU S3$ )Nz,
z(
z
)(r   )rk   r'  mapr   indentjoinr{   r   )r   linesr&  	multiline	new_liness        r   
str_helperIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    U R                   $ r   r   r  s    r   r   IRNode.get_dtype  s    zzr   c                D     U R                  5       $ ! [         a     g f = fr   )r   NotImplementedErrorr  s    r   maybe_get_dtypeIRNode.maybe_get_dtype  s&    	>>##" 		    
c                2    [        S[        U 5       S35      e)Nz#get_layout() is not implemented by !r7  r{   r  s    r   r   IRNode.get_layout  s    !$GT
|ST"UVVr   c                D     U R                  5       $ ! [         a     g f = fr   )r   r7  r  s    r   maybe_get_layoutIRNode.maybe_get_layout  &    	??$$" 		r:  c                "    U R                  5       $ r   )r   r  s    r   get_output_specIRNode.get_output_spec  s      r   c                D     U R                  5       $ ! [         a     g f = fr   )rD  r7  r  s    r   maybe_get_output_specIRNode.maybe_get_output_spec  s(    	''))" 		r:  c                >    [        U R                  5       [        5      $ )z4True for single tensor output (excludes MultiOutput))rj   rG  Layoutr  s    r   has_tensor_outputIRNode.has_tensor_output  s    $446??r   c                2    [        S[        U 5       S35      e)Nz!get_size() is not implemented by r<  r=  r  s    r   r   IRNode.get_size  s    !$Ed4j\QR"STTr   c                D     U R                  5       $ ! [         a     g f = fr   )r   r7  r  s    r   maybe_get_sizeIRNode.maybe_get_size  %    	==?"" 		r:  c                "    U R                  5       $ r   r   r  s    r   shapeIRNode.shape  s    }}r   c                4    [        U R                  5       5      $ r   )rU   r   r  s    r   	get_numelIRNode.get_numel  s    T]]_--r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ Nr   rY   r   r   is_expr_static_and_trueru   EqrX  r  s    r   is_zero_elementsIRNode.is_zero_elements  0    ww77AQST8UVVr   c                0    [        S[        U 5       35      e)a  
If the IRNode refers to data which has not been materialized (e.g.,
it is a Pointwise/Reduction that could potentially have more
compute fused into it), realize the IRNode into physical memory,
ending the possibility of fusing into it, but allowing, e.g., multiple
users to access the data without having to recompute.

Check StorageBox.realize for a particularly notable implementation.

TODO(ezyang): I think, in principle, every IRNode should have an
implementation of this, and most of the time no-op is OK, but you
really do have to audit each IRNode for this, so for now, raise
an error if it's not implemented.  Note that some code in graph.py
will catch this thrown error and suppress it with a warning.
zrealize NYI on r=  r  s    r   realizeIRNode.realize  s      "ODJ<"@AAr   Nc                0    [        S[        U 5       35      e)Nzcodegen_reference NYI on r=  r   writers     r   codegen_referenceIRNode.codegen_reference  s    !$=d4j\"JKKr   c                    g r   r   r  s    r   r   IRNode.get_device  r!  r   c                0    U R                  5       nUc   eU$ r   )r   r   r   s     r   get_device_or_errorIRNode.get_device_or_error  s    "!!!r   c                    gNFr   r  s    r   has_exceeded_max_readsIRNode.has_exceeded_max_reads      r   c                >    [        [        U 5      R                  5      er   r7  r{   r   r  s    r   make_loaderIRNode.make_loader      !$t*"5"566r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   make_indexerIRNode.make_indexer  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   
get_strideIRNode.get_stride  ry  r   c                D     U R                  5       $ ! [         a     g f = fr   )r~  r7  r  s    r   maybe_get_strideIRNode.maybe_get_stride  rB  r:  c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_nameIRNode.get_name  ry  r   c                D     U R                  5       $ ! [         a     g f = fr   )r  r7  r  s    r   maybe_get_nameIRNode.maybe_get_name  rR  r:  c                    grq  r   r   	thresholds     r   has_large_inner_fnIRNode.has_large_inner_fn  rt  r   c                    g r   r   r   userss     r   
mark_reuseIRNode.mark_reuse      r   c                    g r   r   r  s    r   realize_hintIRNode.realize_hint  r  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   unwrap_viewIRNode.unwrap_view  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   freeze_layoutIRNode.freeze_layout  ry  r   c                >    [        [        U 5      R                  5      er   rv  r   r   allow_paddings      r   freeze_layout_with_stride_order&IRNode.freeze_layout_with_stride_order       "$t*"5"566r   c                >    [        [        U 5      R                  5      er   rv  r   r   s     r   freeze_layout_with_fill_order$IRNode.freeze_layout_with_fill_order!  ry  r   c                >    [        [        U 5      R                  5      er   rv  r   r   s     r   freeze_layout_with_same_order$IRNode.freeze_layout_with_same_order$  ry  r   c                >    [        [        U 5      R                  5      er   rv  r   exact_stridesr  s      r    freeze_layout_with_exact_strides'IRNode.freeze_layout_with_exact_strides'  r  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_read_writesIRNode.get_read_writes,  ry  r   c                6    U R                  5       R                  $ r   r  readsr  s    r   r  IRNode.get_reads/      ##%+++r   c                4    [        U R                  5       5      $ r   )r   r  r  s    r   	num_readsIRNode.num_reads2  s    4>>#$$r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_storage_numelIRNode.get_storage_numel5  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_unbacked_symbol_usesIRNode.get_unbacked_symbol_uses8  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_reduction_typeIRNode.get_reduction_type;  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_reduction_sizeIRNode.get_reduction_size>  ry  r   c                    grq  r   r  s    r   	is_externIRNode.is_externA  rt  r   c                    grq  r   r  s    r   is_no_opIRNode.is_no_opD  rt  r   c                >    [        [        U 5      R                  5      er   rv  rm  s     r   constant_to_deviceIRNode.constant_to_deviceG  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_mutation_namesIRNode.get_mutation_namesJ  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_operation_nameIRNode.get_operation_nameM  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   get_inputs_that_alias_output#IRNode.get_inputs_that_alias_outputP  ry  r   c                    g r   r   r  s    r   r   IRNode.dtypeU  s    r   r   )r   zOrderedSet[Node]r   zGenerator[None, None, None])r   r   r   r   r   r   r   r   r   zOrderedSet[str])r   r   r   r   r   zOptional[Operation]T)r&  boolr   Sequence[str])TT)r.  zSequence[object]r&  r  r/  r  r   r   r   torch.dtype)r   zOptional[torch.dtype]r   rJ  )r   zOptional[Layout]r   r   )r   zOptional[OutputSpec]r   r  r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r   r   Optional[str]r   rg  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   r  r  ry   r   r   r   rf   Fr   	List[int]r  r  r   r   r   r  r   r   r   List[_IntLike]r   r   r  r  r  r  r   r   r   zdependencies.ReadWritesr   zOrderedSet[Dep]r   ry   r   ra   r   OrderedSet[Symbol]r   Sequence[sympy.Expr]r   r  r   rf   r   r  )Ir   
__module____qualname____firstlineno__r2   r   __annotations__dataclassesfieldr   r   r   staticmethod
contextlibcontextmanagerr   r   r  r  r  r  r  r'  r1  r   r8  r   r@  rD  rG  rK  r   rP  propertyrU  rX  r_  rc  rh  r   rn  rr  rw  r{  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   __static_attributes__r   r   r   rf   rf   f  s   2<,/>  +00e<G_<%0%6%6E%BI"B+6+<+<%+HK(H*  *.5@  PT	5%	504	5HL	5		5W!@U  .WB$L
777777 7<77/37	7
77 DI7+7<@7	7
7,%77777777 		 
	 r   F)frozenc                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jrSS jrSS jrSrg) 	OperationiZ  c                    S U l         g r   operation_namer  s    r   r  Operation.__post_init__\  s
    -1r   c                    [         er   r7  r  s    r   r   Operation.get_device_      !!r   c                @    [        U S5      (       d   eU R                  $ Nr   )hasattrr   r  s    r   r  Operation.get_origin_nodeb  s!    t]++++r   c                @    [        U S5      (       d   eU R                  $ )Nr   )r&  r   r  s    r   get_originsOperation.get_originsf  s    tY''''||r   c                8    U R                   c   eU R                   $ r   r  r  s    r   r  Operation.get_operation_namej  s     ""..."""r   c                    grq  r   r  s    r   r  Operation.is_externn  rt  r   c                    grq  r   r  s    r   r  Operation.is_no_opq  rt  r   c                    [         er   r!  r  s    r   r  Operation.get_read_writest  r#  r   c                &    XR                  5       ;   $ r   )r  )r   r   s     r   
is_user_ofOperation.is_user_ofw  s    **,,,r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r	  r
  s     r   r  +Operation.get_read_names.<locals>.<genexpr>{  r  r  r  r  s    r   r  Operation.get_read_namesz  r  r   c                6    U R                  5       R                  $ r   r  r  s    r   r  Operation.get_reads}  r  r   c                    [         er   r!  r  s    r   get_outputsOperation.get_outputs  r#  r   c                    [        5       $ r   r1   r  s    r   get_unbacked_symbol_defs"Operation.get_unbacked_symbol_defs  
    |r   c                    [        5       $ )a}  
Returns the unbacked symbols which are required to be in scope in
order to successfully perform codegen for this buffer.  For example,
a buffer that corresponds to an extern kernel call that takes i0 as
an argument would return {i0} here.  This is used to generate necessary
dependencies that ensure we actually bind i0 in codegen before you
try to use it.

Note that this is NOT transitive; in particular, if this buffer takes
in as input another buffer with dynamic shape (e.g., (i0,)), we will
not report it here, because you will already have a dependency
on that buffer, which will eventually have a dependency on i0 if
necessary.
r1   r  s    r   r  "Operation.get_unbacked_symbol_uses  s     |r   c                    g)z
Gets extra global memory size needed by this buffer.
Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
r   r   r  s    r   get_workspace_sizeOperation.get_workspace_size  s    
 r   r  Nr  r  r  )r   r   r  r  r  )r   r   r   r  r  r  r   List[Buffer]r   zOrderedSet[sympy.Symbol]r  )r   r  r  r  r  r   r  r)  r  r  r  r  r4  r  r  r=  r@  r  rF  r  r   r   r   r  r  Z  sN    2" #"-@,""r   r  c                  h  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S S
 jrS!S jrS"U 4S jjrS#S jr\r	S$S jr
S%S jrS&S jrS&S jr\S'S j5       r\\R$                  4S(S jj5       r\S)S j5       rS*S jr\S#S j5       rS+S,S jjrS-S jrS.S jrS/S jrS0S jrS1S jrS2S jrS3S jrSr U =r!$ )4Loopsi  r  r   r  r   Callable[..., Any]inner_fnr  rangesc                x    [        5       R                  " / S U R                   5       QU R                  5       P76 $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   r,   r  es     r   r  1Loops.get_unbacked_symbol_uses.<locals>.<genexpr>  s     <1#A&&r  )r2   unionrO  inner_fn_free_unbacked_symbolsr  s    r   r  Loops.get_unbacked_symbol_uses  s9    |!! 
<<
//1
 	
r   c                   U R                  SU R                  R                   S3[        U R                  5      U R                  5       /U Vs/ s H  o" S[        X5       3PM     sn-   SU R                  < 3/-   5      $ s  snf )N'=origin_node=)r1  r   r{   r   r   inner_fn_strr   r   )r   namesr   s      r   _to_strLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA54q,-.5AB d..1234
 	
 Bs   B
c                "   > [         TU ]  5         g r   )superr  r   	__class__s    r   r  Loops.__post_init__  s    r   c                $    U R                  S5      $ )NrO  r_  r  s    r   __str__Loops.__str__  s    ||K((r   c                    U R                   $ r   r   r  s    r   r   Loops.get_device      {{r   c                    U R                   $ r   r  r  s    r   r  Loops.get_origin_node  r  r   c                    U R                   $ r   rg  r  s    r   r   Loops.get_size  rn  r   c                    U R                   $ r   rg  r  s    r   get_pointwise_sizeLoops.get_pointwise_size  rn  r   c                    UR                  SS 5      nUR                  SS 5      nU " U0 UD6nUR                  SU5        UR                  SU=(       d    UR                  5        [        R	                  U5      $ )Nr   r   )popr   r   re   create)clsr   r   r   tbrs         r   rx  Loops.create  so    jj5ZZT*    	
]K8	["*;<""r   c                    [        U 5       VVs/ s H0  u  p#US:X  a  [        R                  R                  O
[	        X5      PM2     snn$ s  snnf Nr7   )r   ru   SZerorT   )rO  rd   nr   s       r   _indexLoops._index  sI     "&)
) FEGGLL(Fv(QQ)
 	
 
s   7A
c                z   [        [        R                  " 5       5      n[        R                  " U5         [        R
                  " [        SS5         U R                  " U R                  5       6   UR                  5       sS S S 5        sS S S 5        $ ! , (       d  f       O= fS S S 5        g ! , (       d  f       g = fNallow_indexingT)
rB   rY   MockHandlerset_ops_handlerr   r   r   rN  inner_fn_argsgetvalue)r   	opcounters     r   inner_fn_opcountLoops.inner_fn_opcount  s{     1	y)5<<,d,
 MM4--/0%%'	,
 ,
)) ,
 ,
)))s#   B,-B?	B,
B 	B,,
B:c                :    U R                  U R                  5      4$ r   )r  rO  r  s    r   r  Loops.inner_fn_args  s    DKK(**r   c                t    [         R                  R                  " U R                  /U R	                  5       Q76 $ r   )rY   KernelFormatterHandlerir_to_stringrN  r  r  s    r   r]  Loops.inner_fn_str  s3    ''44MM
 ..0
 	
r   c                z    Uc  Sn[        U[        R                  5      nU R                  5       R                  U:  $ r[  )maxr8   realize_opcount_thresholdr  num_opsr  s     r   r  Loops.has_large_inner_fn  s9    I	6#C#CD	$$&..::r   c                d    U R                  U R                  5      n[        U R                  U5      $ r   )r  rO  r=   rN  )r   r   s     r   rW  $Loops.inner_fn_free_unbacked_symbols  s%    DKK(,T]]EBBr   c                   [         R                  " [        SS5         U R                  5       (       aJ  [	        U R                  5       U R                  5       U R                  5       5      R                  sS S S 5        $ [	        U R                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )	r   r   r   r  r?   rw  r   r  r  r  s    r   r  Loops.get_reads  s    \\.*:DA&&((*$$&MMO++- % BA +$$&MMO % BAAs   AB8=1B88
Cc                H    [        U R                  5       R                  5      $ r   )r2   r  read_buffersr  s    r   r  Loops.get_read_names
  s    $//1>>??r   c                H    [        U R                  5       R                  5      $ r   )r   r  r  r  s    r   r  Loops.num_reads  s    4((*7788r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_size() is not implemented by r<  r=  r  s    r   r  Loops.get_reduction_size      !9$t*QG
 	
r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_type() is not implemented by r<  r=  r  s    r   r  Loops.get_reduction_type  r  r   c                2    [        S[        U 5       S35      e)Nz+constant_to_device() is not implemented by r<  r=  rm  s     r   r  Loops.constant_to_device  r  r   r   r  )r^  r  r   r   r  r  r  r  r  )r   r   r   r   r   re   )rO  r  rd   r6   r   r  )r   rC   r   zSequence[Sequence[_IntLike]]r   r  r   zSet[Symbol]r  r  r  r
  r  r  )"r   r  r  r  r  r  r_  r  ri  __repr__r   r  r   rt  classmethodrx  r  r6   INDEXr  rI   r  r  r]  r  rW  r  r  r  r  r  r  r  __classcell__rd  s   @r   rL  rL    s      
	
 ) H  # # :>** 
 
 ( (+ 
 

;C@9




 
r   rL  c                   UR                   (       a   [        R                  " [        S5      U5      $ [        R                  " SU5      $ )Nnanr   )is_floating_pointrW   constantfloat)r   r   s     r   nop_loader_fnr     s1    ||E%L%00||Au%%r   c                  V    \ rS rSrS	S jrS
S jrSS jr        SS jrSS jrSr	g)	Pointwisei'  c                t    U R                  5       (       a  [        [        U R                  S9$ U R                  $ Nr4  )r_  r   r  r   rN  r  s    r   rw  Pointwise.make_loader)  s,      ""=

;;}}r   c                    / $ r   r   r  s    r   r  Pointwise.get_reduction_size0  s    	r   c                    g r   r   r  s    r   r  Pointwise.get_reduction_type3  r!  r   c                h    U R                  5       n[        R                  " X" U5      U" U5      5      $ r   )rw  rW   storer   output_nameindexervarsloaders        r   store_outputPointwise.store_output6  s-     !!#yygdmVD\BBr   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        XR
                  X R                  S9$ FMove this to a given device. Requires that all reads are to constants.override_devicer   r   rN  rO  )rw  r   r   ConstantBufferr  r   rO  r   r   r  s      r   r  Pointwise.constant_to_device?  sB    !!#n.?HPf[[
 	
r   r   Nr  r
  r  r  r  r  !Callable[[Sequence[Expr]], Never]r  r  r   rX   r  )
r   r  r  r  rw  r  r  r  r  r  r   r   r   r  r  '  sF    C"C 3C 	C
 
C
r   r  c                  R    \ rS rSr% S\S'   SrS\S'   S
S jr        SS jrS	rg)ScatteriH  r  output_indexerNr  scatter_modec           	         U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  S9$ )r  r  )r   r   rN  rO  r  r  )	rw  r   r   r  r  r   rO  r  r  r  s      r   r  Scatter.constant_to_deviceM  s[    !!#n.?HP**;;..**
 	
r   c                    U R                  5       n[        R                  " UU" U R                  U5      5      U" U5      U R                  S9$ )N)mode)rw  rW   r  r  r  r  s        r   r  Scatter.store_outputZ  sI     !!#yyD''-.4L""	
 	
r   r   r  r  )	r   r  r  r  r  r  r  r  r  r   r   r   r  r  H  sB    44"&L-&

"
 3
 	

 

r   r  
logical_ormaximumminimummuladdbitwise_xor)anyr  minprodsumxor_sumz"Dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                   ^ ^^ T [         ;   a	  [         T    $ T S;   a        SUUU 4S jjnU$ T S:X  a        SS jnU$ [        ST  35      e)Nargmaxargminc                  > U u  p#Uu  pETS:X  a  [         R                  " X$5      nO[         R                  " X$5      n[         R                  " X$5      n[	        T5      (       a  [         R
                  " X"5      n[         R
                  " XD5      n	[         R                  " U[         R                  " X5      5      n[         R                  " U[         R                  " X5      5      nT(       a  [         R                  " X55      O[         R                  " X55      n
[         R                  " U[         R                  " Xz5      5      n[         R                  " XbU5      [         R                  " XcU5      4$ )Nr  )	rW   ltgteqr%   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              r   argmax_combine_fn3get_reduction_combine_fn.<locals>.argmax_combine_fn{  s     !G G)vvg/vvg/FF7,Ee$$&&2&&2~~dCFF7,DEucoog.OP ' w(VVG- 
 >>$(CDD		$1		$1 r   welford_combinec                \    U u  p#nUu  pVnXR-
  nXG-   n	Xy-  n
X(U
-  -   X6-   X-  U-  U
-  -   U	4$ r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fn4get_reduction_combine_fn.<locals>.welford_combine_fn  s]     &'"F(%&"F(OE!,J -I**emh6BB r   zunknown reduction_type=)r  Tuple[object, object]r  r  r   Tuple[OpsValue, OpsValue])r  #Tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )r  r7  )r  r   r  r  r  s   ```  r   get_reduction_combine_fnr  s  s     --#N33	/	/	$	)>	&	 	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c           	        [        U5       VVs/ s H5  u  p4[        R                  R                  R	                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H.  n[        R                  R                  R	                  X   5      PM0     n nU Vs/ s H.  n[        R                  R                  R	                  X   5      PM0     nnX:H  $ s  snnf s  snf s  snf )zH
Returns true if the strides are equal, ignoring dimensions of size 1 .
r   fallbackr7   )r   rY   r   r   r   )strides1strides2r   r   dimnon_1_indicess         r   significant_strides_equalr    s      o%FA77%%cA%6!; 	
%  
 BOOA  **8;7HOANOA  **8;7HO
 POs   2CC5C5Cc                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S S
 jr\rS!U 4S jjrS"S jrS#S jr	          S$S jr
S%S jrS&S jrS'S jrS(S jr\ S)                   S*S jj5       r\          S+S j5       r\\R(                  S4                   S,S jj5       r\      S-S j5       r\      S-S j5       r\        S.S j5       r\              S/S j5       r\            S0S j5       r\                        S1S j5       r\                    S2S j5       r\                      S3S j5       rSrU =r$ )4	Reductioni  r  reduction_rangesr   r  r  	src_dtyperF   reduction_hintc                $    U R                  S5      $ )N)rO  r  r  rh  r  s    r   ri  Reduction.__str__  s    ||LMMr   c                r   > [         TU ]  5       [        5       R                  " S U R                   5       6 -  $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  rS  s     r   r  5Reduction.get_unbacked_symbol_uses.<locals>.<genexpr>  s     F0E1#A&&0Er  )rb  r  r2   rV  r  rc  s    r   r  "Reduction.get_unbacked_symbol_uses  s5    w/1JL4F4FF0E0EF5
 
 	
r   c                    U R                   $ r   )r  r  s    r   r  Reduction.get_reduction_size  s    $$$r   c                    U R                   $ r   )r  r  s    r   r  Reduction.get_reduction_type      """r   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      n[         R                  " X" U5      U5      $ r   )rW   	reductionr   r  r  rN  store_reduction)r   r  r  r  reduction_varsr   s         r   r,  Reduction.store_reduction  sP     JJNNMM$/	
 "";uEEr   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   rO  r  r  s    r   index_lengthReduction.index_length  s!    4;;#d&;&;"<<<r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nX4$ r   )r  rO  r  r6   RINDEXr   r   rindexs      r   r  Reduction.inner_fn_args  s6    DKK(T22DKK@r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      n[        U R                  X5      $ r   )r  rO  r  r6   r3  r=   rN  r4  s      r   rW  (Reduction.inner_fn_free_unbacked_symbols  s?    DKK(T22DKK@,T]]EJJr   c                   U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  U R                  [        R                  S9$ )r  r  r   r   rN  rO  r  r  r  r  )rw  r   r   r  r  r   rO  r  r  r  rF   DEFAULTr  s      r   r  Reduction.constant_to_device  sk    !!#n.?HP**;;!22..nn(00	
 		
r   Nc	                   SS jn	[         R                  R                  R                  U5      n
[         R                  R                  R                  [	        U5      5      nUS:H  =(       dV    [         R                  R                  U [        R                  5      (       + =(       a    US;  =(       a    [        R                  nU	" U
5      (       a  U	" U5      (       d  [        R                  S4$ [        R                  " U 5      nUR                  nSnU(       a]  [        R                   " [         R"                  R$                  U SS9n[        R                   " [         R"                  R$                  U SS9nO      SS	 jnUnUS:X  a  U" X5      nUS:X  a  [        R&                  U4$ Ub  [)        U[*        5      (       a{  [-        U5      u  nnUbj  Ubg  [         R                  R                  R                  [	        UU-   5      5      nU
U:X  a,  [.        R1                  S
UUUUU5        [        R&                  S4$ [        R&                  U4$ X::  d  XS-  S-  :  a  [        R                  S4$ [3        U UUUUUU[        R                  S9nSS jnU" U5      u  nnU(       a  U" U5      u  nn[5        U5      S:X  a  [        R                  S4$ [6        R8                  " UR;                  5       UR=                  5       5      u  u  nnnSnSnU H  n[         R                  R                  R?                  UU5      n [         R                  R                  RA                  U UURC                  5       5      n![E        S U! 5       5      n"U"(       a  US-  nM  US-  nM     UU:  a  [        R&                  U" X5      4$ [        RF                  U" X5      4$ )Nc                .    [        U [        [        45      $ r   )rj   ry   r   r   s    r   
_is_static(Reduction.num_splits.<locals>._is_static  s    a#w00r   scanr  r7       T)inner_reductionFc                    gr~  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splits4Reduction.num_splits.<locals>.inner_reduction_splits0  s     r   zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr   r:  c           	     R  ^ [        S [        U R                  5       U R                  5       U R	                  5       S9U S9nUR                  5       nUR                  c   eUR                   V s/ s H=  n [        U [        5      (       d  M  [        U [        R                  5      (       a  M;  U PM?     nn / nSn[        UR                  S S9 H  m[        U4S jU 5       5      (       d  M  UR                  TR                  5        TR                   ["        R$                  R&                  ;   d  Md  ["        R$                  R&                  TR                      n[)        UR*                  SS 5      nUR-                  5         [)        UR*                  SS 5      U:w  d  M  SnM     XE4$ s  sn f )	Nr   r   r   r   layoutdataFc                    U R                   $ r   r	  r   s    r   <lambda>@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>|  s    affr   keyc              3  T   >#    U  H  oTR                   R                  ;   v   M     g 7fr   )r   free_symbols)r  r{  mds     r   r  AReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>}  s     F:aBHH111:   %(r   T)ComputedBufferr   r   r   r   r  
range_varsrj   r   ru   Numbersortedr  allappendr   r   rY   r   name_to_bufferr   rM  decide_layout)	r{  cbread_writesrZ  indiceschangedbuforiginal_striderV  s	           @r   get_read_indices.Reduction.num_splits.<locals>.get_read_indicesf  s]   %<<>++-
 B ,,.K ))555 %///Aa& /9!U\\/J /  
 GG[..4DEF:FFFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G F ##!s   -F$
F$+F$r   c              3  *   #    U  H	  oS :  v   M     g7fr7   Nr   r  r   s     r   r  'Reduction.num_splits.<locals>.<genexpr>  s     /w!Aw   )r   r   r   r  )rE  ry   rF  ry   r   ry   )r{  r  r   zTuple[Sequence[Expr], bool])$rY   r   r   symbolic_hintrU   has_featurer:   REDUCE_TO_SINGLE_ELEMENTr8   split_reductionsrF   r;  rE   rx  multi_processor_count	functoolsr   choicesreduction_split_factorINNERrj   re   r>   logdebugr  r   r9   index_vars_squeezer   r  simplify_with_rangesstride_hintskeysr]  OUTER)#r   	dst_dtyper  rN  rO  r  r  reduction_numel
input_noder?  rE  rF  should_splitpropsnum_smmin_elements_per_threadrG  outer_reduction_splitssplit
new_rangesnew_reduction_rangesextracted_numel_hintr{  rg  rc  rd  r   r-  ranges1	num_outer	num_innerr   jstridesouters#                                      r   
num_splitsReduction.num_splits  s   	1  !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /00Z
5K5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K*K3V40
0 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- ;aZ"_, ((!++-)(00	
	$B ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		A  55aAAgg&&33A~w||~VG/w//EQ	Q	  y  &&(>$)   !&&(>$)  r   c                R  ^ ^^^^^ T Vs/ s H,  n[         R                  R                  R                  U5      PM.     snm[	        X#5      mSUUU4S jjmUS;   aD  [        SST[        R                  T5      5      R                  5       m      SUU 4S jjmU4S j$ T mT$ s  snf )z1Convert inner_fn from a reduction to an pointwisec                   >^  [         R                  " TU U4S j[        R                  " T Vs/ s H  n[	        U5      PM     sn6  5       5      $ s  snf )Nc              3  6   >#    U  H  nT" TU5      v   M     g 7fr   r   )r  r5  r   value_fns     r   r  =Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s&      # UF++#s   )rs  reduce	itertoolsproductr   )r   r   
combine_fnr  r  s   ` r   r   *Reduction._unroll_reduction_fn.<locals>.fn  sN    ##"+"3"3,<=,<q%(,<=# 
 >s   Ar  r  Nc                   > U Vs/ s H  n[         R                  " U5      PM     nnT" X5      [        R                  " T" U5      [        R
                  5      4$ s  snf r   )ru   expandrW   
index_exprro   int64)r   r5  r   flatten_indexrN  s      r   r  0Reduction._unroll_reduction_fn.<locals>.value_fn  sO     4::6a%,,q/6:U+NN=#8%++F  ;s    Ac                   > T" U 5      S   $ r~  r   )r   r   s    r   rP  0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1r   )r   r  r   r   )r   r  r5  r  r   r  )	rY   r   r   evaluate_static_shaper  FixedLayoutr   r   r{  )	rN  r  r  r  r   r  r  r   r  s	   ``   @@@@r   _unroll_reduction_fnReduction._unroll_reduction_fn  s     @P
?O!AGG2215?O
 .nH
		 		 11' 112BC	
 ln )3E*  .-HIM
s   3B$c
                `  ^^^^^ [         R                  R                  R                  [	        T5      5      n
U
S:X  an  SU4S jjnU" S5      U" S5      U" S5      U" S5      S.mTTR                  5       ;   d
   T S35       eSUUU4S jjn[        R                  UUU[        U5      S9$ U
S:X  a-  TS;   a	  SU4S	 jjnO	SUU4S
 jjn[        R                  UTXS9$ [        U
[        5      (       a  [         R                  R                  R                  U
5      [        R                  :  aQ  [	        U5      S:w  d  [        UR                  5      (       a(  [        R                  UTU R!                  TTTU5      US9$ U R#                  UTUTUTTU
U	5	      u  pU[$        R&                  :X  a  UnUS:X  a7  U	c   e[)        U	5      u  nnUc   eUc   eU R+                  UTUTUTUUTU5
      $ US:  a  U R-                  UTUTUTTUU5	      $ [.        R                  [1        UTTUTTUUS95      $ )Nr   c                  > T[         R                  :X  a  [        U 5      $ TR                  (       a,  [        U [        R
                  5      (       d   e[        U 5      $ [        U [        R                  5      (       d   e[        U 5      $ r   )	ro   r  r  rj   typingSupportsFloatr  SupportsIntry   )valr~  s    r   py_cnst!Reduction.create.<locals>.py_cnst  sg    

*9$00%c6+?+?@@@@ :%%c6+=+=>>>>s8Or   r7   )r  r  r  r  z* not supported for zero-dimension tensors!c                8   > [         R                  " TT   T5      $ r   rW   r  )r   r~  r  rtypes_to_initss    r   const_fn"Reduction.create.<locals>.const_fn  s    ||ON$CYOOr   r  r  c                2   > [         R                  " ST5      $ r[  r  )r   r~  s    r   r   Reduction.create.<locals>.fn  s    <<955r   c                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   ru   r  r  )r   r   reduction_indexrN  r  s      r   r   r    s2    =M&N=Muww||=MO&N#E;; 'O   $4rI  r:  )r  r   r   zUnion[bool, float, int])r   ry   r   rX   )rY   r   r   simplifyrU   r|  r  rx  rk   rj   r   r   r8   unroll_reductions_thresholdrQ   r{   r  r  rF   r;  r>   !create_multilayer_existing_rangescreate_multilayerre   r  )ry  r   r~  r  rN  rO  r  r  r  r  r  r  r  r   hintr  r  r  r  s     ` ` ``          @r   rx  Reduction.create  s    ''**33MBR4STa$ qz"1:
qz	O /"6"6"88M !!KLM8P P ##!F|	 $   a!556 6
< < ##Y $  
 00  **?;001v&!+vfkk/B/B ##11.	  $   nn

 ]222!NB;)))/R0,J, )))'33388 $  QY(( 
 
 !!1-#-	
 	
r   c                b   U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R
                  $ U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R                  $ SSSSSSS.U    $ )	N)r  r  z-infr   )r  r  infr7   r   r   r   )r  r  r  r  welford_reducer  )r%   r  r$   ro   iinfor  r  r  r   s     r   default_accumulatorReduction.default_accumulatori  s     ..e$$V}$!%(({{5)---..e$$U|#!%(({{5)--- '(
  	r   c                :    U S:X  a  g[         R                  X5      $ )Nr  r   )r  r  r  s     r   default_valueReduction.default_value  s!     --,,^CCr   c                    U S:X  a  U$ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U$ )NrI     i      )rF   r}  
OUTER_TINY)r  rF  r  s      r   _multilayer_second_step_hint&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                   ^^^^^^	 [         R                  UT/5      m	[        R                  R                  R                  [        R                  " TU-  S5      5      (       + m      SUUUUUU	4S jjnU$ )Nr   c                D  >^^ Uu  nU Gt mnTU-  U-   mSUU	UU4S jjnT
(       at  [         R                  " [         R                  " T[        R                  5      [         R                  " T[        R                  5      5      n[         R
                  " XCT5      $ U" 5       $ )Nc                 $   > T" TT" T /5      5      $ r   r   )rc  r  	new_indexr   s   r   bodyCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<r   )r   rX   )rW   r  r  ro   int32masked)r   r  reduction_blockr  r  rc  r  
block_sizedefaultr  	need_maskr  r   s        @@r   
wrapper_fn5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = vvNN7EKK8NN?EKK@ zz$g66vr   )r   Sequence[Symbol]r  r  r   rX   )Viewdynamic_reshape_indexerrY   r   r   r]  ru   r^  )
ry  r  r  r  r  r  r  r  r  r   s
    ` ` `` @@r   _multilayer_wrap_loader!Reduction._multilayer_wrap_loader  sw     ../?/ARS((@@HH_u,a0
 
		#	6F		 	& r   c                   ^^^ [        S T 5       5      (       d   ST< 35       e[        R                  U[        U5      [        U5      -   5      m      SUUU4S jjnU$ )Nc              3  *   #    U  H	  oS :H  v   M     g7frj  r   r  r{  s     r   r  DReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s      
+qFOrm  z8Only enabled for numel_hint == 1, found original_ranges=c           	        > U S [        T5       nU [        T5      S  nT" UT" [        U5      [        U5      -   5      5      $ r   )r   rl   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       r   r  EReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )r  r  r  r  r   rX   )r]  r  r  rl   )ry  r  r  original_reduction_rangesr  r  r  r   s    ``    @r   '_multilayer_wrap_loader_existing_ranges1Reduction._multilayer_wrap_loader_existing_ranges  s      
+
 
 
 	IFo5GH	I 
 ..%uZ'85AU;V'V
		.		!5		 		 		 r   c                  ^ U[         R                  [         R                  4;  a  UO[         R                  n[        R                  UUUUUUU	U5      nUR                  5         UR                  5       m      SU4S jjn[        R                  R                  R                  [        U5      5      nU R                  XU5      nXWS[        U5       :X  d   e[        R                  [	        UUUUU[        U5      S U	UUS95      $ )I
Break a large reduction up into multiple smaller reductions
recursively
c                   > T" / U QUQ5      $ r   r   )r   r  intermediate_loaders     r   intermediate_fn;Reduction.create_multilayer_helper.<locals>.intermediate_fn  s     ''A'A'ABBr   Nr:  )r   r  r  r  r   rX   )ro   float16bfloat16r  r  rx  rc  rw  rY   r   r   r   rU   r  r   re   )ry  r   r~  r  r  r  r  r  r  r  r  r  intermediate_dtypeintermediater  rF  r  s                   @r   create_multilayer_helper"Reduction.create_multilayer_helper  s$   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99~
 -Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c
                    [        U5      n
[        XS-
  -   U5      nU R                  Xr5      nU R                  XFXX5      nU R	                  UUUUUU/ UQUPU/UUU	5      $ )r  r7   )rU   r4   r  r  r  )ry  r   r~  r  rN  rO  r  r  r  r  r  r  r  r  s                 r   r  Reduction.create_multilayer  s    $ ((89o;UC
##N>00


 ++feL
 	
r   c                j    U R                  UUUUU5      nU R                  UUUUUU/ UQUQUU	SU
5      $ )r  rI  )r  r  )ry  r   r~  r  rN  r  r  r  r  r  r  r  s               r   r  +Reduction.create_multilayer_existing_rangesF  sc    $ @@% 

 ++%+o+
+ 
 	
r   r   r  r  r
  r  
r  r  r  r  r  r  r-  r  r   rX   r  r   zSequence[Sequence[Expr]]r  r  r   )r   r  r~  r  r  r  rN  Callable[..., OpsValue]rO  r  r  r  r  r   r  r   r  Optional[IRNode]r   Tuple[ReductionHint, _IntLike])
rN  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   r  r~  r  r  r  rN  rM  rO  r  r  r  r  r   r  rF   r  r  r   re   r  r   r   r  r   #Union[_NumLike, Sequence[_NumLike]])r  ra   rF  ry   r  rF   r   rF   )r  r  r  r  r  ra   r  ra   r  ra   r  r	  r   Callable[..., object])r  @Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue]r  r  r  r  r  Sequence[Integer]r  r  r   r  )r   r  r~  r  r  r  r  rM  r  r  r  r  r  
List[Expr]r  List[Integer]r  r   r  ra   r  rF   r   re   )r   r  r~  r  r  r  rN  rM  rO  r  r  r  r  r   r  ra   r  rF   r   re   )r   r  r~  r  r  r  rN  rM  r  r  r  r  r  r  r  r  r  r   r  rF   r   re   ) r   r  r  r  r  ri  r  r  r  r  r,  r0  r  rW  r  r  r  r  r  rF   r;  rx  r  r  r  r  r  r  r  r  r  r  r  s   @r   r  r    s   ((!!N H

%#F"F 3F 	F
 )F 
F=
K

  (,]]] ] *	]
 #] -] ] ] %] 
(] ]~ -N-,- - 	-
 
2- -^  )6(=(='+Q
Q
 Q
 	Q

 %Q
 Q
 )Q
 Q
 &Q
 %Q
 
Q
 Q
f $/	, 6 DD$/D	,D D %(:G	   !'! -! "	!
 ! ! 5! 
! !F P ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 =
 =
 &=
 
=
 =
~ $
$
 $
 	$

 %$
 $
 )$
 $
 $
 &$
 
$
 $
L $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 $
 &$
 
$
 $
r   r  c                    ^  \ rS rSr% S\S'                     S
U 4S jjr          SS jr\\R                  4               SS jj5       r
\      SS j5       r\                  SS j5       rS	rU =r$ )WelfordReductionin  ry   output_indexc	                   >^ [        T5      S:X  a  TS   n	O      SU4S jjn	[        T
U ]	  UUU	UUUUUS9  Xl        g )Nr7   r   c                4   >^ ^ [        U U4S jT 5       5      $ )Nc              3  4   >#    U  H  o" TT5      v   M     g 7fr   r   )r  r   r   reduction_idxs     r   r  <WelfordReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HiR]33i   )rl   )r   r  	inner_fnss   ``r   r  )WelfordReduction.__init__.<locals>.loader  s     HiHHHr   r:  )r   r  r  r  r   zTuple[OpsValue, ...])r   rb  __init__r  )r   r   r   r  rO  r  r  r  r  r  rd  s      `      r   r  WelfordReduction.__init__q  so     y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      nXPR                     n[         R                  " X" U5      U5      $ r   )rW   r+  r   r  r  rN  r  r,  )r   r  r  r  r-  rn   r   s          r   r,   WelfordReduction.store_reduction  s^     JJNNMM$/	
 (()"";uEEr   c                  ^^^^^ US;   d   e[         R                  R                  R                  [	        T5      5      nS
UUU4S jjn	US:X  a  U	" S5      n
U	" S5      nU	" S5      nXU4$ US:X  aD      SUUUU4S jjmUS:X  a  T" US   5      U	" S5      U	" S5      4$ [        U4S jU 5       5      $ [        R                  TTTUS   TTUUS9u  pU[        R                  :X  a  UnUS:  a  U R                  TTUTTUUU5      $ [        S	5       Vs/ s H(  n[        R                  [        TTUTTUUU5      5      PM*     nnU H  nUR                  5         M     U$ s  snf )N)r  r  c                V   >^  SUU 4S jjn[         R                  TTU[        T5      S9$ )Nc                2   > [         R                  " TT5      $ r   r  )r   r   r  s    r   rN  8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    || r   r  r   r  r   rX   r  rx  rk   )r  rN  r   r   rO  s   ` r   const&WelfordReduction.create.<locals>.const  s7      ##!F|	 $  r   r   r7   c                V   >^  SU U4S jjn[         R                  TTU[        T5      S9$ )Nc                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r  )r   r   r  r  r  s      r   rN  7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s2    =M&N=Muww||=MO&N!#77 'Or  r  r"  r#  )r  rN  r   r   rO  r  s   ` r   copy%WelfordReduction.create.<locals>.copy  s7    8 8 !''!%<	 (  r   r  c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r  r   r)  s     r   r  *WelfordReduction.create.<locals>.<genexpr>  s     :	"T"XX	r  )r  r  r   )r  ry   r   re   )r  z4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r   re   )rY   r   r   r  rU   rl   r  r  rF   r;  r  r   re   rx  r  rc  )ry  r   r   r  rO  r  r  r  r  r$  meanm2weightr  r  
output_idxresultsr   r)  s    `` ``            @r   rx  WelfordReduction.create  s    !FFFF''**33MBR4ST	 	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
 ]222!N19(( 	 	0 $Ah
 '
  $""	 ' 	 
 AIIK #
s   /E"c                    g)Nr  r   r  s     r   r  WelfordReduction.default_value  s     r   c	                  ^ ^^^^^^ [        T5      m[        R                  R                  R	                  [
        R                  " TT-  S5      5      (       + n	U	(       aB  US:w  a<          S
U4S jjn
T R                  UTUS   [        U
SS9[        U
SS94UTSTUS9$ [        TTS-
  -   T5      m[        R                  UT[        UU UUU4S jU 5       5      / UQTPT/UU5      nU H  nUR                  5         M             SS jm[        R                  R                  R                  [        U5      5      nT R                  TX5      n[        R                  UT[        U4S	 jU 5       5      UT/SU5      $ )r  r   r  c                2   > [         R                  " UT5      $ r   r  )r   r  r   r   s      r   r  4WelfordReduction.create_multilayer.<locals>.constant9  s     ||E511r   r   r7   )r   r   r  rO  r  r  r  r  c           
   3  L   >#    U  H  nTR                  UTTTTS S9v   M     g7f)r   )r  N)r  )r  r  r  ry  r  r  r  s     r   r  5WelfordReduction.create_multilayer.<locals>.<genexpr>Q  s>      
 (F ++$# ,  (s   !$c                    U" / U QUQ5      $ r   r   )r   r  r  s      r   intermediate_loader_fnBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fnd  s    
 4E4O455r   c              3  T   >#    U  H  n[        TUR                  5       S 9v   M     g7f))r  N)r   rw  )r  r   r;  s     r   r  r9  r  s&      &A .q}}G&rX  )r   r  r  r  r   ry   r   rX   )r   r  r  r  r  r  r   rX   )rU   rY   r   r   r]  ru   r^  r  r   r4   r  rx  rl   rc  r   r  )ry  r   r   r  rO  r  r  r  r  r  r  intermediatesr   rF  r  r;  r  s   ` `  ` `      @@@r   r  "WelfordReduction.create_multilayer   s     ((89((@@HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 
 (
 
 feL#
& AIIK 	6!	6+	6 9	6 		6 WW%%//f0EF
99:
  && &  G
 	
r   )r  )r   r  r   r  r  z>Sequence[Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]]rO  r  r  r  r  r   r  rF   r  ry   r   r   r  )r   r  r   r  r  Sequence[Callable[..., Any]]rO  r  r  r  r  r   r  rF   r   Sequence[TensorBox]r  )r   r  r   r  r  r@  rO  r  r  r  r  r   r  ra   r  rF   r   rA  )r   r  r  r  r  r  r,  r  rF   r;  rx  r  r  r  r  r  r  s   @r   r  r  n  s   )) ) R	)
 ") ,) ) &) ) 
)BF"F 3F 	F
 )F 
F   )6(=(=uu u 0	u
 u (u u &u 
u un $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 Z
 Z
 &Z
 
Z
 Z
r   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S U 4S jjrS!U 4S jjr          S"S jrS#S jrS$S jr	S%S jr
S%S jrS&S jrS'S jrS(S jr\\R"                  4SS.                   S)S jjj5       r\                  S*S j5       rSrU =r$ )+Scani~  r  scan_rangesr   =Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rF   r  ry   r  Tuple[torch.dtype, ...]dtypesTuple[Callable[..., Any], ...]r  c                   > [         TU ]  5       [        5       R                  " S U R                   5       6 -  [        5       R                  " S U R
                   5       6 -  $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  rS  s     r   r  0Scan.get_unbacked_symbol_uses.<locals>.<genexpr>       "VEU#8#;#;EUr  c              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  rS  s     r   r  rK         "OY#8#;#;Yr  )rb  r  r2   rV  rD  r   rc  s    r   r  Scan.get_unbacked_symbol_uses  sW    
 G,.l  "VTEUEU"VWXl  "OTYY"OPQ	
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   rO  rD  r   rb  r  rc  s    r   r  Scan.__post_init__  =    4;;#d&6&6"773tyy>IIIr   c                   U R                  X45      nU R                   Vs/ s H
  of" U5      PM     nn[        R                  " U R                  U R
                  U5      n[        R                  " X" U5      XR                     5      $ s  snf r   )r   r  rW   rA  rG  r  r  r  )	r   r  r  r  	scan_varsr   rN  rn   results	            r   r,  Scan.store_reduction  sn     ll4+04?H(3-?$++t?yygclF;L;L4MNN @s   Bc                    g)Ncustomr   r  s    r   r  Scan.get_reduction_type  s    r   c                    U R                   $ r   )rD  r  s    r   r  Scan.get_reduction_size  r  r   c                    U R                   $ r   r   r  s    r   r   Scan.get_size      yyr   c                    U R                   $ r   rg  r  s    r   rt  Scan.get_pointwise_size  rn  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   rO  rD  r  s    r   r0  Scan.index_length  !    4;;#d&6&6"777r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  rO  rD  r6   r3  r   r   r   r5  r   s       r   r  Scan.inner_fn_args  C    DKK(T--t{{;ll5)vr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      n[        U R                  U5      $ r   )r  rO  rD  r6   r3  r   r=   rN  rf  s       r   rW  #Scan.inner_fn_free_unbacked_symbols  M    DKK(T--t{{;ll5),T]]C@@r   T)can_fallback_to_atenc                 ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [        U5      S:  aB  [         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[        R                  UX,   X<   US9PM     sn$ U R!                  UUS   US   TTTUUS9u  p}["        nUS:  aW  [$        R&                  R(                  S L =(       a    [        U5      S:H  nU(       d  U(       a  S /[        U5      -  $ SnO[*        nSUUU4S jjn[        [        U5      5       Vs/ s H/  n[,        R                  U" SUX,   UX<   UUTTUUUUS.U	D65      PM1     nnU H  nUR/                  5         M     U$ s  snf s  snf )	Nr7   r  r   )r   r   rN  axispointwise_rangesrD  r  
scan_numelc                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r   )r   
scan_indexrn  ro  rD  s     r   r   Scan.create.<locals>.reindex  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r   r   rG  rN  r  r   rO  rD  r  r   r  r  )r   r  rs  r  r   r  r   )rY   r   ro  r:   SCANr   TUPLE_REDUCTIONr   r  rU   r]  ru   Ler   r  rx  r  rC  ro   versionhip	SplitScanre   rc  )ry  r   rG  r  r   rn  r  r  rl  r   r   rp  r  r  	scan_typesupports_splitr   r1  rU  ro  rD  s        `             @@r   rx  Scan.create  s~    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''v;?177#6#6N22$
 $
 6CK''77##&&}['AB
6{c)n,,, ++EHHZ,CDD %*#f+$6 %7L   ! .&4	 !  %7  &)^^)q\-#!! &4 	&
" 	>"]]..$6K3v;!;KN!' 6CK//!"J%		? 	?. !&c&k 2%
$ !3#  ! .!&4'+ +)##1!- " !3% 	 
* FNN  {J
s   ;$I6Ic	                L   ^^ SUU4S jjn	[         R                  UUUU	UUSUS9$ )Nc                ,   > T" / U S T QUQU TS  Q5      $ r   r   )r   r  rn  rN  s     r   r  #Scan.num_splits.<locals>.wrapper_fn/  s*    Fc%4jF=F3tu:FGGr   rA  )r   r~  r  rN  rO  r  r  r  )r   r  r  r  r   rX   )r  r  )
ry  r   r   rN  rn  ro  rD  r  rp  r  s
      ``     r   r  Scan.num_splits"  sA    	H 	H ###(!& $ 	
 		
r   r   r  r  )
r  r  r  z%Callable[[Sequence[_IntLike]], Never]r  r  rT  r  r   rX   r  r
  r  r  r  r  )r   r  rG  rF  r  z+Tuple[Callable[[Sequence[Expr]], Any], ...]r   r  rn  ry   r  rE  r  rF   rl  r  r   r   r   Sequence[Optional[TensorBox]])r   r  r   r  rN  r  rn  ry   ro  r  rD  r  r  rE  rp  r   r   r  )r   r  r  r  r  r  r  r,  r  r  r   rt  r0  r  rW  r  rF   r;  rx  r  r  r  r  s   @r   rC  rC  ~  s   
MMSS!!##--
 
O"
O 7
O 	
O
 $
O 

O 8A  )6(=(=] &*]] (] ?	]
 ] ] R] &] #] ] 
'] ]~ 

 
 7	

 
 (
 #
 R
 
 
(
 
r   rC  c                      \ rS rSrSrg)r{  i?  r   N)r   r  r  r  r  r   r   r   r{  r{  ?  s    r   r{  c                  P  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   SU 4S jjrSU 4S jjr          S S jrS!S jrS"S jr	S"S jr
S"S jrS#S jrS$S jrS%S jr\\R"                  4                   S&S jj5       rSrU =r$ )'SortiD  r  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rF   r  ry   r  rF  rG  rH  r  r  stable
descendingc                   > [         TU ]  5       [        5       R                  " S U R                   5       6 -  [        5       R                  " S U R
                   5       6 -  $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  rS  s     r   r  0Sort.get_unbacked_symbol_uses.<locals>.<genexpr>X  rL  r  c              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  rS  s     r   r  r  Y  rN  r  )rb  r  r2   rV  r  r   rc  s    r   r  Sort.get_unbacked_symbol_usesU  sU    G,.l  "VTEUEU"VWXl  "OTYY"OPQ	
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   rO  r  r   rb  r  rc  s    r   r  Sort.__post_init__\  rR  r   c                .   U R                  X45      nU R                   Vs/ s H
  of" U5      PM     nn[        R                  " U R                  XpR
                  U R                  5      n[        R                  " X" U5      XR                     5      $ s  snf r   )	r   r  rW   sortrG  r  r  r  r  )	r   r  r  r  r-  r   rN  rn   rU  s	            r   r,  Sort.store_reduction`  sr     ll4004?H(3-?$++v{{DOOLyygclF;L;L4MNN @s   Bc                    g)Nr  r   r  s    r   r  Sort.get_reduction_typel  s    r   c                    U R                   $ r   )r  r  s    r   r  Sort.get_reduction_sizeo  r  r   c                    U R                   $ r   r]  r  s    r   r   Sort.get_sizer  r_  r   c                    U R                   $ r   rg  r  s    r   rt  Sort.get_pointwise_sizeu  rn  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   rO  r  r  s    r   r0  Sort.index_lengthx  rd  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  rO  r  r6   r3  r   rf  s       r   r  Sort.inner_fn_args{  rh  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      n[        U R                  U5      $ r   )r  rO  r  r6   r3  r   r=   rN  rf  s       r   rW  #Sort.inner_fn_free_unbacked_symbols  rk  r   c	                  ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      nSn[        R                  R                  =(       a%    U
R                  [        R                  " X5      5      nU(       d  S /[        U5      -  $ [        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[         R#                  UX.   X>   US9PM     sn$ SUUU4S jjn[        [        U5      5       Vs/ s H3  n[$        R#                  ['        SUX.   UX>   UUTTUUUUUS.U	D65      PM5     nnU H  nUR)                  5         M     U$ s  snf s  snf )Nr7   r  r  c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   rr  )r   
sort_indexrn  ro  r  s     r   r   Sort.create.<locals>.reindex  ru  r   )r   r   rG  rN  r  r   rO  r  r   r  r  r  r  )r   r  r  r  r   r  r   )rY   r   ro  r:   SORTr   r   r  rU   r8   tritonpersistent_reductionsr]  ru   rx  r   r  rx  re   r  rc  )ry  r   rG  r  r   rn  r  r  r  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r1  rU  ro  r  s        `            @@r   rx  Sort.create  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''77##&&}['AB
 
MM// S00*1QR 	 $6CK''6{c)n,,, ++EHHZ,CDD %*#f+$6 %7L   ! .&4	 !  %7 	? 	?0 !&c&k 2'
& !3%  ! .!&4'+ +##1!-!) $ !3' 	 
, FNN  Q
s   $G&:G+r   r  r  )
r  r  r  r  r  r  r-  r  r   rX   r  r  r  r  r  )r   r  rG  rF  r  z'Tuple[Callable[[List[Expr]], Any], ...]r   r  rn  ry   r  r  r  r  r  rF   r   r   r   r  )r   r  r  r  r  r  r  r,  r  r  r   rt  r0  r  rW  r  rF   r;  rx  r  r  r  s   @r   r  r  D  s0    
GG!!##--L
 
O"
O 2
O 	
O
 '
O 

O 8A  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lr   r  c                :     [        U SS9  g! [         a     gf = f)NFfreezeT)as_storage_and_layoutr7  r   s    r   r   r     s&    a. s   
 
c                     [        U SS9u  pUR                  5       (       a  UR                  5         UR                  5       $ ! [         a     gf = fNFr  )r  should_pad_stridespad_stridesis_contiguousr7  )r   bufferrM  s      r    is_contiguous_storage_and_layoutr    sS    .q? $$&& ##%% s   A A 
AAc           	        [        U [        5      (       a  [        U R                  UUUUUS9$ [        U [        5      (       a  [        U R                  [
        5      (       a  U(       a  U(       aJ  U R                  R                  5         U R                  R                  5       R                  5       (       d   eOTUb  U R                  R                  X4S9  O7Ub  U R                  R                  XTS9  OU R                  R                  5         X R                  R                  5       4$ [        U [        5      (       a#  [        U R                  US9u  pgX`R                  4$ [        e)z
Try to simplify x into a StorageBox and a Layout.

allow_padding only affect how we apply stride_order. When allow_padding
is True, we have the freedom to add padding when applying the stride_order.
r  want_contiguousstride_orderr  r  r  r  )rj   re   r  rN  
StorageBoxBufferr  r   r  r  r  r`  ReinterpretViewrM  r7  )r   r  r  r  r  r  r  r   s           r   r  r    s/    !Y$FF+%''
 	
 !Z  Z%?%?$$&vv((*88::::)66  7  *77! 8  $$&&&##%%%!_%% *FF
	 xx
r   )r  c                ^     [        U SS9u  p#UR                  U5      $ ! [         a     gf = fr  )r  is_stride_orderedr7  )r   r  r  rM  s       r   "is_stride_order_storage_and_layoutr  "	  s8    .q?''55 s    
,,c                      \ rS rSr% S\S'   SS jrSS jrSS jrSS jr\	SS j5       r
S S	 jrS!S
 jrS"S jrS#S jrS$S jrS%S jrS&S jrS'S jrS rS rS&S jrS&S jrS(S jrS)S jrS rS*S jrSrg)+BaseViewi,	  rf   rN  c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  !BaseView.get_unbacked_symbol_uses0	      yy1133r   c                    [        SU  35      e)Nzmake_reindexer NYI on r!  r  s    r   make_reindexerBaseView.make_reindexer3	  s    !$:4&"ABBr   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r   innerr   s    r   r  &BaseView.make_indexer.<locals>.indexer:	      &&r   )r   r  r   r   )rN  r{  r  )r   r  r  r   s     @@r   r{  BaseView.make_indexer6	  s4    		&&(%%'	' 	' r   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r  s    r   r  $BaseView.make_loader.<locals>.loaderC	  r  r   r"  )rN  rw  r  )r   r  r  r   s     @@r   rw  BaseView.make_loader?	  s4    		%%'%%'	' 	' r   c                6    U R                   R                  5       $ r   )rN  r   r  s    r   r   BaseView.dtypeH	  s    yy""$$r   c                6    U R                   R                  5       $ r   rN  r   r  s    r   r   BaseView.get_layoutL	      yy##%%r   c                6    U R                   R                  5       $ r   rN  r   r  s    r   r   BaseView.get_deviceO	  r  r   c                    g r   r   r  s    r   r  BaseView.get_origin_nodeR	  r!  r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  BaseView.get_nameU	      yy!!##r   c                "    U R                  5       $ r   rT  r  s    r   rt  BaseView.get_pointwise_sizeX	      }}r   c                8    U R                   R                  U5      $ r   rN  r  r  s     r   r  BaseView.mark_reuse[	      yy##E**r   c                6    U R                   R                  5       $ r   rN  rr  r  s    r   rr  BaseView.has_exceeded_max_reads^	      yy//11r   c                6    U R                   R                  5       $ r   rN  rc  r  s    r   rc  BaseView.realizea	      yy  ""r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  BaseView.realize_hintd	      yy%%''r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  BaseView.get_storage_numelg	      yy**,,r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  BaseView.is_externj	      yy""$$r   c                6    U R                   R                  5       $ r   )rN  is_module_bufferr  s    r   r  BaseView.is_module_bufferm	      yy))++r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  BaseView.get_read_namesp	      yy''))r   c                    [         R                  " [        SS5         [        U R	                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )r   r   r   r?   rw  r   r  r  s    r   r  BaseView.get_readss	  sD    \\.*:DA&  " e	 BAAs   2A
A'c                z    U n[        U[        5      (       a#  UR                  n[        U[        5      (       a  M#  U$ r   )rj   r  rN  )r   r   s     r   r  BaseView.unwrap_viewz	  s1    H%%A H%%r   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R                  5       UU R                  5       S9$ r  )rw  r   r   r  r  r   r   r  s      r   r  BaseView.constant_to_device	  sN    !!#n.?HP.."==?	
 	
r   r   Nr  )r   z*Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r   r  r  r  r  r  r  r{  rw  r  r   r   r   r  r  rt  r  rr  rc  r  r  r  r  r  r  r  r  r  r   r   r   r  r  ,	  s    
L4C % %&&$+2#(-%,*	
r   r  c                  P    \ rS rSr% S\S'   \S 5       r\S 5       rS
S jr	S r
Srg	)rr   i	  r  r   c                t   [         R                  R                  n[        [	        [
        R                  U5      5      nU R                  5       nS/[        U5      [        U5      -
  -  [        U5      -   n[        U5      [        U5      :X  d   e[        [        U5      5       H  nX   S:X  a  X4   c   eX4   X'   M  X4   bN  [         R                  R                  R                  R                  [
        R                  " X4   S5      SS9(       a  Mo  UR                  X   X4   -
  SS9S:X  a  M   S5       e   U$ )	zReplace `-1` with correct sizesNrI  r7   Tsize_obliviousr   r  zKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rY   r   r   rk   r+  ru   r  r   r   r   r   evaluate_exprr^  r   )r   new_sizer   old_sizer   s        r   _normalize_sizeExpandView._normalize_size	  s    77##ELL(34::<6S]S]:;d8nL8}H---s8}%A{b {...&k$(8(8(B(B(P(Pa( )Q )  &&x{X['@1&MQRRa`aR &" r   c           	        U R                  X5      n[        U5      (       Ga3  [        U5      u  p4[        U5      [        UR                  5      -
  nUS:  d   e[
        R                  R                  /U-  n[        UR                  UR                  5       H|  u  pxUR                  [        R                  R                  R                  R                  [
        R                   " US5      SS9(       d  UO[
        R                  R                  5        M~     [#        UR$                  UR&                  [)        U5      UUR*                  5      n	[-        X9S9$ [/        XS9$ )Nr   r7   Tr  rN  rM  )rN  r   )r  r   r  r   r   ru   r  r  r   r   r^  rY   r   r   r   r  r^  r  r   r   rk   offsetr  rr   )
ry  r   r  storage
old_layoutskip
new_strider   r   
new_layouts
             r   rx  ExpandView.create	  s   &&q3 ##"7":Gx=3z#77D199'',,$.J #J$5$5z G!!77++55CCq)$ D    !H %!!  X!!J #CCq00r   c                    U R                   $ r   r]  r  s    r   r   ExpandView.get_size	  r_  r   c                   ^^ U R                  5       nU R                  R                  5       m[        U5      [        T5      -
  mUU4S jnU$ )Nc                   > [        U TS  5      n [        U 5      [        T5      :X  d   e[        [        T5      5       H*  nTU   S:X  d  M  [        R                  R
                  X'   M,     U $ r~  )rk   r   r   ru   r  r  )r   r   actualr  s     r   r   *ExpandView.make_reindexer.<locals>.reindex	  s]    tu&Eu:V,,,3v;'!9>$ww||EH ( Lr   )r   rN  r   )r   targetr   r%  r  s      @@r   r  ExpandView.make_reindexer	  s>    ##%6{S[(	 r   r   Nr  )r   r  r  r  r  r  r  r  rx  r   r  r  r   r   r   rr   rr   	  s8    
 4 1 16r   rr   c                  P    \ rS rSr% S\S'   \S 5       r\S 5       rS
S jrS r	Sr
g	)PermuteViewi	  r  dimsc           
        U R                  U5      n[        U5      [        [        [        U5      5      5      :X  d   e[	        U5      (       a}  [        U5      u  p4[        UR                  UR                  U Vs/ s H  oTR                  U   PM     snU Vs/ s H  oTR                  U   PM     snUR                  5      n[        X6S9$ [        XS9$ s  snf s  snf )Nr  )rN  r+  )_map_neg_dimsr2   r   r   r   r  r  r   r   r   r   r  r  r*  )ry  r   r+  r  r  r   r  s          r   rx  PermuteView.create	  s      &$:eCI.>#???? ##"7":G$!!  -12T#T2/34t!""1%t4!!J #CC-- 34s   :CCc                X    U Vs/ s H  o"S:  a  UO[        U5      U-   PM     sn$ s  snf r[  rr  )ry  r+  r  s      r   r-  PermuteView._map_neg_dims	  s+    @DEaxSY_4EEEs   'c                   [        U R                  U R                  5      5      [        [        [	        U R                  5      5      5      :X  d   eU R
                  R                  5       nU R                   Vs/ s H  o!U   PM	     sn$ s  snf r   )r2   r-  r+  r   r   rN  r   )r   r   r   s      r   r   PermuteView.get_size	  sq    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   8B	c                N  ^ [        U R                  5       VVs0 s H  u  pX!_M	     snnm[        [        U R                  5      5       Vs/ s H  nTU   PM
     snm[	        T5      [	        [        [        U R                  5      5      5      :X  d   eU4S jnU$ s  snnf s  snf )Nc                8   > T Vs/ s H  oU   PM	     sn$ s  snf r   r   )r   r   invs     r   r   +PermuteView.make_reindexer.<locals>.reindex
  s    &)*c!Hc***s   )r   r+  r   r   r2   )r   r   r  r   r5  s       @r   r  PermuteView.make_reindexer	  s     )$)) 45 4qt 45$S^454!s1v45#*U3tyy>-B"CCCC	+  65s   BB"r   Nr  )r   r  r  r  r  r  rx  r-  r   r  r  r   r   r   r*  r*  	  s:    
. ." F F,r   r*  c                  J    \ rS rSr\SS.S j5       r\SS j5       rS	S jrSr	g)
SqueezeViewi
  N)r  c          	        [        U5      (       Ga!  [        U5      u  p4/ n/ nUb=  [        U[        5      (       d   S5       eSU::  a  U[	        UR
                  5      :  d   e[        [        UR
                  UR                  5      5       Hm  u  nu  pUc,  US:w  a$  UR                  U5        UR                  U	5        M5  M7  Xr:w  a$  UR                  U5        UR                  U	5        M`  US:X  a  Mh   S5       e   [        UR                  UR                  UUUR                  5      n
[        X:S9$ Uc;  [        R!                  XR#                  5        Vs/ s H  oS:w  d  M
  UPM     sn5      $ UR#                  5       U   S:X  d   e[        R!                  U[        UR#                  5       5       VVs/ s H  u  p{Xr:w  d  M  UPM     snn5      $ s  snf s  snnf )Nzexpected integer dim argumentr   r7   zexpected squeezed size to be 1r  )r   r  rj   ry   r   r   r   r   r   r^  r  r   r   r  r  r  rx  r   )ry  r   r  r  r  r  r  r   r   r   r  r   s               r   rx  SqueezeView.create	
  s    ##"7":GHJ!#s++L-LL+CxC#joo*>$>>>%.s:??JDUDU/V%W!>D;qy -"))&1 ! x -"))&1#qyJ*JJy &X %!!  !!J #CC;;;qjjl"El1f1l"EFF::<$)));;q1::<1H"U1HAH11H"UVV #F #Vs   	G
$G
6GGc                   ^^ U  Vs/ s H  oS:w  d  M
  UPM     nn[        U 5       VVs/ s H  u  p1US:w  d  M  UPM     snnm[        U 5      mSUU4S jjnX$4$ s  snf s  snnf )Nr7   c                   > [        U 5      [        T5      :X  d   U  ST 35       e[        R                  R                  /T-  n[	        TU 5       H	  u  p#X1U'   M     [        U5      $ )N )r   ru   r  r  r   rl   )r   r  r   r   lengthnot_ones       r   r   %SqueezeView.squeezer.<locals>.reindex5
  sb    u:W-C%'/CC-/Igu-!"# .##r   )r   zList[sympy.Expr]r   zTuple[sympy.Expr, ...])r   r   )r   r   r  r   r   r?  r@  s        @@r   squeezerSqueezeView.squeezer/
  sa    #.t!AvAt.!*4;AF1;T	$ 	$    /;s   	AAA A c                    [        S5      e)Nzuse SqueezeView.create())AssertionError)r   rN  s     r   r  SqueezeView.__init__>
  s    788r   r   )r   r  r  )
r   r  r  r  r  rx  r  rB  r  r  r   r   r   r9  r9  
  s3    " #W #WJ ! !9r   r9  c                  b    \ rS rSr% S\S'   S\S'   S rSS jrSS jr\r\	S	 5       r
SS
 jrSrg)GenericViewiB
  r  r   rM  r   c                    U R                   $ r   )r   r  s    r   r  GenericView.make_reindexerG
      ||r   c                   [        [        U R                  5      5       Vs/ s H  n[        [        R
                  U5      PM     nn[        U R                  U5      5      nSSR                  [        [        U5      5       SU 3$ s  snf )Nzlambda , r   )r   r   r   rT   r6   r  rk   r   r-  r+  r   )r   r  	index_old	index_news       r   reindex_strGenericView.reindex_strJ
  sv    CHTYYCX
CXa*4::q9CX 	 
 i01	3sI#6789+FF	
s   $Bc                z    U R                  U R                  SU R                   3SU R                  5        3/5      $ )Nsize=zreindex=)r1  rN  r   rP  r  s    r   ri  GenericView.__str__Q
  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                $    U " U[        U5      US9$ )NrN  r   r   )rk   )ry  r   r  r   s       r   rx  GenericView.createX
  s    X@@r   c                    U R                   $ r   r]  r  s    r   r   GenericView.get_size\
  r_  r   r   Nr  r  )r   r  r  r  r  r  rP  ri  r  r  rx  r   r  r   r   r   rH  rH  B
  s>    
G

 HA Ar   rH  c                  d    \ rS rSr\S 5       r\S 5       r\S 5       r\S 5       r	\S 5       r
Srg)	r  i`
  c                   [         R                  " U 5      n [         R                  " U5      n[        R                  R                  R
                  R                  nU" [         R                  " U S5      5      (       a  X-   n U $ r[  )ru   r  rY   r   r   r   r  Lt)r   r   r  s      r   handle_negative_indexView.handle_negative_indexb
  s[    ll3||D!((22@@#q)***C
r   c                   ^	 [        U[        [        45      (       d   eU R                  UR	                  5       U5      u  m	n[
        R                  R                  R                  T	U5      (       a  U$ Sn[        [        T	5      5      S:  d  [        [        U5      5      S:  a  SnSU;   a  U	4S jnU " U[        U5      US9$ [        U5      (       d  U(       a  U(       a%  [        U5      (       d  [        R                  U5      n[        U5      u  pV[        UR                   UR"                  U[$        R'                  U5      UR(                  5      n[+        XWS9$ U R-                  T	U5      nU " U[        U5      US9$ )NFr   Tc                4   > [        S/[        T5      -  5      $ r[  )rl   r   )r   r  s    r   fake_reindex!View.create.<locals>.fake_reindex}
  s    aS3x=011r   rV  r  )rj   rl   rk   resolve_negative_sizer   rY   r   r   statically_known_list_equalsr   r,   r  ExternKernelrealize_input as_contiguous_storage_and_layoutr  r   r   r   r   r  r  r  )
ry  r   r  unbacked_symbols_in_sizesra  r  r  r  r   r  s
            @r   rx  View.createk
  sR   (UDM2222 66qzz|XN( 77888LLH$)!%h/014(23a7(,%=2 ADNLII-a004M(2RST2U2U !..q1"B1"EG$!!  11(;!!J #CC--hAX@@r   c                P   U Vs/ s H,  n[         R                  R                  R                  U5      PM.     nnU  Vs/ s H,  n[         R                  R                  R                  U5      PM.     n n[	        U5      n[        [        U5      5       HI  nX   S:X  d  M  [        R                  R                  X'   [        [        U 5      [        U5      5      X'     O   [         R                  R                  R                  [        U 5      [        U5      5        X4$ s  snf s  snf )NrI  )rY   r   r   r  rk   r   r   ru   r  Oner3   rU   guard_equals)r  r  r   r   s       r   rc  View.resolve_negative_size
  s    :BC(QAGG$$--a0(C:BC(QAGG$$--a0(C>s8}%A{b #ggkk&}X'>h@WX	 & 	
%%mH&=}X?VW!! DCs
   3D3D#c                     U R                  X5      nU$ ! [        [        4 a=    [        U5      /nU R                  X5      nU R                  XB5      n[	        XV5      n U$ f = fr   )_dynamic_reshape_indexerrE  
IndexErrorrU   r   )ry  r  r  r   flatr   r   s          r   r  View.dynamic_reshape_indexer
  sp    	:228FG  
+ 	:!(+,D33HCH33DCH%h9G	:s    A	A"!A"c                  ^^ [         R                  R                  R                  n[	        [        U5      5       Vs/ s H  n[        [        R                  U5      PM     snm[        [        TU5      5      n[        U 5      n/ mU(       GaK  U(       GaC  UR                  5       nUR                  5       u  pxUS:X  a=  TR                  [        R                  R                  5        UR                  Xx45        GOUS:X  a  UR                  U5        GOU" U5      U" U5      :X  a<  TR                  U5        [         R                  R                  R!                  X5        GOfU" U5      U" U5      :  a~  U" U5      U" U5      :  a1  UR                  5       u  pX-  U-   nX-  nU" U5      U" U5      :  a  M1  TR                  U5        [         R                  R                  R!                  X5        OU" U5      U" U5      :  a  [        R                  R"                  nUnTR                  [%        X{U5      5        X-  nU" U5      U" U5      :  aG  UR                  5       nTR                  [%        X{U5      5        X-  nXl-  nU" U5      U" U5      :  a  MG  [         R                  R                  R!                  X5        O[&        eU(       a
  U(       a  GMC  U(       al  UR                  5       n[         R                  R                  R!                  US5        TR                  [        R                  R                  5        U(       a  Ml  U(       aE  UR                  5       u  px[         R                  R                  R!                  US5        U(       a  ME  TR)                  5         [        T5      [        U 5      :X  d   eUU4S jnU$ s  snf )z7
Perform a reshape entirely by modifying indexing math
r7   c                   >^ [        U 5      [        T5      :X  d   [        U 5      [        T5      45       e[        [        TU 5      5      m[        U4S jT 5       5      $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   )rV   )r  r   replacementss     r   r  AView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>
  s     HiA|44is   )r   rm   r   rl   )r   rv  r  	view_exprs    @r   r   .View._dynamic_reshape_indexer.<locals>.reindex
  sM    u:T*CSZT,CC*D% 01LHiHHHr   )rY   r   r   r   r   r   rT   r6   VIEWrk   r   rw  r^  ru   r  r  rl  rk  r5   rE  reverse)r  r  r   r   	stack_new	stack_oldsize_oldvarsize_newvar2	size_new2divisormodulusr   r  rx  s                 @@r   ro  View._dynamic_reshape_indexer
  s   
 GG$$..	 CHHBV
BVQ*499a8BV
 T8,-	N		I }}H%MMOMC1}  .  #1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD/C/C'3H  )Ih,??   %  --hA8$y'::''++"  w!GH!+)Ih,??'mmoG$$_S7%KL%/G'1H	  )Ih,??
   --hA$$= II@  }}HGG))(A6UWW\\* i
 %MMOMCGG))(A6 i 	9~X...	I
 s
s   $Or   N)r   r  r  r  r  r]  r  rx  rc  r  ro  r  r   r   r   r  r  `
  si      'A 'AR " " 	 	 @ @r   r  c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jr\rS r	SS jr
SS	 jr\S
 5       rSS jrS rSS jrSS jrSS jrS rSS jrSSS jjrS S jrSrU =r$ )!r  i
  z*Pretend our storage has a different layoutrJ  rM  c                   > [         TU ]  5         [        U R                  [        5      (       a0  [
        R                  U SU R                  R                  5       5        g g )NrN  )rb  r  rj   rN  r  r   r   r  rc  s    r   r  ReinterpretView.__post_init__
  sC    dii**tVTYY-B-B-DE +r   c                P    U R                  U R                  U R                  /5      $ r   )r1  rN  rM  r  s    r   ri  ReinterpretView.__str__
  s&    		
 	
r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ReinterpretView.get_name	  r  r   c                .    U R                   R                  $ r   )rM  r   r  s    r   r   ReinterpretView.get_device  s    {{!!!r   c                    g r   r   r  s    r   r  ReinterpretView.get_origin_node  r!  r   c                .    U R                   R                  $ r   )rM  r   r  s    r   r   ReinterpretView.dtype  s    {{   r   c                @    [        U R                  R                  5      $ r   )rk   rM  r   r  s    r   r   ReinterpretView.get_size  s    DKK$$%%r   c                @    [        U R                  R                  5      $ r   )rk   rM  r   r  s    r   r~  ReinterpretView.get_stride  s    DKK&&''r   c                   ^  SU 4S jjnU$ )Nc                Z  > TR                   R                  5       n[        R                  " TR	                  5       U" U 5      5      nTR                   R
                  TR                  R
                  :w  a6  [        R                  " UTR
                  TR                  R
                  5      $ U$ r   )rM  r{  rW   loadr  r   rN  to_dtype_bitcast)r   r  
tmp_loaderr   s      r   r  +ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   r  r   rX   r   r   r  s   ` r   rw  ReinterpretView.make_loader  s    	" r   c                6    U R                   R                  5       $ r   )rM  r{  r  s    r   r{  ReinterpretView.make_indexer'      {{''))r   c                    U R                   $ r   rM  r  s    r   r   ReinterpretView.get_layout*  rn  r   c                    g r   r   r  s    r   r  ReinterpretView.freeze_layout-  r  r   c                    [        U R                  R                  5      [        U R                  R                  5      -  [        U R                  R                  5      -  $ r   )r,   rM  r   r   r  r  s    r   r  (ReinterpretView.get_unbacked_symbol_uses0  sG    !$++"2"23#DKK$6$678#DKK$6$678	
r   c           	     p   [         R                  R                  R                  U R                  U R
                  R                  U R
                  R                  U R
                  R                  Ub  UR                  O#[         R                  R                  R                  U R
                  R                  S9$ r  )rY   r   wrapper_codecodegen_reinterpret_viewrN  rM  r   r   r  	writeliner   rf  s     r   rh  !ReinterpretView.codegen_reference7  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                    gr~  r   r  s    r   r  ReinterpretView.num_readsD      r   r   r  r  r  r  r  r  r  r  rJ  r   r  r  )r   r  r  r  __doc__r  r  ri  r  r  r   r  r  r   r   r~  rw  r{  r   r  r  rh  r  r  r  r  s   @r   r  r  
  sr    4NF

 H$" ! !&(	*

 r   r  c                  f    \ rS rSr% SrS\S'   \S 5       rSS jr\r	\
S 5       rSS jrSS	 jrS
rg)	DtypeViewiH  z(Pretend our storage has a different typer  target_dtypec                    [        U5      (       aM  [        U5      u  p4[        UR                  UUR                  UR
                  UR                  5      n[        X5S9$ [        XS9$ )Nr  )rN  r  )	r   r  r  r   r   r   r  r  r  )ry  r   	new_dtyper  r  r  s         r   rx  DtypeView.createN  sa     ##"7":G$!!!!!!J #CCa88r   c                P    U R                  U R                  U R                  /5      $ r   )r1  rN  r  r  s    r   ri  DtypeView.__str__\  s     		4+<+<=>>r   c                    U R                   $ r   )r  r  s    r   r   DtypeView.dtypea  s       r   c                6    U R                   R                  5       $ r   rN  r   r  s    r   r   DtypeView.get_sizee  r  r   c                L   ^ ^ T R                   R                  5       mUU 4S jnU$ )Nc                |   > [         R                  " T" U 5      TR                  TR                  R                  5      $ r   )rW   r  r  rN  r   )r   r  r   s    r   r  %DtypeView.make_loader.<locals>.loaderk  s*    ''c
D4E4EtyyWWr   rN  rw  )r   r  r  s   ` @r   rw  DtypeView.make_loaderh  s"    		%%'	X r   r   Nr  r  r  )r   r  r  r  r  r  r  rx  ri  r  r  r   r   rw  r  r   r   r   r  r  H  sE    29 9? H! !$r   r  c                  8    \ rS rSr\S 5       r\SS j5       rSrg)	SliceViewiq  c                   ^ ^^^ [         R                  R                  mUR                  5       U   m[	        S X4T4 5       5      (       a  S mOU4S jmUU U4S jnU" USTS5      nU" XCTT5      nX44$ )zb
Normalize start and end such that both are in the range
[0, x.get_size()[dim]] and start <= end.
c              3  8   #    U  H  n[        U5      v   M     g 7fr   rR  r  r   s     r   r  0SliceView.normalize_start_end.<locals>.<genexpr>{  s     H1GA$Q''1Gr  c                X    [         R                  " [         R                  " X5      U5      $ r   )ru   MinMax)r   loweruppers      r   clamp,SliceView.normalize_start_end.<locals>.clamp}  s    yy1!4e<<r   c                F   > TR                  TR                  X5      U5      $ r   )evaluate_minevaluate_max)r   r  r  r   s      r   r  r    s!    ,,X-B-B1-LeTTr   c                D   > U c  U$ TR                  U T5      n T" XU5      $ r   )r]  )r  r  r  r  r  ry  dim_sizes       r   
clamp_wrap1SliceView.normalize_start_end.<locals>.clamp_wrap  s,    {++C:CU++r   r   )rY   r   r   r   r  )	ry  r   r  startendr  r  r  r   s	   `     @@@r   normalize_start_endSliceView.normalize_start_endr  sr     77##::<$H%h1GHHH=
U	, 5!Xq1Xx8zr   c           	       ^^^^ [         R                  " T5      m[        T[         R                  5      (       d  TS:  d   e TS:X  a  US:  a  TS:X  a  U$ [
        R                  R                  n[        UR                  5       5      mU(       a  U R                  UTTU5      u  mn[        UT-
  TS-
  -   T5      TT'   [        U5      (       av  [        U5      u  p[        U	R                  5      n
U
T   T-  U
T'   [        U	R                   U	R"                  TU
U	R$                  U	R                  T   T-  -   5      n['        XS9$ UUUU4S jn[)        UTUS9$ ! [         a     GNf = f)Nr   l    r7   r  c                   > [        U 5      [        T5      :X  d   SU  ST 35       e[        U 5      n U T   T-  T-   U T'   U $ )Nzwrong ndim r>  )r   rk   )r   r  r  r  steps    r   r   !SliceView.create.<locals>.reindex  sP    u:X.P+eWAhZ0PP.KEsd*U2E#JLr   rV  )ru   r  rj   r   	TypeErrorrY   r   r   rk   r   r  r4   r   r  r   r  r   r   r  r  r  )ry  r   r  r  r  r  r  r   r  r  r  r  r   r  s     `` `       @r   rx  SliceView.create  s`   ||D!$

++tax77	zcY.419 77##

%
 00CDJE3 uq!94@ ##"7":Gj//0J(o4JsO$!!  !!J$5$5c$:U$BBJ #CC	 	 ah@@G  		s   E 
E('E(r   N)r7   T)r   r  r  r  r  r  rx  r  r   r   r   r  r  q  s*     8 )A )Ar   r  c                  R    \ rS rSr% S\S'   S\S'   SS jrSS jrSS jrSS	 jrS
r	g)BaseConstanti  r  r   r  r   c                    gNr   r   r  s    r   r   BaseConstant.get_size  s    r   c                    U R                   $ r   rl  r  s    r   r   BaseConstant.get_device  rn  r   c                    g r   r   r  s    r   r  BaseConstant.get_origin_node  r!  r   c                    [        5       $ r   r1   r  s    r   r  BaseConstant.get_reads  rB  r   r   Nr  r  r  r  )
r   r  r  r  r  r   r   r  r  r  r   r   r   r  r    s"    r   r  c                  R    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrSS
 jrSrg)Constanti  r   r   r  r   r  r   c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rW   r  r   r   r   r   s    r   r  $Constant.make_loader.<locals>.loader  s    <<

DJJ77r   r  r   r  s   ` r   rw  Constant.make_loader  s    	8 r   c                    g r   r   r  s    r   rc  Constant.realize  r  r   c                @    [        U R                  U R                  US9$ )N)r   r   r   )r  r   r   rm  s     r   r  Constant.constant_to_device  s    djj

6JJr   r   Nr  r  r  )	r   r  r  r  r  rw  rc  r  r  r   r   r   r  r    s#    JKr   r  c                  H    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrS
rg)IndexingConstanti  r   r   r  r   r  r   c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rW   r  r   r   r  s    r   r  ,IndexingConstant.make_loader.<locals>.loader  s    >>$**djj99r   r  r   r  s   ` r   rw  IndexingConstant.make_loader  s    	: r   c                @    [        U R                  U R                  US9$ )N)r   r   r   )r  r   r   rm  s     r   r  #IndexingConstant.constant_to_device  s    djj

6RRr   r   Nr  r  )r   r  r  r  r  rw  r  r  r   r   r   r  r    s    JSr   r  c           	     b    [        S [        U [        R                  U5      U5       5       5      $ )Nc              3  H   #    U  H  u  pnUS :H  =(       d    X:H  v   M     g7frj  r   )r  leftrightr   s       r   r  2is_contiguous_strides_for_shape.<locals>.<genexpr>  s.      "
D 		"T]""
s    ")r]  r   r   r   )r   rU  s     r   is_contiguous_strides_for_shaper    s5      !$N55e<e"
  r   c                <    [         R                  U R                  -  $ r   )r8   padding_alignment_bytesitemsizer4  s    r   get_align_for_dtyper
    s    ))U^^;;r   c                  ,    \ rS rSrSrSS jrSS jrSrg)	r   i   ztAbstract base for Layout, MultiOutputLayout, NoneLayout.
Represents the memory layout of the output of an Operation.c                >    [        [        U 5      R                  5      er   rv  r  s    r   r   OutputSpec.get_device  ry  r   c                >    [        [        U 5      R                  5      er   rv  r  s    r   storage_sizeOutputSpec.storage_size  ry  r   r   Nr  r  )r   r  r  r  r  r   r  r  r   r   r   r   r      s    C77r   r   c                      \ rS rSrS\" S5      4           SS jjrSS jr\rSS jrSS jr	\
      SS j5       rSS	 jrSS
 jrS r\
S 5       rS rS rS rSS jrSS jrSS jrSrg)rJ  i  Nr   c                    Uc  [         R                  U5      nXl        X l        [	        U5      [	        U5      :X  d   SU SU 35       e[        S U 5       5      (       d   eX0l        X@l        XPl        g )NrS  	, stride=c              3  N   #    U  H  n[        U[        [        45      v   M     g 7fr   )rj   r   ry   rk  s     r   r  "Layout.__init__.<locals>.<genexpr>  s     <t!:a$--t   #%)	r   r   r   r   r   r]  r   r   r  )r   r   r   r   r   r  s         r   r  Layout.__init__  sq     >#66t<F
4yCK'H5ix)HH'<t<<<<< $	"("r   c                \   SnU R                   S:w  a  SU R                    3nU R                  R                  c  SOSU R                  R                   3n[        U 5      R                   SU R                  R                   U SU R
                   SU R                   SU R                   U S	3$ )
Nr#  r   z	, offset=:z('z', z, size=r  r   )r  r   r   r{   r   r   r   r   )r   r  device_index_strs      r   ri  Layout.__str__  s    ;;! .F!%!2!2!:2!DKKDUDUCV@WDz""#2dkk&6&6%78H7ITZZL YII;i}VHA?	
r   c                    U R                   $ r   rl  r  s    r   r   Layout.get_device,  rn  r   c                B    [        U R                  U R                  5      $ r   )r  r   r   r  s    r   r  Layout.is_contiguous/  s    .t{{DIIFFr   c                    [        U 5      nUS;  d	  U S   S:X  a  g[        U[        U 5      U 5       H  u  p4nUS:w  d  M  X4:w  d  M    g   g)N)r      r7   FT)r   r   r&   )rU  r  ndimr  r  r   s         r   is_channels_last_contiguous"Layout.is_channels_last_contiguous2  sX     5zvqQ!$3E:E"
D qyT]	"

 r   c                    [        U R                  [        [        R	                  [        [        U R                  5      5      5      5      U R                  5       H  u  pnUS:w  d  M  X:w  d  M    g   g)Nr7   FT)r   r   reversedr   r   rk   r   )r   r  r  r   s       r   is_transposedLayout.is_transposed@  sZ    !$KK^66tHTYY<O7PQRII"
D
 qyT]"
 r   c           	        [        U R                  5      [        U5      :X  d   e[        U R                  5       VVs/ s H5  u  p#[        R
                  R                  R                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H  o R                  U   PM     nnU Vs/ s H  o!U   PM	     nnS nU" U5      nS/[        U5      -  n[        [        U5      5       H  nXR   XqU   '   M     [        [        U5      S-
  5       H_  nXr   XrS-      :  n[        U[        5      (       d2  [        R
                  R                  R                  Xr   XrS-      :  SS9nU(       d  M_    g   gs  snnf s  snf s  snf )	Nr   r  r7   c                d    [        U 5      nU  Vs/ s H  o!R                  U5      PM     sn$ s  snf r   )r\  r   )arr
sorted_arrelements      r   sorted_indices0Layout.is_stride_ordered.<locals>.sorted_indicesW  s,    J=@AS'$$W-SAAAs   -rI  Tr  F)r   r   r   r   rY   r   r   r   r   rj   r  
_shape_envr  )	r   r   r   r  r  r   r.  stride_orderedexprs	            r   r  Layout.is_stride_orderedJ  sl   4;;3u:---
 $DII.
.ww))#):a? . 	 
 +88-Q++a.-8#01=aq=1	B
 u% E
*s5z"A'-yN8$ # s5zA~&A!$~!e'<<DdD))ww))77"%1u(==d 8  t ' ;
 91s   2E'3E' E-E2c                    S/[        [        [        S[        U R                  5      S-
  5      5      5      -   n[        U5      /U-   nU R                  U5      $ Nr   r7   )rk   r&  r   r   r   r  r  s     r   is_channels_last_stride_ordered&Layout.is_channels_last_stride_orderedm  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                   [        U5      n[        U 5      S:X  a  U $ [        R                  (       d  [        R                  X5      (       a  U $ [        R                  " 5       n[        US5      (       a#  UR                  R                  SS5      (       a  U $ [        S [        R                  " X5       5       5      (       d  U $ [        U 5      n[        U5      n[!        [        U 5      5       Vs/ s H  nSPM     nnSXS   '   Sn	[#        USS SS9 HE  u  pXjS-
     nX   X   -  nU[        R$                  :  a  X-  S:w  a  ['        X5      U-  nS	n	XU'   MG     U	(       d  U $ [(        =R*                  S-  sl        U$ s  snf )
zv
The padding does not change stride order but makes sure all strides larger
than the threshold are multiple of align.
r   metadislike_paddingFc              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   )rj   ry   ru   r   rk  s     r   r  &Layout._pad_strides.<locals>.<genexpr>  s*      
6 q3.//6s   -/r7   N)r  T)r
  r   r8   pad_channels_lastrJ  r#  rY   get_current_noder&  r9  getr]  r  chainr   r   r   r   padding_stride_thresholdrJ   r"   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder  r   r   new_stridespaddedrankr   prev_idxr   s                 r   _pad_stridesLayout._pad_stridess  s    $E*z?a''F,N,N-
 -
 ,,.?F++0D0D0H0Hu1
 1
   
__Z6
 
 
 '
3,\:
"'J"89"8Qq"89 &'qM"":ab>;ID!(+H *T^;F777FNa<O /%7% <  ))Q.)- :s   0Fc                    [        U [        5      (       d   eU R                  c   eU R                  U R                  U R                  U R
                  5      U l        g r   )rj   r   r   rJ  r   r   r  s    r   r  Layout.pad_strides  sG    $////{{&&&''TYY

Kr   c                P    [         R                  =(       a    [        U [        5      $ r   )r8   comprehensive_paddingrj   r   r  s    r   r  Layout.should_pad_strides  s    ++P
40PPr   c                    [        U [        5      (       a  U $ U R                  5       (       a  U R                  5         [        U R                  U R
                  U R                  U R                  U R                  5      $ r   )	rj   r  r  r  r   r   r   r   r  r  s    r   as_fixedLayout.as_fixed  s_    dK((K""$$KKJJIIKKKK
 	
r   c                    [         R                  (       d   S[        U 5      R                   S35       eU R	                  5       R                  5       $ )Nzconvert z to FixedLayout first)r   r  r{   r   rR  r{  r  s    r   r{  Layout.make_indexer  sG    ))	Ad4j))**?@	A)}}++--r   c                4   U R                   UR                   :H  =(       ay    U R                  UR                  :H  =(       aY    U R                  UR                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r   r   r   r   r   r  )r   others     r   __eq__Layout.__eq__  so    KK5<<' ,

ekk),		UZZ', u||+, u||+	
r   c                X    [        U R                  U R                  U R                  5      $ r   )r#   r   r   r  r  s    r   r  Layout.storage_size  s    .tyy$++t{{SSr   )r   r   r  r   r   )r   r  r   r  r   r  r   zOptional[List[Expr]]r  r   r   r   r  r  r  )rU  r  r  r  r   r  r  r   z
sympy.Expr)r   r  r  r  r   r  ri  r  r   r  r  r#  r'  r  r6  rJ  r  r  rR  r{  rY  r  r  r   r   r   rJ  rJ    s     (,qz## # 	#
 %# # 
#$	
 HG !,>	 !F- 8 8tL
Q
.
Tr   rJ  c                  "    \ rS rSrSrSS jrSrg)r  i  z A Tensor layout we cannot changec                   ^  U 4S jnU$ )z1A closure containing math to read a given elementc                $  > [        U 5      [        TR                  5      :X  d   e[        U 5      [        TR                  5      :X  d   eTR                  n[	        U TR                  TR                  5       H  u  p#nUS:w  d  M  XU-  -   nM     U$ r~  )r   r   r   r  r   )r   rU  r   r   szr   s        r   r  )FixedLayout.make_indexer.<locals>.indexer  s{    u:T[[!1111u:TYY///[[F#&udkk499#ER7#Fl2F $F Mr   r   r   r  s   ` r   r{  FixedLayout.make_indexer  s    	 r   r   Nr  )r   r  r  r  r  r{  r  r   r   r   r  r    s
    *r   r  c                     ^  \ rS rSrSrSr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       rSS	 jrSS
 jrS rS rSSU 4S jjjrSrU =r$ )r   i  z(A Tensor layout we are allowed to changeFc                    [        U 5      S:X  a  / $ [        R                  R                  /n[	        U SS  5       H  nUR                  X!S   -  5        M     [        [	        U5      5      $ )Nr   r7   rI  )r   ru   r  rk  r&  r^  rk   )sizesreversed_stridesr   s      r   r   !FlexibleLayout.contiguous_strides  s^    u:?I!GGKK=U12Y'D##DB+?$?@ (H-.//r   c                    [        [        [        U 5      5      5      [        U5      :X  d   X45       e[        R                  R
                  nS/[        U5      -  nU H  nX#U'   X U   -  nM     U$ )z
Create a stride based on the order the dimensions should be filled in.

In this format, channels last would be:
    [1, 3, 2, 0]
N)r2   r   r   ru   r  rk  )rg  r   next_strider  r   s        r   fill_orderedFlexibleLayout.fill_ordered  sm     %E
+,
50AAQE>QAggkk&3u:%A$AJ%a0K  r   c                    [        [        [        U 5      5      5      [        U5      :X  d   e[        U5      n[        R                  X5      $ )zz
Create a stride based on the sorted order of a permuted range.

In this format, channels last would be:
    [3, 0, 2, 1]
)r2   r   r   r   r   rl  )rg  r   r   s      r   r1  FlexibleLayout.stride_ordered	  s@     %E
+,
50AAAA,U3
**5==r   c                D   U[         R                  :X  a  [        R                  U [        5      $ U[         R
                  :X  a  [        R                  U [        5      $ U[         R                  :X  a  [        R                  U 5      $ [        R                  SU5        [        e)a9  
Create a stride based on a memory format.

Memory format is translasted into a stride order,
so channels_last is the same as:
    FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

This interface does not support memory_format `torch.preserve_format`
which should be used to deduce a format from another source
z>stride_ordered_for_memory_format, unsuppored memory_format: %s)ro   channels_lastr   r1  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   rw  rx  r7  )rg  memory_formats     r    stride_ordered_for_memory_format/FlexibleLayout.stride_ordered_for_memory_format  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                (   [        U 5      [        U5      :X  d   eU Vs/ s H,  n[        R                  R                  R	                  U5      PM.     nn[        [        [        U5      5      UR                  S9n[        R                  X5      $ s  snf )z
Create a stride that has the same stride order as given stride

For example, if given stride is [1000, 1, 100, 10],
the fill order should be [1, 3, 2, 0]
rR  )
r   rY   r   r   r   r\  r   __getitem__r   rl  )rg  r   r   r   s       r   same_orderedFlexibleLayout.same_ordered.  su     5zS[(((9?@A!''"",,Q/@E#f+.F4F4FG
**5== As   3Bc                ,   U R                  U R                  U5      nU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ r   )r1  r   r  rJ  r   r  r   r  )r   r   r  r  s       r   as_stride_orderFlexibleLayout.as_stride_order;  so    ((E:
""$$**:yy$**MJKKJJIIKK
 	
r   c                    UnU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[	        U R
                  U R                  U R                  UU R                  5      $ r   )r  rJ  r   r   r  r   r  )r   r  r  r  s       r   as_exact_stridesFlexibleLayout.as_exact_stridesH  s^    "
""$$**:yy$**MJKKJJIIKK
 	
r   c                   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ r   )rl  r   r  rJ  r   r  r   r  )r   r   r  s      r   as_fill_orderFlexibleLayout.as_fill_orderU  sm    &&tyy%8
""$$**:yy$**MJKKJJIIKK
 	
r   c                   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ r   )r{  r   r  rJ  r   r  r   r  )r   r   r  s      r   as_same_orderFlexibleLayout.as_same_ordera  sm    &&tyy&9
""$$**:yy$**MJKKJJIIKK
 	
r   c                   > U(       a  [         R                  X45      nO[         R                  U5      n[        TU ]  XX55        g r   )r   rl  r   rb  r  )r   r   r   r   r  r  rd  s         r   r  FlexibleLayout.__init__m  s5    $11$EG$77=G6r   r   r  r   r  )r   r  r  r  r  r  r  r   rl  r1  rw  r{  r~  r  r  r  r  r  r  r  s   @r   r   r     s    2N 0 0    	> 	> & &0 
> 
>





7 7r   r   c                  @   ^  \ rS rSrSrSU 4S jjrSS jrS rSrU =r	$ )	NonOwningLayoutiu  z,Is a view into the storage of another tensorc                   > UR                  5       n[        TU ]	  UR                  UR                  UR
                  UR                  5        Xl        g r   )r   rb  r  r   r   r   r   view)r   r  rM  rd  s      r   r  NonOwningLayout.__init__x  s?    "MMLLKKMM		
 	r   c                >    U R                  5       R                  5       $ r   )rR  r{  r  s    r   r{  NonOwningLayout.make_indexer  s    }}++--r   c                    U R                   R                  5       R                  nUS:X  a  gSSKJn  [
        R                  R                  R                  X5      $ )Nr   Tr7   )	ALIGNMENT)	r  r   r  utilsr  rY   r   r   statically_known_multiple_of)r   r  r  s      r   maybe_guard_aligned#NonOwningLayout.maybe_guard_aligned  sB    %%'..Q;$ww<<VOOr   )r  )r  zUnion[BaseView, TensorBox]r   r   r  )
r   r  r  r  r  r  r{  r  r  r  r  s   @r   r  r  u  s    6.P Pr   r  c                      \ rS rSrSrSrg)CommBufferTypei  symm_memr   N)r   r  r  r  SYMM_MEMr  r   r   r   r  r    s    Hr   r  c                  R   ^  \ rS rSr% SrS\S'   S\S'         S	U 4S jjrSrU =r$ )
CommBufferLayouti  a\  
A layout that signifies the buffer is a comm buffer.
In terms of striding, the layout is identical to `FixedLayout`.

Buffers with this layout do not participate in in-place reuse - it can be
neither the source nor the target for in-place reuse.

For detailed motivation and usage of this layout, see
NOTE [lowering-time collective optimization].
r  comm_buffer_typer   
group_namec                  > [        U[        5      (       d  [        SU S35      eUR                  5       n[        TU ]  UR                  UR                  UR                  UR                  UR                  S9  X l        X0l        g )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).rW  )rj   r   rE  rR  rb  r  r   r   r   r   r  r  r  )r   rM  r  r  fixedrd  s        r   r  CommBufferLayout.__init__  s     &.11 ++1("6 
 !<<++<<<< 	 	
 !1$r   )r  r  )rM  r   r  r  r  r   )	r   r  r  r  r  r  r  r  r  r  s   @r   r  r    s;    	 %$O%% )% 	% %r   r  c                      \ rS rSr% S\S'   \R                  " S S9rS\S'   \R                  " S S9rS\S	'   SS
 jr	S r
SS jrSrg)
NoneLayouti  r  r   c                     S/$ r[  r   r   r   r   rP  NoneLayout.<lambda>  s    r   default_factoryr  r   c                     S/$ r[  r   r   r   r   rP  r    s    1#r   r   c                    gr[  r   r  s    r   r  NoneLayout.storage_size  r  r   c                    U $ r   r   r  s    r   rR  NoneLayout.as_fixed      r   c                    U R                   $ r   rl  r  s    r   r   NoneLayout.get_device  rn  r   r   Nr  r  )r   r  r  r  r  r  r  r   r   r  rR  r   r  r   r   r   r  r    sC     #"!''DD)D#))+FFIFr   r  c                     ^  \ rS rSrSU 4S jjr\SS j5       r\R                  SS j5       rSS jrSS jr	S r
\SS j5       rS	 rSS
 jrSrU =r$ )MutationLayoutSHOULDREMOVEi  c                  > [         TU ]  UR                  5       UR                  5       UR	                  5       S 5        Xl        U R                  5       R                  5       n[        R                  R                  U5        g r   )rb  r  rn  r   r   r'  
get_bufferr  rY   r   mark_buffer_mutated)r   r'  r   rd  s      r   r  #MutationLayoutSHOULDREMOVE.__init__  sc    &&(OO		
  ))+	##D)r   c                6    U R                  5       R                  $ r   )real_layoutr   r  s    r   r   !MutationLayoutSHOULDREMOVE.stride  s    !(((r   c                    g r   r   )r   r   s     r   r   r    s    r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.storage_size  s    !..00r   c                p   ^ U4S jmT" U R                   5      n[        U[        5      (       d   S5       eU$ )Nc                   > [        U [        5      (       a  T" U R                  5      $ [        U [        5      (       a  T" U R	                  5       5      $ [        U [
        5      (       a  T" U R                  5      $ U $ r   )rj   r  r'  r  r  
MutableBoxrN  )r'  unwrap_viewss    r   r  ;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  sb    &"<==#FMM22&(++#F$6$6$899&*--#FKK00Mr   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r'  rj   r  )r   rU  r  s     @r   r  %MutationLayoutSHOULDREMOVE.get_buffer  sC    	 dkk*F
 
 	?>	? 
 r   c                6    U R                  5       R                  $ r   )r  rM  r  s    r   r  &MutationLayoutSHOULDREMOVE.real_layout       '''r   c                   UR                  5         [        R                  R                  UR	                  5       5        [        U[        5      (       a  UR                  nUR                  5         U(       d  [        R                  UR                  5       UR                  5       UR                  5       [        UR                  5       UR                  5       5       VVs/ s H.  u  pE[        R                  R                   R#                  XE5      PM0     snnS9R                  nUR                  5         [        UR                  R$                  [&        5      (       d   e[)        U5      UR                  l        UR                  $ s  snnf )Nr  )rc  rY   r   r  r  rj   re   rN  r  r  rx  r   r   rw  r   r   r   rl  rM  r   r  )ry  srcdstunsafe_aliasr  r  s         r   realize_into'MutationLayoutSHOULDREMOVE.realize_into  s    	
##CLLN3c9%%((C 	""~~'mmo* !$CLLNCLLN C C GG$$11!7 C	 #  d  	#((//>::::4S9xxs   5Fc                    U $ r   r   r  s    r   rR  #MutationLayoutSHOULDREMOVE.as_fixed  r  r   c                6    U R                   R                  5       $ r   )r'  r{  r  s    r   r{  'MutationLayoutSHOULDREMOVE.make_indexer!  r  r   )r'  )r'  rf   r   r   r   r  )r   r   r   r   r]  )r   r  r  r  )r   r  r  r  r  r  r   setterr  r  r  r  r  rR  r{  r  r  r  s   @r   r  r    sg    	* ) ) ]] 1 (    D* *r   r  c                  V  ^  \ rS rSr% S\S'   S\S'   S#U 4S jjrS$S jrS%S jrS&S	 jrS'S
 jr	\
S(S j5       rS)S jrS*S jrS+S jrS,S jrS-S jrS rS rS.S#S jjrS#S jrS#S jrS.S#S jjrS rS/S jrS0S1S jjrS rS2S jrS2S jrS3S jrS4S jrS4S jrS5S  jr S6S! jr!S"r"U =r#$ )7r  i%  r  r   r   rM  c                F   > [         TU ]  5         U R                  SS 5        g r%  )rb  r  r   rc  s    r   r  Buffer.__post_init__/  s    t4r   c                >    U R                  5       R                  5       $ r   )r   r{  r  s    r   r{  Buffer.make_indexer3  s     --//r   c                J    U R                   (       d   U 5       eU R                   $ r   r	  r  s    r   r  Buffer.get_name6  s    yy$yyyr   c                >    U R                  5       R                  5       $ r   )rD  r   r  s    r   r   Buffer.get_device:  s    ##%0022r   c                    g r   r   r  s    r   r  Buffer.get_defining_op=  r!  r   c                6    U R                  5       R                  $ r   )r   r   r  s    r   r   Buffer.dtype@  s     &&&r   c                :    / U R                  5       R                  Q$ r   )r   r   r  s    r   r   Buffer.get_sizeD  s    ("''((r   c                :    / U R                  5       R                  Q$ r   )r   r   r  s    r   r~  Buffer.get_strideG  s    *"))**r   c                6    U R                  5       R                  $ r   )r   r  r  s    r   
get_offsetBuffer.get_offsetJ  r  r   c                    [        U R                  [        5      (       a  U R                  $ [        [	        U R                  5      R
                  5      er   )rj   rM  rJ  r7  r{   r   r  s    r   r   Buffer.get_layoutM  s7    dkk6**;;!$t{{"3"<"<==r   c                    U R                   $ r   r  r  s    r   rD  Buffer.get_output_specR  rn  r   c                "    U R                  5       $ r   )rX  r  s    r   r  Buffer.get_storage_numelU  s    ~~r   c                    [        U R                  [        5      (       a@  [        U R                  [        5      (       d   U R                  R	                  5       U l        g g g r   )rj   rM  rJ  r  rR  r  s    r   r  Buffer.freeze_layoutX  sF    dkk6**:KK4
 4
 ++..0DK4
*r   c                    [        U R                  [        5      (       d   eU R                  R                  XS9U l        g Nr  )rj   rM  r   r~  r  s      r   r  &Buffer.freeze_layout_with_stride_order^  s2    $++~6666kk11%1Ur   c                    [        U R                  [        5      (       d   eU R                  R                  U5      U l        g r   )rj   rM  r   r  r  s     r   r  $Buffer.freeze_layout_with_fill_orderb  s/    $++~6666kk//6r   c                    [        U R                  [        5      (       d   eU R                  R                  U5      U l        g r   )rj   rM  r   r  r  s     r   r  $Buffer.freeze_layout_with_same_orderf  s/    $++~6666kk//7r   c                    [        U R                  [        5      (       d   eU R                  R                  XS9U l        g r  )rj   rM  r   r  r  s      r   r  'Buffer.freeze_layout_with_exact_stridesj  s7    $++~6666kk22 3 
r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ r[  r\  r  s    r   r_  Buffer.is_zero_elementsp  ra  r   c                v   ^  T R                  5       (       a  [        [        T R                  5       S9$ U 4S jnU$ )Nr4  c                r   > TR                  5       n[        R                  " TR                  U" U 5      5      $ r   )r{  rW   r  r   r   r  r   s     r   r  "Buffer.make_loader.<locals>.loaderx  s*    '')G88DIIwu~66r   )r_  r   r  r   r  s   ` r   rw  Buffer.make_loaders  s2      ""=0@AA	7 r   c                "    U R                  5       $ r   r  rf  s     r   rh  Buffer.codegen_reference~  r  r   c                    g r   r   r  s    r   r`  Buffer.decide_layout  r  r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )rj   rM  r  r  r  r  s    r   r  #Buffer.get_inputs_that_alias_output  s2    dkk?33KK$$--/00r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )rj   rM  r  r'  r  r  s    r   r  Buffer.get_mutation_names  s3    dkk#=>>KK&&//122r   c                6    [        U R                  5       /5      $ r   )r2   r  r  s    r   r  Buffer.get_read_names  s    4==?+,,r   c                    [        5       $ r   r1   r  s    r   r  Buffer.get_unbacked_symbol_uses  rB  r   c                    [        5       $ r   r1   r  s    r   r@  Buffer.get_unbacked_symbol_defs  rB  r   c                    g r   r   r  s    r   rc  Buffer.realize  r  r   c                    grq  r   r  s    r   should_allocateBuffer.should_allocate  s    r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  rJ  r  r  )$r   r  r  r  r  r  r{  r  r   r  r  r   r   r~  r  r   rD  r  r  r  r  r  r  r_  rw  rh  r`  r  r  r  r  r@  rc  r  r  r  r  s   @r   r  r  %  s     
503 ' ')+(>
 1V78
W	

- r   r  c                  J    \ rS rSrSS jrSS jr\R                  rS	S jrSr	g)
OperationBufferi  c                    U /$ r   r   r  s    r   r=  OperationBuffer.get_outputs  s	    vr   c                    U $ r   r   r  s    r   r  OperationBuffer.get_defining_op  r  r   c                X    [         R                  U 5        [        R                  U 5        g r   )r  r  r  r  s    r   r  OperationBuffer.__post_init__  s    T"%r   r   NrH  r   r  r  )
r   r  r  r  r=  r  r  r  r  r  r   r   r   r  r    s     #55&r   r  c                      \ rS rSrSS jrSrg)InputBufferi  c                    gr~  r   r  s    r   r  InputBuffer.num_reads  r  r   r   Nr  )r   r  r  r  r  r  r   r   r   r  r    s    r   r  c                      \ rS rSrSrSrg)DonatedBufferi  aA  
Represents a donated buffer which is a saved tensor that is not alias to any
fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
reuse the input tensor memory during backward since it might be used in another
function. However, donated buffer can be inplace reused during backward
to save memory.
r   N)r   r  r  r  r  r  r   r   r   r   r     s    r   r   c                  8    \ rS rSr% SrS\S'   SS jrS	S jrSrg)
r  i  Nr  r  c                   ^  SU 4S jjnU$ )Nc                   > TR                  5       R                  5       n[        R                  " [        R
                  R                  TR                  5       TR                  5      U" U 5      5      $ r   )	r   r{  rW   r  rY   r   constant_namer  r  r  s     r   r  *ConstantBuffer.make_loader.<locals>.loader  sP    oo'446G88%%dmmot7K7KL r   r  r   r  s   ` r   rw  ConstantBuffer.make_loader  s    	 r   c                    [        [        R                  R                  U R	                  5       U5      U R
                  S9$ N)r   rM  )r  rY   r   r$  r  rM  rm  s     r   r  !ConstantBuffer.constant_to_device  s/    &&t}}?
 	
r   r   r  r  )	r   r  r  r  r  r  rw  r  r  r   r   r   r  r    s    .2O+2
r   r  c                  @    \ rS rSrSS jrS	S
S jjrSS jrSS jrSrg)NoneAsConstantBufferi  c                    [        5       $ r   r1   r  s    r   r  -NoneAsConstantBuffer.get_unbacked_symbol_uses  rB  r   Nc                J    [         R                  R                  R                  $ r   )rY   r   r  none_strrf  s     r   rh  &NoneAsConstantBuffer.codegen_reference  s    ww##,,,r   c                    [        S S9$ Nrl  )r  r  s    r   rD  $NoneAsConstantBuffer.get_output_spec  s    &&r   c                    grq  r   r  s    r   rK  &NoneAsConstantBuffer.has_tensor_output  rt  r   r   rJ  r   r  r  r  )	r   r  r  r  r  rh  rD  rK  r  r   r   r   r+  r+    s    -'r   r+  c                  B    \ rS rSr% S\S'   S	S jrS
SS jjrSS jrSrg)ShapeAsConstantBufferi  r   r2  c                ,    [        U R                  5      $ r   )r,   r2  r  s    r   r  .ShapeAsConstantBuffer.get_unbacked_symbol_uses  s    $TYY//r   Nc                h    [         R                  R                  R                  U R                  5      $ r   )rY   r   r  codegen_sizevarr2  rf  s     r   rh  'ShapeAsConstantBuffer.codegen_reference  s!    ww##33DII>>r   c                    grq  r   r  s    r   rK  'ShapeAsConstantBuffer.has_tensor_output  rt  r   r   rJ  r   r  r  )	r   r  r  r  r  r  rh  rK  r  r   r   r   r7  r7    s    
J0?r   r7  c                    ^  \ rS rSr% S\S'   SS jrSS jrSS jrSS jrSS jr	SS	 jr
SU 4S
 jjrSS jrSS jrS S jr\  S!S j5       r  S"     S#S jjr\ S$S j5       rS%S jrSS jrS&S jrS&S jrS'S jrSrU =r$ )(rY  i  rL  rN  c                    U R                   b  U R                   $ [        U R                  S5      (       a  U R                  R                   $ g)z}
Returns self.name if it exists, otherwise returns the name of the data node if that exists.
If neither exist, returns None.
Nr   )r   r&  rN  r  s    r   get_computed_buffer_name'ComputedBuffer.get_computed_buffer_name  s:    
 99 99499f%%99>>!r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  ComputedBuffer.num_reads  r   r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  ComputedBuffer.get_reads  r   r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ComputedBuffer.get_read_names  r  r   c                   [         R                  " [        SS5         U R                  R	                  5       (       aT  [        U R                  5       U R                  R                  5       U R                  R                  5       5      sS S S 5        $ [        U R                  5       U R                  R                  5       5      sS S S 5        $ ! , (       d  f       g = fr  )
r   r   r   rN  r  r?   get_store_functionrt  r  r   r  s    r   r  ComputedBuffer.get_read_writes  s    \\.*:DAyy++--*++-II002II002 BA +++-II&&( BAAs   A*C1C
Cc                    [        U R                  5       5      [        U R                  5       5      -  [        U R                  5       5      -  U R                  R                  5       -  $ r   )r,   r   r~  r  rN  r  r  s    r   r  'ComputedBuffer.get_unbacked_symbol_uses  sV    & "$--/2#DOO$567#DOO$567 ii0023	
r   c                   > U R                  5       (       dV  U R                  [        R                  R                  ;  a.  U R                  5       S:X  a  U R                  R                  5       $ [        TU ]  5       $ r[  )	r  r   rY   r   mutated_buffersr  rN  rw  rb  rc  s    r   rw  ComputedBuffer.make_loader.  sZ    ''))		!8!88 A% 99((**w"$$r   c                   U R                  5       R                  5       R                  5       n[        U R                  [
        [        [        45      (       a+  [        U R                  R                  U R                  U5      $ [        U R                  [        5      (       d   e[        U R                  R                  U R                  U5      $ r   )r   rR  r{  rj   rN  r  rC  r  r   r,  r   r  r  rc  s     r   rL  !ComputedBuffer.get_store_function8  s    //#,,.;;=dii)T4!89949944diiIIdii333349911499gFFr   c                   [        U R                  [        5      (       Ga  [        R                  " U R
                  R                  5       U R
                  R                  5       5      u  u  pnU R                  5       R                  n[        S U 5       5      (       d   eU VVs/ s Hk  n[        U[        R                  5      (       d  M$  [        UR                  U Vs0 s H%  ofS:w  d  M
  U[        R                  R                   _M'     sn5      PMm     nnnU(       a  [        U R
                  ["        [$        45      (       a  U R
                  R'                  X5      nOUnU Vs/ s H,  n[(        R*                  R,                  R/                  X5      PM.     n	nSSKJn
  U
" XR5                  5       5      $ gs  snf s  snnf s  snf )aD  
If our layout is still flexible, try to determine the stride order based on stride orders of reads.

TODO(jansel): A better algorithm here would look at downstream consumers of this
              value and try to do global graph-level layout optimization.
              This is also something just begging to be autotuned.
c              3  v   #    U  H/  n[        U[        R                  [        R                  45      v   M1     g 7fr   )rj   r9   StarDep	MemoryDepr  s     r   r  0ComputedBuffer.get_fill_order.<locals>.<genexpr>O  s2      A 1|33\5K5KLMMs   79r   r7   pick_loop_orderN)rj   rM  r   r9   ry  rN  rt  r  r  r  r]  rX  rV   r   ru   r  r  rC  r  r   rY   r   r   r{  	schedulerr[  r   )r   
index_varsr-  r   r  r{  vrc  r2  stride_lengthsr[  s              r   r   ComputedBuffer.get_fill_order@  sx    dkk>22.:.M.M		,,.		0L0L0N/+(Z! ((*00E       Aa!7!78 Y
177n$WnUVPV_Q_n$WX   dii$66"ii//
KG(GMR"MRTAGG$$11$@U  " 7&~}}GG# %X"s*   %#F?F? 	F:-F:F?(3G:F?c                    [        U R                  [        5      (       a:  U R                  5       nU(       a  U R	                  U5        g U R                  5         g g r   )rj   rM  r   r   r  r  r  s     r   r`  ComputedBuffer.decide_layoutg  sC    dkk>22'')E2259""$ 3r   c                   [         R                  " U R                  R                  5       U R                  R	                  5       SS9u  p[
        R                  " [        SU R                  5       5         [        U R                  5       U R                  5       (       a  UOUS S U/UQ76 nS S S 5        / n/ n/ n/ nUR                  5        Hf  u  pXS   ;   a-  U(       a   eUR                  U5        UR                  U	5        M:  XS   ;   d   eUR                  U5        UR                  U	5        Mh     Xg4WXE44$ ! , (       d  f       N= f)Nqrc   r  r7   r   )r9   ry  rN  rt  r  r   r   r  r   rA   rL  r  itemsr^  )
r   r   
var_rangesr  r]  reduce_vars
index_sizereduce_sizer^  r   s
             r   get_default_sizes_body%ComputedBuffer.get_default_sizes_bodyo  s6    (::II((*DII,H,H,JSV
 \\.*;T__=NO'')0022Ra 	D P 
!#
$$&DAG|&&!!!$!!!$G|#|""1%""1% ' ($0III) POs   37D;;
E	c                  ^ ^^ T R                  5       u  u  p4nu  pgU(       a  U" X44XVU45      u  u  p4nu  pg/ UR                  R                  5       QmUb  [        U[        5      (       a  [        U5      S:X  d   eUu  p[        U[        5      (       d   e[        U	[        5      (       d   e[        S U	 5       5      (       d   eUR                  n
X:X  d	   U
U45       eU	 Vs/ s H  oT;  d  M
  UPM     n	nTU	-  m/ UR                  5       Qm[        R                  R                  T [        R                  5      (       d  TR!                  UR#                  5       5        UUU 4S jnXg-   n[%        ['        T 5      5      (       + =(       d    [(        R*                  (       + nU" UUUU5      u  nnnU" X}XN5      u  nnn[,        R.                  " UUSS9u  u  nnn[1        UU" U5      U" U5      /UUU5      nUU4U4$ s  snf )a  
This is a main place where we do loop transformations in a
backend-agnostic way.

Here we:
    1) Remove any 1 dimensions
    2) Fuse contiguous dimensions together
    3) Reorder dimensions based on stride orders

Optional argument extra_indexing_constraints can be used to append additional
indexing expressions to existing ones derived from buffer's body. This can be useful
to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
the scheduler node compatible with other nodes.
Optional argument recompute_sizes_body_func can be used to recompute sizes and body
on the default body. This can be useful to append additional loop transformations.
r   c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )rj   r   )r  fs     r   r  6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s     H4Gqz!T**4Gs   c           	        > TR                  XUT
5      u  p$nU" U 5      n U(       aD  [        R                  R                  R	                  U U[        T	X5      5      u  p&n[        XV5      nOUnX(U4$ r   )_apply_loop_reorderingrY   r   r   _simplify_loopsr;   r   )x_varssupport_varsrg  simplify_loopsreindex0r   r   pruner   index_formulasmemory_addrsr   s            r   simplify_and_reorderAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  sy    (,(C(Ce\)%EX f%F)*)9)9)I)I,^VK*&
 *(="8++r   prc   )rj  indexing_exprsrn   rj   rl   r   rm   rk   r]  rf  get_write_exprsrY   r   ro  r:   PREFER_STORE_LOOP_ORDERextendget_read_exprsrQ   r   r8   loop_ordering_after_fusionr9   index_vars_no_squeezerA   )r   extra_indexing_constraintsrecompute_sizes_body_funcrh  ri  r  r]  rg  extra_indexing_rangesextra_indexing_exprexpected_var_rangesrT  rz  rt  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsrf  rx  ry  s   `                     @@r   rz  #ComputedBuffer.simplify_and_reorder  sV   4 '')		
%Z%Z %
 *)4k1J	))
 94..5578%15u==23q89 :T6!3T::::148888H4GHHHHH"&//&? #%B ? /#.a>2I.   # 11N0--/0ww""4)O)OPP 3 3 56	,$ "/t,--VV5V5V1V 	 (<	(
$\1 ,@{,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11w#s   %	G?2G?c           
     n   SSK Jn  Uc  / n U Vs/ s H-  n[        R                  R                  R                  X`U5      PM/     nn[        U5      [        U5      :X  a  [        US   5      [        U 5      :X  d   e[        [        U" XrU5      5      5      nU V	s/ s H  oU	   PM	     nn	U[#        U5      [%        U5      4$ s  snf ! [         a^    [        R                  (       a)  [        R                  S[        [        X5      5      U5        [        [!        [        U5      5      5      n Nf = fs  sn	f )zE
Shuffle the order of loops around to hopefully improve performance.
r7   rZ  r   z%Did not simplify complex index:
%s
%s)r\  r[  rY   r   r   r{  r   rk   r&  	Exceptionr8   rx  rw  warningrm   r   r   r   r   )
r]  rt  rg  ry  priority_idxr[  r2  r  r   r   s
             r   rq  %ComputedBuffer._apply_loop_reordering  s#    	/L	, )(D   --dM(   w<3|#44WQZCM :   /',"OPQE $))5aq5)l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 4CAC D2C A%D/.D/c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  !ComputedBuffer.get_reduction_size'      yy++--r   c                6    U R                   R                  5       $ r   rN  r  r  s    r   r  !ComputedBuffer.get_reduction_type*  r  r   c                6    U R                   R                  5       $ r   )rN  r_  r  s    r   r  ComputedBuffer.is_no_op-  r  r   c                    gNTr   r  s    r   r  ComputedBuffer.should_allocate0  r!  r   c                8    U R                   R                  U5      $ )r  rN  r  rm  s     r   r  !ComputedBuffer.constant_to_device3  s    yy++F33r   r   r  r  r  r  r  rJ  r  )r   r  )r   zOptional[List[int]]r  )r   zeTuple[Tuple[List[sympy.Expr], List[sympy.Expr]], LoopBody, Tuple[List[sympy.Expr], List[sympy.Expr]]]NN)r  *Optional[Tuple[Dict[Any, Any], List[Any]]]r  Optional[Callable[..., Any]]r   z:Tuple[Tuple[List[sympy.Expr], List[sympy.Expr]], LoopBody]r   r
  r  r  )r   r  r  r  r  rA  r  r  r  r  r  rw  rL  r   r`  rI   rj  rz  r  rq  r  r  r  r  r  r  r  r  s   @r   rY  rY    s    
K	%%*
2%G%N% J
J JD RVBFq2$Nq2 $@q2 
D	q2f  !B !BF..,4 4r   rY  c                     ^  \ rS rSrSr        SU 4S jjrSS jrS rSS jrSS jr	SS jr
  S   SS	 jjrS
rU =r$ )TemplateBufferi8  zh
Represents a Triton (in the future other type) of template operator
that we can fuse an epilogue onto.
c                   > [         TU ]  S US9  [        R                  U5      U l        X0l        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r(  )rb  r  InputsKernelunwrap_storageinputsmake_kernel_renderrY   r   register_bufferr   register_operation)r   rM  r  r  rd  s       r   r  TemplateBuffer.__init__>  sW     	d62"11&9"4GG++D1		""4(r   c                     U R                  SS9$ )NT	normalize)r?   r  s    r   r  TemplateBuffer.get_read_writesJ  s    ''$'77r   c                   ^^ U R                  5       mU R                  5       R                  5       mUU4S jn[        R                  " X R                  5       SUS9n[        S U R                   5       5      Ul        U$ )Nc                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ )Nr   fake)r   rW   r  )r   r5  r  r   s     r   dummy1TemplateBuffer.extract_read_writes.<locals>.dummyQ  s,    v;!###99T75>6::r   r   r  c              3  j   #    U  H)  n[         R                  " UR                  5       5      v   M+     g 7fr   )r9   rW  r  r  s     r   r  5TemplateBuffer.extract_read_writes.<locals>.<genexpr>X  s$     XKq 4 4QZZ\ B BKs   13)	r  r   r{  r9   r?   r   r2   r  r  )r   r  r  depsr  r   s       @@r   r?   "TemplateBuffer.extract_read_writesM  sd    }}//#002	; //==?B)
  XDKKXX
r   c                6    [         R                  R                  $ r   )ru   r  rk  r  s    r   r  !TemplateBuffer.get_reduction_size[  s    ww{{r   c                    g r   r   r  s    r   r  !TemplateBuffer.get_reduction_type^  r!  r   c                    gr  r   r  s    r   r  TemplateBuffer.should_allocatea  r!  r   c                *    U R                  5       S4S 4$ r  rT  )r   r  r  s      r   rz  #TemplateBuffer.simplify_and_reorderd  s$      
 	
r   )r  r  r   )rM  rJ  r  List[IRNode]r  rM  r   r   r  r
  r  r  r  )r  r  r  r  )r   r  r  r  r  r  r  r?   r  r  r  rz  r  r  r  s   @r   r  r  8  ss    

)
) 
) /	
)
 

)8
 RVBF
$N
 $@
 
r   r  c                  L   ^  \ rS rSr S   SU 4S jjjrSS jrS	S jrSrU =r$ )
TritonTemplateBufferir  c                  > [         T	U ]  XU5        X@l        U /U l        Ub  [        R
                  R                  R                  [        R
                  R                  R                  4n[        R                  R                  R                  nXe;   d   SU SU 35       eU R                  S   R                  5       nU =R                  U Vs/ s H  n[        [!        US9X5      PM     sn-  sl        ggs  snf )a  
NOTE:[TritonTemplates with multiple outputs]
We want the ability for TritonTemplates to output multiple tensors. Triton
kernels have no notion of outputs and this is done by creating tensors that
are then mutated by the kernel. Currenlty our STORE_OUTPUT codegen doesn't
support creating multinode outputs for triton templates.
We work around this by creating an extra input buffer during the lowering
and we mark them as mutated inputs.
Nz$Mutated inputs are only allowed for z	 but got r   rl  )rb  r  mutated_inputsoutputsro   rW   higher_orderflex_attentionflex_attention_backwardrY   r   current_noder'  r  r   MutationOutputr  )
r   rM  r  r  r  allowed_setr  r   re  rd  s
            r   r  TritonTemplateBuffer.__init__s  s      	);<,&*V% 		&&55		&&>>K 77//66L+[5k])L>Z[+[[^..0FLL))C z8#D) L &s   C7c                    U R                   $ r   )r  r  s    r   r=   TritonTemplateBuffer.get_outputs  rK  r   c                &    SU R                    S3nU$ )NzTritonTemplateBuffer(layout=r   r  )r   r   s     r   ri  TritonTemplateBuffer.__str__  s    ,T[[M;
r   )r  r  r   )r  zOptional[Iterable[IRNode]]r   r   rH  r  )	r   r  r  r  r  r=  ri  r  r  r  s   @r   r  r  r  s6     6:!
 3! 
! !F r   r  c                     ^  \ rS rSrSr          SU 4S jjrSS jrSS jrS rSS jr	SS jr
SS	 jrSS
 jrSrU =r$ )ChoiceCalleri  a  
Represents a possible choice used in autotune_process.py.
During autotuning, self.benchmark() is first called to get benchmark result,
and if this choice is selected, self.output_node() is called to get the output_node.

Children classes: TritonTemplateCaller, CUDATemplateCaller.
c                R   > [         TU ]  5         Xl        X0l        X l        X@l        g r   )rb  r  r   rM  input_nodesdescription)r   r   r  rM  r  rd  s        r   r  ChoiceCaller.__init__  s(     		& 'r   c               T    U R                  5       n[        R                  " X2SU05      $ )Nr   )to_callablerD   	benchmark)r   r   r   algos       r   r  ChoiceCaller.benchmark  s'    !$$T%>>r   c                    [         er   r!  r  s    r   	call_nameChoiceCaller.call_name  r#  r   c                    [         er   r!  r  s    r   r  ChoiceCaller.to_callable  r#  r   c                    [         er   r!  r  s    r   hash_keyChoiceCaller.hash_key  r#  r   c                    [         er   r!  r  s    r   output_nodeChoiceCaller.output_node  r#  r   c                    0 $ )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictChoiceCaller.info_dict  s    	r   c                    g)Nunsupported_choicer   r  s    r   autoheuristic_idChoiceCaller.autoheuristic_id  s    #r   )r  r  rM  r   )
r   r   r  rI  rM  rJ  r  r   r   r   )r   r  r  )r   re   )r   z<Dict[str, Union[PrimitiveInfoType, List[PrimitiveInfoType]]])r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s   @r   r  r    se    '' "' 	'
 ' 
'?""""$ $r   r  c                      \ rS rSrSS jrSrg)TritonTemplateCallerBasei  c                    [         er   r!  r  s    r   get_make_kernel_render/TritonTemplateCallerBase.get_make_kernel_render  r#  r   r   N)r   r   )r   r  r  r  r  r  r   r   r   r  r    s    "r   r  c                     ^  \ rS rSrSr          S
U 4S jjr\SS j5       r\SS j5       r\	R                  SS j5       rSS jrSS jrS	rU =r$ )MultiTemplateBufferi  a3  
Represents a Buffer with multiple backing implementation choices.

Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
Otherwise, the fastest base choice will be chosen.
c                v   > [         TU ]  XS S9  X0l        S U l        X l        [        S U 5       5      U l        g )N)rM  r  r  c              3     #    U  H]  n[        U[        5      =(       dA    [        U[        R                  R                  R
                  5      =(       a    UR                  v   M_     g 7fr   )rj   r  ro   rp   select_algorithmExternKernelCallerhas_out_variant)r  choices     r   r  /MultiTemplateBuffer.__init__.<locals>.<genexpr>  sU      %
 - v78 65??#C#C#V#VW +**
 -s   A%A')rb  r  _choice_timings_fn_choice_timingsoriginal_inputsr]  _output_plannable)r   rM  r  choice_timingsunfiltered_choicesrd  s        r   r  MultiTemplateBuffer.__init__  sG     	$O"0DH%!$ %
 -%
 "
r   c                    U R                   $ )zN
Are all possible choices TritonTemplates or Extern Kernels with out variants
)r  r  s    r   output_plannable$MultiTemplateBuffer.output_plannable  s    
 %%%r   c                ^    U R                   c  U R                  5       U l         U R                   $ r   )r  r  r  s    r   r  "MultiTemplateBuffer.choice_timings  s+    '#'#:#:#<D ###r   c              #    #    [        U[        R                  R                  R                  5      (       d   eU R
                  UR
                  :X  d   eU R                  nUR                  5       U l         S v   X l        g ! X l        f = f7fr   )rj   ro   rp   r  TritonTemplateCallerrM  r  r  )r   callerrenders      r   swap_as_triton_caller)MultiTemplateBuffer.swap_as_triton_caller   sn     &%//"B"B"W"WXXXX{{fmm+++(("("?"?"A	-&,#f#s   A3B6B :BB		Bc                @   [        U[        R                  R                  R                  5      (       d   eU R                  5       UR                  R                  :X  d   eU R                  5       UR                  R                  :X  d   eUR                  5       U l        g r   )rj   ro   rp   r  r  r   rM  r   r~  r   r  r  )r   r  s     r   finalize_as_triton_caller-MultiTemplateBuffer.finalize_as_triton_caller  ss    &%//"B"B"W"WXXXX}}&--"4"4444 FMM$8$8888"("?"?"Ar   c                r    [        U R                  U R                  R                  S9nXR                  U   4$ )NrR  )r  r  r?  )r   
min_choices     r   get_min_choice"MultiTemplateBuffer.get_min_choice  s4    ,,$2E2E2I2IJ
//
;<<r   )r  r  r  r  r  )
rM  rJ  r  r  r  z'Callable[[], Dict[ChoiceCaller, float]]r  zList[ChoiceCaller]r   r   r  )r   zDict[ChoiceCaller, float])r  r  )r  r  r   r   )r   zTuple[ChoiceCaller, float])r   r  r  r  r  r  r  r
  r  r  r  r  r  r  r  r  r  s   @r   r  r    s    

 
 @	

 /
 

( & & $ $
 	- 	-B= =r   r  c                  >   ^  \ rS rSr      SU 4S jjrS rSrU =r$ )CUDATemplateBufferi  c                >   > [         TU ]  XU5        X@l        XPl        g r   )rb  r  workspace_sizetemplate)r   rM  r  r  r  r  rd  s         r   r  CUDATemplateBuffer.__init__  s      	);<, r   c                8    U R                   b  U R                   $ S$ r[  )r  r  s    r   rF  %CUDATemplateBuffer.get_workspace_size%  s    &*&9&9&Et""L1Lr   )r  r  )r  ry   r  r[   r   r   )r   r  r  r  r  rF  r  r  r  s   @r   r  r    s2    !
 ! ! 
!M Mr   r  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )CppTemplateBufferi)  c                >   > [         TU ]  XU5        X@l        XPl        g r   )rb  r  r  r   )r   rM  r  r  r  r   rd  s         r   r  CppTemplateBuffer.__init__*  s    );< r   )r   r  r  r   r  r  r  r  r  r  r  s   @r   r$  r$  )  s     r   r$  c                  l    \ rS rSr% S\S'   SS jrSS jr\SS j5       r\	S 5       r
SS jrSS	 jrS
rg)r  i0  rI  r  c                  ^ [        5       n[        R                  mU R                   Hq  n[	        U[
        5      (       a  UR                  U4S jU 5       5        M5  [	        U[        5      (       a  ML  UR                  T" UR                  5       5      5        Ms     [        U4S jU R                  5        5       5      n[        R                  " UU[        5       S9$ )Nc              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r  r   rW  s     r   r  /InputsKernel.get_read_writes.<locals>.<genexpr>9  s     BEqWQZZ\22E   #&c              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r  re  rW  s     r   r  r+  @  s#      :
/AGCLLN##/Ar,  )r  writesindex_exprs)r2   r9   rW  r  rj   rk   updater7  r  r  r=  
ReadWrites)r   r  inputr.  rW  s       @r   r  InputsKernel.get_read_writes4  s    .8l&&[[E%&&BEBBE#899		'%.."234 ! 0: :
/3/?/?/A:
 0
 &&"
 	
r   c                6    U R                  5       R                  $ r   r  r  s    r   r  InputsKernel.get_readsJ  r  r   c                   [        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  n[        U[        5      (       a*  [        U[
        5      (       d  [        R                  U5      n[        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U$ [        U[        [
        45      (       d   U5       eU$ r   )rj   re   rN  r  r  r  re  rf  unwrap_storage_for_inputTorchBindObjectr  ry  r   s     r   r7  %InputsKernel.unwrap_storage_for_inputM  s    a##Aa$$Aa"":a+I+I**1-Aa##
 //22a))H!fo677::7r   c                    / nU  Hd  n[        U[        5      (       a&  U Vs/ s H  n[        R                  U5      PM     nnO[        R                  U5      nUR	                  U5        Mf     U$ s  snf r   )rj   rk   r  r7  r^  )r  
inputs_newr   r   s       r   r  InputsKernel.unwrap_storage`  sk    
A!T""GHIq!\::1=qI 99!<a   	 Js   A/c                    gr  r   r  s    r   r  InputsKernel.is_externk  r!  r   c                    gr~  r   r  s    r   r  InputsKernel.num_readsn  r  r   r   Nr  r  )r   rf   r   rf   r  r  )r   r  r  r  r  r  r  r  r7  r  r  r  r  r  r   r   r   r  r  0  sD    
,,  $  r   r  c                  (    \ rS rSrSS jrSS jrSrg)	NopKernelir  c                    gr  r   r  s    r   r  NopKernel.is_no_ops  r!  r   c                    [        5       $ r   r1   r  s    r   r  NopKernel.get_readsv  rB  r   r   Nr  r  )r   r  r  r  r  r  r  r   r   r   rC  rC  r  s    r   rC  c                  V    \ rS rSrSr\S 5       r\S	S j5       r\S 5       rS
S jr	Sr
g)ConcatKerneliz  zb
There isn't actually a real kernel for concat, we just change the
storage for the upstream data.
c                	   US   R                  5       nUS   R                  5       n[        US   R                  5       5      nS/nXR   /nSUs=::  a  [	        U5      :  d   e   e[        S[	        U5      5       H  nX   R                  5       n	UR                  XR   5        [	        U	5      [	        U5      :X  d   eX   R                  5       U:X  d   eX   R                  5       U:X  d   e[        [	        U5      5       HE  n
X:X  a  XZ   X   -   XZ'   M  [        R                  R                  R                  XZ   X   5      XZ'   MG     UR                  XR   5        M     [        R                  U5      n[        [	        U5      5       H|  nX   n[        U5      (       d  M  UR                  5       n[        U[         5      (       d  M@  ["        R%                  UR&                  UR(                  5      (       d  Mq  [+        U5      n  O   [-        S U 5       5      n[        R                  R.                  R0                  S   n[        U[        5      (       d   eUSL a"  [-        S U 5       5      (       a  [+        U5      n[3        S [!        UUUUS9/ S9n[5        U5      n/ n[        [	        U5      5       GH  nU R7                  X   [8        R;                  UX&U   Xx   SS95      nUR<                  R                  U5        [        X   R>                  [@        5      (       a  X   R>                  RC                  5       nOX   R>                  nURE                  5       (       d  M  [G        X   R                  5       RH                  5      (       d  M  [K        U5      (       a  M  UR                  URM                  5       5        GM     [	        U5      S:  aR  [        R                  RO                  U[P        RR                  5      (       a  [        R                  RU                  U5        [        R                  RW                  U5      Ul,        U R[                  UR<                  5      Ul        [        R                  R]                  U5        U$ )	Nr   r7   c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r   r  s     r   r  &ConcatKernel.create.<locals>.<genexpr>  s     -WPV1.CA.F.FPVr  Fc              3    #    U  Hv  nS UR                   ;   =(       a[    UR                   S    R                  [        R                  S9=(       d*    UR                   S    R                  [        R                  S9v   Mx     g7f)r  rv  N)r9  r  ro   rq  rs  )r  args     r   r  rL    sr      <
 $ SXX --E<O<O-P W88E?00u?U?U0V
 $s   A>B )r   r   r   r   r   rM  r  )r  )/r   r   rk   r   r   r   r^  rY   r   r   rl  r   r   r   r   rj   r  rJ  r#  r   r   r&   r  r  r   rI  r  r  r  rx  r  rN  r  r  is_input_bufferrQ   r{   rP   r  ro  r:   FOREACHregister_operation_listr  r   r  r  )ry  r  r  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strider   rM  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        r   rx  ConcatKernel.create  s   %%'q	##%q	**,-}oC'#h-'''''q#f+&A++-J  /z?c(m3339&&(E1119'')V3333x=)8"*+
"=HK"#''"2"2"?"? Z]#HK	 * x}- ' '99(Cs6{#A	A$Q''K 88fmmTT$B8$LM $ +.-WPV-W*W'ww++003,----*e3 <
 $<
 9
 9
 ;8DM$$	 	
 M*s6{#A++	  Cq!1;> ! L   ''5&)..(33"().."<"<">"()..  //1169//16677"<00 ? ? AB' $* x=1!4!4V^=S=S!T!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                   [        U[        5      (       a  U R                  UR                  U5      $ [        UR                  [        5      (       a  [        UR                  R
                  [        5      (       a  UR                  R                  (       d  gUc  g[        UR                  5       5      [        UR                  5       5      :X  d  g[        S [        UR                  5       UR                  5       5       5       5      $ [        UR                  R
                  [        5      =(       a    [        UR                  [        5      (       + $ )NFTc              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   )rY   r   r   statically_known_equals)r  s1s2s      r   r  =ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s1      EFB   88@@Es   8:)rj   re   can_realize_into_without_copyrN  r  rM  r  r
  r   r~  r]  r   r   ExternKernelAlloc)ry  r  r  s      r   rf  *ConcatKernel.can_realize_into_without_copy  s    c9%%44SXXsCCchh 344sxx<<xx00 { s~~'(C0@,AA !#.."2CNN4DE  
 #((//>: 
:HH'D
 @
 	
r   c                   [        U[        5      (       d&  [        U5      (       a  [        U5      u  p4[        X4S9n[        U[        5      (       d   U5       e[        U[        5      (       a  U R                  UR                  U5      $ [        U[        5      (       ai  UR                  5         [        UR                  S5      (       d   eU R                  X5      (       a&  [        U5      UR                  l        UR                  $ [        R                  UR                  5       UR!                  5       UR#                  5       [%        UR'                  5       UR'                  5       5       VVs/ s H.  u  pV[(        R*                  R,                  R/                  XV5      PM0     snnS9nU R                  Xr5      $ s  snnf )Nr  rM  r  )rj   r  r   r  re   r  rN  r  rc  r&  rf  r  rM  r  rx  r   r   rw  r   r   rY   r   r   rl  )ry  r  r  r  rM  r  r  pws           r   r  ConcatKernel.realize_into  sV   
 #//$S))"7"<%7B#//44/c9%%##CHHc22c:&&KKM388X....00::"1#"6xx>>#--/__&  ??DA   --a3?	  
 ((s   .5F:c                    gr  r   r  s    r   r  ConcatKernel.should_allocate  r!  r   r   r   r  )r   r  r  r  r  r  rx  rf  r  r  r  r   r   r   rI  rI  z  sL    
 X Xt 
 
< ) )@r   rI  c                    ^  \ rS rSr% SrS\S'   \R                  " \S9r	S\S'   Sr
S	\S
'   SrS\S'   SrS\S'   \R                  " \S9rS\S'   SrS\S'   SrS\S'   SrS\S'   \R                  " \S9rS\S'   \R                  " \S9rS\S'          S; S<U 4S jjjrS=S jrS>S jrS rS rS<S jrS  rS?S@S! jjrSAS" jrS# r\S$ 5       r\   SBS% j5       r!\ S& 5       r"\ S' 5       r#\ S( 5       r$\    SC   SDS) jj5       r%\ SES* j5       r&\ SES+ j5       r'\ S, 5       r(\ S- 5       r)\ S. 5       r*S<S/ jr+S0 r,S?SFS1 jjr-S2 r.S3 r/SES4 jr0S<S5 jr1S6 r2S7 r3S>S8 jr4SGS9 jr5\5r6S:r7U =r8$ )Hre  i  r   zTuple[Any, ...]constant_argsr  zDict[str, Any]r   NzOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[List[Dict[str, Any]]]arg_propertiesz#Optional[Dict[str, Dict[str, Any]]]kwarg_propertiesz"Dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszList[MutationOutput]mutation_outputsc                2  > [         TU ]  UUUS9  X@l        U(       a  UO0 U l        X`l        Xl        U R                  U5        U R                  U5        Xl        U R                  5         0 U l
        / U l        [        R                  R                  U l        g NrP  )rb  r  ro  r   rp  rt  set_cpp_kernel_nameset_python_kernel_namers  collect_arg_kwarg_propertiesrw  rx  rY   r   r  fx_node)r   r   rM  r  ro  r   rp  rq  rr  rs  rt  rd  s              r   r  ExternKernel.__init__5  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++r   c                     U /U R                   Q$ r   rx  r  s    r   r=  ExternKernel.get_outputsS  s    -t,,--r   c                    [        5       $ r   r1   r  s    r   r@  %ExternKernel.get_unbacked_symbol_defsV  rB  r   c                   [        U R                  [        R                  R                  5      (       af  U R                  R
                  R                   Vs/ s H:  nUR                  (       a  M  UR                  UR                  UR                  S.PM<     snO.[        [        U R                  5      5       Vs/ s H  n0 PM     snU l        [        U R                  [        R                  R                  5      (       aS  U R                  R
                  R                   Vs0 s H'  nUR                  UR                  UR                  S._M)     snO0 U l        [        U R                  [        R                  R                  5      (       a  U R                   (       dR  U R                  R
                  R                   Vs/ s H!  oR                  (       d  M  UR                  PM#     snU l        U R                  R
                  R                   Vs/ s H  oR                  (       d  M  UPM     snU l        g g s  snf s  snf s  snf s  snf s  snf )N)r   r{   r  )r{   r  )rj   rt  ro   _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   r  ru  allarg_propertiesrs  schema_kwargs)r   r   r   s      r   r}  )ExternKernel.collect_arg_kwarg_propertiesY  s    $**EJJ,A,ABB ))11;; <A||FFKK%&__
 < $C$4565"56 	$ $**EJJ,A,ABB ))11;;;A qOO;
  	 d&&

(=(=>>55$($4$4$<$<$F$F6$Fq,,FAFF$F62  ++33=="=a="D ?- 76"s0   H;/(H;:I #.II
I
I+Ic                    [        U R                  [        5      (       a!  U R                  5         U R	                  5         g g r   )rj   rM  r   apply_constraintr  r  s    r   r`  ExternKernel.decide_layout|  s0    dkk>22!!#  3r   c                P    [        X5      u  p#U(       a  UR                  U5        g g r   )rN   r  )r   wrapper
origin_strdetailed_origin_strs       r   codegen_commentExternKernel.codegen_comment  s%    *=d*L'
j) r   c                    [         er   r!  r   r  s     r   codegenExternKernel.codegen  r#  r   c                   Xl         [        R                  R                  (       a3  [	        U R
                  [        R                  R                  5      (       d  g U R
                  nU R                   c  UR                  S:X  aV  UR                  S:X  a  UR                  R                  S5      S   OUR                  R                  SS5      nSU S3U l         g UR                  R                  U l         g g )Natenr  .r   r   z
at::_ops::z::call)rr  rY   r   cpp_wrapperrj   rt  ro   r  r  	namespace_overloadnamer   r  replacer  r   )r   rr  r[  opnames       r   r{   ExternKernel.set_cpp_kernel_name  s    .ww""*ejj33+
 +
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (r   c                   Xl         Ub  g U R                  nUc  g [        U[        R                  R
                  5      (       a  SUR                   3U l         g UR                  R                  SS5       SUR                   3U l         g )Nztorch.ops.higher_order.z._ops.z.ops.r  )	rq  rt  rj   ro   r  HigherOrderOperatorr   r  r  )r   rq  r[  s      r   r|  #ExternKernel.set_python_kernel_name  s    "4)!!>

 > >??(??P&QD# $$,,Xw?@&//ARS #r   c                    [         R                  R                  (       a3  [         R                  R                  R	                  U R
                  5      $ U R                  $ r   )rY   r   r  r  get_c_shim_func_namerr  rq  r  s    r   get_kernel_nameExternKernel.get_kernel_name  sF     ww"" GG  55d6J6JK	
 ((	
r   c           	         [         R                  U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       S9nUR                  5         U$ )N)r   r   rN  rO  r   r   )	r  rx  r   r   rw  r   r  r  rc  )r   rj  s     r   
copy_inputExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	r   c                  ^^ X#S.n[         R                  " U5      u  nm/ m/ n/ nU H  nTR                  [        U[        5      5        TS   (       a  UR                  U5        M?  [        U[
        R                  5      (       a2  [        R                  R                  R                  R                  US S9nUR                  U5        M     UU4S jn	U V
s/ s H  oR                  U
5      PM     nn
U H  n
[        U
5      (       d  M  [        U
SS9  M!     / nU GH  n
[        U
[        5      (       dh  U
R!                  5       [        R                  R"                  ;   a<  UR                  [        R                  R"                  U
R!                  5          5        M  [        U
[        5      (       dh  U
R!                  5       [        R                  R$                  ;   a<  UR                  [        R                  R$                  U
R!                  5          5        M  UR                  ['        U
SS95        GM     U	" X5      u  pU" U0 UD6nS n[        R(                  R                  =n(       aO  [+        U[        R,                  U5        [/        UU[        R,                  R0                  R3                  S5      5      n[        U[4        [6        45      (       d  U/OUnU H  n[        U[8        R:                  5      (       d  M$  UR<                  (       d  M7  S	n[        R                  R,                  R0                  R3                  S
S 5      =n(       a  U SU 3nU[        R                  l        M     UUUU	U4$ s  sn
f )N)r   r   rI  )r  c                6  > / n[        U 5      n[        U5      nT H@  nU(       a  UR                  [        U5      5        M&  UR                  [        U5      5        MB     [        R                  " UT5      nUR                  S/ 5      UR                  S0 5      4$ )Nr   r   )iterr^  nextpytreetree_unflattenr?  )	new_tensor_argsnew_non_tensor_argsrU  
it_tensorsit_non_tensors	is_tensorr{  	args_specis_arg_tensors	          r   unflatten_args3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo.J!"56N*	MM$z"23MM$~"67	 +
 %%fi8A55$aeeHb&999r   Tr  )r   r  zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 ) r  tree_flattenr^  rj   rf   ru   r   rY   r   r   r   create_symintnoderf  r   r  r  r  	constantstorchbind_constantsr   	fake_moder-   r  r*   r9  r?  rk   rl   ro   Tensor	is_sparsedisable_cudagraphs_reason)ry  r[  r   r   binded_args	args_flattensor_argsnon_tensor_argsrO  r  r   example_argsnew_args
new_kwargsexample_outputrw  r   example_out_lir   msgr  r  r  s                        @@r   process_kernelExternKernel.process_kernel  s     $6%22;?	9%'C  C!89R ""3'c5::..''**44FFsQUFVC&&s+ 
	: 6AA[((+[A A$Q''%a5  JL
 A a**qzz|qww?P?P/P##AGG$5$5ajjl$CDq(++JJLAGG$?$??##AGG$?$?

$MN##$5aT$JK   .lL8Z8JN---9-Iq~~~F 9>1>>+>+>+B+B5+I! ntUm<<  	
  A!U\\**q{{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471   
 	
g Bs   M:c                   [        U[        5      (       d   e[        U[        5      (       a  U$ UR                  5       n[        R
                  R                  UR                  5       5      nUc   eUR                  5       nUb  SUR                  ;   a  [        UR                  [        5      (       a  UR                  S   R                  [        R                  S9(       d/  UR                  S   R                  [        R                  S9(       a)  UR!                  [#        UR%                  5       5      5        OUR'                  5         [(        R*                  " UR%                  5       SS9u  pVUS   nUR-                  5       " U5      n[        R
                  R.                  R1                  X5      n[        R
                  R.                  R3                  X5      n	[        R
                  R.                  R5                  X5      n
[7        Xy5      U
-   nX:w  a  [8        R;                  SU	U
U5        [<        e[        UR>                  [A        URC                  5       URE                  5       UR%                  5       U	U
S9S9$ )	z
In order to pass this to an extern kernel we need a
ReinterpretView not a View.  This allows us to avoid some
unneeded copies.
r  rN  r{  rc   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%srW  r  )#rj   r  r  r  rY   r   r  r  r  r9  rM  r   r  ro   rq  rs  r  r&   r   r  r9   ry  r{  r   rz  stride_vars
offset_varrR   rw  rx  r7  rN  r  rn  r   )ry  r   x_unwrap_viewre  x_unwrap_view_fx_node
index_argsrf  rZ  r   r  r  expecteds               r   convert_to_reinterpret_view(ExternKernel.convert_to_reinterpret_view%  s    !X&&&&a))H gg  !7!7!9: # 3 3 5 "-.333=//@@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

  ]
 ,  55eH''""..uA!!,,U?Z1F:IIR	 &%,,.kkmZZ\	
 		
r   c                   Uc
  [        5       $ [        U[        R                  [        R                  R
                  R                  [        45      (       a	  [        US9$ [        U[        5      (       aY  [        R                  R                  [        R                  " UR                  UR!                  5       UR#                  5       S95      $ [        U[$        5      (       a  U$ [        U[&        5      (       a  U R)                  UR*                  5      $ [        U[,        5      (       a1  [-        U R)                  UR*                  5      UR/                  5       S9$ [        U[0        5      (       a@  UR3                  5         [5        UR7                  5       5      (       a   U R9                  U5      $ [        U[<        5      (       a  UR3                  5         U$ [        U[>        5      (       a  U$ U RA                  U5      $ ! [:         a     N[f = f)N)r2  )r   r   r  )!r+  rj   ru   r   rv   rw   rx   ry   r7  r  rY   r   add_tensor_constantro   tensorr   r   r   r  re   rf  rN  r  r   r  rc  r   r  r  r7  r  r8  r  r9  s     r   rf  ExternKernel.realize_inputi  s   9'))a%**ekk&9&9&A&A3GHH(a00a""77..QWWAKKM!,,.Q  a((Ha##$$QVV,,a))"&&qvv.q||~  a""IIK$Q]]_55::1== a$$IIKHa))H~~a   + s   G> >
H
Hc                    [        U5      (       a@  [        UR                  5       5      S:X  a  U$ UR                  5        H  nUS:X  d  M  Us  $    U R                  U5      $ r5  )r   r   r~  r  )ry  r   r   s      r   require_stride1ExternKernel.require_stride1  sR     ##1<<>"a',,.Q;H ) ~~a  r   c                &	   Uc  Uc   eUR                  5       S;   a  U$ [        U5      (       Ga  [        UR                  5       [        5      (       a?  UR                  5       R
                  n[        UR                  5       [        5      (       a  M?  [        UR                  5       [        5      (       a  U(       ai  [        USS[        X5      (       aJ  [        [        R                  R                  R                  UR                  5       R                  5      5      OUUS9  U$ [        USSS UUS9  U$ [        UR                  5       [        5      (       ak  U(       a$  UR                  5       R!                  U5      (       d>  U(       a9  [#        X1R                  5       R                  UR%                  5       5      (       a  U$ [        UR                  5       [&        5      (       a  [        UR                  5       R)                  5       [        5      (       a  [+        S5      e[        UR                  5       R)                  5       [        5      (       a  U(       a2  UR                  5       R)                  5       R!                  U5      (       dM  U(       aH  [#        UUR                  5       R)                  5       R                  UR%                  5       5      (       a  U$ [        U[,        5      (       ak  U(       a$  UR                  5       R!                  U5      (       d>  U(       a9  [#        X1R                  5       R                  UR%                  5       5      (       a  U$ [        U[.        5      (       a  [        UR0                  [2        5      (       a  [        UR0                  [4        5      (       d  [        UR7                  5       5      (       a}  [        UR7                  5       R0                  [8        5      (       dP   U R;                  UR0                  5      Ul        U(       a  U R=                  XUS9$ U(       a  U R?                  XUS9$  U RC                  U5      n[        USSUUUS9  U(       a  [        X5      (       d   eU$ ! [@         a     NFf = f)N)r   r7   TF)r  r  r  r  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr  )"rX  r   rj   r   r  r  r   r  r  r   rY   r   r   
size_hintsr   r  r  r  r   r  r  rE  r  re   rN  r  r  r  rg  r  require_stride_orderrequire_exact_stridesr7  r  )ry  r   r   r  r  s        r   require_stridesExternKernel.require_strides  s     M$===;;=F"H !##Q\\^_==LLN'' Q\\^_==!,,..99 *#(-
  B!KK - ! 0 0 ; ;ALLN<Q<Q R "'&3 H *#(-%)&3&3 HALLNK881<<>;;EBB!1%||~'<'<ajjl 
 ALLN,FGGalln88:NKK(b    : : <kJJq||~99;MMeTT%5)LLN668??JJL  H a%%q||~77>>-!<<>#8#8!**, 
 Hq)$$1668,,qvv77%ammo66q}}335FGG88@33 4   #44 5   # NN1!''	
 5a???? ' s   :6R 1R 
RRc                "    U R                  XUS9$ )N)r  r  r  )ry  r   r  r  s       r   r  "ExternKernel.require_exact_strides  s    ""- # 
 	
r   c                "    U R                  XUS9$ )N)r   r  r  )ry  r   r   r  s       r   r  !ExternKernel.require_stride_order  s    ""1"OOr   c                .    U R                  U[        5      $ r   )r  rr  r9  s     r   require_channels_last"ExternKernel.require_channels_last  s    ''+<==r   c                .    U R                  U[        5      $ r   )r  rt  r9  s     r   require_channels_last_3d%ExternKernel.require_channels_last_3d   s    ''+=>>r   c                    U R                  U[        [        [        [	        UR                  5       5      5      5      5      5      $ r   )r  rk   r&  r   r   r   r9  s     r   require_contiguousExternKernel.require_contiguous$  s/    ''4s1::<?P9Q0R+STTr   c                    g r   r   r  s    r   r  ExternKernel.apply_constraint(  r  r   c                   [        U[        [        45      (       d   e[        U[        5      (       a  [        U5      nU R                  (       d   S5       e[	        U5      n[	        U R                  5      nX4:  aq  [
        R                  SU R                  XC-
  5        [        X45       H?  nU R                  U   S   nUR                  Xb;   a  X&   OU R                  U   S   5        MA     U$ )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )
rj   rk   rl   ru  r   rw  rx  rt  r   r^  )r   r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_args#ExternKernel.fill_non_provided_args+  s     $u....dE"":D""U$UU"T,,-
 II^  #	 6...q1&9) $,,Q/@ / r   c                r   [         R                  R                  (       Ga[  / nS nU(       ae  U R                  (       aT  [	        U R
                  5      [	        U5      :X  d   S5       eU R                   Vs0 s H  oDR                  S5      U_M     nn[        U R
                  5       H  u  pVUb.  UR                  X   5      nU(       a  UR                  S5      OS nOb[	        U R                  5      U-   n	U R                  (       a7  U	[	        U R                  5      :  a  U R                  U	   R                  S5      OS nUR                  [         R                  R                  R                  Xh5      5        M     U$ [        [         R                  R                  R                  U R
                  5      $ s  snf )NzDnames passed to codegen_const_args does not match self.constant_argsr   r{   )rY   r   r  ru  r   ro  r?  r   r  r^  r  val_to_arg_strr+  )
r   r^  rU  name_to_arg_propertiesrO  r   r   proptype_r   s
             r   codegen_const_argsExternKernel.codegen_const_argsM  ss   77F
 &*",,4--.#3  ZYZ  594G4G*4GSGGFOS(4G ' * "$"4"45)5155eh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!KL 6 Mqww++::D<N<NOO%*s   4F4c                    [         R                  R                  (       aD  U R                  b7  U R	                  / U R
                  QU R                  QU R                  5      nSnOU R
                  nSn/ n[        U5       H  u  pE[         R                  R                  (       a  U R                  (       a  U[        U R                  5      :  d   S5       eU R                  U   R                  S5      nUR                  [         R                  R                  R                  XV5      5        M  UR                  [         R                  R                  R                  U5      5        M     U(       a  UR                  U R!                  5       5        U$ )NFTz-Invalid access to ExternKernel.arg_propertiesr{   )rY   r   r  rt  r  r  ro  r   r   ru  r   r?  r^  r  r 	  r  r	  )r   r  need_codegen_constant_argsr   r   r   r	  s          r   codegen_argsExternKernel.codegen_argsm  s3   774#3#3#?003$++3 2 23T[[F */&[[F)-&f%DAww""**q3''4 0 CBC  ++A.226:AGG00??IJAGG00??BC & &KK//12r   c                (   XR                   ;   a  U R                   R                  U5      $ U R                  (       aJ  U R                  R                  U5      (       a*  U R                  R                  U5      R                  S5      $ [        U S35      e)Nr  z not in self.allarg_properties)r   r?  r  rE  )r   r  s     r   get_kwargs_valueExternKernel.get_kwargs_value  ss    {{";;??8,,!!d&<&<&@&@&J&J))--h7;;OLL H:-K!LMMr   c           	     0   [         R                  R                  (       Ga  U R                  b  [	        U R
                  5      S:X  a  / $ / nU R                   H  nU(       a  US:X  a  M  U R                  U5      n[        U[        R                  5      (       a  UR                  U5        MU  U R                  (       a9  X0R                  ;   a*  U R                  R                  U5      R                  S5      OS nUR                  [         R                  R                  R                  XE5      5        M     U$ U R                   R#                  5        VVs/ s H3  u  pdU S[         R                  R                  R                  U5       3PM5     nnnU$ s  snnf )Nr   r   r{   r[  )rY   r   r  rt  r   r  rs  r	  rj   ru   r   r^  r  r?  r  r 	  r   re  )r   skip_outr   r  r^  r	  ks          r   codegen_kwargsExternKernel.codegen_kwargs  sK   77+D4F4F0G10L	F >>E 1))(3a,,MM!$  11hBXBX6X ..228<@@H! 
 MM!''"6"6"E"Ea"OP ?(  !KK--//DA #Qqww++::1=>?/   	s   :Fc           	        [         R                  (       a  [        R                  R                  (       d  [        U R                  5       5      S:X  a  g [        R                  R                  R                  U R                  5       5      n[        R                  R                  R                  U R                  5       5      nUR                  SU R                  5        SU SU S35        g g g )Nr   zassert_size_stride(rM  r   )r8   size_assertsrY   r   r  rU   r   r  codegen_shape_tupler~  r  r  )r   r  r   r   s       r   codegen_size_asserts!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF%dmmo%6bbJ (;r   c                N    U R                  5       nU R                  5       nU/ /U4$ )z4
get output sizes and strides, for template_codegen
)r   r~  )r   _size_strides      r   get_group_strideExternKernel.get_group_stride  s*     //#r{G##r   c                   [         R                  R                  nU R                  5       nU R	                  5       nU Vs/ s H  oAR                  U5      PM     nn[        [        U5      5       Vs/ s H  n[        SU 35      PM     nn[        [        [        U5      5      UR                  SS9n[        U5       VV	s0 s H  u  pX_M	     n
nn	[        [        U
5      5       Vs/ s H  oZU   PM	     nnU Vs/ s H  oVU   PM	     nnU R                  5       nU" U5      n[         R                  R                  R                  XbU/5      u  pn[        S5      u  nn[        [!        Xo" U Vs/ s H  nU" U5      PM     sn5      5      5      n[#        [$        R&                  " U5      U5      nU[)        U5      4$ s  snf s  snf s  sn	nf s  snf s  snf s  snf )z3
Manually get canonicalization of the output index
dT)rS  r{  c)rY   r   r   r   r~  r   r   r   rS   r\  rz  r   r{  rr  r@   rm   r   rV   ru   r  rl   )r   r   rg  r  r   r   r]  index_orderr   r   r   r   r  r   	new_sizesr   rw  r   add_varreplacements                       r   canonicalizeExternKernel.canonicalize  s   
 77##//#29:'Q%%a(':;@U;LM;La(1QC1;L
MU3w<0g6I6ISWX+4[+AB+Axs#(+AB$)#f+$67$6q$67-23UmU
3##%
#$%GG$4$4$D$Dw%
!	E !%
73z7	3R	1GAJ	3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F=1G G'G;G/Gc                    [        5       nU R                   H  nU[        U5      -  nM     U R                  R	                  5        H  nU[        U5      -  nM     U$ r   )r2   ro  maybe_free_unbacked_symbolsr   rn   )r   r{  rO  s      r   r  %ExternKernel.get_unbacked_symbol_uses  sW     '1l%%C,S11A &;;%%'C,S11A (r   c           
     ,   [        U SS 5      nSU< 3/nU[        R                  " U 5       Vs/ s H'  nUR                   S[        XR                  5       3PM)     sn-  nUR	                  SU R
                  < 35        U R                  U5      $ s  snf )Nrq  zpython_kernel_name=r[  r\  )r   r  fieldsr   r^  r   r1  )r   kernel_namer.  r  s       r   ri  ExternKernel.__str__  s    d$8$?!+1
 	$++D1
1 zzl!GD**5671
 	
 	|D$4$4#789u%%
s   .B)r  ru  ro  rr  r~  r   rx  rt  rs  rp  rq  r  rw  r   NNNNr   Nr  rH  rJ  r   rr  r  r   r   )rq  r  r   r   )r   ziTuple[Any, List[Any], List[Any], Callable[[Any, Any], Any], Optional[Dict[sympy.Symbol, pytree.KeyPath]]])NNF)r   zOptional[Sequence[int]]r  r  r  )r^  r   r  )9r   r  r  r  ro  r  r  r  rm   r   rp  rq  rr  rk   rs  rt  ru  rv  rw  rx  r  r=  r@  r}  r`  r  r  r{  r|  r  r  r  r  r  r  rf  r  r  r  r  r  r  r  r  r  r	  r	  r	  r	  r	  r	  r#	  r  ri  r  r  r  r  s   @r   re  re    sr   %'M?'(..tDFND-1K*1(,,%)O]) 4?3D3D4!= 
 	    6:N29<@9@<G<M<M=9  .9->->t-T*T &(, 
, ,<.!F!
*
";0
 
 
 ^

^
 ^
@ A
 A
F ! !@ ! !  *.6:{ '{ 4	{ {z 
 

 P P > > ? ? U U DP@4N:	$'>
& Hr   re  c                  T   ^  \ rS rSrSS jr       S SU 4S jjjrSS jrSrU =r$ )	ExternKernelOuti  c                   U R                  U5        / U R                  5       QU R                  SS9QnU R                  5       n[        R
                  R                  (       a  U R                  S:X  a  SnOU R                  5       nUR                  UU R                  5       U R                  (       a  U R                  R                  5       OS U5        g )NT)r	  ztorch::inductor::_mm_plus_mmaoti_torch__mm_plus_mm_out)r  r	  r	  r  rY   r   r  rr  generate_extern_kernel_outrh  rp  )r   r  r   r*	  s       r   r  ExternKernelOut.codegen  s    W%J""$Jt':':D':'IJ**,GG$$(FF 7K..0K**""$484D4DD..0$		
r   c
                   > [         T
U ]  S UU R                  U5      UU=(       d    0 S UUUU	5
        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r   )rb  r  r  rY   r   r  r   r  )r   rM  r  ro  r   rp  rq  rr  rs  rt  rd  s             r   r  ExternKernelOut.__init__  si     	'Lb)	
 GG++D1		""4(r   c                    gr  r   r  s    r   r  ExternKernelOut.should_allocate(  r!  r   r	  r  r,	  r  )	r   r  r  r  r  r  r  r  r  r  s   @r   r/	  r/	    s=    
. &() 
) )6 r   r/	  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )RandomSeedsi,  c           	       > [         R                  " [         R                  5      n[        TU ]  [        U[         R                  U/S9/ UR                  UR                  U//SS[        R                  R                  S9  g )NrK  zaten.randint.low_outzat::_ops::randint_low_out::call)rM  r  ro  rq  rr  rt  )ro   r  r  rb  r  r  r  r  r  randintlow_out)r   countr   limitsrd  s       r   r  RandomSeeds.__init__-  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   r   )r=	  ry   r   r  r   r   r'  r  s   @r   r9	  r9	  ,  s    
 
r   r9	  c                  X   ^  \ rS rSrSS jr      S SU 4S jjjrS	S jrS rSrU =r	$ )
rg  i@  c                   U R                  U5        / U R                  5       QU R                  5       Qn[        R                  R
                  R                  X5        [        U R                  [        5      (       a  U R                  U5        g g r   )r  r	  r	  rY   r   r  generate_extern_kernel_allocrj   rM  rJ  r	  r   r  r   s      r   r  ExternKernelAlloc.codegenA  sm    W%=""$=t':':'<=	99$Edkk6**%%g. +r   c	                   > [         T	U ]  S UU R                  U5      UU=(       d    0 S UUUU5
        / U l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        g r   )	rb  r  r  r  rY   r   r  r   r  )
r   rM  r  ro  r   rq  rr  rs  rt  rd  s
            r   r  ExternKernelAlloc.__init__H  sp     	'Lb)	
 ')GG++D1		""4(r   c                    grq  r   r  s    r   r  !ExternKernelAlloc.should_allocatef  rt  r   c                    [         er   r!  r  s    r   r  "ExternKernelAlloc.apply_constrainti  r#  r   )r   r  r  )r   NNNr   Nr  )
r   r  r  r  r  r  r  r  r  r  r  s   @r   rg  rg  @  s?    / &() 
) )<" "r   rg  c                  J   ^  \ rS rSrSrSU 4S jjrS	S jrS rS
S jrSr	U =r
$ )r  im  zH
An output buffer that represents the mutation of a pre-existing buffer
c                   > [         TU ]  S US9  UR                  5       n[        R                  R                  U5        U/U l        X0l        [        R                  R                  U 5      U l	        g r(  )
rb  r  r  rY   r   r  mutation_namesmutating_noder  r   )r   rM  mutated_noderN	  mutated_node_namerd  s        r   r  MutationOutput.__init__r  s^    d62(113	##$5601(5GG++D1	r   c                    U R                   $ r   )rN	  r  s    r   r  MutationOutput.get_defining_opz  s    !!!r   c                    U R                   $ r   )rM	  r  s    r   r  !MutationOutput.get_mutation_names}  r)  r   c                    grq  r   r  s    r   r  MutationOutput.should_allocate  rt  r   )rN	  rM	  r   )rN	  r  r   r   r  r  )r   r  r  r  r  r  r  r  r  r  r  r  s   @r   r  r  m  s!    2"# r   r  c                     ^  \ rS rSr% Sr0 rS\S'   \ S	       S
S jj5       r S	         SU 4S jjjr	SS jr
SrU =r$ )TMADescriptori  a  
An IR node representing a host-side TMA descriptor in the Triton API
(the ones obtained via create_{1d,2d}_tma_descriptor calls). Mostly
useful for user-defined Triton kernels relying on host-side TMA; but
can, in principle, be used for Inductor's Triton templates, too.
zDict[Any, TMADescriptor]_CACHEc                    [        U5      X#U4nXPR                  ;  a  [        XX45      U R                  U'   U R                  U   $ r   )idrZ	  rY	  )ry  r  r+  
block_dimselement_sizerS  s         r   rx  TMADescriptor.create  s@     &z4\:jj +F*SCJJsOzz#r   c           
     H  > [        U5      S;   d   e[        U5      [        U5      :X  d   eUc  UR                  5       R                  nXl        X l        X0l        X@l        [        U R                  5      U l        U/n/ U R                  QU R
                  QU R                  Pn[        TU ]%  S [        [        UUR                  5       S95      U[        U5      S 5        [        R                  R!                  U 5      U l        [        R                  R%                  U 5        g )N)r7   r   r  )r   r   r	  r  r+  r]	  r^	  rH  rb  r  r  r  r   rl   rY   r   r  r   r  )r   r  r+  r]	  r^	  r  ro  rd  s          r   r  TMADescriptor.__init__  s    4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	 !,,. - 	
  GG++D1		""4(r   c                &    UR                  U 5        g r   )generate_tma_descriptorr  s     r   r  TMADescriptor.codegen      ''-r   )r]	  r+  r^	  r   rH  r  r   )r  rf   r+  List[Union[int, torch.SymInt]]r]	  rf	  r^	  r  )
r  rf   r+  rf	  r]	  rf	  r^	  r  r   r   r  )r   r  r  r  r  rZ	  r  r  rx  r  r  r  r  r  s   @r   rY	  rY	    s     (*F$) '+

 -
 3	

 $
 
" '++)+) -+) 3	+)
 $+) 
+) +)Z. .r   rY	  c                  j   ^  \ rS rSrS rS
S jrSU 4S jjrSS jrS
U 4S jjrSS jr	SS jr
S	rU =r$ )UserDefinedTritonKerneli  c                z  ^ SSK Jn  SSKJn  UR	                  U R
                  5      m/ n/ n/ n[        TU5      (       a  [        TS5      (       a&  UR                  U4S jTR                   5       5        O.[        TS5      (       d   eUR                  TR                  5        [        TS5      (       a<  TR                   H+  nUR                  TR                  R                  U   5        M-     O.[        TS5      (       d   eUR                  TR                  5        TR                   nTR                  mTX4U4$ )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  V   >#    U  H  nTR                   R                  U   v   M      g 7fr   )r   	arg_names)r  r   r[  s     r   r  BUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s$      *4FqFII''*4Fs   &)restore_value	reset_idxreset_to_zero)triton.runtime.autotunerrj	  *torch._higher_order_ops.triton_kernel_wraprk	  
get_kernel
kernel_idxrj   r&  r  rl	  rp	  rq	  r^  r   rn	  rr	  configs)r   rj	  rk	  rw	  restore_value_argsreset_to_zero_argsr   r[  s          @r   get_kernel_and_metadata/UserDefinedTritonKernel.get_kernel_and_metadata  s   6P"--doo>(*(*fi(( v}--")) *4:4F4F*  v7777"))&*>*>?v{++))A&--fii.A.A!.DE * v7777"))&*>*>?nnGYYFw4FFFr   c                   U R                  5       u  nnnnUR                  X#U R                  XE5      u  pgU R                   Vs/ s H  oR	                  U5      PM     n	n/ n
[        U R                  5       HA  u  pUR                  R                  U5      UR                  ;   d  M0  U
R                  U5        MC      [        U
5      n[        5       n[        U	5       VVs/ s H  u  pUc  Uc  X;   a  X4OX4PM     n	nnU	 VVs/ s H  u  nnUU:X  d  M  UPM     nnnU	 VVs/ s H  u  nnUU:w  d  M  UPM     n	nnU(       a  [        US   S   R                  5      n/ n
/ nSn[        U R                  5       Hm  u  pUU;   a  US-  nM  UR                  R                  U5      nUUR                  ;   a  U
R                  UU-
  5        UU;   d  MY  UR                  UU-
  5        Mo     UUS   S   l        U R                  U5        UR                  UU	U R                  UUU
5        g s  snf s  snnf s  snnf s  snnf )Nrw	  r   r7   )rz	  !define_user_defined_triton_kernelr   rs  r	  r   rn	  r   
constexprsr^  setr   
equal_to_1r  #generate_user_defined_triton_kernelgrid)r   r  r[  rw	  rx	  ry	  new_nametriton_metar	  raw_argsconstexpr_indicesr   kwargconstexpr_indices_setREMOVEDrO  r  removed_none_argseq1_indices_setr	  index_shift	arg_indexs                         r   r  UserDefinedTritonKernel.codegen  sl    ((*	
 !( I IT[[*<!
 /3.P.P
.P!!!$.P 	 
 #D$F$FGJC%%e,0A0AA!((- H	 !$$5 6( &h/
 0 O9U 
^$ 0 	 
 2:LXS#SG^SL(0CHCC7NCC !+i"8";"F"FGO "JK'(J(JK
 ++1$K",,2259	 1 11%,,S;->?/%%cK&78 L  4>K	"1%0 	W%33II	
y
,
 MCs$   H*H/H5H5#H;4H;c                L   > [         TU ]  5       [        U R                  5      -  $ r   )rb  r  r,   r	  rc  s    r   r  0UserDefinedTritonKernel.get_unbacked_symbol_uses?  s"     w/14I$))4TTTr   c                    [        5       $ r   r1   r  s    r   r@  0UserDefinedTritonKernel.get_unbacked_symbol_defsD  rB  r   c          	       > / n0 n/ nUR                  5        H  u  p[        U	[        5      (       aY  [        R	                  U R                  U	5      5      n
X;   a  [        R                  " U
/X8   Q76 n
UR                  U
5        XU'   Ms  UR                  U	5        XU'   M     [        U5      S:w  d   eUS   R                  5       U l        [        TU ]5  S [        U R                  S9U[        U5      U5        Xl        X l        U R%                  5       u  p  nUR&                   Vs/ s H  oU;   d  M
  UPM     snU l        SSKJn  [        U5      S:  a  US   R.                  O0 nU" U0 UEUE5       Vs/ s H  nUU   PM
     snU l        U R0                   Vs/ s H!  n[3        [        U R                  S9UU 5      PM#     snU l        [6        R8                  R;                  U 5        g s  snf s  snf s  snf )Nr   rl  )identify_mutated_tensors)re  rj   re   r  r7  rf  rY	  rx  r^  r   r   r   rb  r  r  rl   rv	  r	  rz	  rn	  rs  rt	  r	  r   mutable_argsr  rx  rY   r   r  )r   rv	  r	  tma_descriptor_metadatakernel_argsr  r   ro  r	  r^  r   r[  rw	  r   rO  r	  autotuned_kwargsrS  re  rd  s                      r   r   UserDefinedTritonKernel.__init__G  s   %%'DA!Y'' 99$:L:LQ:OP/%,,QL1H1KLAa q	$$Q'q	 ( 6{aQi**,dkk*- 	
 %	 $ < < >A "++.
+Ck/AC+.
* 	X03Gq0@71:,,b 0;;;*:;
 
 ((!
( :T[[93E(!
 	
""4(%.

!
s   (	G-5G-8G2(G7c                ,    [        U R                  5      $ r   )rk   rx  r  s    r   r=  #UserDefinedTritonKernel.get_outputsz  s    D))**r   c                    U R                   $ r   rl  r  s    r   r   "UserDefinedTritonKernel.get_device}  rn  r   )r   r	  rv	  r	  rx  rs  r  rJ  rH  r  )r   r  r  r  rz	  r  r  r@  r  r=  r   r  r  r  s   @r   rh	  rh	    s0    G>O
bU
1)f+ r   rh	  c                  T   ^  \ rS rSrSrS	S jrS
S jrS rSS jrS	U 4S jjr	Sr
U =r$ )InplaceBernoulliFallbacki  =
This needs to be a custom class to handle mutation properly
c                   S U R                    5       u  n[        R                  R                  (       a\  UR	                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g UR	                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   rh  r  r   s     r   r  3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     ;{!##%%{   r*  rM  z, NULL)r   )r  rY   r   r  r  r  r-  r+  reprro  ending)r   r  r   s      r   r   InplaceBernoulliFallback.codegen  s    ;t{{;77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                    grq  r   r  s    r   r  (InplaceBernoulliFallback.should_allocate  rt  r   c                >    U R                   S   R                  5       /$ r[  r  r  r  s    r   r  +InplaceBernoulliFallback.get_mutation_names      A'')**r   c                    [        5       $ r   r1   r  s    r   r@  1InplaceBernoulliFallback.get_unbacked_symbol_defs  rB  r   c                R  > [         TU ]  S [        UR                  5       S9U R	                  U/5      UUS9  [
        R                  R                  UR                  5       5        [
        R                  R                  U 5      U l
        [
        R                  R                  U 5        g )Nrl  rt  )rb  r  r  r   r  rY   r   r  r  r  r   r  )r   rt  r   ro  rd  s       r   r  !InplaceBernoulliFallback.__init__  s~    alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   r	  r  r  rJ  r   r  r  r  r  r  r  r  r@  r  r  r  r  s   @r   r	  r	    s&    +
) 
)r   r	  c                  p   ^  \ rS rSrSrS
S jrSS jrS rSS jr  S
U 4S jjr	\
SSS jj5       rS	rU =r$ )InplaceCopyFallbacki  r	  c                N    U R                  5       u  p#nUR                  X2U5        g r   )r	  codegen_device_copy)r   r  r  r  non_blockings        r   r  InplaceCopyFallback.codegen  s%    #'#4#4#6 <##Cl;r   c                    grq  r   r  s    r   r  #InplaceCopyFallback.should_allocate  rt  r   c                >    U R                   S   R                  5       /$ r[  r	  r  s    r   r  &InplaceCopyFallback.get_mutation_names  r	  r   c                    [        5       $ r   r1   r  s    r   r@  ,InplaceCopyFallback.get_unbacked_symbol_defs  rB  r   c           	       > [         TU ]  S UUUSSS9  [        R                  R	                  US   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nz
aten.copy_aoti_torch_copy_)rq  rr  r   )	rb  r  rY   r   r  r  r  r   r  )r   rM  r  ro  rd  s       r   r  InplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    X4 Vs/ s H  o@R                  U5      PM     nnU4n[        [        UR                  5       S9UU5      nU$ s  snf r2  )rf  r	  r  r   )ry  r  r  r	  r   r  ro  rU  s           r   rx  InplaceCopyFallback.create  sS    14
;
1##A&
;%$cnn./

  <s   A	r	  r  r  rJ  r  )r	  r  )r   r  r  r  r  r  r  r  r@  r  r  rx  r  r  r  s   @r   r	  r	    s?    <+)
 
)$  r   r	  c                  F    \ rS rSrSrS
S jrSS jrS rSS jrSS jr	Sr
g	)MutatingFirstArgExternKerneli  r	  c                    / S U R                    5       Q[        [        U R                  5      QnUR	                  U R                  5        SSR                  U5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   r	  r	  s     r   r  7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s     9[!!##[r	  r*  rM  r   )r  r+  r	  ro  r  r  r-  r	  )r   r  argrefss      r   r  $MutatingFirstArgExternKernel.codegen  sl    
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                    grq  r   r  s    r   r  ,MutatingFirstArgExternKernel.should_allocate  rt  r   c                >    U R                   S   R                  5       /$ r[  r	  r  s    r   r  /MutatingFirstArgExternKernel.get_mutation_names  r	  r   c                    [        5       $ r   r1   r  s    r   r@  5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  rB  r   c                    gr  r   r  s    r   has_side_effects-MutatingFirstArgExternKernel.has_side_effects  r!  r   r   Nr  r  rJ  )r   r  r  r  r  r  r  r  r@  r	  r  r   r   r   r	  r	    s     
+r   r	  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )ResizeStorageBytesi  c                (  > [        U[        5      (       d   S5       e[        TU ]  S [	        UR                  5       S9U R                  U/5      U4S9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        SU l        SU l        [        R                  R                   R#                  UR$                  R                  5       5        g )NzTODO: dynamic shapesrl  )ro  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)rj   ry   rb  r  r  r   r  rY   r   r  r  r  r   r  rq  rr  never_reuse_buffersr  rN  )r   variabler  rd  s      r   r  ResizeStorageBytes.__init__  s    (C((@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG	##''(>(>(@Ar   )rr  r   rq  r  r'  r  s   @r   r	  r	    s    B Br   r	  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )SetSourceTensorKerneli  c                  > UR                  5         [        TU ]	  UR                  5       X/S[        R
                  R                  R                  R                  S9  [        R                  R                  R                  UR                  R                  5       5        [        R                  R                  R                  UR                  5       5        [        R                  R                  R                  U R                  5       5        UR                  5       n[!        [#        US9X5      [!        [#        US9X 5      /U l        g )Nz!torch.ops.aten.set_.source_Tensor)rq  rt  rl  )r  rb  r  r   ro   rW   r  set_source_TensorrY   r   r	  r  rN  r  r   r  r  rx  )r   self_tensorstorage_tensorr   rd  s       r   r  SetSourceTensorKernel.__init__  s    $$&%%')B		++99	 	 	
 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4kH:V4nK!
r   c                v    U R                   S   R                  5       U R                   S   R                  5       /$ r5  r	  r  s    r   r  2SetSourceTensorKernel.get_inputs_that_alias_output  s/    A'')4;;q>+B+B+DEEr   r  r  r  )r   r  r  r  r  r  r  r  r  s   @r   r	  r	    s    
"F Fr   r	  c                  l   ^  \ rS rSrSrSS jrSS jrS rSS jrSSS	.       SU 4S
 jjjr	Sr
U =r$ )ScatterFallbacki  z
This needs to be a custom class to handle mutation properly.
This class handles both aten.scatter_ and aten.scatter_reduce_.
It also handle the case `src` being a scalar properly.
c           
        U R                   S   n[        R                  R                  (       a  SSS.nX#;   a  X2   nU R                  (       a  S U R
                   5       u  pEnO$S U R
                   5       u  pEU R                  S   nUR                  UX@R                  S   XV/U R                  U R                  U R                  UU R                  5       5        g )	Nr  r  r  )r  multiplyc              3  @   #    U  H  oR                  5       v   M     g 7fr   r	  r	  s     r   r  *ScatterFallback.codegen.<locals>.<genexpr>-  s     Jk2244kr	  c              3  @   #    U  H  oR                  5       v   M     g 7fr   r	  r	  s     r   r  r	  /  s     EA--//r	  r7   r   )r   rY   r   r  src_is_tensorr  ro  generate_scatter_fallbackrr  rq  r	  )r   r  r  get_operator_enumr   r   r  s          r   r  ScatterFallback.codegen$  s    X&77(-6 B**2JdkkJOQsEEJQ$$Q'C))""1%u2  ##!	
r   c                    grq  r   r  s    r   r  ScatterFallback.should_allocate;  rt  r   c                >    U R                   S   R                  5       /$ r[  r	  r  s    r   r  "ScatterFallback.get_mutation_names>  r	  r   c                    [        5       $ r   r1   r  s    r   r@  (ScatterFallback.get_unbacked_symbol_defsA  rB  r   NTr  include_selfc               d  > [        U[        5      U l        U R                  (       a&  X$U4 Vs/ s H  oR                  U5      PM     n	nU4n
O$X$4 Vs/ s H  oR                  U5      PM     n	nX54n
[        TU ]  S [        UR                  5       S9U R                  U	5      U
XgS.[        U5      SS/US9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R!                  U 5        g s  snf s  snf )Nrl  r	  r  r	  )rq  rs  rt  )rj   re   r	  rf  rb  r  r  r   r  r   rY   r   r  r  r  r   r  )r   rt  r   r  r   r  r  r	  r   tensorsro  rd  s              r   r  ScatterFallback.__init__D  s    (Y7 78oFo))!,oGF FM78jAj))!,jGA JMalln-(<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-)r   r	  r  r  rJ  )r  ry   r  r  r	  r  r   r   r	  r  s   @r   r	  r	    sV    
.+ !%!!) 	!) !) !) 
!) !)r   r	  c                  T   ^  \ rS rSrSrS	S jrS
S jrS rSS jrS	U 4S jjr	Sr
U =r$ )IndexPutFallbackih  zI
This needs to be a custom class to handle mutation and indices properly
c                   S U R                    5       tp#n/ n[        U5      n[        U R                  5       Hd  u  pxU R                  U   b  UR	                  [        U5      5        M1  UR	                  [        R                  R                  R                  5        Mf     UR                  " U R                  5       X%U/U R                  5       Q76   g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   r	  r	  s     r   r  +IndexPutFallback.codegen.<locals>.<genexpr>n  s     &Rk':':'<'<kr	  )r  r  r   rc  r^  r  rY   r   r  r/  generate_index_put_fallbackr  r	  )	r   r  r   rn   valid_indicesrc  iter_valid_indicesr   r   s	            r   r  IndexPutFallback.codegenm  s    &Rdkk&R#]!-0dll+DA||A*t$678qww33<<=	 , 	++  "A	
9=9P9P9R	
r   c                    grq  r   r  s    r   r   IndexPutFallback.should_allocate{  rt  r   c                >    U R                   S   R                  5       /$ r[  r	  r  s    r   r  #IndexPutFallback.get_mutation_names~  r	  r   c                    [        5       $ r   r1   r  s    r   r@  )IndexPutFallback.get_unbacked_symbol_defs  rB  r   c           
       > X0l         U Vs/ s H	  ofc  M  UPM     nnX$/UQ Vs/ s H  o R                  U5      PM     nnSn	[        T
U ]  S [	        WR                  5       S9U R                  U5      U4SU	US9  [        R                  R                  U R                  S   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g s  snf s  snf )Naoti_torch_index_put_outrl  zaten.index_put_)rq  rr  rt  r   )rc  rf  rb  r  r  r   r  rY   r   r  r  r  r  r   r  )r   rt  r   rc  rn   
accumulater   r
  r	  rr  rd  s             r   r  IndexPutFallback.__init__  s    $+=GqG=342M}2MN2MQ%%a(2MN4alln-(M0+# 	 	
 	
##DKKN$;$;$=>GG++D1		""4( >Ns   C9C9C>rc  r   r  r  rJ  r	  r  s   @r   r	  r	  h  s&    
+) )r   r	  c                  .    \ rS rSr\S 5       rSS jrSrg)
DeviceCopyi  c                   UR                  5       (       dU  [        S UR                  5        5       5      (       a0  [        R                  R
                  (       d  UR                  U5      $ [        R                  R                  U5        [        R                  R                  UR                  5       5        [        S5        U4n[        [        UUR                  5       UR                  5       S9U R!                  U5      /U5      $ )Nc              3  Z   #    U  H!  o[         R                  R                  ;   v   M#     g 7fr   )rY   r   r  r  s     r   r  $DeviceCopy.create.<locals>.<genexpr>  s     G4Fq***4Fs   )+zDeviceCopy in input programrK  )r  r]  r  r8   aot_inductoruse_runtime_constant_foldingr  rY   r   add_device_infor   rM   r
  r   r   r   rf  )ry  r   r   r	  ro  s        r   rx  DeviceCopy.create  s     GA4D4D4FGGG''DD''//	'	/78%kkmZZ\
 q!"
 	
r   c                   U R                  5       n[        U5      S:X  d   eU R                  (       a2  UR                  US   U R                  R	                  5       US   5        g UR                  US   U R	                  5       US   5        g )Nr   r   r7   )r	  r   rp  r	  rh  rC	  s      r   r  DeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   r   Nr  )r   r  r  r  r  rx  r  r  r   r   r   r
  r
    s    
 
.Tr   r
  c                  X   ^  \ rS rSrSrS	S jrS
S jrSU 4S jjrSS jrSS jr	Sr
U =r$ )rs   i  z3
The result of a call to aten._local_scalar_dense.
c                    [        5       $ r   r1   r  s    r   r  DynamicScalar.get_reads  rB  r   c                    grq  r   r  s    r   r  DynamicScalar.should_allocate  rt  r   c                   > UR                  5         [        TU ]	  S [        [        R
                  " S5      S9U R                  U/5      5        Xl        X l        g Nr   rl  )	rc  rb  r  r  ro   r   r  symkeypath)r   r#
  r$
  rN  rd  s       r   r  DynamicScalar.__init__  sG    *ELL$78$:M:Mtf:U	
 r   c                .    [        U R                  /5      $ r   )r2   r#
  r  s    r   r@  &DynamicScalar.get_unbacked_symbol_defs  s    488*%%r   c                &    UR                  U 5        g r   )codegen_dynamic_scalarr  s     r   r  DynamicScalar.codegen  s    &&t,r   )r$
  r#
  r  r  r  rJ  )r   r  r  r  r  r  r  r  r@  r  r  r  r  s   @r   rs   rs     s&    &- -r   rs   c                  ^   ^  \ rS rSrSrS
S jrSS jrSU 4S jjrSS jrS r	SS jr
S	rU =r$ )rt   i  z-
The result of a call to aten._assert_scalar
c                    [        5       $ r   r1   r  s    r   r  AssertScalar.get_reads  rB  r   c                    grq  r   r  s    r   r  AssertScalar.should_allocate  rt  r   c                v   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        g r"
  )rb  r  r  ro   r   scalarr  )r   r1
  r  rd  s      r   r  AssertScalar.__init__  s3    ell512	
 r   c                    gr  r   r  s    r   r	  AssertScalar.has_side_effects  r!  r   c                ,    [        U R                  5      $ r   )r,   r1
  r  s    r   r  %AssertScalar.get_unbacked_symbol_uses  s    $T[[11r   c                   [         R                  R                  (       aU  [         R                  R                  R	                  U R
                  SS9nUR                  SU SU R                   S35        g [         R                  R                  R                  U R
                  SS9nUR                  SU S35        UR                  S[        U R                  5       S	35        UR                  U R                  5        S
35        g )NF)r  zif (!(z)) { throw std::runtime_error("z"); }zif not r  z    raise RuntimeError(r   z = None)rY   r   r  r  codegen_cpp_sizevarr1
  r  r  codegen_python_sizevarr	  r  )r   r  sizevars      r   r  AssertScalar.codegen  s     77gg**>>e ? G 	!A$((6R gg**AAe B G y23 7TXX7GqIJ  19:r   )r  r1
  r  r  r  )r   r  r  r  r  r  r  r  r	  r  r  r  r  r  s   @r   rt   rt     s+    	2; ;r   rt   c                  *    \ rS rSr% S\S'   S\S'   Srg)ExternKernelNodei
  r   r   zexport_schema.Noder}   r   N)r   r  r  r  r  r  r   r   r   r=
  r=
  
  s    
I
r   r=
  c                     ^  \ rS rSr SSS. SU 4S jjjjrSS jrSS jrS r\S 5       r	S	 r
S
 rS rS rSS jr\SS j5       r\S 5       rU 4S jrSrU =r$ )FallbackKerneli  Nrw  c               h  >^  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R
                  R                  [         R
                  R                  [         R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  [         R                  R                  0	nX(;   a"  [        U5      S:X  a  [        U5      S:X  a  X   n[        TT ]9  U[        U5      [        U5      US9  ST l        UT l        [%        U[&        R(                  R*                  [&        R(                  R,                  45      (       d   SU S[/        U5       S35       eUT l        UT l        Uc  0 OUT l        [6        R8                  R;                  T R<                  5        / T l        / T l         [%        T R0                  [&        R(                  R,                  5      (       a  g ST R0                  RC                  5       ;   a  g T R0                  RD                  n	[&        RF                  RH                  RK                  T R0                  5      (       a-  T R@                  RM                  US   RO                  5       5        g U	RP                  (       a  [S        U5      (       d  [U        S	U 35      eU	RV                  n
T R3                  T RX                  T RZ                  5      u  pSU 4S
 jjn[&        RF                  RH                  R]                  XU5       H  u  pU" X5        M     g )Nr7   r	  Fz#Fails to create FallbackKernel for r   z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                ~  >^  [        T R                  [        R                  5      (       a  [        U[        [
        45      (       d   e[        T R                  [        R                  5      =(       a2    [        T R                  R                  5       [        R                  5      n[        T R                  [        R                  5      =(       a2    [        T R                  R                  5       [        R                  5      nU(       d)  [        T R                  [        R                  5      (       a  [        U[
        [        45      (       a   eUc  g T R                  c  g SU U4S jjnU(       a  U H  nU" U5        M     g [        T R                  [        R                  5      (       d	  U(       d   eU" U5        g )Nc                  > TR                   R                  U R                  5       5        TR                  R                  (       a<  TR
                  R                  [        [        U R                  5       S9U T5      5        g g r2  )	alias_namesr^  r  
alias_infois_writerx  r  r  r   )r   infor   s    r   	add_aliasPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  sZ      ''

5??++))00&z'H!TR ,r   r  )
rj   r{   ro   ListTyperk   rl   OptionalTypegetElementType
TensorTyperF
  )rH
  rO  is_optional_tensoris_list_tensorrI
  
tensor_argr   s   `     r   handle_aliasing_and_mutation=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutations  s<   $))U^^44!#e}5555!+		5--" "KTYY5579I9IJ  (		5>>B z		((*E,<,<HN "Z		5;K;K%L%L &cE4=9999{&  "%Jj) #& "$))U-=-=>>BTTT#r   r  )/r  r  r  Scalardivdividefloor_divider  r	  subsubtracttrue_divider   rb  r  rl   use_runtime_dispatchrw  rj   ro   r  r  r  r{   rt  r  r   rY   r   warn_fallbackrq  rE
  rM	  r   r  _libraryr  mutates_and_returns_first_argr^  r  
is_mutabler!   r7  r  r  ro  
zip_schema)r   rM  r[  r  nontensor_argsr  r   rw  BINARY_OP_MAPPINGschemaschema_argsr   rR
  rH
  rO  rd  s   `              r   r  FallbackKernel.__init__  s    HHOOTXX__HHOOTXX__KK 2 2t0077HHOOTXX__MM  $--"6"6HHOOTXX__MM  $--"6"6##T%5%5%<%<

 'K A%N#q(&.F+.!	 	 	
 %*!!2

%%

..
 
 	X 14<.W	X 
 ","Nb	d556 '))+d&&

(F(FGG !1!1!6!6!88
 !!)) >>==d>N>NOO&&{1~'>'>'@A%;F%C%C%9&B  &&**4;;8J8JK 	D --88vNID(3 Or   c                j  ^ ^^ [        T S5      (       d  g [        [        R                  R                  R
                  T R                  5      nU(       d  g UR                  5        HJ  u  nmU4S jmUUU 4S jnUR                  UR                  U5       SU" 5        UR                   35        ML     g )Nrw  c                  > US:X  a  U $ [        U5      S:  ai  [        US   [        5      (       aQ  [        US   [        R                  5      (       a/  T" U  SUS   R
                   SUS   R                   S3USS  5      $ [        US   [        5      (       a  T" U  SUS   R
                   S3USS  5      $ [        US   [        R                  5      (       a^  [        R                  R                  (       a   T" S	US   R                   S
U  S3USS  5      $ T" U  SUS   R                   S3USS  5      $ [        US   [        5      (       a  T" U  SUS   R                   S3USS  5      $ [        SU 35      e)Nr   r   r   r7   r  r*  r   z()z	std::get<z>([]z.__floordiv__(zunrecognized keypath )r   rj   r)   r  SequenceKeyr   r   rY   r   r  r+   r  rE  )r2  r$
  gos     r   rk
  7FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go  s   b=K LA%"71:}=="71:v/A/ABB&'!*//!2!GAJNN3C1Ewqr{   
M::a
'8;WQR[II
F,>,>?? 77.. Ywqz~~&6ba@'!"+N  4&'!*..)9 ;WQR[I
  
K88 nWQZ5G5G4HJGTUTVKXX(+@	)JKKr   c                   > [         R                  R                  (       a  [        TR                  5      S:X  a$  T " TR                  S   R                  5       T5      $ [        TS   [        R                  5      (       d   eT " TR                  TS   R                     R                  5       TSS  5      $ T " TR                  5       T5      $ )Nr7   r   )
rY   r   r  r   r  r  rj   r  rj
  r   )rk
  r$
  r   s   r   go_outer=FallbackKernel.codegen_unbacked_symbol_defs.<locals>.go_outer  s    77&&
 4<<(A-!$,,q/":":"<gFF)'!*f6H6HIIII!$,,wqz~~">"G"G"I7STSU;WWdmmow77r   z = )r&  r.   rY   r   r   r   rw  re  r  codegen_unbacked_symbol_declr	  )r   r  rw  r   rn
  rk
  r$
  s   `    @@r   codegen_unbacked_symbol_defs+FallbackKernel.codegen_unbacked_symbol_defs  s    t0115GG&&(>(>
 !+113JAwL68 77:;3xzl7>>JZ[W 4r   c                    [        U SS 5      =n(       aC  [        [        R                  R                  R
                  U5      nUc   eUR                  5       $ [        5       $ )Nrw  )r   r.   rY   r   r   r   r|  r2   )r   rw  resolveds      r   r@  'FallbackKernel.get_unbacked_symbol_defs  sZ     '.A4 HHH0  **,=H '''==?"<r   c                F   [         R                   " S S5      5       nU R                   Vs/ s H  o!" UR                  5       5      PM     nnU R	                  X0R
                  5      u  pE[        R                  R                  (       a  [        U R                  [        R                  R                  5      (       a  U R                  XE5      n[        U R                  R                   R"                  U5       VVs/ s H8  u  pb[        R                  R$                  R'                  X&R(                  5      PM:     nnnO9U Vs/ s H,  n[        R                  R$                  R'                  U5      PM.     nnU R*                  R-                  U5        U$ s  snf s  snnf s  snf )Nc                  *    \ rS rSr% S\S'   SS jrSrg))FallbackKernel.codegen_args.<locals>.Shimi  r   refc                    U R                   $ r   )ry
  r  s    r   r  2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s    xxr   r   Nr  )r   r  r  r  r  r  r  r   r   r   Shimrx
    s    H r   r|
  )r  	dataclassr  rh  r  ro  rY   r   r  rj   rt  ro   r  r  r  r   r  r  r  r 	  r  r   r0  )r   r|
  r   r  r   r   params          r   r	  FallbackKernel.codegen_args  sH   				  	  
	  =AKKHKqtA//12KH**;8J8JK77:d.>.>

@U@U#V#V..t<D !$D$4$4$<$<$F$F M MHE $$33AG M  D
 EIIDqAGG((77:DDI 	6" I
 Js   F:?F3Fc                    U (       a;  U  Vs/ s H)  o"R                  5       (       d  M  UR                  5       PM+     nnUS   $ [        U[        R                  5      (       a  UR                  $ [        U[
        [        45      (       al  [        S U 5       5      nU Vs/ s H  oU(       d  M  UPM     nn[        U5      S:X  a  US   $ U H!  n[        UR                  5      (       d  M  Us  $    US   $ g s  snf s  snf )Nr   c              3  N   #    U  H  n[         R                  S U5      v   M     g 7fr   )r?
  find_devicer  s     r   r  -FallbackKernel.find_device.<locals>.<genexpr>  s#      $=K**433^r  r7   )r   rj   ro   r  r   rk   rl   r2   r   rQ   r{   )r  r  rO  devices
device_setr   s         r   r
  FallbackKernel.find_device  s    3>S;C..BR's~~';GS1:nell33!(((ntUm44# $=K$ J -7AJ&&vJGA7|q qz!!&++&&!M " 1:! T Bs   C6C6
C;-C;c                    [        U R                  [        R                  R                  5      (       a  g[        U R                  5      R                  5       $ rq  )rj   rt  ro   r  r  r(   r_
  r  s    r   r	  FallbackKernel.has_side_effects  s<    d&&

(F(FGGt//0;;==r   c                    U R                   $ r   )rE
  r  s    r   r  +FallbackKernel.get_inputs_that_alias_output  r  r   c                P    [        U R                  5      S::  d   eU R                  $ r~  )r   rM	  r  s    r   r  !FallbackKernel.get_mutation_names  s'    4&&'1,,,"""r   c           
     N   [         R                  SU R                  5       U R                  5        [	        U [
        5      (       d   eU R                  U R                  U R                  5      u  pU R                  X5      nU R                   Vs/ s H  o2R                  US 5      PM     nn[        R                  R                  (       d  / UQUQ$ [        S S 5      nUR!                  U R                  X5      nS nU R                  nUR"                  R$                  n	['        U	5      S:X  aB  U R(                  (       a  U R(                  OU R*                  n
U	S   R,                  nU" X5      /nO:[/        XR(                  5       VVs/ s H  u  pU" UR,                  U5      PM     nnn[1        U R                  5       [2        R4                  " U R                  R7                  5       UU0 S9S9n[        R                  R8                  R;                  U5        / UQUQ$ s  snf s  snnf )Nz4Extern kernel node added for node %s with target %s.c           	        [        U [        R                  5      (       ap  Un[        U[        [        45      (       a  [        U5      S:X  d   eUS   n[        R                  R                  [        R                  " UR                  5       S9S9$ [        U [        R                  5      (       a{  [        U R                  5       [        R                  5      (       aN  [        R                  R                  U Vs/ s H%  n[        R                  " UR                  5       S9PM'     snS9$ [        S[        U 5       35      es  snf )Nr7   r   r	  )	as_tensor)
as_tensorszUnsupported return type )rj   ro   rN
  rk   rl   r   export_schemaArgumentrx  TensorArgumentr  rK
  rM
  RuntimeErrorr{   )return_typeoutputr   s      r   handle_single_outputFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output/  s   +u'7'788ftUm44v;!+++ )C$--44+::O 5   K88Z**,e.>.>> > %--44 $* #)C &44#,,.I#)  5   #%=d;>O=P#QRR s   9,Er7   r   )r'  r  r  metadata)r   r}   )rw  rx  r  rt  rj   r?
  r  r  ro  r  rs  r?  rY   r   aot_moder    serialize_inputsr  returnsr   r  rx  r  r   r=
  r
  rZ   r   extern_kernel_nodesr^  )r   r   r   rS  ordered_kwargs
serializernamed_argumentsr
  r'  r
  r  r
  output_argumentsreturn_schemar
  r}   s                   r   export_extern_kernel_node(FallbackKernel.export_extern_kernel_node  s   		BMMO	
 $////**4;;8J8JK**48-1-O-O
-OcJJsD!-O 	 
 ww+T+N++*46
$55d6F6FU	S. !!..((w<1 '+lldll8M8MG!!*..K 4[ JK .1,,-G -G)M %]%<%<fE-G   
  ##'',,.&(	
 	
##**40''''}
Z s   HH!c           	     v   U R                   nUR                  S:X  a}  [        U[        R                  R
                  5      (       d   e[        R                  R                  (       a2  SSK	J
n  [        U5      U;  a  [        R                  SU5        SU l        ObUR                  S:X  a,  [        U[        R                  R
                  5      (       d   eO&[        R                  R                  (       a  SU l        U R                  (       a  U R                  U5        S nS nU R!                  5       nUR#                  U R%                  5       U R&                  U R(                  UU R                   UU R*                  (       a  U R*                  OU R,                  5        OU R                  U5        / U R/                  5       QU R1                  5       Qn[        R                  R2                  R5                  X5        [        U R6                  [8        5      (       a  U R;                  U5        U R=                  U5        g )Nr  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantized)rt  r  rj   ro   r  r  rY   r   r  torchgen.aoti.fallback_opsr
  r   rw  r  r[
  r  r
  ,generate_fallback_kernel_with_runtime_lookupr  rq  rr  r  rx  r	  r	  r  generate_fallback_kernelrM  rJ  r	  rq
  )r   r  r[  r
  exported_argsr   s         r   r  FallbackKernel.codegend  s   !!v%fejj&;&;<<<<ww""Lv;&;; KKa 15D--fejj&;&;<<<< ww"",0)$$  ) MD ::<M@@''$$   $$2G2G	   )AT&&(A4+>+>+@ADGG  99$E$++v..))'2))'2r   c           	         [        U R                  U R                  [        U R	                  5       5      [        U R                  5       5      5      $ r   )r  r   r   rK   r   r   )r
  s    r   tensor_to_layoutFallbackKernel.tensor_to_layout  s9    MMLL%fkkm4%fmmo6	
 	
r   c           	       ^ ^^ [         R                  4nX;  a  [        R                  R                  O	[        5       nU   T R                  " U/UQ70 UD6u  nnnn	n
S S S 5        T R                  WW5      nUc  T " [        US9UUWW	W
S9mO U(       d   S5       eT " [        US9UUWW	W
S9mU UU4S jmT" U/ 5      n[        U[        [        [        45      (       a	  UTl        U$ U/Tl        U$ ! , (       d  f       N= f)Nrl  r@
  z"Not sure where to find device infoc                  >^ ^ [        T [        [        45      (       a/  [        T 5      " UUU 4S j[	        [        T 5      5       5       5      $ [        T [        5      (       a<  T R                  5        VVs0 s H  u  p#UT" UT[        T 5      U4/-   5      _M      snn$ [        T [        R                  5      (       a  [        TR                  T 5      TT5      $ [        T [        5      (       a  T $ [        T [        R                  5      (       a  T R                  R                  $ T b   S[        T 5       S35       eg s  snnf )Nc              3  Z   >#    U  H   nT" TU   T[        T5      U4/-   5      v   M"     g 7fr   )r{   )r  r   generate_outputrc  r
  s     r   r  AFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>  s7      $/ $F1Iw4<:K9L/LMM/s   (+zFallbackKernel output type z is not supported)rj   rk   rl   r{   r   r   rm   re  ro   r  MultiOutputr
  ry   SymIntr}   r2  )r
  rc  rS  r  ry  r
  packeds   ``  r   r
  .FallbackKernel.create.<locals>.generate_output  s)   &4-00F| $"3v;/$   FD)) %+LLN$2 g$v,9L8M.MNN$2  FELL11"((0 
 FC((FELL11{{''' NQ0f>OPQ"%s   6%D>)r  *_fused_moving_avg_obs_fq_helper_functionalrY   r   r  r   r  r
  r  MultiOutputLayoutrj   rk   rl   rm   r  )ry  r[  r   r   fake_incorrect_kernelscontextr  r  r  r  rw  r   r  r
  r
  s   `            @@r   rx  FallbackKernel.create  s   "&"Q"Q!S!'!EAGG;= 	  ""6;D;F;!  n=!&)"3F ???6!0"3F	6 "."5geT233$FN  &YFN Ws    C11
C?c                    > [         TU ]  5       $ r   )rb  r  rc  s    r   r  FallbackKernel.apply_constraint  s    w'))r   )rE
  r   rM	  rt  rw  r  r[
  r   r  rJ  )r
  torch.Tensor)r   r  r  r  r  rq
  r@  r	  r  r
  r	  r  r  r
  r  r
  r  rx  r  r  r  r  s   @r   r?
  r?
    s     E4 E4 
E4 E4N8t .  (>
 #H(T03d 
 
 D DL* *r   r?
  c                  N   ^  \ rS rSrSrS	S jrS
S jrSS. SU 4S jjjrSrU =r	$ )ComplexViewi  z9View a complex number as two dtyped numbers or vice versac                    grq  r   r  s    r   r  ComplexView.should_allocate  rt  r   c                >    U R                   S   R                  5       /$ r[  r	  r  s    r   r  (ComplexView.get_inputs_that_alias_output  s    A'')**r   Nr@
  c          	     *   > [         TU ]  UUUUUUS9  g )Nr@
  )rb  r  )r   rM  r[  r  ra
  r  rw  rd  s          r   r  ComplexView.__init__  s)     	/ 	 	
r   r   r  r  r  )
r   r  r  r  r  r  r  r  r  r  r  s   @r   r
  r
    s)    C+ 
 

 
r   r
  c                  *    \ rS rSr% S\S'   SS jrSrg)r
  i	  r  r   c                    U R                   $ r   rl  r  s    r   r   MultiOutputLayout.get_device  rn  r   r   Nr  )r   r  r  r  r  r   r  r   r   r   r
  r
  	  s    r   r
  c                  Z   ^  \ rS rSrS rS	S jrS
U 4S jjrSS jrSS jrSS jr	Sr
U =r$ )r
  i  c                   [        U5      S:  a  US   u  p4[        U[        5      (       a  U R                  U SU S3USS  5      $ [        U[        5      (       aU  [
        R                  R                  R                  XR                  5       [        U5      5      nU R                  XRSS  5      $ [        U[        5      (       a  U R                  U SU S3USS  5      $ [        SU5      eU$ )Nr   rh
  ri
  r7   z['z']znon supported index type: )r   
issubclassrk   codegen_list_tuple_accessrl   rY   r   r  codegen_tuple_accessr  r   rm   rE  )r   basenamerc  ityper   tuple_accesss         r   r
  %MultiOutput.codegen_list_tuple_access  s    w<!qzHE%&&55
!A3a6H'RSRT+VVE5)) ww33HHmmos1v  55lABKPPE4((55
"QCr6JGTUTVKXX$%A5IIOr   c                    UR                  U R                  5       U R                  U R                  S   R                  5       U R                  5      5        g r[  )codegen_multi_outputr  r
  r  rc  r  s     r   r  MultiOutput.codegen'  s>    $$MMO**4;;q>+B+B+DdllS	
r   c                   > [         TU ]  S X/S5        [        R                  R	                  U 5      U l        [        R                  R                  U 5        X0l        g r  )rb  r  rY   r   r  r   r  rc  )r   rM  r2  rc  rd  s       r   r  MultiOutput.__init__-  sC    vw3GG++D1		""4(r   c                <    U R                   S   R                  5       $ r[  )r  r  r  s    r   r  $MultiOutput.get_unbacked_symbol_uses3  s    {{1~6688r   c                    grq  r   r  s    r   r  MultiOutput.should_allocate6  rt  r   c                    U R                    Vs/ s HI  n[        U[        5      (       d  M  [        UR	                  5       5      S:  d  M9  UR                  5       PMK     sn$ s  snf r[  )r  rj   r?
  r   r  r  )r   inps     r   r  (MultiOutput.get_inputs_that_alias_output9  s\     {{
"#~.  C4467!; CLLN"
 	
 
s   A"A"A"r
  r  )rM  r   rc  zList[Tuple[Any, ...]]r   r   rJ  r  r  )r   r  r  r  r
  r  r  r  r  r  r  r  r  s   @r   r
  r
    s&    $
9
 
r   r
  c                     \ rS rSr% SrS\S'   S-S jrS.S jrS/S jrS0S jr	S1S	 jr
S2S
 jrS3S4S jjrS5S jrS6S jrS7S jrS6S jr S8     S9S jjrS:S jrS;S jr S8     S<S jjrS=S jrS>S jrS?S jrS@S jrSAS jrSBS jrS-S jrS-S jrSCS jrSDS jrS2S jrSDS  jr SAS! jr!SES" jr"SFS# jr#SGS$ jr$S3SHS% jjr%\&SIS& j5       r'SJS' jr(SIS( jr)SKS) jr*\&S* 5       r+S2S+ jr,\,r-S,r.g)Lr  iD  z;
TensorBox / StorageBox allow in-place mutation of Tensors
rf   rN  c                6    U R                   R                  5       $ r   r  r  s    r   rr  !MutableBox.has_exceeded_max_readsL  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r   MutableBox.get_deviceO  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rw  MutableBox.make_loaderR      yy$$&&r   c                6    U R                   R                  5       $ r   )rN  r{  r  s    r   r{  MutableBox.make_indexerU  r  r   c                6    U R                   R                  5       $ r   )rN  r~  r  s    r   r~  MutableBox.get_strideX  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_name[  r  r   Nc                8    U R                   R                  U5      $ r   )rN  r  r  s     r   r  MutableBox.has_large_inner_fn^  s    yy++I66r   c                8    U R                   R                  U5      $ r   r  r  s     r   r  MutableBox.mark_reusea  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.realize_hintd  r  r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.unwrap_viewg  r
  r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.freeze_layoutj  s    yy&&((r   c                8    U R                   R                  X5      $ r   )rN  r  r  s      r   r  *MutableBox.freeze_layout_with_stride_orderm  s     yy88NNr   c                8    U R                   R                  U5      $ r   )rN  r  r  s     r   r  (MutableBox.freeze_layout_with_fill_orderr  s    yy66u==r   c                8    U R                   R                  U5      $ r   )rN  r  r  s     r   r  (MutableBox.freeze_layout_with_same_orderu  s    yy66v>>r   c                8    U R                   R                  X5      $ r   )rN  r  r  s      r   r  +MutableBox.freeze_layout_with_exact_stridesx  s     yy99-WWr   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.get_read_writes}      yy((**r   c                6    U R                   R                  5       $ r   rG  r  s    r   r  MutableBox.get_reads  r   r   c                6    U R                   R                  5       $ r   rD  r  s    r   r  MutableBox.num_reads  r   r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_storage_numel  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_reduction_type  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_reduction_size  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.is_extern  r   r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.is_no_op  r  r   c                8    U R                   R                  U5      $ r   r  rm  s     r   r  MutableBox.constant_to_device  s    yy++F33r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.get_mutation_names  r  r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.get_operation_name  r  r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  'MutableBox.get_inputs_that_alias_output  s    yy5577r   c                6    U R                   R                  5       $ r   r  r  s    r   rc  MutableBox.realize  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  #MutableBox.get_unbacked_symbol_uses  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_read_names  r  r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  MutableBox.get_defining_op  r  r   c                8    U R                   R                  U5      $ r   )rN  rh  rf  s     r   rh  MutableBox.codegen_reference  s    yy**622r   c                6    U R                   R                  5       $ r   rN  rD  r  s    r   rM  MutableBox.layout  s     yy((**r   c                6    U R                   R                  5       $ r   r  r  s    r   r   MutableBox.get_layout  r  r   c                6    U R                   R                  5       $ r   r%  r  s    r   rD  MutableBox.get_output_spec  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r   MutableBox.get_size  r  r   c                .    U R                   R                  $ r   )rN  r   r  s    r   r   MutableBox.dtype  s    yyr   c                ~   [        U R                  [        5      (       aQ  [        U 5      R                   S[        U R                  5      R                   S3nSnU R                  R                  nO&[        U 5      R                   S3nU R                  nSnU[        [        U5      5      U/nSR                  U5      $ )Nr*  z))r   
)rj   rN  r  r{   r   r,  r   r-  )r   line0endlr  r.  s        r   ri  MutableBox.__str__  s    dii,,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   r   r  r  r  r  r  r  r   r  r  r  r  r  r  r   r  r  r  r  r  r  r  r
  r  r  rJ  r  r  r  r  r  r  )/r   r  r  r  r  r  rr  r   rw  r{  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rc  r  r  r  rh  r  rM  r   rD  r   r   ri  r  r  r   r   r   r  r  D  s5    L2&'(&$7+(') 7<OO/3O	O
>? DIX+X<@X	X
+%%-..%$4..8#4*+3 + +&+$   " Hr   r  c                  $    \ rS rSr\S 5       rSrg)re   i  c                *    [        [        U 5      5      $ r   )re   r  rN  s    r   rx  TensorBox.create  s    D)**r   r   N)r   r  r  r  r  rx  r  r   r   r   re   re     s    + +r   c                  T    \ rS rSrS rS rSS jrSS jrSS jrS r	SS jr
S	 rS
rg)r  i  c                    [        U R                  [        [        45      (       a5  U R                  R	                  5       [
        R                  R                  ;   $ grq  )rj   rN  r  r  r  rY   r   graph_inputsr  s    r   rQ  StorageBox.is_input_buffer  s=    dii+!?@@99%%'177+?+???r   c                    [        U R                  [        5      =(       a5    U R                  R                  5       [        R
                  R                  ;   $ r   )rj   rN  r  r  rY   r   r  r  s    r   r  StorageBox.is_module_buffer  s9    tyy>3 :		""$(9(99	
r   c           	        [        U R                  [        [        [        [
        [        45      (       a  U R                  R                  5       $ [        U R                  [        [        [        [        45      (       d   [        U R                  5      5       eU R                  R                  5       nU R                  R                  5       n[        S [        U R                  R!                  5       U R                  R#                  5       U R                  R%                  5       S9U R                  S9U l        [&        R(                  R+                  U R                  5      U R                  l        [&        R(                  R/                  U R                  5        U R0                  U R                  l        XR                  l        X R                  l        U R                  R,                  $ )NrK  rL  )rj   rN  rY  r  r  r  r  r  r  r  rC  r  r{   r  r  r   r   r   r   rY   r   r  r   r  r   r   r   )r   r   r   s      r   rc  StorageBox.realize  sS   II	
 	
 99%%''$))iD$%GHH 	
$IIK
 	
H ii//1II++-	"!yy++-ii))+YY'')
 
	 00;			""499- LL		 +		'		yy~~r   c                    [        U R                  [        [        45      (       a:  U R                  R	                  5       R
                  S:  a  U R                  5         ggg)z<
Called on buffers we expect to be forced to realize later.
r7   N)rj   rN  r  r  r  nontrivial_read_countrc  r  s    r   r  StorageBox.realize_hint	  sI    
 tyy9i"899		**,BBQFLLN G :r   c                    [        U R                  [        5      =(       a8    U R                  5       [        R
                  :  =(       d    U R                  5       $ r   )rj   rN  r  r  r8   realize_acc_reads_thresholdr  r  s    r   rr  !StorageBox.has_exceeded_max_reads  s@    $))Y/ 
NNvAAA )&&(	
r   c                r  ^ US:  a  [        U R                  [        [        45      (       a  [	        U R                  5      (       a9  U R                  R                  5       mSS/n[        U4S jU 5       5      (       a  gU R                  5       [        R                  :  =(       d    U R                  5       $ g)zR
A heuristic to decide if we should realize a tensor
that is used multiple times.
r7   expsigmoidc              3  @   >#    U  H  oTR                   ;   v   M     g 7fr   )used_ops)r  r   opcounts     r   r  5StorageBox.should_realize_on_reuse.<locals>.<genexpr>#  s     @iG,,,is   TF)rj   rN  r  r  r   r  r  r  r8   realize_reads_thresholdr  )r   r  	heavy_opsrK  s      @r   should_realize_on_reuse"StorageBox.should_realize_on_reuse  s    
 19DII	9/EFFdii  ))446"I.	@i@@@ 6#A#AA -**, r   c                R    U R                  U5      (       a  U R                  5         g g r   )rO  rc  r  s     r   r  StorageBox.mark_reuse+  s!    ''..LLN /r   c                6    U R                   R                  5       $ r   rD  r  s    r   r  StorageBox.num_reads/  r   r   r6  Nr  r  r  r  )r   r  r  r  rQ  r  rc  r  rr  rO  r  r  r  r   r   r   r  r    s+    

B
$%r   r  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
Subgraphi3  r   r   ztorch.fx.GraphModulegraph_moduleNzOptional[GraphLowering]r   r   )r   r  r  r  r  r   r  r   r   r   rV  rV  3  s    
I&&%)E")r   rV  c                    U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     n n[        [	        S U  5       5      5      [        U 5      :  $ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   )r\	  )r  r  s     r   r  '_has_aliased_buffers.<locals>.<genexpr>@  s     ;7"V**7r  )rj   r  r  r   r2   )buffersr  s     r   _has_aliased_buffersr\  :  sd     F !+6? C CO  
 z;7;;<s7|KKs   1Ac                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'           SU 4S	 jjr\	SS
 j5       r
SS jrSrU =r$ )InvokeSubgraphiC  NOptional[Subgraph]subgraphOptional[List[TensorBox]]operandsOptional[List[MultiOutput]]r  c                   > [         TU ]  S UUS9  Xl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g rz  )rb  r  r`  rY   r   r  r   r  )r   r`  rb  rM  rd  s       r   r  InvokeSubgraph.__init__I  sO     	 	 	

 !GG++D1		""4(r   c                   [         R                  R                  R                  S   nU Vs/ s H  oDR                  S   PM     nnU Vs/ s H  o@R                  U5      PM     nnS n/ n[        U5       He  u  p[        U	[        5      (       a  UR                  U	5        M-  U" XX   R                  5       5      n
UR                  U R                  X5      5        Mg     UnUR                  cz  [         R                  R                  UR                  UUR                  S9Ul        [         R                  " UR                  5         UR                  R                   " U6   S S S 5        UR                  R"                  nS nU H*  n	[        U	[        5      (       a  M  U	R%                  5       n  O   Uc   e['        UU[)        US9S9n[        U5       VVs/ s Ht  u  p[+        [-        UR%                  5       UR/                  5       UR1                  5       UR3                  5       UR5                  5       R6                  S9U[8        U4/5      PMv     nnnXl        U$ s  snf s  snf ! , (       d  f       GN= fs  snnf )NrI  r  c                    U  Vs/ s H:  n[        U[        R                  5      (       a  UR                  R                  OUPM<     sn$ s  snf r   )rj   ro   r
  r}   r2  )r   r   s     r   handle_sym_expr.InvokeSubgraph.create.<locals>.handle_sym_expr`  s7    OUVv!:a#>#>AFFKKAEvVVVs   AA	gmexample_inputssubgraph_namerl  )r`  rb  rM  rW  )rY   r   r  r   r9  rf  r   rj   r7  r^  r   r  make_subgraphrW  r   set_graph_handlerrungraph_outputsr   r^  r
  r
  r  r   r   r~  r   r  rk   r  )ry  r`  rb  fx_operandsr   fake_operandsrh  new_operandsr   operandexample_strider  r   invoke_subgraphr   r
  s                   r   rx  InvokeSubgraph.createU  s>    gg**//30;<1<
 3;;(Q%%a((;	W %h/LC'#899##G,!01C1J1J1L!M##C$=$=g$VW 0  >>!WW22((,&mm 3 HN
 $$X^^4""M2 5 .... Gg'<== ++-   !!!($F3
$ 'w/
 0	 !,,. **,*!,,.!,,.55  
 0 	 
 #* =
 <, 54(
s   I
I I%A;I7%
I4c                &    UR                  U 5        g r   )codegen_invoke_subgraphr  s     r   r  InvokeSubgraph.codegen  re	  r   )r   r`  )r`  rV  rb  List[TensorBox]rM  r
  r   r   )r`  rV  r  )r   r  r  r  r`  r  rb  r  r  r  rx  r  r  r  r  s   @r   r^  r^  C  sj    #'H '*.H'.+/G(/
) 
),;
)EV
)	
) B BH. .r   r^  c                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'               SU 4S jjr
\        SS j5       rSS jrSrU =r$ )Conditionali  Nr  	predicatera  rb  r_  true_subgraphfalse_subgraphrc  r  c                L  > Xl         X l        X0l        X@l        / n[	        U[
        5      (       d  UR                  U5        UR                  U5        [        TU ]%  S UUS9  [        R                  R                  U 5      U l        [        R                  R                  U 5        g rz  )r  rb  r  r  rj   r7  r^  r  rb  r  rY   r   r  r   r  )r   r  rb  r  r  rM  r  rd  s          r   r  Conditional.__init__  s     # *,)%:;;MM)$h 	 	
 GG++D1		""4(r   c                ^   U R                  U5      nU Vs/ s H  oPR                  U5      PM     nn[        R                  R                  R                  S   nU Vs/ s H  oUR
                  S   PM     nnX#4 H  nUR                  b  M  [        R                  R                  UR                  UUR                  S9Ul        [        R                  " UR                  5         UR                  R                  " U6   S S S 5        M     UR                  R                  n	UR                  R                  n
SU	4SU
44 H&  u  p[        U	5      (       d  M  [        SU SU 35      e   [        U	5      [        U
5      :X  d   X45       e[        [!        X5      5       H  u  nu  pUR#                  5       UR#                  5       :X  d	   XU45       eUR%                  5       UR%                  5       :X  d	   XU45       eUR'                  5       UR'                  5       :X  d	   XU45       eUR)                  5       UR)                  5       :X  d	   XU45       eUR+                  5       R,                  UR+                  5       R,                  :X  a  M   XU45       e   [/        U[0        5      (       d  UR'                  5       nO)[        U5      S:  d   S	5       eUS   R'                  5       n[3        UUUU[5        US
9S9n[        U	5       VVs/ s Hu  u  nn[7        [9        UR'                  5       UR)                  5       UR#                  5       UR%                  5       UR+                  5       R,                  S9U[:        U4/5      PMw     nnnUUl        U$ s  snf s  snf ! , (       d  f       GM,  = fs  snnf )NrI  r  rj  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: r   zQWhen predicate is not a Tensor, there must be at least one operand in torch.cond.rl  )r  rb  r  r  rM  rW  )rf  rY   r   r  r   r9  rn  rW  r   ro  rp  rq  r\  rE  r   r   r   r   r~  r   r   r   r  rj   r7  r~  r
  r
  r  rk   r  )ry  r  r  r  rb  r   rr  rs  r`  true_outputsfalse_outputsr   r  r   tofor   conditionalr
  s                      r   rx  Conditional.create  s]    %%i0	2:;(Q%%a((;gg**//30;<1< +H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&6 98 , }}22 44(,7*m9TUMD#L11$**./TU\T]_  V < C$66U8UU6$S%EFKAx;;=BKKM1>A2;>1==?bmmo5Br{B5==?bmmo5Br{B5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC G )%:;;))+F H!cbc!a[++-F!!#$F3
, '|4
 5	6 !,,. **,*!,,.!,,.55 
 5 	 
" &O < = 98P
s   NN$NA<N)
N&	c                &    UR                  U 5        g r   )codegen_conditionalr  s     r   r  Conditional.codegen  s    ##D)r   )r  r   rb  r  r  )r  rf   rb  r|  r  rV  r  rV  rM  r
  r   r   )r  re   r  rV  r  rV  rb  r|  r  )r   r  r  r  r  r  rb  r  r  r  r  r  rx  r  r  r  r  s   @r   r~  r~    s    "&I&*.H'.(,M%,)-N&-+/G(/)) ")  	)
 !) ") 
)6 OO O 	O
 "O Ob* *r   r~  c                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S'   Sr	S	\S
'               SU 4S jjr
\        SS j5       rSS jrSrU =r$ )	WhileLoopi  Nra  carried_inputsadditional_inputsr_  cond_subgraphbody_subgraphrc  r  c                   > Xl         X l        X0l        X@l        [        TU ]  S UX-   S9  [        R                  R                  U 5      U l	        [        R                  R                  U 5        g rz  )r  r  r  r  rb  r  rY   r   r  r   r  )r   r  r  r  r  rM  rd  s         r   r  WhileLoop.__init__  se     -!2**!5 	 	
 GG++D1		""4(r   c                B	   U Vs/ s H  oPR                  U5      PM     nnU Vs/ s H  oPR                  U5      PM     nnX4-   n[        R                  R                  R                  S   [        R                  R                  R                  S   -   nU Vs/ s H  oUR
                  S   PM     nnX4 H  n	U	R                  b  M  [        R                  R                  U	R                  UU	R                  S9U	l        [        R                  " U	R                  5         U	R                  R                  " U6   S S S 5        M     UR                  R                  n
UR                  R                  n[        U5      (       a  [        SU 35      e[        U
5      S:X  d   U
5       eU
S   R                  5       [         R"                  :X  d   U
5       e[        U
S   R%                  5       5      S:X  d   U
5       e[        U5      S:  d   S5       eUS   R'                  5       n[        U5      [        U5      :X  d   X;45       e[)        [+        X;5      5       H  u  nu  p      SS	 jnU" UR%                  5       UR%                  5       5        U" UR-                  5       UR-                  5       5        UR'                  5       UR'                  5       s=:X  a  U:X  d  O   XX45       eUR                  5       UR                  5       :X  d	   XU45       eUR/                  5       R0                  UR/                  5       R0                  :X  a  M   XU45       e   [3        UUUU[5        US
9S9n[)        U5       VVs/ s Hu  u  nn[7        [9        UR'                  5       UR                  5       UR%                  5       UR-                  5       UR/                  5       R0                  S9U[:        U4/5      PMw     nnn[+        UU5       Hk  u  nnUR=                  5       [        R                  R>                  ;   d  M4  [        R                  R@                  RC                  UR=                  5       5        Mm     UUl"        U$ s  snf s  snf s  snf ! , (       d  f       GM  = fs  snnf )NrI  r  rj  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r7   r   z9torch.while_loop is assumed to have at least one operand.c                ~    [        X5       H.  u  p#[        R                  R                  R	                  X#5        M0     g r   )r   rY   r   r   rl  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equals,WhileLoop.create.<locals>._guard_list_equalsg  s-     !$I 9HCGG$$11#; !:r   rl  )r  r  r  r  rM  rW  )r  List[Union[int, sympy.expr]]r  r  r   r   )#rf  rY   r   r  r   r9  rn  rW  r   ro  rp  rq  r\  rE  r   r   ro   r  r   r   r   r   r~  r   r  r  r
  r
  r  rk   r  r:  r	  r  r  )ry  cond_fnbody_fnr  r  r   
all_inputsfx_all_inputsfake_all_inputsr`  cond_outputsbody_outputsr   r   opbor  
while_loopr
  r  r
  r   s                         r   rx  WhileLoop.create5  s    9GG1++A.G;LM;La..q1;LM#7
,,11"58L8L8Q8QRT8UU2?@-Q66%=-@ *H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&8 98 + }}22}}22-- XXdWeg  < A%3|3%A((*ejj8F,F8<?++-.!3A\A3 
Oa	GF	G A))+ >"c,&77W.9WW7$S%FGKAx<7<7< < r{{}bkkm<r}}@ ==?bmmo??T!ATT?<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC H" )/!!$F3

* '|4
 5	6 !,,. **,*!,,.!,,.55 
 5 	 
 NG4HC||~!5!55 ++//? 5 %
w HM A 98f
s#   Q9Q>RR6A<R
R	c                &    UR                  U 5        g r   )codegen_while_loopr  s     r   r  WhileLoop.codegen  s    ""4(r   )r  r  r  r  r   )r  r|  r  r|  r  rV  r  rV  rM  r
  r   r   )r  rV  r  rV  r  r|  r  r|  r  )r   r  r  r  r  r  r  r  r  r  r  r  rx  r  r  r  r  s   @r   r  r    s    04N-43707(,M%,(,M%,+/G(/)') +)  	)
  ) ") 
), bb b (	b
 +b bH) )r   r  c                  V   ^  \ rS rSr SSS. S	U 4S jjjjrS
U 4S jjrSS jrSrU =r$ )rz   i  Nr@
  c          
       > [         T
U ]  UUUUUS US9  SSKJn  U" U/ UQUQ7U5      n	U	c   eXl        [
        R                  R                  R                  U	S 5      U l	        U [
        R                  R                  U	'   g )N)r   rw  r   )get_effect_key)
rb  r  torch._higher_order_ops.effectsr  effect_typerY   r   effectful_opsr?  prev_effect_buffer)r   rM  r[  r  ra
  r  r   rw  r  r  rd  s             r   r  EffectfulKernel.__init__  s     	/ 	 	
 	C$V-L~-L-LfU&&&&"#''"7"7";";K"N-1k*r   c                   > [         TU ]  5       nU R                  bG  UR                  R	                  [
        R                  " U R                  R                  5       5      5        U$ r   )rb  r  r  r  r  r9   rW  r  )r   rb  rd  s     r   r  EffectfulKernel.get_read_writes  sU    g-/"".!!$$T%<%<%E%E%GH r   c                    gr  r   r  s    r   r	   EffectfulKernel.has_side_effects  r!  r   )r  r  r   r  r  r  )	r   r  r  r  r  r  r	  r  r  r  s   @r   rz   rz     s5     2 2 
2 2: r   rz   c                  >    \ rS rSr% S\S'   S\S'   S rS
SS jjrS	rg)r8  i  r   r   ztorch._C.ScriptObjectr   c                    U R                   $ r   r	  r  s    r   r  TorchBindObject.get_name  r_  r   Nc                    U R                   $ r   r	  rf  s     r   rh  !TorchBindObject.codegen_reference  r_  r   r   r   r  )r   r  r  r  r  r  rh  r  r   r   r   r8  r8    s    
I   r   r8  c                  j    \ rS rSrS	S jrS	S jrS
SS jjr\    SS j5       r\  SS j5       r	Sr
g)_CollectiveKerneli  c                    grq  r   r  s    r   r  !_CollectiveKernel.should_allocate  rt  r   c                    gr  r   r  s    r   r	  "_CollectiveKernel.has_side_effects  r!  r   Nc                Z   [        U R                  5      [        R                  R                  L d   S5       eU R                  nUR
                  R                  U l        UR
                  R                   Vs/ s H!  o3R                  (       d  M  UR                  PM#     snU l
        g s  snf )Nz,Setting cpp kernel needs a valid op_overload)r{   rt  ro   r  r  r  r   rr  r  r  rs  )r   rr  r[  r   s       r   r{  %_CollectiveKernel.set_cpp_kernel_name  s    !!"ejj&;&;;	:9	:;!!%~~22 #NN44.
4qFAFF4.
* .
s   7B(B(c                <   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     US   R                  5       nU " [        US9UUWW5      n[        R                  " U5      nUR                  R                  U Vs/ s H  n[        [        US9X5      PM     sn5        UR                  R                  U Vs/ s H  oR                  5       PM     sn5        SU;   a]  UR                  R                  [        [        US9US   U5      5        UR                  R                  US   R                  5       5        g g ! , (       d  f       GNQ= fs  snf s  snf )Nr>  r   rl  r   )rY   r   r  r  rc  r   r  r  tree_leavesrx  r  r  rE
  r  r^  )ry  r[  r  r   r   r  r  r  r  rw  rQ
  r   r
  inpsre  r
  s                   r   create_inplace _CollectiveKernel.create_inplace  s    WW ""6CDCFC!  %E2C1D&EE$%J  & Q**,f%
 !!&)&&OSTt^Jf5sCtT	

 	!!T"BTc<<>T"BCF?##**z8&-P %%fUm&<&<&>? 9 . U #Cs   FF>F
Fc           
     f   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     [        W[        5      (       av  U R                  Xe5      nU " [        US9UUWW5      n[        U5       VVs/ s H(  u  p[        U R                  U5      U[        U4/5      PM*     snnUl        UR                  $ U " U R                  U5      UUWW5      nU/Ul        U$ ! , (       d  f       N= fs  snnf )NrM  rl  )rY   r   r  r  rc  rj   rk   r
  r
  r   r
  r
  r  )ry  r[  r  r   r   r  r  r  r  rw  rQ
  r   r
  r   r  s                  r   create_out_of_place%_CollectiveKernel.create_out_of_place-  sK    WW ""6CDCFC!  %F3D2E&FF$%J  & nd++__[AF!0F "+>!: ";IA ((0AYK
 ";FN >>!$$^4F %XFNMO *s   D3/D-
D*)rr  rs  r  r   r-	  )r  !Union[TensorBox, List[TensorBox]]r   r   )r  r  )r   r  r  r  r  r	  r{  r  r  r  r  r   r   r   r  r    sV    
	
" $@>$@	$@ $@x *>* *r   r  c                  F   ^  \ rS rSrS r\SS j5       rSU 4S jjrSrU =r	$ )_WaitKerneli[  c                &   U R                   S   n[        U[        5      (       a  UR                   S   /$ [        U[        5      (       aG  UR                   S   n[        U[        5      (       a!  UR                  S   u  p4UR                   U   /$ / $ / $ r[  )r  rj   r  r
  rc  )r   r
  collr   r   s        r   get_volatile_reads_WaitKernel.get_volatile_reads\  s    kk!nc,--JJqM?"[)) ::a=D$ 122QC())I Ir   c                v   [         R                  R                     U R                  X5      u  nnnnnS S S 5        W(       a   U SU 35       eU " [	        UR                  5       S9UWWW5      nUR                  R                  [        [	        UR                  5       S9X(5      5        g ! , (       d  f       N}= f)Nr>  rl  )	rY   r   r  r  r  r   rx  r^  r  )	ry  r[  r
  r  r  r  r  rw  r
  s	            r   create_wait_WaitKernel.create_waitq  s    WW ""6/!  %E2C1D&EE$cnn./
 	&&:S^^-=>L	
! s   B**
B8c                   > [         TU ]  5       nU R                  5       nU H@  nUR                  R	                  [
        R                  " UR                  5       5      5        MB     U$ r   )rb  r  r  r  r  r9   rW  r  )r   rb  volatile_readsvrrd  s       r   r  _WaitKernel.get_read_writes  sS    g-/002 B!!,"6"6r{{}"EF !r   r   )r
  re   r   r   r  )
r   r  r  r  r  r  r  r  r  r  r  s   @r   r  r  [  s&    * 
 
* r   r  c                2   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a#  [        5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )
rj   r0   r   r,   rl   rk   r2   r&	  ro   r  )r   r{  r   s      r   r&	  r&	    sz    !h%&&$Q''	At}	%	%&0lA,Q//A 	Au||	$	$$Q''|r   )r   r   r   r   )r   r   r   r  )r   Sequence[int]r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   z&Callable[[Sequence[_T]], Sequence[_V]]r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r  )r   Sequence[Union[int, Integer]]r   r  r  )r   zLiteral[None]r   r  r   r   )r   rf   r   r  r   r
  )r   r  r   r  r   zOptional[torch.Tensor])r   zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r  )r   z&Union[IRNode, torch.device, None, str]r   r  )r   zUnion[Expr, Sequence[Expr]]r   r  r   rX   )r  r   r   r  r  r  r   r
  )r  r  r  r  r   r  r   r  )r   rf   r   r  )TFNFN)r   rf   r  r  r  r  r  'Optional[Sequence[Union[int, Integer]]]r  r  r  r  r   zTuple[StorageBox, Layout])r   rf   r  r  r   r  )r   r  rU  r  r   r  )r   r  r   ry   )r[  zSequence[IRNode]r   r  )r   r   r   r	  (  
__future__r   r  r  rs  r  loggingtextwrapr   r  r   enumr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   unittest.mockr   ru   r   r   r   torch._export.serde.schema_exportserderc
  r
  torch._loggingro   torch.fxtorch.utils._pytreer  _pytreer  torch._dynamo.utilsr   torch._export.serde.serializer    *torch._higher_order_ops.auto_functionalizer!   torch._inductorr"   torch._prims_commonr#   r$   r%   r&   r'   torch._subclasses.fake_tensorr(   %torch.fx.experimental.symbolic_shapesr)   r*   r+   r,   r-   r.   r/   r0   torch.utils._ordered_setr2   torch.utils._sympy.functionsr3   r4   r5   torch.utils._sympy.symbolr6   r#  r8   r9   codegen.commonr:   r;   r<   r=   r>   r?   r@   	loop_bodyrA   ops_handlerrB   rC   runtime.benchmarkingrD   runtime.hintsrE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   virtualizedrW   rX   rY   torch.fx.noderZ   codegen.cuda.cuda_templater[   r   r\   r]   r   r  r^   r_   r`   ry   ra   r  rb   	getLoggerr   rw  r,  r  r   rg   r   r   r   r   r   rr  rt  r   r   r   r   r   r   r   r   rf   r  rL  r  r  r  r  r  r  r  r  rC  r{  r  r   r  r  rg  r  r  rr   r*  r9  rH  r  r  r  r  r  r  r  r  r
  r   rJ  r  r   r  r  r  r  r  r  r  r  r   r  r+  r7  rY  r  r  r  PrimitiveInfoTyper  r  r  r  r$  r  rC  rI  re  r/	  r9	  rg  r  rY	  rh	  r	  r	  r	  r	  r	  r	  r	  r
  rs   rt   r=
  r?
  r
  r
  r
  r}
  r  re   r  rV  r\  r^  r~  r  rz   r8  r  r  r&	  r   r   r   <module>r     sX   "         "      & = <   ' ' 2 2   $ $ ( ? M #  :	 	 	 0 L L * " D    4 - :    $ * ) "8$% %L)$ T]T]T]CI&) &C,-) -!			8??4	8yy~~'T  k	sDk!12K8STU	i 	$>44 , ! $  TX	1>P	 TX
	1
>P

 
 
 
 

 .2&*8!%	>9	>	>&'q qh UA A AH }
F }
 }
@& 
 
 
@ 
i 
 
B |$y!y!u=)< 8  JN<N<N +<NBF<N<N~   ,> FX 	   h

 h

 h

VM
y M
` |
5 |
 |
@ 	 	 	 O5 O Od	 !<@=A/// / :	/
 / ;/ /d $-#4#44$  
:	 \
v \
 \
~ K K K\ (( ( (V 79( 79 79t (  : P; P Pf Ph P Pf % % %PHA HAV 6  " K| K K$ S| S S'9	<7 7 HTZ HT HTV& $I7V I7XPf P2T $%{ $%N   .Q* Q*h UvV v vr U&fi & & & 
K 
[ 
& 6   
F 
 
 UE4_ E4 E4P
7
_ 7
t)> )X #udCeCeT<Q6R1SST -$ -$`"| "
>=. >=BM M$  U>? > >B b9 bJ UU< U Up U0l 0 0f
/ 
(*" *"ZV .G.L G.Tpl pf&)| &)T-, -`< 6B5 B"F- F,H)l H)V,)| ,)^!T !TH-L -42;< 2;j U  
W*& W*t U
. 
 
< 
  .
, .
f N N Nb+
 +T% T%n U*v * *L UW.\ W. W.t Uu*, u* u*p UC) C) C)L)n )X f  B BJ2# 2pr   