
    Bj                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d d	lmZmZ d d
lm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dlm'Z'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZE d dlFmGZG ddlHmIZImJZJmKZK ddlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZT ddlUmVZVmWZWmXZXmYZY ddlZm[Z[ ddlJm\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZgmhZh ddlimjZjmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZtmuZumvZv dd lwmxZx dd!lymzZz dd"l{m|Z| dd#l}m~Z~mZmZmZmZmZmZmZmZmZmZmZmZ dd$lmZmZ er;d d%lmZmZmZmZ d d&lmZ d d'lmZ d d(lmZ d d)lmZ dd*lmZ dd+lmZ dd,lmZ eeOz  Zd d-lmZ  ej        e          Zej                            ed.          Zej        j        Z ej                    Z eIj                    rd d/lmZ d d0lmZ ndUd6ZdUd7ZdVd;ZdWd>ZdXdDZdYdHZdZdKZd[dMZd\dPZ G dQ dRej        j                  Z G dS dTe          ZdS )]    )annotationsN)defaultdict)contextmanager)AnyNoReturnTYPE_CHECKING)Expr)deviceTensor)get_decompositions)defakedynamo_timed)FakeScriptObject)is_opaque_reference_typeis_opaque_typeis_opaque_value_type)get_layout_constraint_tag)
LazyStringtrace_structured)compute_required_storage_lengthmake_channels_last_strides_for)
FakeTensor)full_aoti_runtime_assert)BackwardState)magic_methodsmethod_to_operator)_get_placeholder_exprfree_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Node)_is_view_op)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesFileBackedGraphModuleget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registrationWorkspaceArg)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)count_flops_fx)assign_origin_nodeConstantDonatedBufferFixedLayoutget_device_typeGraphPartitionSignatureInputBuffer	Pointwise	ReductionShapeAsConstantBuffer
StorageBox	TensorBoxTorchBindObject)constrain_to_fake_tensorsconstrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguoustag_to_layout_constraintunsupported_output_tensoruser_lowerings)autotune_cache)AutotuneCacheBundler)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeGraphPartitionMapis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_alignedshould_fallback_by_defaultSUPPORTED_MKLDNN_DEVICESValueWithLineMap)NullHandlerV)CallableIterableIteratorSequence)
ModuleType)_EffectType)GraphModule)Graph)PythonWrapperCodegen)Dep)BaseSchedulerNode)output_code_log
perf_hints) save_triton_kernel_perf_artifact)log_module_codeargsr   kwargsreturnNonec                     d S N rv   rw   s     X/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/torch/_inductor/graph.pyru   ru              c                     d S r{   r|   r}   s     r~   rt   rt      r   r   constant_buffer
sympy.Exprtorch.dtype | Nonec                   t          | t          j        t          j        t          j        j        j        f          s
J d            t          | t          j        j        j                  rt          j        S t          | t          j                  rt          |           S | j
        rt          j        S | j        rt          j        S d S )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr	   corenumbersIntegertorchint64r\   
is_integeris_floatfloat32)r   s    r~   may_get_constant_buffer_dtyper      s    %,
EJ4F4NO    	r  
 /5:#5#=>> {/5:.. 5#O444! {		! }tr   opboolc                F    t          d t          D                       }| |v S )Nc              3  4   K   | ]}t          |          V  d S r{   )r   ).0ms     r~   	<genexpr>z"is_magic_method.<locals>.<genexpr>   s+      HHQ-a00HHHHHHr   )r(   r   )r   	magic_opss     r~   is_magic_methodr      s(    HH-HHHHHI?r   objrm   targetstr,Tensor | torch._C.ScriptObject | GraphModulec           	         |                     d          }| }t          |          D ]R\  }}t          ||          s-t          dd                    |d |                              t          ||          }S|S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)r   r   target_atomsattr_itriatoms         r~   getattr_recursiver      s     <<$$LH\** + +4x&& 	Rchh|BQB?O6P6PRR   8T**Or   grn   dict[Node, tuple[int, ...]]c                J   i }|                      d          d         }d|j        vr|S t          |j        d         t          j        j                  s|j        d         }n|j        }t          |          D ]*\  }}||j        d         v r|j        d         |         ||<   +|S )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   rv   r   fxr%   r   )r   retoutput_nodeoutput_node_argsidxnodes         r~   get_user_visible_output_stridesr      s    ')C,,(,++A.K!)999
k&q)58=99 ,&+A.&+/00 I I	T+"#=>>>#()BCCHCIJr   user_visible_outputsdict[Node, object]c                   i | }g |                                 }t          g |          }|r|                                }t          |j                  r|j        r{t          |j        d         t          j        j	                  rQ|j        d         }||vr@|
                    |d           |                    |           |                    |           ||S )zc
    Extend user_visible_output_strides to include view ops that lead to user-visible outputs.
    r   N)keysr(   popr&   r   rv   r   r   r   r%   
setdefaultaddappend)r   resultqueuevisitedcurrentbases         r~   "extend_user_visible_output_stridesr      s     ":$8!9FfkkmmE5""G
 #))++''		#		# 7<?EHM::		#
 <?D7""!!$---D!!!T"""  # Mr   user_visible_output_stridesc                   t           j        sdS t          |          }t          t          j        t          j        t          j        g          }t          t          j        t          j	        t          j
        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        g          }d
d}t)          | j                  D ]	}t-          |j        t0          j        j        j                  rd|j        d<   7t-          |j        t0          j        j                  r7t?          |j                  t0          j         j!        j"        k    rd|j        d<    ||          }|s||v r
d|j        d<   |j        #                    dd	          r&|j$        D ]} ||          }	|	s|	|vr
d|j        d<   t           j%        s||v r
d|j        d<   dS )a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    Nr   torch.fx.Noderx   "torch._ops.OpOverloadPacket | Nonec                    | j         dk    rEt          | j        t          j        j                  r!t          | j        d          r| j        j        nd S )Ncall_function_overloadpacket)r   r   r   r   _ops
OpOverloadr   r   )r   s    r~   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet   sX    
 w/))4;
(=>> * %677 * K''
 	
r   Tdislike_paddingF)r   r   rx   r   )&r+   comprehensive_paddingr   r(   atenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducereversednodesr   r   r   _higher_order_opstriton_kernel_wrapTritonKernelWrapperMutationr   r   r   r   _CTagneeds_exact_stridesgetall_input_nodespad_outputs)
r   r   extended_user_visible_nodesops_dislike_paddingops_like_paddingr   curr   priorprior_ops
             r~   mark_nodes_dislike_paddingr      s    ' "D## # %%O	
  "MHIIHIIHHKK	
 "

 

 

 

     /  /J#6R
 
 	 +/CH&' sz5:#899	)#*55x|/0 0 +/CH&'!!#&& 	$$$*.CH&'8<<)511 	9, 9 9//66 #33348EJ01! 	/c-H&H&H*.CH&'A /  /r   r   r%   c                (   t          t          j        dd           t          t          j        j        dd           t	          | j        t          j                  rt          | j        j	                  dk    rt          | j        j	        d         d          rz| j        j	        d         j        D ]b}|j        d         t          j        j        j        j        t          j        j        j        j        t          j        j        j        j        fv r dS cdS )Nmkldnn_convolution_pointwiser   targetsTF)r   r   opsr   r   r   	functoolspartiallenrv   r   r   fnsr   defaultbinary_convolution_pointwise_)r   r   s     r~   is_mkldnn_convr  O  s     		8T**6EI$&>EEQt{I$566 R !!A%%DK$Q'33 & k&q)1 	 	Fz!}	 7?	 7>	 8?!  
 tt 5r   c                      e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd fd&Zdd'Zdd+Zdd-Zdd1Zdd4Z	dd9Z
ddd?ZddAZej        ddC            ZddEZeddG            ZddHZddLZddNZddOZddPZeddR            ZddUZddXZddZZdd\Zdd^Zdֈ fdaZdddZ ddeddiZ!ddlZ"ddoZ#ddpZ$ddqZ%ddtZ&dddvZ'ddyZ(d fdZ)e*j+        d fd            Z,edd            Z-ddZ.ddZ/ddZ0e*j+        d fd            Z1ddZ2edd            Z3edd            Z4ddZ5edd            Z6ddZ7d fdZ8ddZ9ddZ:	 	 	 	 dddZ;ddZ<ddZ=ddZ>ddZ?ddZ@ddZAdZBded<   ddZCddZDddZEddZFddZGddZH xZIS )GraphLoweringlist[ir.IRNode]graph_outputsNFgmtorch.fx.GraphModuleexample_inputsSequence[object] | None	shape_envShapeEnv | Nonegraph_id
int | Nonecpp_wrapperr   aot_mode
layout_optbool | Noneextern_node_serializer1Callable[[list[ir.ExternKernelNode]], Any] | Noneis_inferenceis_backwardis_const_graphconst_output_indexdict[str, int] | Noneconst_wrapper_code
str | Noneconst_kernel_codeconst_moduleGraphLowering | Nonenameinputs_to_checkSequence[int] | None
fx_wrapperget_decomp_fn3Callable[..., dict[Any, Callable[..., Any]]] | Nonerx   ry   c                *   t                                          |           || _        || _        ||n|                     ||	          | _        d| _        |	| _        |
| _        || _	        || _
        || _        || _        || _        d| _        d| _        |t!                      }d| _        nd| _        || _        |j                                        | _        t-          t.          j                             | _        t5          |          | _        g | _        i | _        i | _        d | _        t-                      | _         |r|j!        nt-                      | _!        |r|j"        nt-                      | _"        d| _#        tI          t,                    | _%        tI          t,                    | _&        i | _'        g | _(        g | _)        |r|ni | _*        |r!t-          |+                                          nt-                      | _,        |r|j-        ni | _-        |r|j.        ni | _.        |j/        0                    dt-                                | _1        |r|j2        ni | _2        i | _3        i | _4        i | _5        i | _6        t-                      | _7        t-                      | _8        t-                      | _9        t-                      | _:        i | _;        t-                      | _<        t-                      | _=        d | _>        d | _?        ddl@mA} t          jC                    r|r|n|| _D        d | _E        i | _F        t-                      | _G        g | _H        i | _I        tI          t                    | _K        i | _L        t          jM                    | _N        || _O        || _P        || _Q        || _R        i | _S        || _T        || _U        t          t                    | _X        d | _Y        d | _Z        d | _[        d | _\        d | _]        | j        r| ^                                nt-                      | __        t-          dg          | _`        t          |jb                  | _c        t          |jb        | jc                   d	| _e        d	| _f        g | _g        d | _h        i | _i        |j                                | _k        | jk        .                                D ]\  }}|| j.        |<   | jk        2                                D ]\  }}|| j2        |<   | jl        j/        0                    d
i           | _m        ||jn        ni | _n        t                        t          jq        d           t                    | _r        i | _s        t-                      | _t        t-                      | _u        i | _v        t-                      | _w        t-                      | _x        t-                      | _y        t          j{                    | _|        d| _}        t                      | _        i | _        d S )N)r  r   FTcpumutated_named_buffers)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)super__init__r%  r  decide_layout_optr  num_channels_last_convr  r  r  r  r  r  r"  _defers_input_alignmentextra_tracebackr"   reuse_shape_env
_shape_envdeferred_runtime_assertscopyras_by_symbolr(   r   r   bound_unbacked_symbolsrW   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalpartition_mapszero_dim_cpu_tensor_listdevice_typesdevice_idxsdevice_typer   additional_buffer_depsadditional_star_depsbuffer_to_padded_sizebuffers
operationsr  r   folded_constants	constantsnamed_buffersr   r   r)  named_parameterstorchbind_constantsopaque_value_type_classesseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_bufferssdpa_constraint_cachenever_reuse_buffersinplaced_to_remove
device_opswrapper_code&torch._inductor.extern_node_serializerr*  r+   	is_fbcoder  current_nodelistsmutated_inputsmutated_input_idxsname_to_bufferlistname_to_users
name_to_optimecreation_timer!  r  r$  record_multi_kernel_choicemulti_kernel_to_choicer  r  next_post_grad_graph_counterpost_grad_graph_id	schedulerautotuning_inputsautotuning_mappingautotuning_gridscurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler,  allocated_constant_namer4   r   	lru_cacher1   effectful_opsunaligned_buffersno_fuse_buffer_namesbuffer_layout_constraintslow_precision_codegen_opsinvoke_quant_opsall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxr[   bw_donated_idxsdep_size_hint_cache)selfr	  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r"  r$  r%  r*  kv	__class__s                          r~   r/  zGraphLowering.__init__g  s   . 	*, % J'''FF 	
 '(#(&,"4!2(.',$$ 

I#(D  #'D # .3355 	 '1&>&@&@#(33,.QS=?">B9C%)5GL%%:<< 	 )5FL$$*,, 	 !BMC
 C
# ALJ@W@W! <>"(*.0"4<" 	
 "J)..00111 	 '3:L"" 	 +7>L&&B 	 @Bw{{#Z\\@
 @
" .:AL))r 	
  	  ;=&68.03=<<0:8B$0:=?"4>LL 3=<<-126VVVVVV !!-&<-"", 	# ,0+-
/9||-/46@KD@Q@Q35!Y[[	&$
 +6'68#  "&'?"@"@>B =ADH7; 48 7;oWD00222:<< 	' !+,G+H I I+J28+T+T("28T-MNNN !  	 6:& GI -/[[]]L..00 	& 	&DAq$%Dq!!L1133 	) 	)DAq'(D!!$$040@0D0D.1
 1
- 5A4LL00RT 	$ 	"###$=I$7$=$=>R$S$S!;= 3=,,5?\\! EG&:D,,&1; :D% &O--  "/11 AC   r   c                8    | j                                          d S r{   )r5  freeze_runtime_assertsr  s    r~   r  z$GraphLowering.freeze_runtime_asserts=  s    ..00000r   extorch.Tensor1tuple[Sequence[int | Expr], Sequence[int | Expr]]c                `   | j         rBt          |                                          t          |                                          fS ddlm}  |dt          | j        j                             }| j        	                    ||          \  }}}d |D             }d |D             }||fS )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_c                ^    g | ]*}t          |t          j                  r|j        j        n|+S r|   r   r   SymIntr   exprr   r   s     r~   
<listcomp>z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>a  s1    RRRAu|!<!<C!&++!RRRr   c                ^    g | ]*}t          |t          j                  r|j        j        n|+S r|   r  r  s     r~   r  z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>b  s1    VVV!:a#>#>EAFKKAVVVr   )
r4  rX   sizestridetorch._dynamo.sourcer  r   r5  backed_var_to_val,create_symbolic_sizes_strides_storage_offset)	r  r  r  sourcer  r  _r_sizer_strides	            r~   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides@  s      	,RWWYY779R		: :   <;;;;; $^US1R-S-SUU F LL 	 SRTRRRVVvVVVxr   )tuple[list[sympy.Expr], list[sympy.Expr]]c                    d |                                 D             }d |                                D             }||fS )z+
        Primarily used to weights
        c                6    g | ]}t          j        |          S r|   r   r   r  s     r~   r  z6GraphLowering.static_sizes_strides.<locals>.<listcomp>k  s"    444Qa  444r   c                6    g | ]}t          j        |          S r|   r  r  s     r~   r  z6GraphLowering.static_sizes_strides.<locals>.<listcomp>l  s"    888q%-""888r   )r  r  )r  r  r  r  s       r~   static_sizes_stridesz"GraphLowering.static_sizes_stridese  sD     54"''))44488BIIKK888V|r   r   Lir.TensorBox | ir.StorageBox | ir.Buffer | WorkspaceArg | ir.TorchBindObjectSequence[Expr]c                "   t          |t          j                  r|j        }t          |t          j                  r|j        }t          |t          j                  r |j        | j        v r| j        |j                 S |                                S r{   )	r   r,   rF   datarE   ComputedBufferr!  rE  get_size)r  r   s     r~   get_allocation_sizez!GraphLowering.get_allocation_sizeo  s     dBL)) 	9DdBM** 	9DtR.//	#	T777 -di88==??"r   -ir.Buffer | WorkspaceArg | ir.TorchBindObjectr	   c                    |                                 }|                     |          }|j        }|j        }t	          |||          S r{   )
get_layoutr  r  offsetr   )r  r   layoutr  r  r  s         r~   get_allocation_storage_sizez)GraphLowering.get_allocation_storage_size  sF     ""''--.tVVDDDr   r
   )torch._inductor.ir.IRNode | device | Nonefeaturer.   c                    t          |t                    s
J |            ||                     t          |                    v S r{   )r   r.   r1   r?   )r  r
   r  s      r~   has_featurezGraphLowering.has_feature  sC    
 '>22;;G;;2$33OF4K4KLLLLr   Tdeprp   count_bytesintc                   ||f| j         vrd}| j                            |j                  }t	          |t
          j                  rd| j         ||f<   dS 	 |                                r,| j        	                    |
                                          r+|r|                                }n|                                }n# t          $ r Y nw xY w|| j         ||f<   | j         ||f         S )zc
        Get the size hint for a dependency with caching to avoid expensive recomputation.
        r   )r  r<  r   r!  r   r,   NonTensorObjhas_unbacked_symbolsr:  all_unbacked_explicitly_hinted	get_numelnumbytes_hint
numel_hintKeyError)r  r  r  resinps        r~   get_dep_size_hintzGraphLowering.get_dep_size_hint  s    T%===C #''11C#r// ?@(#{);<q0022/}CCCMMOOTT/ # /!//11!nn..    	
 <?D$c;%78'k(:;;s   A+C 
CCtorch.devicec                6    | j         x}r|S t          d          )NzNo current device)rn  r   r  r
   s     r~   get_current_device_or_throwz)GraphLowering.get_current_device_or_throw  s&    ((6 	4M2333r   Iterator[None]c              #  V   K   | j         }|| _         	 d V  || _         d S # || _         w xY wr{   )rn  )r  r
   r   s      r~   set_current_devicez GraphLowering.set_current_device  sF      #$	(EEE"'D%D''''s    	(r   c                *    | j         rdS | j        rdS dS )N	inferencebackwardforward)r  r  r  s    r~   get_training_phasez GraphLowering.get_training_phase  s'     	; 	:yr   rm   c                  t           j        sdS t           j        rdS d | j        j        D             }| j        j        D ]&}t          |          r|                    |           't          |          }|dk    rdS t          j	        j
        j        r>t          j	        j
                                        rt          d |D                       rdS t          t          | j        j                            d|z  k    rt                              d           dS t#          d |D                       rt                              d	           dS d d}d!d}d!d}|rt%          t&                    }|D ]P}	t)          |	          }
|
 ||	          rd}n ||	          rd}n ||	          rd}nd}||xx         |
z  cc<   Qt                              d           d}d}d}d}t+          |                                          }|d         |z  |d         |z  z   |d         |z  z   |d         |z  z   }||k    }|st                              d||           |S t#          t/          ||                    rt                              d           dS t#          t/          ||                    rt                              d           dS t          t/          ||                    rt                              d           dS dS )"zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTc                V    g | ]&}|j         t          j        j        j        j        u $|'S r|   )r   r   r   r   r   r  )r   ns     r~   r  z3GraphLowering.decide_layout_opt.<locals>.<listcomp>  s5     
 
 
UY^5O5W)W)WA)W)W)Wr   r   c              3  r   K   | ]2}d D ]-}|j         |         j        d         j        j        t          v V  .3dS )r   r*   valN)rv   r   r
   typerc   r   r  r   s      r~   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sg        !   s '.37OO      r   i,  z*Skipped layout opt because only a few convc              3  j   K   | ].}d D ])}t          |j        |         j        d                   V  */dS r  )r   rv   r   r  s      r~   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sc       
 

 
  QVC[-e455
 
 
 
 
 
 
r   zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670r  r   rx   r   c                    | j         d         j        d         }t          |t          j                  sJ | j         d         dk    o|                    d          dk    S )Nr*   r  r-  )rv   r   r   r   r   r  )r  meta_vals     r~   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_grouped  sR    vay~e,Hh555556":>:hmmA&6&6&::r   r   c                   | j         d         j        d                             d          dz  | j         d         j        d                             d          k    o.| j         d         j        d                             d          dk    S )Nr*   r  r      rv   r   r  r  s    r~   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel  ss    q	u%**1--1QVAY^E5J5O5OPQ5R5RR 6F1IN5)..q11A5r   c                    | j         d         j        d                             d          dk    o.| j         d         j        d                             d          dk    S )Nr*   r  r   @   r  r  s    r~   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channel	  sR    q	u%**1--3 8F1IN5)..q11R7r   Ngroupedsmallin_outr  zConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)r  r   rx   r   )r  r   rx   r   )r+   layout_optimizationforce_layout_optimizationrr  r   r  r   r   r   backendsr   enabledis_availableallr`  logdebugr   r   floatr:   r   valuesmap)r	  r  
conv_nodesr  nconvr  r  r  flop_countsr   counted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                      r~   r0  zGraphLowering.decide_layout_opt  s    ) 	5+ 	4
 
x~
 
 

  	% 	%Aa   %!!!$$$JA::5 N!)		%2244		   #    		 4
 tBHN##$$e33IIBCCC5 
 

 
 
 
 
 	
 IIw   5	; 	; 	; 	;
	 	 	 		 	 	 	  +	!,7,>,>K" 8 8 .t 4 4 (:d## * )II%%d++ * 'II&&t,, * (II )II&&&-7&&&&		6777
 "'!& %$k002233K I&);;g&)99:h'*;;< i(+==>  +k9M  		~"  
 ! & s:z**++ 	IIX   5 s$j1122 	IIT   5 s#Z0011 	IIVWWW5tr   c                ,    | j         | j          d| S |S )z2Prepend the given name with the graph name if any.Nr  r!  r  r!  s     r~   qualify_namezGraphLowering.qualify_namei  s%    9 i(($(((r   list[torch.Tensor]subgraph_nameSubgraphLoweringc                    t          | ||| j        | j        | j        | j        | j        | j        |                     |          
  
        S )a  
        Make a subgraph of the current graph with all inherited parts, except
        the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
        separately and lifted into a separate function in the parent output
        wrapper code.  The subgraph name is qualified by the parent graph's
        name. Note that the lifting of subgraph is supported for python wrapper
        only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
        )
parentr	  r  r  r  r  r  r  r  r!  )r  r5  r  r  r  r  r  r
  )r  r	  r  r  s       r~   make_subgraphzGraphLowering.make_subgrapho  sW      )o(]#'#>*(""=11
 
 
 	
r   OrderedSet[Node]c                l   d}t           j        j        j        j        g}t          t                               }t          | j        j	        j
                  D ]}|j        t           j        j        j        j        u r|                    |           ||}>|j        |v rHt          |          r|                    |           m|j        D ]}||v r|                    |            n| j        j	        j
        D ]9}|||k    r n.||v r)|j        D ]!}|j        |v r|                    |           ":|S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        N)r   r   r   bmmr  r(   r%   r   rz  rr  r   r   r   r   r  users)r  	last_convnodes_cannot_propagate
output_setr  users         r~   ro  z-GraphLowering.find_nodes_prefer_channels_last  sd   & 	"')."4"<!=%''
$++122 	 	Ax59>5===q!!!$ !Ix111a   q!!!  :%%NN1%%%E &$ "( 	) 	)A$iJG ) )D{&<<< NN4((((r   c                    || j         vr7| j                             |           t                              d|           d S d S )NzUsing FallbackKernel: %s)rq  r   perf_hint_loginfor	  s     r~   warn_fallbackzGraphLowering.warn_fallback  sL    t,,,!%%d+++94@@@@@ -,r   c                    | j                             |j                   |j        | j                            |j                   t
          j        j        r$|| j        vrt
          j        j        | j        |<   d S d S d S r{   )	r@  r   r  indexrA  rf   rr  r[  rw  r  s     r~   add_device_infozGraphLowering.add_device_info  s    fk***<#  ...7 	DF$2J$J$J/0w/CD$V,,,	D 	D$J$Jr   ,torch._subclasses.fake_tensor.FakeTensorModec                    t           j        S r{   )rf   	fake_moder  s    r~   r"  zGraphLowering.fake_mode  s
    {r   buffer_name4ir.TensorBox | ir.Buffer | ir.TorchBindObject | Nonec           	     :   || j         v r| j         |         S || j        v r| j        |         S || j        v ret          j        j        |         }t          j        |t          j        |j        |j	        gt          j        
                    |          R            S d S Nr!  r  )r_  r<  rI  rf   rr  r,   ConstantBufferr>   r
   dtyper  )r  r#  r  s      r~   try_get_bufferzGraphLowering.try_get_buffer  s     $---&{33$+++$[11$.((7$[1D$ ~K./g.J.J4.P.P      tr   symbolr   c                     t          d          )Nz'Should not be called for the main graph)r   )r  r+  s     r~   add_symbol_graph_inputz$GraphLowering.add_symbol_graph_input  s    DEEEr   -ir.TensorBox | ir.Buffer | ir.TorchBindObjectc                X    |                      |          }||S t          d|           )Nz$Failed to find buffer matching name )r*  r   r  r#  bufs      r~   
get_bufferzGraphLowering.get_buffer  s8     !!+..?JO+OOPPPr   torch.dtypec                   || j         v r| j         |         j        S t          | j        d          rp|| j        j        v rb| j        j        |         }|| j        v r| j        |                                         S || j        v r| j        |                                         S || j        v r| j        |                                         S || j        v r| j        |                                         S t          j	        d|          }|r(|                     |
                    d                    S t          d|           )Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r*   could not find )rI  r)  r   rj  r5  r_  	get_dtyper<  rematchgroupr  )r  r#  mutated_bufr   s       r~   r7  zGraphLowering.get_dtype  sC   $.((>+.44 DN$899	Bt~@@@.;KHKd111*;7AACCCd///(5??AAA$---&{3==???$+++$[1;;===HI;WW 	.>>!''!**---666777r   
int | Exprc                F   || j         v r| j         |                                         S || j        v r7| j        |         }|                                sdS |                                S || j        v r| j        |                                         S t          d|           )Nr*   r6  )rI  numelr_  has_tensor_outputr  r<  r  r0  s      r~   r  zGraphLowering.get_numel  s    $.((>+.44666$---%k2C((** q==??"$+++$[1;;===666777r   rv   r   c                    t          d          5   t                      j        | cd d d            S # 1 swxY w Y   d S )NzGraphLowering.run)r   r.  run)r  rv   r  s     r~   rA  zGraphLowering.run  s    -.. 	& 	&577;%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   488r   ir.Operationc                   |j         J d|             t          |t          j                  sJ |                     dt          | j                             }| j                            |           || j        |<   ||_         |S )NzOperation registered twice: r   )	operation_namer   r,   	Operationr
  r   rG  r   rb  )r  r   r!  s      r~   register_operationz GraphLowering.register_operation  s     ((*M*M*M((("bl+++++  !<c$/&:&:!<!<==r""" " r   set_namebuffer	ir.BufferrH  c                  |                      dt          | j                             }| j                            |           || j        |<   |                                }|[t          |t          j                  r,|	                                r|t          j        d          k    s|                     |           |r||_        |S )Nr1  r(  )r
  r   rF  r   r_  
get_devicer   r,   r  is_zero_elementsr   r
   r  r!  )r  rI  rH  r!  r
   s        r~   register_bufferzGraphLowering.register_buffer&  s      !:s4<'8'8!:!:;;F###$*D!""$$ 62#455  ++--  el51111   ((( 	FKr   operation_names	list[str]c                p    |                      dd                    |          z             }|| j        |<   |S )Nlist_r  )r
  r   r\  )r  rO  r!  s      r~   register_operation_listz%GraphLowering.register_operation_list:  s7      388O+D+D!DEE*
4r   node_outputIterable[ir.IRNode] | ir.IRNodec                .     d fd |           d S )NvaluerU  rx   ry   c                   t          | t          t          f          r| D ]} |           t          | t          j                  r7|                                 D ]$}j        |                             |            #d S d S r{   )r   r`  tupler,   rF   get_read_namesra  r   )rW  x	read_nameregisterr  s      r~   r]  z1GraphLowering.register_users_of.<locals>.register@  s    %$//      AHQKKKK%.. @!&!5!5!7!7 @ @I&y188????@ @@ @r   )rW  rU  rx   ry   r|   )r  rT  r]  s   ` @r~   register_users_ofzGraphLowering.register_users_of?  sF    	@ 	@ 	@ 	@ 	@ 	@ 	@ 	r   c                    t          |t                    sJ | j                            |           || j        vrdS | j        |         D ]}|                                 dS )z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)r   r   rS  r   ra  realize)r  r!  r  s      r~   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutatedJ  st    
 $$$$$$  &&&t)))F&t, 	 	DLLNNNN	 	r   c                    || j         v r	|| j        v sJ d|z               t          | j         |                   }|| j        j        v r| j        j        |         n| j        |         S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )r{  rI  rZ   rz  r   )r  r!  	orig_names      r~   get_original_value_of_constantz,GraphLowering.get_original_value_of_constantX  s     t3338N8N8N2T9 9O8NN 5T5QRV5WXX	 DK,,, KY''%	
r   r  r   c                   t           j        j        s3| j                                        D ]\  }}t          ||          r|c S |dt          | j                   }|}|d                                         rd| }|                     |          }t          |          }|}d}|| j        v r| d| }|dz  }|| j        v || j        |<   |j
        d|j        dt          |                                          dt          |                                          dt          |          d	| j        |<   || j        |<   |S )Nconstantr   	constant_r  r*    r[  )r+   aot_inductoruse_runtime_constant_foldingrI  itemsr^   r   isdigitr
  r`   r
   r)  rY  r  r  hashrO  r{  )r  r!  r  constant_namerW  rc  prefixcnts           r~   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_nameh  s   "? 	)(,(<(<(>(> ) )$u!$.. )(((() <3c$.1133D	7?? 	&%t%%D  &&  %%dn$$$$s$$D1HC dn$$  $t{  tz  TYY[[!! &+DKKMM&:&: Dzz  	D!
 .7$T*r   rF   c                    |                      ||          }t          j        t          j        |t          |j        |j        g|                     |          R                      S r&  )	rq  rF   creater,   r(  r>   r
   r)  r  )r  r  r!  new_names       r~   add_tensor_constantz!GraphLowering.add_tensor_constant  su    33D$??"K.2.G.G.M.M    
 
 	
r   device_overridetorch.device | Nonec                D   | j                  j        |k    s|S t          j        j                                        5  |                      d|j         |j        pd | j                  	                    |                    }|| j         v sJ | d            t          fd| j        D                       r| j         |         | j        |<   t          fd| j        D                       r| j         |         | j        |<   |cddd           S # 1 swxY w Y   dS )z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        Nr  r   z' should be in V.graph.constants alreadyc              3  >   K   | ]}t          |          k    V  d S r{   r`   )r   r#  r!  s     r~   r   z.GraphLowering.constant_name.<locals>.<genexpr>  sD         {333     r   c              3  >   K   | ]}t          |          k    V  d S r{   rz  )r   
param_namer!  s     r~   r   z.GraphLowering.constant_name.<locals>.<genexpr>  sD         z222     r   )rI  r
   r   utils_python_dispatch_disable_current_modesrq  r  r  tor   rJ  rK  )r  r!  rv  non_dup_const_names    `  r~   rn  zGraphLowering.constant_name  s    >$&/99_=TK[)@@BB 	& 	& "&!A!ALL/.L0E0JLLt$''88" "
 &777%NNN 877     #'#5      :>&:"#56     "&"7      =AN&=%&89 &=	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   C	DDDr   tuple[object]rw   dict[str, object]Expr | TensorBox | Nonec                	   | xj         dz  c_         t                                          |||          }|                     |          }t	          |t
                    rXt          j        j        st          |j
                  }n|j
        j        }|| j        |<   | j                            |           |S t	          |t          t           t"          f          r:t%          j        |          }|| j        |<   | j                            |           |S t	          |t(                    r7t+          ||          }|| j        |<   | j                            |           |S || j                            |           d S t	          |t,                    r| j                            |           d S t	          |t.          j                  rt3          t          j        j        j                  dk    rat9          t;          t          j        j        j                            j        t.          j        j         j!        t.          j"        j#        j$        fv sJ tK          j&        ||j'                  }|| j        |<   | j                            |           |S tQ          tS          |                    r<tK          j*        ||          }|| j        |<   | j                            |           |S t	          |t.          j+                  s
J |            |j,        s| -                    |          \  }	}
n| .                    |          \  }	}
| j        rS| j/        rL| j         | j/        v r>ta          j1        te          |tg          |j'        |j4        |	|
                              }n=ta          j1        tk          |tg          |j'        |j4        |	|
                              }|| j        |<   | j                            |           |j6        j6        | j7        |<   | j        j        r| 8                    |j'                   ts                      5  tu          |          s| j;        <                    |           d d d            n# 1 swxY w Y   |S )Nr*   r!  rW  )r!  r
   r'  )=r  r.  placeholderr
  r   r$   rf   rr  r  r   r   r  r<  r;  r   r  r   r  r   sympifyr   rG   r   r   	Generatorr   r[  r  rg  iterr   _prims	rng_primsgraphsafe_run_with_rng_stater   higher_orderinvoke_subgraphr,   GeneratorStater
   r   r  OpaqueObjectStater   _has_symbolic_sizes_stridesr  r  r  rF   rs  r=   r>   r)  rA   r  r=  r  r_   ra   r~  r   )r  r   rv   rw   exampler  r   gen
opaque_objsizesstridestensorr  s               r~   r  zGraphLowering.placeholder  s    	!''%%fdF;;""6**gx(( 	 7& ),W\::|((,Df%"))&111K#tU!344 	=))D(,Df%"))&111K!122 	!vW===C(+Df%"))&111J_"))&1114g}-- 	 "))&111411 	qw+122a77DQW)/00= =&C	&6= = =  #GGGC(+Df%"))&111J%d7mm44 	-6IIIJ(2Df%"))&111'5<0099'990
 2 	B!66w??NE77!88AANE7 	$	 $(<<<%&w~w}eWUU   FF %&w~w}eWUU   F %+&!%%f----3[-="6*" 	1  000 122 	3 	3.w77 3&**6222	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 s   
*R  RRrg   dict[str, Any]c                8	   t           j        u rKt          |d         t          t          t
          f          r#t                                          ||          S t          t          j	        j
                  st          d          r |i |S t          vrt          t          j	        j                  sJ  d                                                                d          d         }|t           v rt#          d| j        d           n
t&          j        rt+          g          rt,          nt.          }t0                              d|                    ||                     t7          d	          }|3t          j        j                                      r| j        rt@          }n t7          d	          }tC          |          }t#          || j        
           n2t+          g          rt-          ||          t/          ||          	 t0          "                    dt                              | j#        }	tI                    }
|
r||}}|
tJ          u rd|	j&        v ry|	j&        d         \  }}t          t          j	        j                  sJ dfd} |||          \  }} |||          \  }} |||          \  }}tK          ||||          \  }}n |
|	g|R i |\  }}d|	j&        v r tO          d          |i |}nd }tP          v rtR          j*        vrvtR          j*        +                               	 tQ                   |i |}tR          j*        ,                               n$# tR          j*        ,                               w xY w|3t          v rt                   |i |}n tO          d          |i |}|
r| -                    |	||||           |S # t\          $ r}d }t          | d          rH| j#        At          | j#        d          r,| j#        j&         | j#        j&        /                    dd           }ta          ||||          1                    |j2                  d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnr%  override_decompz"Creating implicit fallback for:
%s)with_default)layout_constraintr%  z  via %seager_input_valsrv   r   rw   rx   tuple[Any, Any]c                x    t           j        j                            | |          }|J |d         |d         fS )Nr   r*   )r   r   operator_schemasnormalize_function)rv   rw   r   r   s      r~   	normalizez.GraphLowering.call_function.<locals>.normalize  sE    %*X%>%Q%Q &f& &F $*#5#5#5#)!9fQi#77r   should_fallbackadd_to_fallback_setr[  r   stack_tracer  )rv   r   rw   r   rx   r  )3operatorgetitemr   r`  rY  dictr.  r   r   r   OpOverloadPacketr   rM   r   r!  r   rJ   rN   r%  r+   implicit_fallbacksr   r8   r9   r  r  operator_strr   _libraryr}  
is_builtinr  rQ   rR   r  r[  rO   rH   r   rK   rT   rf   active_user_lowering_opsr   discardpropagate_mutation	Exceptionr   r7   with_traceback__traceback__)r  r   rv   rw   	base_nameerrortagdecided_constraintdefault_tagr  layout_constraintsold_args
old_kwargs	fake_argsfake_kwargsr  outer  r  s    `                 r~   r   zGraphLowering.call_function&  s   X%%%*T!WtUD>Q*R*R%77((v>>> &%*"=>> 	+71D
 D
 	+ 64*6***""fej&;<<  000 < ++C003I///"&"4$(	     * 1I *6(336--5 
 9&&vtV<<  
 ,E, , , K,77??  (   + '& 1JT1 1 1K *B+)N)N&&8"&"4     $VH-- I 0fEEE264HHHS	8IIj)F"3444!A!9&!A!A! J'+V*%)BBB
 *QV33128J1K.	;  *&%*2GHHHHH8 8 8 8 8 8 2;9k1R1R.	;'0yv'>'>f/8y:/N/N,*'@ &)[( (f $6#5a#I$#I#I#I&#I#ILD& AF**I&v5III#   n,,a&@@@ .226:::C,V4dEfEE2::6BBBB2::6BBBB ;**'/@@@ R.v5QQQ!%+  " O ''8ZvNNNJ 	8 	8 	8Kn--N%1D-v66 2%*6"/488MM#64[  nQ_--48	8s3   $D6P N - P !N..AP 
RBRRtc                T    t          | j                  dk    o| j        d         dk    S )zM
        True if this is a small constant attr that will be inlined.
        r*   r      )r   shape)r  s    r~   can_inline_constantz!GraphLowering.can_inline_constant  s&    
 17||q 4QWQZ1_4r   	tuple[()]LConstant | TensorBox | ShapeAsConstantBuffer | ir.Subgraph | TorchBindObjectc                   t          | j        |          }t          |t          j        j                  r8|| j        v r| j        |         S t          j        ||          }|| j        |<   |S t          |t          j	        j
                  r%|| j        |<   d| j        |<   t          ||          S t          |t                    r%|| j        |<   d| j        |<   t          ||          S t          t!          |                    r%|| j        |<   d| j        |<   t          ||          S t          |t          j                  sJ t$          j        j        s$t$          j        st-          |          s	|| j        v r|                     ||          S t3                      5  |j        dk    r:t7          |                                |j        |j                  cd d d            S |                     |          rWt@          !                    d|           ddl"m#}  ||$                                |j        |j        	          cd d d            S 	 d d d            n# 1 swxY w Y   |                     ||          S )
N)r!  graph_moduler+  r  r|   )rW  r)  r
   zInlining constant: %s r*   )r  )r)  r
   )%r   rz  r   r   r   rm   rN  r,   Subgraphr   ScriptObjectrL  rO  rG   r   r   r  r   r+   ri  rj  always_keep_tensor_constantsrS   r)  ru  r'   r  r<   itemr)  r
   r  r  r  loweringr  tolist)r  r   rv   rw   rW  r  r  s          r~   get_attrzGraphLowering.get_attr  s    "$+v66eUX122 	,,,*622+6>>>C*-D'JeUX233 	=/4D$V,*,D'"e<<<</00 	=/4D$V,*,D'"e<<<<DKK(( 	=/4D$V,*,D'"e<<<<%.....<	;2	; )//	; 333++E6:::]] 
	V 
	V{b  **,,ek%,  
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V
 ''.. V		2F;;;,,,,,,vellnnEKUUU
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V
V
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V ''v666s   !9I 'AI  I$'I$r   c                    t           r{   AssertionErrorr  r   rv   rw   s       r~   call_modulezGraphLowering.call_module      r   c                    t           r{   r  r  s       r~   call_methodzGraphLowering.call_method  r  r   torch.fx.node.Target"tuple[torch.fx.node.Argument, ...]c                *   t                                          |||          }t          |t          t          f          s|f}t          |t          t          f          sJ t          |                      d |D             }t          d |D                       s
J |            t          j        j	        j
        d         }t          |t          t          f          s|f}d |D             }g }t          |          t          |          k    sJ t          ||          D ]\  }}t          |t          j        t          j        f          s|                    |           Bt          |                                t          j                  r3|                    t          j                            |                     t*          j        j                            |          sJ d |j        d                                         D             }	|                    t          j        ||	                     || _        | j                                        D ]s\  }
}t          |t<          t>          j         t*          j        j        j!        t*          j        j        j"        f          rRt          |t                    sJ dt          |                       |#                                 t          |t                    sJ |j$        }t          |t          j%                  sJ |}|j$        }t          |tL                    r|'                                |
k    rnt          j(        )                    || j*        |
                    	 | j        +                    |          }| j*        |
         | j        |<   b# tX          $ r Y ow xY wu| -                                 t\          /                    d| j0        | j1        | j1        nd	           d S )
Nc                t    g | ]5}t          t          |                    rt          j        |           n|6S )rW  )r   r  r,   OpaqueValueTypeConstantr   r[  s     r~   r  z(GraphLowering.output.<locals>.<listcomp>  sP     
 
 
 4HQ3P3PWB&Q////VW
 
 
r   c              3  P  K   | ]}t          |t          t          j        t	          d           t          j        t          j        t          j        j	        j
        t          t          j        t          j        t          t          j        t          j        t          j        f          V  d S r{   )r   rF   r,   r<   r  r(  r   r	   logicboolalgBooleanr  EffectfulKernelrD   rG   OpaqueMultiOutputr  r  r  s     r~   r   z'GraphLowering.output.<locals>.<genexpr>  s       
 
& % KJJ%JK'/&,#(.( 
 
 
 
 
 
r   r   c                L    g | ]!}t           j                            |          "S r|   )r,   ExternKernelrealize_inputr  s     r~   r  z(GraphLowering.output.<locals>.<listcomp>4  s(    CCCq"///22CCCr   c                ^    g | ]*}t          |t          j                  r|j        j        n|+S r|   r  r   ss     r~   r  z(GraphLowering.output.<locals>.<listcomp>E  sC            $.a#>#>EAFKKA     r   r  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr-  )2r.  r   r   rY  r`  r  r  rf   rr  r[  rv   r   zipr,   rF   BaseViewr   get_output_specCommBufferLayoutr  
copy_inputr   	_inductoris_storage_and_layoutr   r  try_match_insignificant_stridesr  r<  rk  rG   r   Basicr  r  r`  r  rE   rA   get_nameMutationLayoutSHOULDREMOVErealize_intor=  r  
ValueErrorfinalizer  r  r1  r  )r  r   rv   rw   r   fx_node_argsresult_correct_stridesrfx_nodemeta_stridesr!  rW  value_storage_boxindr  s                 r~   r   zGraphLowering.output
  s0    f55&5$-00 	YF&5$-00>>$v,,>>0
 

 
 
  
 
& '
 
 
 
 
 	 	( )	 	 
, w+03,66 	+(?LCCFCCC!#<  CKK////fl33 	 	JAwa",!<== &--a0000A--//1DEE  '--bo.H.H.K.KLLLL )??BBBBB    %\%07799     
 '--6q,GG    4,2244 	 	KD%#KO&5O&8	  	 eY//  G$u++GG / MMOOOeY/////JEeR]33333 %JEe[11 
U^^5E5E5M5M-::45d;  ,223DEEC.2.H.ND&s++!   D 6N 			U'!]6DMMB	
 	
 	
 	
 	
s   /N==
O
Oc                B    | j         D ]}|                                 d S r{   )rF  decide_layout)r  r1  s     r~   r  zGraphLowering.finalizew  s2    < 	  	 C	  	 r   r   c              #  V   K   | j         }	 || _         d V  || _         d S # || _         w xY wr{   )r[  )r  r   olds      r~   set_current_nodezGraphLowering.set_current_node{  sF      	$ $DEEE #DD####s    	(c              #  H   K   | j         }	 d V  || _         d S # || _         w xY wr{   rX  )r  r  s     r~   set_current_wrapper_codez&GraphLowering.set_current_wrapper_code  s>      	$EEE #DD####s    	!r  r  
tuple[Any]r  new_args
new_kwargsc                    t          |          t          |          k    sJ t          |          t          |          k    sJ |j        t          j        j        j        u r|j        d         }t          |t                    sJ t          j	        j
                            |d         |d         d |                                D             |d                   }|D ]U}|d         |         }	|d         |         }
|	|
u r#                     t          j        j        j        j        |	|
fi            VdS t          |j        t          j        j                  sJ d fd}|j        j        }t)          t+          ||                    D ]"\  }\  }	}
|j        |         } |||	|
           #d |j        D             }|D ]'}||         }	||         }
||         } |||	|
           (dS )ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        rw   
kernel_idxconstant_args_idxc                r    i | ]4\  }}|t          |t          j        j                  r|j        d          n|5S r  r   r   r   r%   r   r   r  r  s      r~   
<dictcomp>z4GraphLowering.propagate_mutation.<locals>.<dictcomp>  K       1 
1ehm(D(DKqve}}!  r   tma_descriptor_metadataN
schema_argtorch._C.Argumentold_arg	ir.IRNodenew_argrx   ry   c                   ||u rd S | j         y| j         j        rot          |t          j                  r|f}|f}t          ||          D ]@\  }}||u r
                    t          j        j	        j
        j        ||fi            =d S d S d S r{   )
alias_infois_writer   r,   IRNoder  r   r   r   r   copy_r  )r  r  r  old_arg_itemnew_arg_itemr  s        r~   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagate  s     '!!$0Z5J5S0 gry11 )&jG&jG25gw2G2G  .L,#|33 &&	,4|\6RTV    1000 r   c                    i | ]
}|j         |S r|   r  )r   args     r~   r  z4GraphLowering.propagate_mutation.<locals>.<dictcomp>  s    CCC33CCCr   )r  r  r  r  r  r  rx   ry   )r   r   r   r   r  triton_kernel_wrapper_mutationrw   r   r  r   r   get_mutated_tensorsrk  r   r   r#  r  r   r   _schemar   r  	arguments)r  r  r  r  r  r  rw   mutatedr!  r  r  r&  schemar   r  schema_kwargskeys   `                r~   r  z GraphLowering.propagate_mutation  s8     8}}H----:#j//1111>UY3RRR^H-Ffd+++++-@TT<(./  &   45 G   Y Y$X.t4$X.t4g%%""59>#7#?'7ASUWXXXXF'.%**?@@@@@	 	 	 	 	 	( ''0Xx1H1H'I'I 	: 	:#C#'7)#.JOJ9999CC&2BCCC 	: 	:C oG oG&s+JOJ9999		: 	:r   r  c                ^    | j                             di                               d          S )z:Get the user-annotated stream index from FX node metadata.customstream)r   r   r  s    r~   _get_node_streamzGraphLowering._get_node_stream  s(     vzz(B''++H555r   c                   |                      |          }|j        D ]f}|                      |          }||k    r| j                            |          }t	          |t
          j                  r|                                 gdS )aV  Realize IR inputs that are on a different stream.

        Without this, pointwise ops across stream boundaries would be inlined
        into each other during lowering, making it impossible for the scheduler
        to split them into separate kernels.

        None means the default stream, so it is compared like any other value.
        N)r4  r   envr   r   r,   rF   r`  )r  r  node_stream
input_nodeinput_streamir_values         r~   $_realize_inputs_at_stream_boundariesz2GraphLowering._realize_inputs_at_stream_boundaries  s     ++A..+ 	# 	#J00<<L{**x||J//H(BL11 #  """	# 	#r   objectc                   #$%&' d/fd}d0%& fd
}ddl m} t           j                  $t           j                  't          g          }j        dk    }|r@                               \  }}|t          ||          z  } 	                               t          j                            |          5  t          j                                                                     5                                 5  t!          j                  5  j        dk    rj        rt%          j        t&          j        j                  r{t&          j        j                            j                  rRt3                    s|                    ddfd          r) |d            t7          j        d          |i |}	n7j        dk    rmt%          j        t&          j        j        t&          j        j        f          r8t;                    r) |d            t7          j        d          |i |}	nj        dk    rj        t&          j        j        j         u rtB          j"        dk    r |d           tB          j"        dk    r|}
|}j#        $                    d          x}r&|d         }|d         }tK          ||||          \  }}ntM          g|R i |\  }} '                    j        ||          }	 (                    |
|||           ntS          dtB          j"                   tU          j                  r |d           t%          j#        d         t&          j+        t&          j,        t&          j-        f          rj#        d         j.        j/        }	nNta                      1                              }	n, |d           ta                      1                              }	t&          j        j2        j3        j4        t&          j        j2        j5        j4        t&          j        j2        j6        j4        t&          j        j2        j7        j4        t&          j        j2        j8        j4        g#ts          d j:        D                       } j;        v &ts          #fdj:        D                       %j#        $                    dd          rt%          |	tx                    r|	=                                 j#        d         >                                }t'          j?        j        j@        | }|	A                                |k    r6|s4t          jB        |          }t          jC        D                    |	|          }	|rHt%          |	tx                    r3t%          |	jE        t          jF                  r|	=                                 |s%r4t%          j#        $                    d          t&          jG                  r&r j;        $                              }nj#        d         >                                }|t          |          dk    rtB          jH        p& o% }t&          jI        J                    j#        d                   }t          t          |                    dk    }|sp|rnt          |	L                                          d!k    rI jM        v r@&s>%s<t          jN        O                    |	L                                t&          jP                  }|st          |          rωj#        d         Q                                st%          |	jE        t          jF                  r5t          jC        D                    |	t          jB        |          |"          }	n\t          |	L                                          dk    rt          |          dk    rg }t          jC        R                    |	||"          }	t          t          j:                            }|dk    rt%          |	tx                    rj:        D ]}|j        t          v r|	T                                 t&          j        j2        jU        j4        t&          j        j2        jV        j4        t&          j        j2        jW        j4        g}g } jX        s.|Y                    t&          j        j2        jZ        j4                   t&          j[        j\        r|t&          j        j]        j^        j4        t&          j        j]        j^        j_        t&          j        j2        j`        j4        t&          j        ja        jb        j4        t&          j        ja        jb        jc        t&          j        ja        jb        j_        t&          j        ja        jb        jd        gz  }|t&          j        j]        je        j4        t&          j        j]        je        j_        t&          j        j]        jf        j_        t&          j        j]        jg        j4        t&          j        ja        jh        j4        t&          j        ja        ji        j_        gz  }t&          j[        jj        r|t&          j        jk        jl        j4        gz  }|j        |v rQt          jC        D                    |	t          jB        j#        d         >                                          d#"          }	|j        |v r^|jm        d         u rOt          jC        D                    |	t          jB        t          j#        d         jo                                      }	|j        d$k    r~t%          |	jE        jE        t          t          f          rXtB          jr        r8|	jE        s                                dk    r|	jE        t                                s|	=                                 |	jE        }t%          |t                    sht%          |t          jF        t          jv        f          rB|jE        }t%          |t                    s&t%          |t          jF        t          jv        f          Bt%          |t                    r3|w                    t          j:                            r ||	          }	|	x                    t          j:                             t%          |	tx                    r4|	y                                r  ||	          }	|	T                                 t%          |	tx                    ret%          |	jE        t                    rK|	jE        jE        }t%          |t                    r*|t                    d%&          r|	=                                 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          |	            {                    |	           t          t          j}                             } j        $d          D ]}||~                                z  } j        'd          D ]}||~                                z  }t           j        j        j        }j        d'k    rDt%          |	t          j}                  r*|                    |	          r|                    |	           d1$' fd(} t           j        j        rj        d'k    r|	S t          t           j        j        j        j#        $                    d)i                     }!|!J t          d* |!D                       }"||"k    s0J d+| d,|" d-                                 d. |                                               |           |	S )2z4Lower and execute a single FX node into Inductor IR.msgr   rx   ry   c                d    t                               dt          j                  |            d S )Nzlowering %s %s)r  r  r   format_node)r>  r  s    r~   r  z%GraphLowering.run_node.<locals>.debug  s*    II&
1=(A(A3GGGGGr   r   r  r  r   c           	        t           j                            |j        d                   }|j        d                                         }t          t          |                    dk    }|s|rt          |                                           dk    r\|j        v rSsQsOt          j
                            | t          j        t          |j        d         j                                      } | S )Nr  r      )r   _prims_common%is_non_overlapping_and_dense_or_falser   r  r   r   r  rp  r,   r  require_stride_orderget_stride_orderr   r  )r   r  denser  unbacked_symbols_in_stridesis_input_for_as_stridedis_user_visibler  s        r~   &maybe_apply_channels_last_stride_orderzFGraphLowering.run_node.<locals>.maybe_apply_channels_last_stride_order  s     'MMu E fUm**,,G*-.CG.L.L*M*MPQ*Q'/ ))**a//888' 9/ 9 =='6qve}7JKK   Mr   r   )CompilerBisectorr   inductorrM   c                 "    t                     S r{   )reprr  s   r~   <lambda>z(GraphLowering.run_node.<locals>.<lambda>*  s    a r   rK   Fr  flexible_layout-user_defined_triton_kernel_layout_constraintsneeds_fixed_stride_orderr  r*   z1Unknown triton_kernel_default_layout_constraint: r   r  r+  c              3  ,   K   | ]}|j         d k    V  dS )r   Nr   )r   r  s     r~   r   z)GraphLowering.run_node.<locals>.<genexpr>~  s)      DDDDGx/DDDDDDr   c              3  *   K   | ]}|j         v V  d S r{   )r   )r   r  as_strided_opss     r~   r   z)GraphLowering.run_node.<locals>.<genexpr>  s;       * *26~-* * * * * *r   inductor_realize_to_stridesNrB  )allow_paddingTr   d   )	thresholdr  c                     d j         d          D             } |                     d j        d          D                        d                    |           S )Nc                D    g | ]}d |                                  d| dS )unbacked_symbol_defs= in:

get_unbacked_symbol_defs)r   r1  s     r~   r  zCGraphLowering.run_node.<locals>.format_new_defs.<locals>.<listcomp>K  sH        V(D(D(F(FUUcUUU  r   c              3  L   K   | ]}d |                                  d| dV   dS )r]  r^  r_  Nr`  )r   r   s     r~   r   zBGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>O  sV         T(C(C(E(ESSRSSS     r   z***
)rF  extendrG  r   )r   buffer_watermarkoperation_watermarkr  s    r~   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defsJ  s     <(8(9(9:  A HH  /*=*>*>?      <<??"r   unbacked_bindingsc              3  j   K   | ].}t           j        j        j                            ||          V  /d S r{   )rf   r"  r  unbacked_renamingsr   r  s     r~   r   z)GraphLowering.run_node.<locals>.<genexpr>  sN       /
 /
 K!488A>>/
 /
 /
 /
 /
 /
r   zfailed  >= z (inductor >= fx)
fx node is: z
new operations are:

)r>  r   rx   ry   )r   r  r  r   rx   r  rx   r   )!torch._inductor.compiler_bisectorrL  r   rF  rG  r(   r   fetch_args_kwargs_from_envrY   r;  r,   r"  current_originscurrent_stream_idxr4  r	  rf   r   r   r   r   r   r  r}  r  rL   disable_subsystemrK   HigherOrderOperatorrb   r   r  r)  r+   'triton_kernel_default_layout_constraintr   r   rH   rI   r   r  r   r   r  SymFloatSymBoolr   r  r.  run_noder   
as_stridedr  as_strided_as_strided_scatterresize	resize_asr   r  r   rF   r`  r  r  any_is_symbolicmaybe_get_striderF  r  rE  r  r  r   r   rC  rD  r   r  rp  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesrP   realize_hintr   mm_int_mmr  r   r   r   _has_mkldnnr   _linear_pointwiser  mkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensorr   r   _convolution_transpose_pointwiseqconv_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrv   r   r  rB   rC   delay_realize_cheap_outputs	num_readshas_large_inner_fnrE   
MutableBoxshould_realize_on_reuse
mark_reusehas_exceeded_max_readsr;   r^  r   r   ra  rr  r:  r  is_unbacked_symintr   r  r    r@  create_deferred_runtime_asserts))r  r  r  rK  rL  originsis_call_functionrv   rw   r   r  r  r  inp_args
inp_kwargs	is_outputr  sym_stridesstride_orderrX  rG  rH  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layout_datacurrnew_unbacked_defsr1  r   r  rf  rg  renamed_unbacked_bindingsrV  rd  rI  rJ  re  r  s)   ``                                 @@@@@r~   ru  zGraphLowering.run_node  s   	H 	H 	H 	H 	H 	H	 	 	 	 	 	 	 	0 	GFFFFFt|,,!$/22 $.qc??4?2 	9::1==LD&~dF333G55a888I%%g..Y	) Y	)I(()>)>q)A)ABBY	) Y	) !!!$$Y	) Y	) q!!	Y	) Y	) ''H ( qx)>?? ( N(33AH==	 ( :!<< ( (99"K  ( ()))N)!(NNN 
 ''Huz4ej6TU  ( /q11	 ( ()))N)!(NNN 
 ''H	 6 UUUBFWWWEFFFB12 2  $H!'J+,6::6H+I+II' S#3A#6%5a%8
'@ "$&( (ff (?q'R4'R'R'R6'R'Rf!//$GGF++AxT6RRRR&|FLz||   !** - '(((F5MEL%.%-#P  1 VE]/4FF"WW--a00FFb			))!,, 	)1	*2	19	%-	(0N DDAGDDDDDI4#CCO&) * * * *:;'* * * ' '# vzz7?? XJ	E E X    &-..00#o3CWM**,,777#%#6w#?#?L_AA&,WWF!vy11! v{BK88!     14 1*

5!!5<; ; 1 # 5">BB1EEGGfUm2244G&3w<<!+;+;*A/.A%655 " "/UUu E 1'::;;a? 0 8
!
   1 122a77!@@@ / A 7 A #%"3"T"T"OO--u/B# # 7 3w<<  6%=1133 z"KK8 8  &(_%I%I & " 3G < <.; &J & &FF  #6??#4#455::s7||a?O?O*,%'_%J%J &} &K & &F Jqw//00I1}}FI!>!>}G I- I-D{&;;;++--- "IN?G!IN-5!IN2:-)
 ;=7# Y-44UY^5O5WXXX 8/ Y- %	 0 B J %	 0 B I %	 ? G %	 0 B J %	 0 B I %	 0 B I %	 0 B P2 - < %	 0 G O %	 0 G N %	 0 H O %	 0 Q Y %	 0 @ H %	 0 B I@ ;  %x/ Y 1eim6O6W5X X 1;*;;;%'_%I%I & " 3AF5M4H4H4J4J K K.2 &J & &F !K+JJJ !TYq\ 1 1%'_%I%I & " 3$B16%=CV$W$W!" !"& &F w(**%fk&6I8NOO - !' B) %+K$9$9$;$;q$@$@(.(F(F(H(H %A !)"NN,,,$UJ77 'JBK7= = ' "JE %UJ77 'JBK7= = '
 eZ00 OU5R5RLL6 6 O DCFANNF !!#ag,,/// &),, &1N1N1P1P & @?JJ##%%% &),, )FK1T1T ){'dI.. )...== )(((sY	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	) Y	)v 	61%%%v&&&&u|466< 0 1 12 	@ 	@C!=!=!?!??/"5"6"67 	? 	?B!<!<!>!>>G$.	
 DM!!65<00 ",,V44 " !!&)))		# 		# 		# 		# 		# 		# 		# 		# 7 	14=#8#8M. 6G&

3F(K(K
 
 !,,, %/ /
 /
&/
 /
 /
 %
 %
!
 !$====:' : :-F : :==??: :&5o&7&7: : >==
 	,,Q0ABBBs   3x4w+
wrv=	1w=wwwww+ww+ww+x+w/	/x2w/	3xx	xr  OrderedSet[sympy.Symbol]c                    t           j        rd S d fd}t                      rl|j        t          j        j        j        j        u rJ j	        rC 
                    |          \  }}|d         d	k    r ||d         |d          d
           d S d S  xj        |z  c_        t          j        j        j        }|D ]5} j                            |g           }|j        |         }	|                                                    |	          sbdd}
 |
|	j                  r |||	j        k    | d|	j                     |
|	j                  r |||	j        k    | d|	j                    |D ]~}t/          |j                  }| j        z
  }|rEt3          |t4                    } j                            |g                               |           g ||j        |j                    7d S )Nr  r#   r>  r   rx   ry   c                    t          j        | |          }                    |d                               |           d S )NTrG  )r,   AssertScalarrN  rF  )r  r>  	assert_opr  s      r~   make_assertzBGraphLowering.create_deferred_runtime_asserts.<locals>.make_assert  sG    c22I  T :::##I.....r   r   Tz to be Truer  r	   r   c                n    | t           t            fv rdS 	 t          |            dS # t          $ r Y dS w xY w)NFT)r)   r  	TypeError)r  s    r~   is_convertiblezEGraphLowering.create_deferred_runtime_asserts.<locals>.is_convertible  sP    & 111#(5)FFF#'4( ) ) )#(55)s   & 
44rj  z <= )r0  )r  r#   r>  r   rx   ry   )r  r	   rx   r   )r+   do_not_emit_runtime_assertionsr   r   r   r   r   _assert_scalarr  r  rm  r9  rf   rr  r:  r  r8  r   var_to_range _default_unspecified_value_rangeissubsetlowerupperr   r  r   r   r   r   )r  r  r  r  	node_argsr  r  i0rasvrr  rafvsmissingi1s   `              r~   r  z-GraphLowering.create_deferred_runtime_asserts  se    0 	F@	/ 	/ 	/ 	/ 	/ 	/ %&&,	;EIN9AAA B  ::1==LIq|t##IaLYq\*F*F*FGGGGG $# ''+<<''(2I ( ; ;(,,R44+B/ AACCLLRPP K) ) ) ) &~bh// K#B"(Nr4I4Irx4I4IJJJ%~bh// K#B"(Nr4I4Irx4I4IJJJ ; ;B/88C!D$??G ; c222*55b"==DDRHHHH#BG\::::;); ;r   c                    t           j        rt          d          t          j        dvrt          dt          j                   d S )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform )r+   disable_cpp_codegenr6   sysplatformr  s    r~   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapper  sL    % 	D()BCCC<;;;()O)O)OPPP <;r   is_subgraphparent_wrapper_codePythonWrapperCodegen | Nonepartition_signaturesGraphPartitionSignature | Nonec                   | j                                         }|                    d           |                    d           t          |          dk    s0J d                    d                    |                                t          |          dk    }|rdn|                                | _        | j        r| 	                                 t          | j                  | _        t          | j        | j        | j                  }|J d| j         d            |                    ||||          | _        | j        r| j        j        j        | j        _        d S d S )	Nr(  r   r*   zDoes not support mixing {}+r   zDevice z not supported)r@  r7  r  r   formatr   r   rB  r  r  r2   rW  r3   r$  rs  rX  r  _names_iter)r  r  r  r  r  r@  only_cpuwrapper_code_gen_clss           r~   init_wrapper_codezGraphLowering.init_wrapper_code  s    (--//U###V$$$<  A%%%'C'J'JHH\""(
 (
%%% |$$)$,D55,2B2B2D2D 	5224441$2BCC=d. 
  
 $//6d&666 0// 177 	
 
  	W,0,=,J,VD)))	W 	Wr    list[int | float | torch.Tensor]c                   dd l }|                    | j                  }|                    |          }g }|j        j        D ]?}|j        dk    r2|j        t          j        j	        j
        u r|                    |           @g }i }i }g }	i }
|D ]}|j        d         D ]R}|D ]M}||v rt          |t          j        j                  r't!          |          ||<   |                    |           NS|j        d         }t          j        j                            |j        d         |j        d         d |                                D             |j        d                   }i }|j                            |          5  |                                D ]\  }}||v rO|j                            t          j        |f	          }t!          |	          ||<   |	                    |           X||
v r|
|         ||<   ht!          |	          |
|<   |	                    |           |
|         ||<   	 d d d            n# 1 swxY w Y   |||j        <   |	|z   }|j        j        D ]$}|j        d
k    rt3          |          f|_         n%|                                 t          j                            |          }|                    |          }t!          |          dk    r|t!          |	          d          }i | _        |D ]}d}g }|j        d         D ]}g }|D ]Z}t          |t          j        j                  s|                    |           7d}|                    |||                             [|                    t3          |                     |r|| j        |j        <   |d t!          |	                   | _        || _         d S )Nr   r   gridrw   r  r  c                r    i | ]4\  }}|t          |t          j        j                  r|j        d          n|5S r  r  r  s      r~   r  z9GraphLowering.extract_autotune_inputs.<locals>.<dictcomp>6	  r  r   r  )rv   r   FT)!r7  deepcopyry  rr  r   r   r   r   r   r  r)  r   rw   r   r   r%   r   r   r   r*  rk  inserting_beforer   cloner!  rY  rv   	recompileInterpreterrA  rm  rk  rl  )r  r  r7  	cloned_gmtriton_nodesr   grid_inputsvisited_gridstriton_inputskwargs_inputsvisited_kwargsr  r  rw   r-  r  r  r  new_nodenew_outputsrunnerreturned_outputsgrid_outputsdynamic_grid	new_gridsnew_grids                             r~   extract_autotune_inputsz%GraphLowering.extract_autotune_inputs	  s    	MM$,//	~66O) 	* 	*D?**K59#9#XXX##D))) ,.24(*-/)+  &	2 &	2DF+ 0 0 0 0Cm++ !#ux}55 0-0-=-=c*#**3///0 [*F-@TTL)/0  &   56 G *,J11$77 6 6"LLNN 6 6DAqG||#,?#@#@TUSW#@#X#X(+M(:(:
1%,,X666 N**(6q(9
1 (+M(:(:N1%!((+++$21$5JqMM66 6 6 6 6 6 6 6 6 6 6 6 6 6 6 (2M$)$$#k1O) 	 	Dw(""";//1	 # 	%%i00!::n55{a+C,>,>,@,@AL$&D!$ A A$.0	 K/ 	6 	6D!H# J J)#ux}== %$OOC000$'+ ]35G(HIIII$$U8__5555 A7@D)$)4!12FC4F4F2F!G"/s   >B0H;;H?	H?	)tuple[ValueWithLineMap, ValueWithLineMap]c                    t           fddD                       rd
 fd}t          j        j        rt          j        j        rZd} j        D ] }t          |t          j                  rd} n!|r. |            } 	                    |           t                                                       S  j        s                                 S d _                                         j        } |            }t           j        j                                        5   ||           ddd           n# 1 swxY w Y   ~d _         j                                          j                                          j                                         t0          j        j        j                                         t0          j        j        j                                         t;          j                     t          j        d	di          5                                   cddd           S # 1 swxY w Y   dS                                  S )zQ
        For GPU, Triton kernels are autotuned and stored as cubin files
        c              3  *   K   | ]}|j         v V  d S r{   )r@  )r   r
   r  s     r~   r   z9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>u	  s+      IIvv**IIIIIIr   )cudaxpurx   r  c                    ddt           j        j                                        } | |t	          t
          j        t                    s]| j        r| j        	                                 d | j
        D             }fdt          j        |t
          j                  D             n>fdt	          t
          j        t                    rj        nt
          j        D             j        r_d	d
lm} fdt#          j                  D             }|D ]5}|         }t	          |t           j                  sJ  ||          |<   ~6S )Nr[  ,torch.SymInt | torch.SymFloat | torch.Tensorrx   int | float | torch.Tensorc                <   | d S t          | t          j        t          j        f          r| j        j        S t          | t                    rt          |           S t          | t          j                  s'J dt          t          |                     z               | S )Nz&Unknown type when creating real inputs)r   r   r  rs  r   hintr   r   r   r   r  )r[  s    r~   materializezXGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.materializex	  s     y#t#Aen'EFF 	! v{*#Az22 !%ayy()!U\::  Ds4PQ77||S :  !r   c                    g | ]}||S r{   r|   )r   params     r~   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>	  s(     # # #! , ,,,r   c                &    g | ]} |          S r|   r|   r   r[  r  s     r~   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>	  s1     # # # $A# # #r   c                &    g | ]} |          S r|   r|   r  s     r~   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>	  s1     # # # $A# # #r   r*   )clone_preserve_stridesc                l    g | ]0\  }}|j         v t          |         t          j                  .|1S r|   )r]  r   r   r   )r   r   r!  real_inputsr  s      r~   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>	  sN     * * *%C4#666&{3'7FF 7 666r   )r[  r  rx   r  )r   _guardsTracingContexttry_getr   rf   r  re   output_stridesclearparams_flatr  chainr  r]  
compile_fxr  r   r<  r   )	tracing_contextr  r  r^  r   mutated_inpr  r  r  s	         @@r~   extract_real_inputszCGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputsw	  s   ! ! ! !" #(-">"F"F"H"H".zM;8 8. '5 ?'6<<>>># #%4%@# # #K
# # # #!*am!L!L# # #KK# # # #  *!-EE/D//!"# # #K & (BBBBBB* * * * *)243D)E)E* * *&  2 ( ( '2#&6)+u|DDDDD+A+A++N+NC('K""r   FTNztriton.autotune_at_compile_time)rx   r  ) r   r+   tritonautotune_at_compile_timeautotune_with_sample_inputsrG  r   r,   UserDefinedTritonKernelr  rt   codegenr  r  compile_to_modulecallr   r}  r~  r  rQ  r  rP  rV  rf   rr  r:  precomputed_replacementsinv_precomputed_replacementsr-   resetpatch)r  r
  user_defined_kernelsr   r  compileds   `     r~   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrappero	  s    IIIIIIIII q	"D# D# D# D# D# D#L }5 &* =< 	?+0("o " "%b"*DEE "370!E" , ?&9&9&;&;44[AAA8>>>||~~%} *  <<>>) $) 113381133[1HHJJ * *H[)))* * * * * * * * * * * * * * * $( $**,,,'--///'--/// 9??AAA =CCEEE\#De"LMM * *<<>>* * * * * * * * * * * * * * * * * * <<>>!s$   D44D8;D8H,,H03H0c                    ddl m} t          j        dd          5   || j                  | _         ddd           dS # 1 swxY w Y   dS )z
        (Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
        files should be generated (to avoid biasing any benchmarks and pessimizing
        fusion decisions).
        r*   )	Schedulerztriton.store_cubinFN)rj  r  r+   r  rG  )r  r  s     r~   _update_schedulerzGraphLowering._update_scheduler	  s     	)(((((\.66 	8 	8&Yt77DN	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8s   ?AAc                4   t          dd          5  |                                  |                                  t          j                            | j        | j        j                   | j	        
                    |            | j                                         t                              dt          j        j                   | j	                            | j                  }| j	                                         |cd d d            S # 1 swxY w Y   d S )NzGraphLowering.codegenTlog_pt2_compile_eventzFFinished codegen for all nodes. The list of kernel names available: %s)r   r  r  rf   r  draw_orig_fx_graphry  rj  r   rX  push_codegened_graphr  r  rr  r  generater  pop_codegened_graph)r  r   s     r~   r  zGraphLowering.codegen	  s<   1NNN 	 	""$$$""$$$G&&t|T^5IJJJ224888N""$$$IIX0  
 &//0ABBF11333!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   C.DDDparent_graphc                D   t          dd          5  |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |                                  | j        	                                 ddd           dS # 1 swxY w Y   dS )a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kernels). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        zGraphLowering.codegen_subgraphTr  N)
r   rX  rW  r  r@  rA  rB  r  rj  r  )r  r#  s     r~   codegen_subgraphzGraphLowering.codegen_subgraph
  s     :RVWWW 		% 		% , 9D*5DO+7D , 9D+7D+7D""$$$N""$$$		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		%s   A6BBBVtuple[int, list[tuple[BaseSchedulerNode, int]], list[tuple[BaseSchedulerNode, float]]]c                    d}g }g }| j         j        D ]^}|                                }||z  }|                    ||dz  f           |                    ||                                f           _|||fS )Nr   rB  )rj  r   get_read_write_buffers_sizesr   get_estimated_runtime)r  total_bytesnode_countsnode_runtimesr   	num_bytess         r~   r  zGraphLowering.count_bytes
  s    
 N( 	G 	GD99;;I9$Ki1n5666  $(B(B(D(D!EFFFFK66r   zCallable[[str], None] | Nonesave_output_codeCompiledModulec                    t          dddd          5  |                                 cd d d            S # 1 swxY w Y   d S )NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namer  dynamo_compile_column_us)r   _compile_to_moduler  s    r~   r  zGraphLowering.compile_to_module.
  s    -!"&%S	
 
 
 	- 	- **,,	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-s   599c                @   | j         r|                                 n|                                 \  }}t          |t                    r|                     |          }n7t          |t                    r|}nt          dt          |                     |j	        J t          |j	                   t                              d|j	                   t          j        d|j	                   t          j        r#t#          d|j	         t$          j                   t          |t                    rnt(          j                            |j	                   t(          j                            t.          j                            |j	                  d         dz              |S )Nz Unrecognized wrapper code type: Output code written to: %szCompiled module path: )filer   .debug)r  r  r  r   rd   _compile_to_module_linesr0   NotImplementedErrorr  __file__ru   r  r  rr   r  r+   benchmark_kernelprintr  stderrrf   output_coder7  ospathsplitext)r  rX  r  mods       r~   r5  z GraphLowering._compile_to_module7
  st    04/?SD))+++T\\^^ 	a l$455 	//==CC&;<< 	CC%G43E3EGG   |'''%%%		.===93<HHH" 	L93<99
KKKKl$9:: 	GG---GLL))#,77:XEFFF
r   rX  rd   c                  
 ddl m} t          j        j        rj| j        j                                        }|                    dd          }d|z   | j        j	                                        z   dz   }|j
        z   _
        t          j        t                              j
                   t          j        dj
                   t          j                    }t#          j        |j
                   	 d	 j        D             }|                    j
                  \  }
t          j        d

           t*          j                            
           t*          j                            t0          j                            
          d         dz              t7          d
fdfd           n## t8          $ r t7          dfd            w xY wt;          dd          5  |                    |
|i | j        | j         | j!                  }d d d            n# 1 swxY w Y   || _"        
| _#        || _$        t          j%        r8t          j&        r,|'                                }	|(                    |	dd           |S )Nr*   )PyCodeCachez"""z\"\"\"z&r"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%s)codec                &    g | ]\  }}||j         fS r|   r  )r   line_nor   s      r~   r  z:GraphLowering._compile_to_module_lines.<locals>.<listcomp>p
  s4       !GT $*+  r   r7  r   r9  inductor_output_codec                 H     t           j                                       dS )N)filename	file_path)rA  rB  abspath)rB  s   r~   rP  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  s#     $!#!6!6  r   c                      j         S r{   r  r  s   r~   rP  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  
    <#5 r   )
payload_fnc                      j         S r{   r  r  s   r~   rP  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>}
  rP  r   zPyCodeCache.load_by_key_pathTr  )linemapattrs)timesrepeat))	codecacherF  r+   r  r  rX  kernel_autotune_defsgetvaluereplacekernel_autotune_callsrW  r  r.  rr   r  rU   inductor_meta_from_configrV   begin_compileline_mapwriterf   r@  r7  rA  rB  rC  r   r  r   load_by_key_pathrI  rL  rM  rs  rt  ru  benchmark_harnessprofile_bandwidth_outputget_argsbenchmark_compiled_module)r  rX  rF  rX  tuning_codeinductor_metarS  r0  rD  rv   rB  s    `        @r~   r:  z&GraphLowering._compile_to_module_linesV
  s4    	+*****=1 	B#'#4#I#R#R#T#T #7#?#?{#S#S 7&' #9BBDDE 	  "-|/A!AL)5**<+=>>>1<3EFFF&@BB*=|?QRRRR	 %1%:  G $)),*<==IC!">EEEG%%%GLL))$//2X=>>> &    6555      	 	 	&5555   
 	  8PTUUU 
	 
	..n. 4	 / 	 	C
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 $# 	C(G 	C<<>>D))$a)BBB
s   8B'F8 8 G,/H''H+.H+c                   g }t          j        d          }t          j        d          }|D ]}t          |t          j                  r-|                    | j         dt          |                      It          |t          j                  r-|                    | j         dt          |                      |                    |	                                           |S )Nr   _none_shape)
r  r  r   r,   NoneAsConstantBufferr   r!  rg  rD   r  )r  r  namesshape_counternone_counterr   s         r~   _get_output_nameszGraphLowering._get_output_names
  s    !** q))! 	. 	.D$ 788 .	DD\0B0BDDEEEED"":;; .	FFm1D1DFFGGGGT]]__----r   c                6    |                      | j                  S r{   )rn  r  r  s    r~   get_output_nameszGraphLowering.get_output_names
  s    %%d&8999r   c                   || j         v op| j         |                                         dk    oMt          | j         |                                                   dk    ot	          | j         |                   dk    p|| j        v S )Nr*   r   r(  )r<  r  r   r  r?   r?  r	  s     r~   is_unspec_argzGraphLowering.is_unspec_arg
  s     D%% B!$'1133q8BD%d+4466771<B   1$ 788EA	3
 T22	3r   )NNNFFNNFFFNNNNNNFN)(r	  r
  r  r  r  r  r  r  r  r   r  r   r  r  r  r  r  r   r  r   r  r   r  r  r  r  r  r  r  r   r!  r  r"  r#  r$  r   r%  r&  rx   ry   )rx   ry   )r  r  rx   r  )r  r  rx   r  )r   r  rx   r  )r   r  rx   r	   )r
   r  r  r.   rx   r   )T)r  rp   r  r   rx   r  )rx   r  )r
   r  rx   r  rk  )r	  rm   r  r   rx   r   )r!  r   rx   r   )r	  r
  r  r  r  r   rx   r  )rx   r  )r!  r   rx   ry   )r
   r  rx   ry   )rx   r   )r#  r   rx   r$  )r+  r   rx   ry   )r#  r   rx   r.  )r#  r   rx   r3  )r#  r   rx   r<  )rv   r   rx   r   )r   rB  rx   r   )rI  rJ  rH  r   rx   r   )rO  rP  rx   r   )rT  rU  rx   ry   )r!  r   rx   r  )r!  r  r  r   rx   r   r{   )r  r   r!  r  rx   rF   )r!  r   rv  rw  rx   r   )r   r   rv   r  rw   r  rx   r  )r   rg   rv   r   rw   r  rx   r   )r  r  rx   r   )r   r   rv   r  rw   r  rx   r  )r   r   rv   r   rw   r   rx   r   )r   r  rv   r  rw   r  rx   ry   )r   r   )rx   r  )r  r   r  r  r  r  r  r  r  r  rx   ry   )r  r   rx   r  )r  r   rx   ry   )r  r   rx   r<  )r  r   r  r  rx   ry   FNNN
r  r   r  r  r  r  r  r  rx   ry   )r  r  rx   ry   )rx   r  )r#  r  rx   ry   )rx   r&  )rx   r/  )rX  rd   rx   r/  )r  r  rx   rP  )rx   rP  )r!  r   rx   r   )J__name__
__module____qualname____annotations__r/  r  r  r  r  r  r  r  r  
contextlibr   r  r  staticmethodr0  r
  r  ro  r  r  propertyr"  r*  r-  r2  r7  r  rA  rF  rN  rS  r^  ra  rd  rq  ru  rn  r  typing_extensionsoverrider   r  r  r  r  r   r  r	  r  r  r4  r;  ru  r  r  r  r  r  r  r  r%  r  r.  r  r5  r:  rn  rp  rr  __classcell__r  s   @r~   r  r  d  s        """"
 37%)#!"&"!$48)-(,-104 MQ+TC TC TC TC TC TC TCl1 1 1 1#  #  #  # J   # # # #*E E E EM M M M< < < < <:4 4 4 4 ( ( ( (    ^ ^ ^ \^@   
 
 
 
6> > > >@A A A A
D D D D    X   $F F F FQ Q Q Q8 8 8 8,
8 
8 
8 
8& & & & & &    FK      (   
	 	 	 	   
 
 
 
    :	
 	
 	
 	
 	
&& && && &&Rk k k k k kZ ]8 ]8 ]8 ]8 ]8  ]8~ 5 5 5 \547 47 47 47l       j
 j
 j
 j
 j
  j
X        $ $ $ ^$ $ $ $ ^$I: I: I: I:V 6 6 6 \6# # # #$] ] ] ] ] ]~U; U; U; U;nQ Q Q Q "$(;??C#W #W #W #W #WJ]0 ]0 ]0 ]0~w" w" w" w"r	8 	8 	8 	8   &% % % %*7 7 7 7" 6:9999- - - -   >G G G GR   : : : :3 3 3 3 3 3 3 3r   r  c                  :     e Zd ZdZd fd	Z	 	 	 	 dd fdZ xZS )r  z
    Mostly a helper class for the subgraph lowering. The main goal is to call
    init_wrapper_code with the subgraph related arguments.
    r  r  rv   r   rw   rx   ry   c                H    || _          t                      j        |i | d S r{   )r  r.  r/  )r  r  rv   rw   r  s       r~   r/  zSubgraphLowering.__init__
  s,    $)&)))))r   FNr  r   r  r  r  r  r  r  c                n    t                                          d| j        | j        j                   d S )NT)r  r  r  )r.  r  r!  r  rX  )r  r  r  r  r  r  s        r~   r  z"SubgraphLowering.init_wrapper_code
  s@     	!!) $ 8 	" 	
 	
 	
 	
 	
r   )r  r  rv   r   rw   r   rx   ry   rs  rt  )ru  rv  rw  __doc__r/  r  r~  r  s   @r~   r  r  
  sx         
* * * * * * "$(;??C
 
 
 
 
 
 
 
 
 
 
r   r  )rv   r   rw   r   rx   ry   )r   r   rx   r   )r   r   rx   r   )r   rm   r   r   rx   r   )r   rn   rx   r   )r   r   rx   r   )r   rn   r   r   rx   ry   )r   r%   rx   r   )
__future__r   ry  r   r  loggingr  rA  r8  r  rc  r|  collectionsr   r   typingr   r   r   r   r	   r   torch._loggingtorch.fxr
   r   torch._decompr   torch._dynamo.utilsr   r   "torch._library.fake_class_registryr   torch._library.opaque_objectr   r   r   torch._library.utilsr   r   r   torch._prims_commonr   r   torch._subclasses.fake_tensorr   torch._utils_internalr   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   r#   r$   torch.fx.noder%   torch.fx.passes.reinplacer&   torch.utils._mode_utilsr'   torch.utils._ordered_setr(   torch.utils._sympy.numbersr)   r+  r+   r,   r-   codegen.commonr.   r/   r0   r1   r2   r3   r4   r5   excr6   r7   r8   r9   fx_utilsr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   runtimerU   runtime.autotune_cacherV   r:  rW   r}  rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   virtualizedre   rf   collections.abcrg   rh   ri   rj   typesrk   torch._higher_order_ops.effectsrl   rm   torch.fx.graphrn   codegen.wrapperro   dependenciesrp   rj  rq   r/  torch._inductor.codecacherr   	getLoggerru  r  _logginggetArtifactLoggerr  r   r   r  rh  rZ  )torch._inductor.fb.triton_kernel_metadatart   torch._inductor.fb.utilsru   r   r   r   r   r   r   r  r   r  r  r  r|   r   r~   <module>r     sC   " " " " " "               				 				 



      # # # # # # % % % % % % / / / / / / / / / /                              , , , , , , 4 4 4 4 4 4 4 4 ? ? ? ? ? ?         
 ; : : : : : 7 7 7 7 7 7 7 7        5 4 4 4 4 4 : : : : : : ? ? ? ? ? ? L L L L L L L L	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	       1 1 1 1 1 1 / / / / / / / / / / / / - - - - - - ! ! ! ! ! ! ! ! ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	            % $ $ $ $ $                                                           $ # # # # # 8 8 8 8 8 8 & & & & & &                              ( ' ' ' ' ' ' '  8FFFFFFFFFFFF      ;;;;;;$$$$$$$$$$$$555555!!!!!!,,,,,,"77N 5 5 5 5 5 5 g!!00<HHy~*9?,, 6       9888888         (   
      $   0V/ V/ V/ V/r   *S%3 S%3 S%3 S%3 S%3EH( S%3 S%3 S%3lJ
 
 
 
 
} 
 
 
 
 
r   