
    IЦi_             	       |   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
  S SK Jr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJrJrJrJr  S SKrS SKJr  S SKrS SKrS SK rS SKJ!r!J"r"  S SK#J$r$  S S	K%J&r&J'r'  S S
KJ(r(J)r)  S SK*J+r+  S SK,J-r-  S SK J.r.  S SK/J0r0  S SK1J2r2J3r3  S SK4J5r5J6r6J7r7J8r8J9r9J:r:J;r;  S SK<J=r=  S SK>J?r?  S SK@JArA  S SKBJCrC  S SKDJErE  SSKFJGrGJHrHJIrI  SSKJJKrKJLrLJMrMJNrNJOrOJPrP  SSKQJRrR  SSKSJTrTJUrUJVrVJWrW  SSKHJXrXJYrYJZrZJ[r[J\r\J]r]J^r^J_r_J`r`Jara  SSKbJcrcJdrdJereJfrfJgrgJhrhJiriJjrjJkrkJlrl  SSKmJnrn  SSKoJprp  SSKqJrrr  SS KsJtrt  SS!KuJvrvJwrwJxrxJyryJzrzJ{r{J|r|J}r}J~r~  SS"KJrJr  \(       a  S S#KJr  S S$KJr  \GR                  " \5      r\GR                  GR                  \S%5      r\GR                  GR                  r\GR                  " 5       r\GGR                   " 5       (       a  S S&KJr  OS'\S(\S)S4S* jrS+\GR&                  S,\S)\4S- jrS.\R:                  S)\\GR&                     4S/ jrS0\S)\4S1 jrS2\.S3\S)\\"\GR2                  GR4                  \.4   4S4 jrS5\=S)\\?\\S64   4   4S7 jrS5\=S8\\?\\S64   4   S)S4S9 jr " S: S;\GR>                  GR@                  5      r " S< S=\5      rg)>    N)defaultdict)contextmanager)
ModuleType)AnyCallableDefaultDictDictIterableIteratorListNoReturnOptionalSequenceTupleTYPE_CHECKINGUnion)Expr)deviceTensor)get_decompositions)defakedynamo_timed)
LazyStringtrace_structured)make_channels_last_strides_for)
FakeTensor)GraphModule)BackwardStatemagic_methodsmethod_to_operator)free_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Graph)Node)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registration)PythonWrapperCodegen)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)
ConstantDonatedBufferFixedLayoutget_device_typeInputBuffer	Pointwise	Reduction
StorageBox	TensorBoxTorchBindObject)
constrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguousunsupported_output_tensor)autotune_cache)AutotuneCacheBundler)BaseSchedulerNode)SizeVarAllocator)	convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_aligned)NullHandlerV)_EffectType)output_code_log
perf_hints)log_module_codeargskwargsreturnc                      g N )rd   re   s     T/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/_inductor/graph.pyrc   rc      s        dtypedevice_typec                    [         R                  [         R                  [         R                  [         R                  [         R
                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  1nUS:X  a|  UR                  [         R                  5        UR                  [         R                   5        UR                  [         R"                  5        UR                  [         R$                  5        X;   $ )Ncuda)torchfloat32float64int64int32int16int8uint8boolbfloat16	complex32	complex64
complex128float16addfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuz)rl   rm   supported_dtypes      rj   supported_dtype_of_cpp_wrapperr      s    



O fE//0E--.E112E112##rk   constant_bufferc                    [        U [        R                  [        R                  [        R                  R
                  R                  45      (       d   S5       e[        U [        R                  R
                  R                  5      (       a  [        R                  $ [        U [        R                  5      (       a  [        U 5      $ U R                  (       a  [        R                  $ U R                  (       a  [        R                  $ g )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr   corenumbersIntegerrp   rs   rY   
is_integeris_floatrq   )r   s    rj   may_get_constant_buffer_dtyper      s    %,,

EJJ4F4F4N4NO  qpq  /5::#5#5#=#=>>{{/5::..#O44!!{{		!	!}}rk   opc                 R    [          Vs1 s H  n[        U5      iM     nnX;   $ s  snf rh   r   )r   m	magic_opss      rj   is_magic_methodr      s)    0=>1#A&I>? ?s   $objtargetc           	          UR                  S5      nU n[        U5       H@  u  pE[        X55      (       d   [        SSR	                  US U 5       35      e[        X55      nMB     U$ )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)r   r   target_atomsattr_itriatoms         rj   getattr_recursiver      sn     <<$LH\*x&&5chh|BQ?O6P5QR  8* + Ork   g.c                     0 nU R                  SS9S   nSUR                  ;  a  U$ [        UR                  S   5       H-  u  p4X2R                  S   ;   d  M  UR                  S   U   X'   M/     U$ )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   rd   )r   retoutput_nodeidxnodes        rj   get_user_visible_output_stridesr      s|    ')C,,(,+A.K!)9)99
{//23	""#=>>#(()BCCHCI 4 Jrk   user_visible_output_stridesc                 0   [         R                  (       d  g[        R                  [        R                  [        R
                  1n[        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                   [        R"                  1nS[$        R&                  R(                  S[*        [$        R,                  R.                     4S jnU R1                  SS9S   n[3        U R4                  5       H  nU" U5      nU(       d  M  Xr;   a  SUR6                  S	'   UR6                  R9                  S	S
5      (       a:  UR:                   H*  nU" U5      n	U	(       d  M  X;  d  M  SUR6                  S	'   M,     [         R<                  (       a  M  Xa;   d  M  SUR6                  S	'   M     g)ar  
Nodes like convolution/convolution_backward want its input to be dense.
If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

The pass finds nodes that dislike padding. These are nodes that can be reached
from a convolution/convolution_backward in the backward direction without
going thru a reduction.
Nr   rf   c                     U R                   S:X  ad  [        U R                  [        R                  R
                  5      (       a1  [        U R                  S5      (       a  U R                  R                  $ S $ )Ncall_function_overloadpacket)r   r   r   rp   _ops
OpOverloadr   r   )r   s    rj   _get_overload_packet8mark_nodes_dislike_padding.<locals>._get_overload_packet   s[    
 ww/)4;;

(=(=>>%677	 KK''	
 	
rk   r   r   r   Tdislike_paddingF)r/   comprehensive_paddingatenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducerp   fxr*   r   r   OpOverloadPacketr   reversednodesr   getall_input_nodespad_outputs)
r   r   ops_dislike_paddingops_like_paddingr   r   curr   priorprior_ops
             rj   mark_nodes_dislike_paddingr      sy    ''!! 									

hhmm

	%**--	.

 ,,(,+A.K !#&$*.CHH&'88<<)511,,/6348EJJ01 - !!!c&H*.CHH&'# !rk   c            #       
  ^  \ rS rSr% \\R                     \S'   S\R                  S\
\\\\4      \\\\4      4   4S jrS\R                  S\
\\R                     \\R                     4   4S jr               SlS	\R$                  R&                  S
\\\      S\\   S\\   S\S\S\\   S\\\\R2                     /\4      S\S\S\S\\\\4      S\\   S\S    S\\   S\\\      SS4"U 4S jjjrS\\R<                  R
                  R                  \S4   S\ S\4S jr!S\R>                  4S jr"\#RH                  S\R>                  S\%S   4S j5       r&S\4S jr'\(S	\S\S\4S  j5       r)S\S\4S! jr*S	\R$                  R&                  S
\\R                     S"\SS#4S$ jr+S\,\-   4S% jr.S\SS4S& jr/S\R>                  SS4S' jr0\1S\Rd                  Rf                  Rh                  4S( j5       r5S)\S\\\Rl                  \Rn                  4      4S* jr8S+\R                  SS4S, jr9S)\S\\Rl                  \Rn                  4   4S- jr:S)\S\Rv                  4S. jr<S)\S\\\4   4S/ jr=S0\S\4U 4S1 jjr>S2\R~                  S\4S3 jr@SS4.S5\Rn                  S6\S\4S7 jjrAS8\\   S\4S9 jrBS:\\C\R                     \R                  4   SS4S; jrDS\SS4S< jrES\S\R                  4S= jrFS\\   S>\\	   S\4S? jrG SmS>\	S\\   S\64S@ jjrHS\SA\\R>                     S\4SB jrISC\S0\
\   SD\\\4   S\\\6S4   4U 4SE jjrJSC\S0\SD\\\4   S\4U 4SF jjrK\(SG\R                  S\4SH j5       rLSC\S0\
SI   SD\\\4   S\\M\6\R                  \O4   4SJ jrPSC\S0\SD\S\Q4SK jrRSC\S0\SD\S\Q4SL jrSSC\S0\
\   SD\\\4   SS4U 4SM jjrTSnSN jrU\$SO\R$                  RZ                  4SP j5       rVSQ\\Rl                  \R                  4   SR\
\\\R                  4   SS4   S\\Rl                  \R                  4   4ST jrYSU\R$                  RZ                  SV\
\   SW\\\4   SX\
\   SY\\\4   SS4SZ jrZS[\R$                  RZ                  S\4U 4S\ jjr[SnS] jr\   SoS^\S"\\   S_\\]   SS4S` jjr^S\
\\\
\\-4      4   4Sa jr_S\
\\\
\\-4      4   4Sb jr`SpSc jraS\
\\\
\b\4      \\
\b\c4      4   4Sd jrd\(Se\SS4Sf j5       reS\f4Sg jrgS\f4Sh jrhS\\   4Si jriS\S\4Sj jrjSkrkU =rl$ )qGraphLoweringi  graph_outputsexrf   c                 Z   U R                   (       a2  [        UR                  5       5      [        UR                  5       5      4$ SSKJn  U" S[        U R                  R                  5       35      nU R                  R                  UU5      u  nnnU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R                  OUPM<     nnU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R                  OUPM<     n	nX4$ s  snf s  snf )z
Support dynamic shapes and dynamic strides by assigning variables
to each dimension.  We duck-shape tensors, so if two tensors
have the same size they get assigned the same symbolic variable.
r   )ConstantSource__inductor_unknown_tensor_)reuse_shape_envrU   sizestridetorch._dynamo.sourcer   len
_shape_env
var_to_val,create_symbolic_sizes_strides_storage_offsetr   rp   SymIntr   expr)
selfr   r   sourcer   r   _r   r_sizer_strides
             rj   symbolic_sizes_strides$GraphLowering.symbolic_sizes_strides  s    ,RWWY79R		:   < $,S1K1K-L,MNF LL	 NRRTAu||!<!<!&&++!CTROUVv!:a#>#>AFFKKAEvV SVs   AD#AD(c                     UR                  5        Vs/ s H  n[        R                  " U5      PM     nnUR                  5        Vs/ s H  n[        R                  " U5      PM     nnX44$ s  snf s  snf )z
Primarily used to weights
)r   r   r   r   )r   r   r   r   r   s        rj   static_sizes_strides"GraphLowering.static_sizes_stridesA  sZ     +-'')4)Qa )4,.IIK8Kq%--"K8| 58s    A, A1NFgmexample_inputs	shape_envgraph_idcpp_wrapperaot_mode
layout_optextern_node_serializeris_inferenceis_backwardis_const_graphconst_output_index
const_codeconst_modulenameinputs_to_checkc                 	  > [         TU ]  U5        X l        Ub  UOU R                  XS9U l        SU l        Xl        Xl        Xl        Xl	        Xl
        UU l        SU l        Uc  [        5       nSU l        OX0l        SU l        X0l        UR!                  5         UR"                  R%                  5       U l        [)        5       U l        [-        U5      U l        / U l        0 U l        0 U l        [)        5       U l        U(       a  UR8                  O	[)        5       U l        U(       a  UR:                  O	[)        5       U l        SU l        / U l        / U l         U(       a  UO0 U l!        U(       a  [)        URE                  5       5      O	[)        5       U l#        U(       a  URH                  O0 U l$        0 U l%        0 U l&        0 U l'        [)        5       U l(        [)        5       U l)        [)        5       U l*        [)        5       U l+        [)        5       U l,        [)        5       U l-        S U l.        S U l/        / U l0        SSK1J2n  [f        Rh                  " 5       (       a	  U(       a  UOUU l5        S U l6        0 U l7        [)        5       U l8        / U l9        0 U l:        [w        [x        5      U l=        0 U l>        [~        R~                  " 5       U l@        XlA        XPlB        XPlC        0 U lD        X`lE        X@lF        [        [        5      U lI        S U lJ        S U lK        U R                  (       a  U R                  5       O	[)        5       U lM        S1U lN        [        UR                  5      U lQ        [        UR                  U R                  5        SU lS        SU lT        / U lU        S U lV        0 U lW        UR                  5       U lY        U R                  R                  R                  S	0 5      U l]        Ub  UR                  O0 U l^        [        5         [        R                  " S 5      " [        5      U lb        0 U lc        [)        5       U ld        [)        5       U le        [)        5       U lf        [        R                  " 5       U li        S
U lj        [        5       U ll        g )N)r   r   FTcpu)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)msuper__init__r   decide_layout_optr   num_channels_last_convr   r   r   r   r  r  extra_tracebackr&   r   r   freeze_runtime_assertsdeferred_runtime_assertscopyras_by_symbolr,   bound_unbacked_symbolsrT   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalzero_dim_cpu_tensor_listdevice_typesdevice_idxsrm   buffers
operationsr   keysfolded_constants	constantstorchbind_constantsseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_buffersnever_reuse_buffersinplaced_to_remove
device_opswrapper_codeextern_kernel_nodes&torch._inductor.extern_node_serializerr  r/   	is_fbcoder   current_nodelistsmutated_inputsmutated_input_idxsname_to_bufferr   listname_to_users
name_to_optimecreation_timer  r   record_multi_kernel_choicemulti_kernel_to_choicer   r   next_post_grad_graph_counterpost_grad_graph_id	schedulercurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler   r   r  allocated_constant_namer7   	functools	lru_cacher4   effectful_opsaligned_inputsno_fuse_buffer_namesall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxrX   bw_donated_idxs)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  	__class__s                     rj   r  GraphLowering.__init__K  s   * 	, % '''F 	
 '(#(&,$(.$ 
I#(D 'O#'D # 	((* ..335 	 AK#(3,.24=?"9C%)5L%%:< 	 )5L$$*, 	 !(*.0"4" 	
 " )..01 	 '3L"" 	 FH 68.03=<0:8B$0:4>L 3=<-126>@ V !!&< #, 	# ,0+-
/9|-/46@KD@Q35!YY[	&
 +6'68#  "&'?"@>B 7; 7;ooD002:< 	' "= =+J288+T("288T-M-MN !  	 9=& GI -/[[]040@0@0D0D.1
- 5A4LL00RT 	$ 	"#$-$7$7$=>R$S!;=/9|5?\! :D% &OO-  "/1rk   r   featurec                 r    [        U[        5      (       d   U5       eX R                  [        U5      5      ;   $ rh   )r   r2   r4   r@   )r   r   rY  s      rj   has_featureGraphLowering.has_feature  s5    
 '>22;G;233OF4KLLLrk   c                 B    U R                   =n(       a  U$ [        S5      e)NzNo current device)r>  r   r   r   s     rj   get_current_device_or_throw)GraphLowering.get_current_device_or_throw  s$    (((6(M233rk   c              #   \   #    U R                   nXl          S v   X l         g ! X l         f = f7frh   )r>  )r   r   r   s      rj   set_current_device GraphLowering.set_current_device  s*     ##$	("'%s   ,! ,),c                 L    U R                   (       a  gU R                  (       a  gg)N	inferencebackwardforward)r   r   r   s    rj   get_training_phase GraphLowering.get_training_phase
  s    rk   c                   [         R                  (       d  g[         R                  (       a  gU R                  R                   Vs/ s HB  o"R
                  [        R                  R                  R                  R                  :X  d  M@  UPMD     nn[        U5      nUS:X  a  g[        R                  R                  R                  (       aE  [        R                  R                  R                  5       (       a  [!        S U 5       5      (       a  g[        [#        U R                  R                  5      5      SU-  :  a  [$        R'                  S5        g[)        S U 5       5      (       a  [$        R'                  S5        gS	[*        S
[,        4S jnS	[        R.                  R0                  S
[,        4S jnS	[        R.                  R0                  S
[,        4S jnU(       GaZ  SSKJn  [7        [8        5      n	U H  n
[        R:                  R<                  R?                  U
5      u  pnU(       a  U" SS9 n[@        RB                     U
R
                  " U0 UD6  SSS5        SSS5        WRE                  5       nU" U
5      (       a  SnO"U" U
5      (       a  SnOU" U
5      (       a  SnOSnU	U==   U-  ss'   M  [$        R'                  S5        M     SnSnSnSn[G        U	RI                  5       5      nU	S   U-  U	S   U-  -   U	S   U-  -   U	S   U-  -   nUU:*  nU(       d  [$        R'                  SUU5        U$ [)        [K        XS5      5      (       a  [$        R'                  S5        g[)        [K        Xc5      5      (       a  [$        R'                  S5        g[!        [K        Xs5      5      (       a  [$        R'                  S5        ggs  snf ! , (       d  f       GN}= f! , (       d  f       GN= f)zT
Decide if we should enable layout optimization for this graph based on
heuristics.
FTr   c              3      #    U  HL  nS   HB  nUR                   U   R                  S   R                  [        R                  " S5      :H  v   MD     MN     g7f)r   r.   valr  N)rd   r   r   rp   .0nr   s      rj   	<genexpr>2GraphLowering.decide_layout_opt.<locals>.<genexpr>)  sL      #A!C s  '..%,,u2EE! F#s   AAi,  z*Skipped layout opt because only a few convc              3      #    U  H4  nS   H*  n[        UR                  U   R                  S   5      v   M,     M6     g7f)rm  rn  N)r#   rd   r   ro  s      rj   rr  rs  8  s?      
 QVVC[--e455 6s   <>zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670rq  rf   c                     U R                   S   R                  S   n[        U[        R                  5      (       d   eU R                   S   S:  =(       a    UR                  S5      S:  $ )Nr.   rn  r	  )rd   r   r   rp   r   r   )rq  meta_vals     rj   
is_grouped3GraphLowering.decide_layout_opt.<locals>.is_groupedB  sT    vvay~~e,Hh555566":>:hmmA&6&::rk   c                     U R                   S   R                  S   R                  S5      S-  U R                   S   R                  S   R                  S5      :*  =(       a.    U R                   S   R                  S   R                  S5      S:  $ )Nr.   rn  r      rd   r   r   rq  s    rj   is_in_out_channel:GraphLowering.decide_layout_opt.<locals>.is_in_out_channelG  sv    q	u%**1-1QVVAY^^E5J5O5OPQ5RR 6FF1INN5)..q1A5rk   c                     U R                   S   R                  S   R                  S5      S:*  =(       a.    U R                   S   R                  S   R                  S5      S:*  $ )Nr.   rn  r   @   r{  r|  s    rj   is_small_channel9GraphLowering.decide_layout_opt.<locals>.is_small_channelM  sT    q	u%**1-3 8FF1INN5)..q1R7rk   )FlopCounterMode)displayNgroupedsmallin_outdefaultzConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)&r/   layout_optimizationforce_layout_optimizationrB  r   r   rp   opsr   r   r  r   backendsmkldnnenabledis_availableallr3  logdebugr   r   rx   r   r*   torch.utils.flop_counterr  r   float	_inductorfx_utilsget_fake_args_kwargsr_   	fake_modeget_total_flopsr   valuesmap)r   r   rq  
conv_nodesnconvrw  r}  r  r  flop_countsr   successrd   re   flop_counter_modecounted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                           rj   r  GraphLowering.decide_layout_opt  sp    ))++ xx~~
%!UYY^^5O5O5W5W)WA~ 	 
 JA: NN!!))%%2244 #   
 tBHHNN#$e3IIBC 

 
 

 IIw 	;# 	;$ 	;
	 	4 		 	$ 	 @,7,>K"(-(@(@(U(U)%v (7;L[[ KK88 ) 8 %6$E$E$GM!$''$-	)$//$+	*400$,	$-		*m;*II:;- #6 "'!& %$k0023K I&);;g&)99:h'*;;< i(+==>  +k9M 		~"
 ! & s:*++IIX  s$122IIT  s#011IIVWm
D )[ 87s0   ?OO8O$	OO$
O!O$$
O3	c                 B    U R                   b  U R                    SU 3$ U$ )z2Prepend the given name with the graph name if any.r   )r  r   r  s     rj   qualify_nameGraphLowering.qualify_name  s&    99 ii[$((rk   subgraph_nameSubgraphLoweringc                     [        U UUU R                  U R                  U R                  U R                  U R
                  U R                  U R                  U5      S9
$ )a  
Make a subgraph of the current graph with all inherited parts, except
the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
separately and lifted into a separate function in the parent output
wrapper code.  The subgraph name is qualified by the parent graph's
name. Note that the lifting of subgraph is supported for python wrapper
only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
)
parentr   r   r   r   r   r   r   r   r  )r  r   r   r   r   r   r   r  )r   r   r   r  s       rj   make_subgraphGraphLowering.make_subgraph  sZ      )oo((]]#'#>#>**((""=1
 	
rk   c                    [        5       n[        U R                  R                  R                  5       H~  nUR
                  [        R                  R                  R                  R                  :X  a  UR                  U5        MR  UR                   H  nX1;   d  M
  UR                  U5          M|     M     U R                  R                  R                   H%  nX!;   d  M
  UR                  UR                  5        M'     U$ )a  
The rule to decide if an node prefer channels last is simple.
1. if it's input/output of a convolution
2. if one of its user prefers channels last

We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
channels last.

Consider the scenario: conv -> batch-norm -> relu -> conv
Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
1. the output of batch-norm should be channels last initially since its input is a conv's output.
   Forcing the batch-norm's output to be contiguous results in the first copy
2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
   We need convert it to channels last layout which results in the second copy.
With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
can be saved.
)r,   r   rJ  rB  r   r   rp   r  r   r   r  r~   usersupdate)r   
output_setrq  users       rj   r?  -GraphLowering.find_nodes_prefer_channels_last  s    & (2|
$++++112Axx599>>55===q!%NN1%   30 ""((A!!!''* ) rk   c                     XR                   ;  a2  U R                   R                  U5        [        R                  SU5        g g )NzUsing FallbackKernel: %s)rA  r~   perf_hint_loginfor  s     rj   warn_fallbackGraphLowering.warn_fallback  s8    ,,,!!%%d+94@ -rk   c                 `   U R                   R                  UR                  5        UR                  b%  U R                  R                  UR                  5        [
        R                  R                  (       a8  XR                  ;  a(  [
        R                  R                  U R                  U'   g g g rh   )	r  r~   typeindexr  r_   rB  r.  rG  r^  s     rj   add_device_infoGraphLowering.add_device_info  sw    fkk*<<#  .77F2J2J$J/0ww/C/CD$$V, %Krk   c                 "    [         R                  $ rh   )r_   r  rh  s    rj   r  GraphLowering.fake_mode  s    {{rk   buffer_namec           	         XR                   ;   a  U R                   U   $ XR                  ;   a  U R                  U   $ XR                  ;   ay  [        R                  R                  U   n[
        R                  " U[
        R                  " UR                  UR                  /[        R                  R                  U5      Q76 S9$ g Nr  layout)r2  r  r  r_   rB  r0   ConstantBufferr?   r   rl   r   )r   r  datas      rj   try_get_bufferGraphLowering.try_get_buffer  s     ---&&{33+++$$[11..(77$$[1D$$ ~~KK./gg.J.J4.P  rk   symbolc                     [        S5      e)Nz'Should not be called for the main graph)r   )r   r  s     rj   add_symbol_graph_input$GraphLowering.add_symbol_graph_input*  s    DEErk   c                 J    U R                  U5      nUb  U$ [        SU 35      e)Nz$Failed to find buffer matching name )r  r   r   r  bufs      rj   
get_bufferGraphLowering.get_buffer-  s/    !!+.?JA+OPPrk   c                    XR                   ;   a  U R                   U   R                  $ [        U R                  S5      (       a  XR                  R                  ;   aq  U R                  R                  U   nX R
                  ;   a  U R
                  U   R                  5       $ X R                  ;   a  U R                  U   R                  5       $ XR
                  ;   a  U R
                  U   R                  5       $ XR                  ;   a  U R                  U   R                  5       $ [        R                  " SU5      nU(       a   U R                  UR                  S5      5      $ [        SU 35      e)Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r.   could not find )r  rl   r   r=  r  r2  	get_dtyper  rematchgroupKeyError)r   r  mutated_bufr   s       rj   r  GraphLowering.get_dtype3  s.   ..(>>+.444 DNN$899~~@@@..;;KHK111**;7AACC///((5??AA---&&{3==??+++$$[1;;==HHI;W>>!''!*--677rk   c                 V   XR                   ;   a  U R                   U   R                  5       $ XR                  ;   a5  U R                  U   nUR                  5       (       d  gUR	                  5       $ XR
                  ;   a  U R
                  U   R	                  5       $ [        SU 35      e)Nr.   r  )r  numelr2  has_tensor_output	get_numelr  r  r  s      rj   r  GraphLowering.get_numelI  s    ..(>>+.4466---%%k2C((**==?"+++$$[1;;==677rk   rd   c                 j   > [        S5         [        TU ]  " U6 sS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.run)r   r
  run)r   rd   rW  s     rj   r  GraphLowering.runU  s$    -.7;% /..s   $
2r   c                 "   UR                   b
   SU 35       e[        U[        R                  5      (       d   eU R	                  S[        U R                  5       35      nU R                  R                  U5        XR                  U'   X!l         U$ )NzOperation registered twice: r   )	operation_namer   r0   	Operationr  r   r  appendr5  )r   r   r  s      rj   register_operation GraphLowering.register_operationY  s      (M,H*MM("bll++++  2c$//&:%;!<=r" " rk   set_namebufferr  c                   U R                  S[        U R                  5       35      nU R                  R                  U5        XR                  U'   UR                  5       nUb_  [        U[        R                  5      (       a/  UR                  5       (       a  U[        R                  " S5      :X  d  U R                  U5        U(       a  X1l        U$ )Nr  r  )r  r   r  r  r2  
get_devicer   r0   ComputedBufferis_zero_elementsrp   r   r  r  )r   r  r  r  r   s        rj   register_bufferGraphLowering.register_bufferb  s      3s4<<'8&9!:;F#$*D!""$ 62#4#455++--ell511   (Krk   operation_namesc                 h    U R                  SSR                  U5      -   5      nXR                  U'   U$ )Nlist_r   )r  r   r/  )r   r  r  s      rj   register_operation_list%GraphLowering.register_operation_listv  s1      388O+D!DE*

4rk   node_outputc                    ^ ^ S[         [        [        R                     [        R                  4   SS 4UU 4S jjmT" U5        g )Nvaluerf   c                   > [        U [        [        45      (       a  U  H  nT" U5        M     [        U [        R                  5      (       a6  U R                  5        H!  nTR                  U   R                  U 5        M#     g g rh   )r   r3  tupler0   rE   get_read_namesr4  r  )r  x	read_nameregisterr   s      rj   r  1GraphLowering.register_users_of.<locals>.register~  sg    %$//AQK %..!&!5!5!7I&&y188? "8 /rk   )r   r
   r0   IRNode)r   r  r  s   ` @rj   register_users_ofGraphLowering.register_users_of{  s@    	@E(299"5ryy"@A 	@d 	@ 	@ 	rk   c                     [        U[        5      (       d   eU R                  R                  U5        XR                  ;  a  gU R                  U    H  nUR                  5         M     g)zz
When a buffer is mutated we need to make sure all the reads to
the old version are realized before the mutation happens.
N)r   strr&  r~   r4  realize)r   r  r  s      rj   mark_buffer_mutated!GraphLowering.mark_buffer_mutated  sX    
 $$$$$  &)))&&t,DLLN -rk   c                    XR                   ;   a  XR                  ;   d
   SU-   5       e[        U R                   U   5      nX R                  R                  ;   a  U R                  R                  U   $ U R                  U   $ )z
In AOTI, module buffers may have been mutated during the tracing and compilation.
Thus we need to read from previously stored original buffers, to make sure the
generated model.so uses correct initial values.
z$Can not find the original value for )rK  r  rW   rJ  r   )r   r  	orig_names      rj   get_original_value_of_constant,GraphLowering.get_original_value_of_constant  s     3338N 	
2T9	
N 5T5Q5QRV5WX	 KK,,, KKY'	
 %	
rk   r  c                    [         R                  R                  (       d7  U R                  R	                  5        H  u  p4[        X$5      (       d  M  Us  $    Uc  S[        U R                  5       3nUnUS   R                  5       (       a  SU 3nU R                  U5      n[        U5      nUnSnXR                  ;   a  U SU 3nUS-  nXR                  ;   a  M  X R                  U'   UR                  < SUR                  < S[        UR                  5       5      < S[        UR                  5       5      < S[        U5      S 3	U R                   U'   XPR"                  U'   U$ )Nconstantr   	constant_r   r.    r  )r/   aot_inductoruse_runtime_constant_foldingr  itemsrZ   r   isdigitr  r\   r   rl   r  r   r   hashr"  rK  )r   r  r  constant_namer  r  prefixcnts           rj   allocate_non_dup_const_name)GraphLowering.allocate_non_dup_const_name  sO    ""??(,(<(<(>$!$..(( )? <c$..123D	7??tf%D  &  %nn$XQse$D1HC nn$  $t{{oQtzznATYY[!$AeDKKM&:%=QDz!n 	D!
 .7$$T*rk   c                     U R                  X!5      n[        R                  " [        R                  " U[        UR                  UR                  /U R                  U5      Q76 S95      $ r  )	r!  rE   creater0   r  r?   r   rl   r   )r   r  r  new_names       rj   add_tensor_constant!GraphLowering.add_tensor_constant  s`     33D?"KK.2.G.G.M
 	
rk   device_overridec                 p   U R                   U   R                  U:X  d  Uc  U$ [        R                  R                  R                  5          U R                  U SUR                   UR                  =(       d    S 3U R                   U   R                  U5      5      sSSS5        $ ! , (       d  f       g= f)z
We AOT copy constants to the devices they are needed on.
If device_override doesn't match the constant's device, then
copy it and return a different name.
Nr   r   )
r  r   rp   utils_python_dispatch_disable_current_modesr!  r  r  to)r   r  r(  s      rj   r  GraphLowering.constant_name  s     >>$&&/9_=TK[[))@@B 33&/../0E0E0J/KLt$''8 CBBs   AB''
B5r   re   c                 P  > U =R                   S-  sl         [        T	U ]	  XU5      nU R                  U5      n[	        U[
        5      (       aA  UR                  R                  nXPR                  U'   U R                  R                  U5        U$ [	        U[        [        [        45      (       aA  [        R                  " U5      nXPR                  U'   U R                  R                  U5        U$ Uc  U R                  R                  U5        g [	        U[         5      (       a  U R                  R                  U5        g [	        U["        R$                  5      (       d   U5       eUR&                  (       d  U R)                  U5      u  pgOU R+                  U5      u  pgU R,                  (       ai  U R.                  (       aX  U R                   U R.                  ;   a>  [0        R2                  " [5        U[7        UR8                  UR:                  Xg5      S95      nO=[0        R2                  " [=        U[7        UR8                  UR:                  Xg5      S95      nXR                  U'   U R                  R                  U5        UR>                  R>                  U R@                  U'   U RB                  RD                  (       a  U RG                  UR8                  5        [I        5          [K        U5      (       a  U RL                  RO                  U5        S S S 5        U$ ! , (       d  f       U$ = f)Nr.   r  )(rU  r
  placeholderr  r   r(   r   r   r  r  r  intrx   r  r   sympifyr   rp   r   _has_symbolic_sizes_stridesr   r   r   rV  rE   r$  r>   r?   r   rl   rA   r  r  r.  r  r  r[   r]   rO  r~   )
r   r   rd   re   exampler   sizesstridestensorrW  s
            rj   r0  GraphLowering.placeholder  s|    	!'%fF;""6*gx((<<$$D(,f%""))&1K#tU!344==)D(,f%""))&1K_""))&1g}-- ""))&1'5<<009'90
 22!66w?NE7!88ANE $$$$(<(<<%%&w~~w}}eUF %%&w~~w}}eUF %+&!%%f--3[[-=-=""6*""  0 12*733##''/ 3  32 s    ,L
L%c                 \  > U[         R                  L a3  [        US   [        [        [
        45      (       a  [        TU ]  XU5      $ [        U[        R                  R                  5      (       d  [        US5      (       a  U" U0 UD6$ U[        ;  Gai  [        U[        R                  R                  5      (       d
   U S35       eUR                  5       R                  S5      S   nU[         ;   a  [#        USSS9  O[$        R&                  (       a  [)        U/5      (       a  [*        O[,        n[.        R1                  SUR3                  XU5      5        [4        n[        R6                  R8                  R:                  UR<                  ;   a  [>        nO4[        R6                  R8                  R@                  UR<                  ;   a  S n[#        XS	9  O)[)        U/5      (       a  [+        XU5      e[-        XU5      e [.        RC                  S
[        U   5        U RD                  n[G        U5      nU(       a  X#pU" U/UQ70 UD6u  p#[        U   " U0 UD6nU(       a  U RI                  UW	W
X#5        U$ ! [J         a+  n[M        XX#5      RO                  URP                  5      S eS nAff = f)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnoverride_decompz"Creating implicit fallback for:
%s)layout_constraintz  via %s))operatorgetitemr   r3  r  dictr
  r   rp   r   r   r   rK   r   r  r   rH   rL   r/   implicit_fallbacksr   r;   r<   r  r  operator_strrO   _CTagneeds_fixed_stride_ordertagsrG   flexible_layoutr  r.  rM   propagate_mutation	Exceptionr:   with_traceback__traceback__)r   r   rd   re   	base_nameerrordecided_constraintrq  layout_constraintsold_args
old_kwargsouterW  s                rj   r   GraphLowering.call_function0  s^   X%%%*T!WtUD>Q*R*R7(v>> &%**"="=>>71D
 D
 4*6**"

--  0./0  ++C03I//f5$G** *6(33 .5 
 9&&vV<
 &8" 88<<88FKKG)@&XX\\11V[[@)-& fK#VH-- 0fEE26HH	IIj)F"34!!A!9&!A!'+*1!EdEfEF#T4V4C! ''8ZNJ 	#At<KK	s   ?A6I6 6
J+ &J&&J+tc                 d    [        U R                  5      S:H  =(       a    U R                  S   S:*  $ )z=
True if this is a small constant attr that will be inlined.
r.   r      )r   shape)rU  s    rj   can_inline_constant!GraphLowering.can_inline_constant{  s(    
 177|q 4QWWQZ1_4rk   ri   c                 b   [        U R                  U5      n[        U[        R                  R
                  5      (       aB  XR                  ;   a  U R                  U   $ [        R                  " XS9nXPR                  U'   U$ [        U[        R                  R                  5      (       a&  X@R                  U'   SU R                  U'   [        XS9$ [        U[        R                  5      (       d   e[        R                   R"                  (       d%  [        R$                  (       d  ['        U5      (       a  U R)                  XA5      $ [+        5          UR,                  S:X  a6  [/        UR1                  5       UR2                  UR4                  S9sS S S 5        $ U R7                  U5      (       aX  [8        R;                  S[=        U5      5        SSKJ n  U" URC                  5       UR2                  UR4                  S	9sS S S 5        $  S S S 5        U R)                  XA5      $ ! , (       d  f       N= f)
N)r  graph_moduler  )r  r  ri   )r  rl   r   zInlining constant: %s r.   )r7  )rl   r   )"r   rJ  r   rp   r   r   r!  r0   SubgraphrC  ScriptObjectr   r"  rF   r   r/   r  r  always_keep_tensor_constantsrP   r&  r+   rX  r=   itemrl   r   rY  r  r  r  loweringr7  tolist)r   r   rd   re   r  rR  r7  s          rj   get_attrGraphLowering.get_attr  s    "$++v6eUXX1122,,,**622++6>C*-'JeUXX2233/4$$V,*,D'"<<%....<<22(//++E::]{{b **,ekk%,, ]
 ''..		2CK@,ellnEKKU ]
 /  ''66 ]s   =H A$H  
H.c                     [         erh   AssertionErrorr   r   rd   re   s       rj   call_moduleGraphLowering.call_module      rk   c                     [         erh   rf  rh  s       rj   call_methodGraphLowering.call_method  rk  rk   c                   > [         TU ]  XU5      n[        U[        [        45      (       d  U4n[        U[        [        45      (       d   [        U5      5       e[        S U 5       5      (       d   U5       e[        R                  R                  R                  S   n[        U[        [        45      (       d  U4nU Vs/ s H"  n[        R                  R                  U5      PM$     nn/ n[        U5      [        U5      :X  d   e[        XE5       H  u  p[        U[        R                   [        R"                  45      (       d  UR%                  U5        MG  [        UR'                  5       [        R(                  5      (       a0  UR%                  [        R                  R+                  U5      5        M  UR%                  U R-                  XR.                  S   R1                  5       5      5        M     Xpl        U R4                  R7                  5        GHF  u  p[        U[         [8        R:                  45      (       d   S[        U5       35       e[        U[         5      (       d  MU  UR=                  5         [        U[         5      (       d   eUR>                  n[        U[        R@                  5      (       d   eUnUR>                  n[        U[B        5      (       a  URE                  5       U
:w  d  M  [        RF                  RI                  XRJ                  U
   5         U R2                  RM                  U5      nU RJ                  U
   U R2                  U'   GMI     U RQ                  5         [R        RU                  SU RV                  U RX                  b  U RX                  5        g S5        g s  snf ! [N         a     GM  f = f)Nc              3      #    U  H  n[        U[        [        R                  [	        S 5      [        R
                  [        R                  [        R                  R                  R                  [        [        R                  45      v   M     g 7frh   )r   rE   r0   r=   r  r  r   r   logicboolalgBooleanr1  EffectfulKernel)rp  r  s     rj   rr  'GraphLowering.output.<locals>.<genexpr>  sm      
  KKJ%%JJKK''//&&	  s   BBr   rn  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr	  )-r
  r   r   r  r3  r  r  r_   rB  r.  rd   r0   ExternKernelrealize_inputr   ziprE   BaseViewr  get_output_specCommBufferLayout
copy_inputtry_match_insignificant_stridesr   r   r   r  r  r   r   r  r  rD   rA   get_nameMutationLayoutSHOULDREMOVErealize_intor  r  
ValueErrorfinalizer  r  r  r   )r   r   rd   re   resultfx_node_argsr  result_correct_stridesrfx_noder  r  value_storage_boxindrW  s                 rj   r   GraphLowering.output  s    f5&5$-00YF&5$-00>$v,>0 
 
 
 
 	 	 
" ww++003,66(?L<BCFq"////2FC!#< CK///f3JAa",,!<==&--a0A--/1D1DEE '--boo.H.H.KL '--88<<.557 4  4,,224KD	5::.  G8eFG  eY//MMOeY////JJEeR]]3333 %JJEe[11U^^5E5M--::55d;,,223DEC.2.H.H.ND&&s+) 50 			U''!]]6DMM	
 =?	
_ DV " s   )N2&7N77
OOc                 J    U R                    H  nUR                  5         M     g rh   )r  decide_layout)r   r  s     rj   r  GraphLowering.finalize  s    <<C  rk   r   c              #   \   #    U R                   n Xl         S v   X l         g ! X l         f = f7frh   )r.  )r   r   olds      rj   set_current_nodeGraphLowering.set_current_node  s*     	$ $ #s   ,
! ,),r7  meta_strides_inp.c           	        ^  [         R                  R                  R                  U5      (       d   eU Vs/ s H:  n[	        U[         R
                  5      (       a  UR                  R                  OUPM<     nn[        U 4S j[        XAR                  5       5       5       5      (       a  U$ S[        [        [        [        4      S[        [        [        [        4      S[        [        [        [        4      S[        4U 4S jjnU" UR!                  5       XAR                  5       5      (       d  U$ [         R                  R                  R#                  U5      u  pg/ UR$                  Qn['        UR!                  5       5       H.  u  pT R(                  R+                  US5      (       d  M(  XI   X'   M0     [         R                  R                  R-                  UR.                  UR0                  UR2                  UUR4                  5      n
[        R6                  " [         R                  R                  R9                  XjS95      $ s  snf )	z
Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
dimensions - size 0 or 1 - will be updated.

If there are real stride differences (NHWC vs NCHW) then the input will be returned.
c              3   ^   >#    U  H"  u  pTR                   R                  X5      v   M$     g 7frh   )r  statically_known_equals)rp  s1s2r   s      rj   rr  @GraphLowering.try_match_insignificant_strides.<locals>.<genexpr>$  s,      
@ MM11"99@s   *-rX  meta_stridestensor_stridesrf   c                    > [        XU5       HK  u  p4nTR                  R                  US5      (       a  M)  TR                  R                  XE5      (       a  MK    g   g)Nr.   FT)rx  r  statically_known_leqr  )rX  r  r  dimr  r  r   s         rj   significant_strides_equalPGraphLowering.try_match_insignificant_strides.<locals>.significant_strides_equal*  sR    
  #5G==55c1==}}<<RDD   H rk   r.   )r  r  )rp   r  r0   is_storage_and_layoutr   r   r   r   r  rx  
get_strider   r   r   r1  rx   get_sizeas_storage_and_layoutr   r   r  r  r?   r   rl   r   offsetrE   ReinterpretView)r   r7  r  sr  r  storage
old_layout
new_strider   
new_layouts   `          rj   r}  -GraphLowering.try_match_insignificant_strides  s    !!77???? HX
GW!:a66AFFKKA=GW 	 
  
l,=,=,?@
 
 
 M	E$),-	"5s#34	 %U49%56	 		 )OO|->->-@
 
 M#oo00FFvN)z(()
foo/0DA}}11!Q77 ,
 1 __''33OO

 ||OO..G.O
 	
U
s   AIr  rP  rQ  new_args
new_kwargsc                 H  ^  [        U5      [        U5      :X  d   e[        U5      [        U5      :X  d   eUR                  [        R                  R                  R
                  L Ga  UR                  S   n[        U[        5      (       d   e[        R                  R                  R                  US   US   UR                  5        VVs0 s H@  u  pxU[        U[        R                  R                  5      (       a  UR                  S   OU_MB     snn5      n	U	 HY  n
US   U
   nUS   U
   nXL a  M  T R!                  [        R                  R"                  R$                  R&                  X40 5        M[     g[        UR                  [        R(                  R*                  5      (       d   eS[        R,                  R.                  S[0        R2                  S[0        R2                  S	S4U 4S
 jjnUR                  R4                  n[7        [9        X$5      5       H   u  nu  pUR:                  U   nU" UX5        M"     UR:                   Vs0 s H  nUR<                  U_M     nnUR?                  5        H  nUU   nUU   nUU   nU" UX5        M     gs  snnf s  snf )aP  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
and then called fx_node(*new_args, **new_kwargs).

If fx_node mutates any of new_args/new_kwargs, and they are different from
old_args/old_kwargs, then we need to update the original tensor.
re   
kernel_idxconstant_args_idxrn  N
schema_argold_argnew_argrf   c                 b  > XL a  g U R                   b  U R                   R                  (       a  [        U[        R                  5      (       a  U4nU4n[        X5       HK  u  p4X4L a  M  TR                  [        R                  R                  R                  R                  X440 5        MM     g g g rh   )
alias_infois_writer   r0   r	  rx  r   rp   r  r   copy_r  )r  r  r  old_arg_itemnew_arg_itemr   s        rj   maybe_propagate9GraphLowering.propagate_mutation.<locals>.maybe_propagatev  s     !$$0Z5J5J5S5S gryy11&jG&jG25g2G.L#3 &&		,,44|6RTV 3H 6T0rk   ) r   r   rp   r  higher_ordertriton_kernel_wrapper_mutationre   r   r@  _higher_order_opstriton_kernel_wrapget_mutated_tensorsr  r   r*   r   r   r   r  r  r   r   rC  Argumentr0   r	  _schemar   rx  	argumentsr  r  )r   r  rP  rQ  r  r  re   kvmutatedr  r  r  r  schemar   r  argschema_kwargskeys   `                   rj   rH   GraphLowering.propagate_mutationN  sO     8}H---:#j/111>>UYY33RRR^^H-Ffd++++--@@TT<(./ !' . 
1ehhmm(D(Dqvve}!K .G  $X.t4$X.t4&""599>>#7#7#?#?'ASUWX   '..%***?*?@@@@	))	46II	HJ				( '''0X1H'I#C#'))#.JJ9 (J 392B2BC2B332BC??$C oG oG&s+JJ9	 %UP Ds   AJJrq  c                 &  >^ ^^*^+^, S[         SS 4U4S jjnSSKJn  [        T R                  5      m+[        T R
                  5      m,T1nTR                  S:H  nU(       a!  T R                  T5      u  pgU[        Xg5      -  n[        R                  R                  U5         T R                  T5         [        R                  " T5         TR                  S:X  am  TR                  [        R                   LaP  [#        T5      (       d  UR%                  SSU4S	 j5      (       a$  U" S
5        ['        TR                  SS9" W0 WD6nGOTR                  S:X  a  TR                  [(        R*                  R,                  R.                  L a  [0        R2                  S:w  a}  U" S5        [0        R2                  S:X  aE  Wn	Wn
[5        T/UQ70 UD6u  pgT R7                  TR                  Xg5      nT R9                  TXXg5        O[;        S[0        R2                   35      e[=        TR                  5      (       a  U" S5        [?        TR@                  S   [(        RB                  [(        RD                  [(        RF                  45      (       a$  TR@                  S   RH                  RJ                  nO'[L        T-T ]  T5      nOU" S5        [L        T-T ]  T5      n[(        R*                  RP                  RR                  RT                  [(        R*                  RP                  RV                  RT                  [(        R*                  RP                  RX                  RT                  [(        R*                  RP                  RZ                  RT                  [(        R*                  RP                  R\                  RT                  /m*[_        S TR`                   5       5      nTT Rb                  ;   n[_        U*4S jTR`                   5       5      nTR@                  Re                  SS5      (       a  [?        U[f        5      (       a  URi                  5         TR@                  S   Rk                  5       n[(        Rl                  Rn                  Rp                  " U6 nURs                  5       U:w  a=  U(       d6  [        Rt                  " U5      n[        Rv                  Ry                  UU5      nU(       aN  [?        U[f        5      (       a9  [?        URz                  [        R|                  5      (       a  URi                  5         U(       d  U(       Gap  [?        TR@                  S   [(        R~                  5      (       GaC  U(       a  T Rb                  Re                  T5      nOTR@                  S   Rk                  5       nUGb  [        U5      S:  Ga  [0        R                  =(       d    U(       + =(       a    U(       + n[(        R                  R                  TR@                  S   5      n[        [        U5      5      S:  nU(       d~  U(       aw  [        UR                  5       5      S:X  aZ  TT R                  ;   aJ  U(       dC  U(       d<  [        R                  R                  UR                  5       [(        R                  5      nU(       d  [        U5      (       a  TR@                  S   R                  5       (       d)  [?        URz                  [        R|                  5      (       a4  [        Rv                  Ry                  U[        Rt                  " U5      US9nOeU Vs/ s H:  n[?        U[(        RB                  5      (       a  URH                  RJ                  OUPM<     nn[        Rv                  R                  XUS9n[        [        TR`                  5      5      nUS:  Ga:  [?        U[f        5      (       Ga$  TR`                   GH  nUR                  [        ;   Ga  UR                  5         [(        R*                  RP                  R                  RT                  [(        R*                  RP                  R                  RT                  [(        R*                  RP                  R                  RT                  /n/ nT R                  (       d=  UR                  [(        R*                  RP                  R                  RT                  5        [(        R                  R                  (       Ga  U[(        R*                  R                  R                  RT                  [(        R*                  R                  R                  R                  [(        R*                  RP                  R                  RT                  [(        R*                  R                  R                  RT                  [(        R*                  R                  R                  R                  [(        R*                  R                  R                  R                  [(        R*                  R                  R                  R                  /-  nU[(        R*                  R                  R                  RT                  [(        R*                  R                  R                  R                  [(        R*                  R                  R                  R                  [(        R*                  R                  R                  RT                  [(        R*                  R                  R                  RT                  [(        R*                  R                  R                  R                  /-  n[(        R                  R                  (       a2  U[(        R*                  R                  R                  RT                  /-  nUR                  U;   aN  [        Rv                  Ry                  U[        Rt                  " TR@                  S   Rk                  5       5      SS9nUR                  U;   af  TUR                  S   L aT  [        Rv                  Ry                  U[        Rt                  " [        TR@                  S   R                  5      5      5      nUR                  S:X  d  GM  [?        URz                  Rz                  [        [        45      (       d  GM  URi                  5         GM     UR                  [        TR`                  5      5        [?        U[f        5      (       a%  UR                  5       (       a  UR                  5         [?        U[f        5      (       an  [?        URz                  [        5      (       aO  URz                  Rz                  n[?        U[        5      (       a$  UR                  SS9(       a  URi                  5         S S S 5        S S S 5        S S S 5        [?        W[f        5      (       GaK  [?        URz                  [        R                  5      (       Ga!  [?        URz                  Rz                  [        R                  5      (       a(  URz                  Rz                  R                  ST5        GO[?        URz                  Rz                  [        R                  5      (       Ga  URz                  Rz                  R                  ST5        [?        URz                  Rz                  [        R                  5      (       an  [?        URz                  Rz                  Rz                  [        R                  5      (       a1  URz                  Rz                  Rz                  R                  ST5        O[?        URz                  Rz                  [        R                  5      (       a  URz                  Rz                  R                  (       ds  [?        URz                  Rz                  R                  S   [        R                  5      (       a3  URz                  Rz                  R                  S   R                  ST5        T R                  U5        [        5       nT R                  T+S   H  nUUR                  5       -  nM     T R
                  T,S   H  nUUR                  5       -  nM     S[         4U+U,U 4S jjnTR                  S :w  Gag  [        R                  R                  R                  nS![        S[         SS 4U 4S" jjnU GHt  n T R                  R                  U / 5      n!UR                  U    n"UR                  5       R                  U"5      (       d  S#G[         SG[        4S$ jn#U#" U"GR                  5      (       a'  U" U U"GR                  :  U  S%U"GR                   35        U#" U"GR                  5      (       a'  U" U U"GR                  :*  U  S&U"GR                   35        U! H  n$[        U$RJ                  5      n%U%T GR                  -
  n&U&(       a=  G[        U&[         S'9n'T R                  GR                  U'/ 5      R                  U$5        Ml  U" U$RJ                  U$RJ                   5        M     GMw     T =GR                  U-  sl        G[        [        R                  R                  R                  TR@                  Re                  S(0 5      5      n(U(c   e[        S) U(GR                  5        5       5      n)UU):  d'   S*U S%U) S+TGR                  5        S,U" 5        35       eU$ s  snf ! , (       d  f       GN|= f! , (       d  f       GN= f! , (       d  f       GN= f)-Nmsgrf   c                 Z   > [         R                  S[        TR                  5      U 5        g )Nzlowering %s %s)r  r  r   format_node)r  rq  s    rj   r  %GraphLowering.run_node.<locals>.debug  s    II&
1==(A3Grk   r   )CompilerBisectorr   inductorrK   c                     > [        T 5      $ rh   )reprr|  s   rj   <lambda>(GraphLowering.run_node.<locals>.<lambda>  s	    ark   rI   F)add_to_fallback_setrG  -user_defined_triton_kernel_layout_constraintsrE  z1Unknown triton_kernel_default_layout_constraint: r   rn  r  c              3   >   #    U  H  oR                   S :H  v   M     g7f)r   Nr   )rp  r  s     rj   rr  )GraphLowering.run_node.<locals>.<genexpr>  s     DGDGGx/Gs   c              3   @   >#    U  H  oR                   T;   v   M     g 7frh   )r   )rp  r  as_strided_opss     rj   rr  r    s      *:A$~-'   inductor_realize_to_strides   )allow_paddingr.   Tr   d   )	thresholdorigin_nodec                     > TR                   TS   V s/ s H  n SU R                  5        SU  S3PM     nn UR                  S TR                  TS   5       5        SR	                  U5      $ s  sn f )Nunbacked_symbol_defs= in:

c              3   P   #    U  H  nS UR                  5        SU S3v   M     g7f)r  r  r  N)get_unbacked_symbol_defs)rp  r   s     rj   rr  BGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>  s1      ?B ((C(C(E'FfRDPRS?s   $&z***
)r  r  extendr  r   )r  r  buffer_watermarkoperation_watermarkr   s     rj   format_new_defs/GraphLowering.run_node.<locals>.format_new_defs  s      <<(8(9::C ((D(D(F'GvcURTU:   HH //*=*>?  <<?"s   !A,r0  r   c                 t   > [         R                  " X5      nTR                  USS9  TR                  U5        g )NTr  )r0   AssertScalarr  r  )r   r  	assert_opr   s      rj   make_assert+GraphLowering.run_node.<locals>.make_assert  s2    OOD6	$$Y$>''	2rk   r  c                 `    U [         [         * 4;   a  g [        U 5        g! [         a     gf = f)NFT)r-   r1  	TypeError)r  s    rj   is_convertible.GraphLowering.run_node.<locals>.is_convertible  s5    & 11#()F#'( )#()s     
--z >= z <= )r  unbacked_bindingsc              3      #    U  H8  n[         R                  R                  R                  R	                  X5      v   M:     g 7frh   )r_   r  r   unbacked_renamingsr   )rp  r  s     rj   rr  r    s5      31A %%88<<QBB1s   A Azfailed z (inductor >= fx)
fx node is: z
new operations are:

)r  !torch._inductor.compiler_bisectorr  r   r  r  r   fetch_args_kwargs_from_envrV   r0   r	  current_originsr  r_   r   r>  r?  rJ   disable_subsystemrI   rp   r  r  r  r/   'triton_kernel_default_layout_constraintrG   r   rH  r   r   r   r   r   SymFloatSymBoolr   r   r
  run_noder   
as_stridedr  as_strided_as_strided_scatterresize	resize_asr   r  r   r   rE   r  r   r  r*  any_is_symbolicmaybe_get_strideget_stride_orderrv  require_stride_orderr  ry  r   r   _prims_commonis_non_overlapping_and_denser"   r  r@  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesr,   rN   realize_hintr   mm_int_mmr   r  r   rC  _has_mkldnnr  _linear_pointwisebinarymkldnn_rnn_layeronednnqlinear_pointwiser7  binary_tensor_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrd   r   rX  rB   rC   
mark_reusehas_exceeded_max_readsrD   has_large_inner_fnLoops_post_init_setattrBufferr  MultiOutputindicesinputsr
  r  rB  r  r   r'   r  popvar_to_range _default_unspecified_value_rangeissubsetr   rx   lowerupperr  r   
setdefaultr$   r  r  ).r   rq  r  r  originsis_call_functionrd   re   r  rP  rQ  	is_outputis_user_visibleis_input_for_as_stridedr6  sym_stridesstride_orderr  denseunbacked_symbols_in_stridesr  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layoutcurrnew_unbacked_defsr  r   r  r   r  i0rasvrr  rafvsmissingi1r  renamed_unbacked_bindingsr  r  r  rW  s.   ``                                        @@@rj   r  GraphLowering.run_node  s   	Hs 	Ht 	H 	Gt||,!$//2#44?2::1=LD~d33GYY&&w/1F1F2

 'HHH$4$449!<<'99"K 
 ())!((N# 'HH		 6 6 U UUBBFWWEFBB12  $H!'J#:1#Nt#Nv#NLD!//$GF++AxTR&KFLzLzK{|  !** '(FF5MELL%..%--#P  VVE]//44F"W-a0Fb	)!, 		))11		**22		1199		%%--		((00N DAGGDDI4#C#CCO&) *:;''* '# vvzz7??J	E E  &&-..0#oo33CCWM**,7#%#6#6w#?L__AA&,WFvy11v{{BKK88  4*uu||; ; #">>BB1EGffUm224G&3w<!+;**A/.A%655 " "//LLuE 1':;a? 0 8! 12a7!@!@@ / 7"$"3"3"T"T"OO-u/B/B# 73w<< 66%=1133z"KK8 8 &(__%I%I & " 3 3G <.; &J &F *1')0A 0:!U\\/J/JPQ Q)0 $ ' &(__%J%J &} &K &F Jqww/0I1}FI!>!>GGD{{&;;++- "IINN??GG!IINN--55!IINN22::-)
 ;=7#-44UYY^^5O5O5W5WX 88///- %		 0 0 B B J J %		 0 0 B B I I %		 ? ? G G %		 0 0 B B J J %		 0 0 B B I I %		 0 0 B B I I %		 0 0 B B P P2 - < %		 0 0 G G O O %		 0 0 G G N N %		 0 0 H H O O %		 0 0 Q Q Y Y %		 0 0 B B J J %		 0 0 B B I I@ ;  %xx// 1eiimm6O6O6W6W5X X 1;;*;;%'__%I%I & " 3 3AFF5M4H4H4J K.2 &J &F !KK+JJ !TYYq\ 1%'__%I%I & " 3 3$B166%=CVCV$W!"&F ww(*%fkk&6&6I8NOO"NN,w $| !!#agg,/ &),,1N1N1P1P ##% &),,FKK1T1T{{''dI.....=(y
2
/P fi((ZR]]-S-S&++**BHH55  33M1EFKK,,bii88  33M1Efkk..0A0ABBzKK$$))288H H KK$$))<<]AN v{{//@@"KK,,44!&++"2"2"9"9!"<biiHH((//2EEmUVWv&6@l<< 0 12C!=!=!?? 3//"5"67B!<!<!>> 8		# 		# 		# 44= * ((22I3, 3S 3T 3
 (((,,R4++B/ AACLLRPP)$ )4 ) &bhh//#B"((Nrd$rxxj4IJ%bhh//#B"((Nrd$rxxj4IJB/8C!D$?$??G c2**55b"=DDRH#BGGy: ) (: ''+<<' 9  **AFFJJ7JB,O! %000 )3 3*//13 )% %(AA +,D1J0K L }}/ 0**9*;)<>A 'E
 
2
 2
//ss    AM"2AM	ZAL>AAL9
TAL>!-AL>C6AL>	AMAM"L9AL>L>
AMMAMM
AM	MAM"M"
AM1c                 @   [         R                  (       a  [        S5      e[        R                  S;  a  [        S[        R                   35      eU R
                  R                  5        H  nS n[        U[        5      (       a  UR                  5       nO][        U[        R                  [        R                  [        R                  R                  R                  45      (       a  [!        U5      n[#        X R$                  5      (       a  M  [        SU 35      e   g )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform zUnsupported input dtype )r/   disable_cpp_codegenr9   sysplatformr  r  r   rE   r  r   r   r   r   r   r   r   r   rm   )r   r  rl   s      rj   !validate_can_generate_cpp_wrapper/GraphLowering.validate_can_generate_cpp_wrapper
  s    %%()BCC<<;;(+@)OPP&&--/EE%++)ejj%**2D2D2L2LM  6e<1%9I9IJJ,/Gw-OPP 0rk   is_subgraphparent_wrapper_codec                 0   U R                   R                  5       nUR                  S5        UR                  S5        [        U5      S::  d%   SR	                  SR                  U5      5      5       e[        U5      S:H  nU(       a  SOUR                  5       U l        U R                  (       a  U R                  5         [        U R                  5      U l        [        U R                  U R                  5      nUc   SU R                   S35       eUR                  XU5      U l        U R                  (       a_  U R                  R                  R                   U R                  l        U R                  R                  R"                  U R                  l        g g )	Nr  r   r.   zDoes not support mixing {}+r   zDevice z not supported)r  r  discardr   formatr   r.  rm   r   rS  r5   r)  r6   r$  r*  r  _names_itersrc_to_kernel)r   rU  r  rV  r  only_cpuwrapper_code_gen_clss          rj   init_wrapper_codeGraphLowering.init_wrapper_code  sg    ((--/U#V$< A% 	
'C'J'JHH\"(
 	
% |$)$,5,2B2B2D2241$2B2BC=d.. 
 !,	6T%%&n5	6,077(;
  -1,=,=,J,J,V,VD)!!..<< +	 rk   c                 J  ^  [        U 4S jS 5       5      (       Ga  [        R                  R                  (       a  T R	                  5       $ ST l        T R                  5       R                  nS[        [        R                  [        R                  [        R                  4   S[        [        [        [        R                  4   4S jn[        R                  R                   R#                  5       nUb  [%        [&        R(                  [*        5      (       d  UR,                  (       a  UR,                  R/                  5         UR0                   Vs/ s H
  nUc  M  UPM     nn[2        R4                  " U[&        R(                  5       Vs/ s H  nU" U5      PM     nnOU[%        [&        R(                  [*        5      (       a  T R6                  O[&        R(                   Vs/ s H  nU" U5      PM     nnT R8                  (       a  SS	KJn  [?        T R@                  5       V	V
s/ s H<  u  pU
T R8                  ;   d  M  [%        Xy   [        R                  5      (       d  M:  U	PM>     nn	n
U H3  n	Xy   n[%        U[        R                  5      (       d   eU" U5      Xy'   AM5     [        RB                  RD                  RG                  5          U" U5        SSS5        AS
T l        T RH                  R/                  5         T RJ                  R/                  5         T RL                  R/                  5         [&        RN                  RP                  RR                  R/                  5         [&        RN                  RP                  RT                  R/                  5         [V        RX                  " 5         [        RZ                  " SS05         T R	                  5       sSSS5        $ T R	                  5       $ s  snf s  snf s  snf s  sn
n	f ! , (       d  f       GN5= f! , (       d  f       g= f)zA
For GPU, Triton kernels are autotuned and stored as cubin files
c              3   @   >#    U  H  oTR                   ;   v   M     g 7frh   )r  )rp  r   r   s     rj   rr  9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>G  s     Iv***r  )ro   xpuFr  rf   c                 N   U c  g [        U [        R                  [        R                  45      (       a  U R                  R
                  $ [        U [        5      (       a  [        U 5      $ [        U [        R                  5      (       d   S[        [        U 5      5      -   5       eU $ )Nz&Unknown type when creating real inputs)r   rp   r   r  r   hintr   r   r   r  r  )r  s    rj   materialize;GraphLowering.codegen_with_cpp_wrapper.<locals>.materializeQ  s     y##Aenn'EFF vv{{*#Az22%ay()u||    SCc$q'lRS    !rk   Nr.   )clone_preserve_stridesTztriton.autotune_at_compile_time).r   r/   tritonautotune_at_compile_timecodegenr   compile_to_modulecallr   rp   r   r  r   r1  r  _guardsTracingContexttry_getr   r_   real_inputsr^   output_stridesclearparams_flatrR  chainr   r0  
compile_fxri  r   r  r*  r+  r,  r$  r#  r(  rB  r  precomputed_replacementsinv_precomputed_replacementsr1   resetpatch)r   compiledrg  tracing_contextparamru  r  rr  ri  r   r  r1  mutated_inps   `            rj   codegen_with_cpp_wrapper&GraphLowering.codegen_with_cpp_wrapperC  s7    IIII}}55 ||~% $) 11388!U\\5>>5<<GH!3u||34!  #(--">">"F"F"H".zMM;8 8 '55'66<<> &5%@%@#%@E  %@   # "+amm!L#!LA $A!L   #K  *!--EE !//!"/#/  $A/   # &&B *343D3D)E*)EIC4#6#66  '{'7F )E ' *  2 '2&6)+u||DDDD+A++N('  2 [[11HHJ[) K $( $$**,''--/''--/  99??A  ==CCE\\#De"LM<<> NM <<>!y#
##*& KJ NMsB   O-O-O2!O7$O<>O<!O<	PP
P
P"c                 j   [        SSS9   SSKJn  U R                  5         U" U R                  5      U l        [
        R                  R                  U R                  U R                  R                  5        U R                  R                  U 5        U R                  R                  5         [        R                  S[
        R                  R                  5        U R                  R!                  U R"                  5      nU R                  R%                  5         UsS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.codegenTlog_pt2_compile_eventr.   	SchedulerzFFinished codegen for all nodes. The list of kernel names available: %s)r   r=  r  r_  r  r_   r  draw_orig_fx_graphrI  r   r*  push_codegened_graphrl  r  rB  rQ  generater   pop_codegened_graph)r   r  r  s      rj   rl  GraphLowering.codegen  s    1N,""$&t7DNGG&&t||T^^5I5IJ2248NN""$IIX00
 &&//0A0ABF113% ONNs   DD$$
D2c                     [        SSS9   SSKJn  UR                  U l        UR                  U l        UR
                  U l        U" U R                  5      U l        U R                  R                  5         SSS5        g! , (       d  f       g= f)a  
This is a more compact version of the `codegen()` above
where we codegen this graph as a subgraph of some parent
graph. The parent graph is passed as an argument: the
intention is to inline codegening of the subgraph in
the parent graph's wrapper code (including the generated
kerenls). The wrapper code is not finalized (via `.generate()`
call), as this will be done in the parent graph's `codegen()`.
zGraphLowering.codegen_subgraphTr  r.   r  N)r   r=  r  r*  r)  r   r  rl  )r   parent_graphr  s      rj   codegen_subgraphGraphLowering.codegen_subgraph  si     :RVW, , 9 9D*55DO+77D&t7DNNN""$ XWWs   A+A??
Bc                     Sn/ n/ nU R                   R                   HL  nUR                  5       nX-  nUR                  XES-  45        UR                  XDR	                  5       45        MN     XU4$ )Nr   r  )r=  r   get_read_write_buffers_sizesr  get_estimated_runtime)r   total_bytesnode_countsnode_runtimesr   	num_bytess         rj   count_bytesGraphLowering.count_bytes  sy    
 NN((D99;I$K1n56  $(B(B(D!EF	 ) 66rk   codec                     g rh   ri   r  s    rj   save_output_codeGraphLowering.save_output_code  s     	rk   c                 p    [        SSSSS9   U R                  5       sS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namer  dynamo_compile_column_us)r   _compile_to_modulerh  s    rj   rm  GraphLowering.compile_to_module  s4    -!"&%S	
 **,
 
 
s   '
5c                 L  ^	^
 SSK Jn  U R                  (       a  U R                  5       OU R	                  5       u  m	n[
        R                  R                  (       aT  SU R                  R                  R                  5       -   U R                  R                  R                  5       -   S-   nUT	-   m	[        R                  T	5        [        R                  " ST	5        [         R"                  " 5       n[$        R&                  " UT	S9   U VVs/ s H  u  pVXVR(                  4PM     nnnUR+                  T	5      u  nm
[        R                  " ST
5        [-        SU
4S	 jU	4S
 jS9  [1        SSS9   UR3                  UT
U0 U R4                  EU R6                  ES9nS S S 5        Xpl        T
U l        X l        [
        R>                  (       a  WRA                  SSS9  WRB                  c   e[E        URB                  5        [F        R                  SURB                  5        [        RH                  " SURB                  5        [
        RJ                  (       a%  [M        SURB                   3[N        RP                  S9  [R        R                  RU                  URB                  5        [R        R                  RW                  [X        RZ                  R]                  URB                  5      S   S-   5        U$ s  snnf ! [.         a    [-        SU	4S jS9  e f = f! , (       d  f       GN= f)Nr.   )PyCodeCachez%"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%sr  zOutput code written to: %sinductor_output_codec                     > ST 0$ )Nfilenameri   )paths   rj   r  2GraphLowering._compile_to_module.<locals>.<lambda>  s
    T*rk   c                     > T $ rh   ri   r  s   rj   r  r        4rk   )
payload_fnc                     > T $ rh   ri   r  s   rj   r  r  
  r  rk   zPyCodeCache.load_by_key_pathTr  )linemapattrs)timesrepeatzCompiled module path: )filer   z.debug)/	codecacher  r   r  rl  r/   rj  rk  r*  kernel_autotune_defsgetvaluekernel_autotune_callsr   r  ra   r  rQ   inductor_meta_from_configrR   begin_compilestack_tracewriter   rI  r   load_by_key_pathr  r   rC  rD  rE  profile_bandwidth_outputbenchmark_compiled_module__file__rc   r  r  benchmark_kernelprintrQ  stderrr_   output_coder  osr  splitext)r   r  r  tuning_codeinductor_metaline_nor   r  modr  r  s            @@rj   r   GraphLowering._compile_to_module  s   * 04/?/?D))+T\\^ 	g ==117##88AACD ##99BBDE 	  %D&&t,148&@@B**=tD	HOP}w!1!12GP#))$/IC!!">E &*'
 8PTU..DD4+C+CD	 / C V $**))!)< ||'''%		.=93<<H""*3<<.9

K	CLL)	RWW%%cll3A6AB
S Q  	&'
 	 VUs*   K7 K1%.K7 0+L1K7 7L
L#c                     U R                    Vs/ s HU  n[        U[        R                  5      (       a  M$  [        U[        R                  5      (       a  ME  UR                  5       PMW     sn$ s  snf rh   )r   r   r0   NoneAsConstantBufferShapeAsConstantBufferr~  )r   r   s     rj   get_output_namesGraphLowering.get_output_names.  s^     **
*dB$;$;<  tR%=%=> DMMO*
 	
 
s   #A.A.A.c                 V   XR                   R                  5       ;   =(       ar    U R                   U   R                  5       S:H  =(       aK    [        U R                   U   R	                  5       5      S:H  =(       a    [        U R                   U   5      S:H  =(       d    XR                  ;   $ )Nr.   r   r  )r  r  r  r   r  r@   r  r  s     rj   is_unspec_argGraphLowering.is_unspec_arg6  s     %%**,, B!!$'113q8BD%%d+44671<B   1 1$ 78EA	3
 222	3rk   )Mr   rA  rO  rQ  rK  r   r  r  rV  rC  rE  rD  r   r  r   r"  r  r   r7  r>  r.  r  rG  r)  rm   r  rF  r  rN  r   r+  r   r  r  r4   r   r  r  r  r   r(  r  r   r   r   r   r/  r9  r&  r1  r0  r  r2  r5  r4  r'  rP  r@  r  r  rI  rU  r<  r  r8  r$  r%  r#  r   r=  r!  r  r   r   rT  r*  r  )NNNFFNNFFFNNNNNrh   )rf   NFNN)r  r   rf   N)m__name__
__module____qualname____firstlineno__r   r0   r	  __annotations__rp   r   r   r   r   r1  r   r   r   r   r   r   r   objectr&   rx   r   ExternKernelNoder   r	   r  r  r  r   r2   r[  r_  
contextlibr   r   rb  ri  staticmethodr  r  r  r,   r*   r?  r  r  property_subclassesfake_tensorFakeTensorModer  rE   r*  r  r  r  rl   r  r  r  r  r  r  r  r
   r
  r  r  r!  r&  r  r0  r   rY  r=   r]  rF   rc  r   ri  rm  r   r  r  ry  r   r}  rH  r  rS  r8   r_  r  rl  r  rS   r  r  r  r   rm  r  r  r  __static_attributes____classcell__rW  s   @rj   r   r     s   		?"# ,,# 	xc4i()8E#t)4D+EE	F# J,,	tEJJejj!11	2 6:(,"&!%) "!$7;$(26"37'f2HH  f2 !&!12f2 H%	f2
 3-f2 f2 f2 TNf2 !)d2../0#56!
f2 f2 f2 f2 %T#s(^4f2  SM!f2" /#f2$ sm%f2& "(3-0'f2( 
)f2 f2PMeoo((//=>M  M 
	M4U\\ 4 ( ((4. ( (C  ak aD aT a aF  
HH  
 U\\*
 	

 

60D1A 0dA# A$ A
Dell Dt D 5,,88GG  	%bii/0	1$FUZZ FD FQc QeBLL"))4K.L Q8S 8U[[ 8,
8S 
8U39-= 
8& & &R\\ c  FK bii d s (tCy S 
 "))!4bii!?@	  
3 
5<< 
 SM).v	@ 37

"*3-
	
# 8N SV  LL!&vL8<S&[8IL	tY$	%L\IH IC Ic3h ITW IV 5u|| 5 5 5(7(7!&r(748f4E(7	xBKK@	A(7T# S # ( # S # ( P
P
!&vP
8<S&[8IP
	P
d  $UXX]] $ $;
bllBKK/0;
  c5<<&7 8# =>;
 
r||R[[(	)	;
zG:G: *G: cN	G:
 *G: cNG: 
G:Rq%((-- qF qfQ* "'+>B	$$  }$ &&:;	$
 
$La"%T%T	:J5K0K*L a"FsDsDy)9$::; *%(7	T%)3./0$u=NPU=U7V2WW
7  s t  -: -?J ?B
$s) 
3# 3$ 3 3rk   r   c            	       v   ^  \ rS rSrSrS\S\S\SS4U 4S jjr   SS	\S
\	\
   S\	\   SS4U 4S jjjrSrU =r$ )r  iA  z
Mostly a helper class for the subgraph lowering. The main goal is to call
init_wrapper_code with the subgraph related arguments.
r  rd   re   rf   Nc                 2   > Xl         [        TU ]  " U0 UD6  g rh   )r  r
  r  )r   r  rd   re   rW  s       rj   r  SubgraphLowering.__init__G  s    $)&)rk   rU  r  rV  c                 `   > [         TU ]  SU R                  U R                  R                  S9  g )NT)rU  r  rV  )r
  r_  r  r  r*  )r   rU  r  rV  rW  s       rj   r_  "SubgraphLowering.init_wrapper_codeK  s.     	!)) $ 8 8 	" 	
rk   )r  r  )r  r  r  r  __doc__r   r   r  rx   r   r  r8   r_  r  r  r  s   @rj   r  r  A  sn    
*} *S *C *D * "'+>B	



  }

 &&:;	


 


 

rk   r  )r  rL  rR  loggingr>  r  r  rQ  r6  collectionsr   r   typesr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   rp   torch._loggingtorch.fxr   r   torch._decompr   torch._dynamo.utilsr   r   r   r   torch._prims_commonr   torch._subclasses.fake_tensorr   r   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder    r!   %torch.fx.experimental.symbolic_shapesr"   r#   r$   r%   r&   r'   r(   torch.fx.graphr)   torch.fx.noder*   torch.utils._mode_utilsr+   torch.utils._ordered_setr,   torch.utils._sympy.numbersr-   r  r/   r0   r1   codegen.commonr2   r3   r4   r5   r6   r7   codegen.wrapperr8   excr9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   ra  rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   runtimerQ   runtime.autotune_cacherR   r=  rS   r  rT   r*  rU   rV   rW   rX   rY   rZ   r[   r\   r]   virtualizedr^   r_   torch._higher_order_ops.effectsr`   torch._inductor.codecachera   	getLoggerr  r  _logginggetArtifactLoggerr  r  r   rS  r;  r-  torch._inductor.fb.utilsrc   rl   r  rx   r   r   r   rC  r^  r   r1  r   r   r   Interpreterr   r  ri   rk   rj   <module>r
     sc        	 	 
  # %             , 4 7 > 4   ? L   !  / / - ! !  2      $ 8 ( &
 
 
 ( ; 5 !00<Hyy~~$??, 	8s c d $%++ $C $D $25:: (5;;BW $  
	!
6588((+56
u 
dE#s(O6K1L 
A/A/+/eCHo0E+FA/	A/He3EHH(( e3P9
} 
rk   