
    a,j                    Z'   U d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZmZ erd dlmZ d dlmZ e j                            dd	          d	k    Zd
Zdedz  fdZdedz  fdZdedz  fdZdedz  fdZdedz  fdZdedz  fdZ defdZ!defdZ"e j                            dd	          d	k    Z#d
Z$dZ%d
Z& e            rdndZ' e(e j                            dd                    Z)e(e*d<    edddd          Z+ee*d<   d
Z,ee*d<    e            Z-edz  e*d <    e            Z.edz  e*d!<    ed"d#d$          Z/ee*d%<   dZ0ee*d&<    e            Z1edz  e*d'<    e            Z2edz  e*d(<    ed)*          Z3ee*d+<   d
Z4ee*d,<   i Z5e6e7e7f         e*d-<   dZ8e(dz  e*d.<   d/Z9ed0         e*d1<   e j                            d2d3          d	k    Z:ee*d4<   e j                            d5d3          d	k    Z;ee*d6<   e j                            d7d3          d	k    Z<ee*d8<   e j                            d9 e            rd3nd	          d	k    Z=ee*d:<   e j                            d;d	          d	k    Z>e j                            d<d	          d	k    Z?d
Z@dZAe j                            d=d	          d	k    ZBe j                            d>          d	k    ZCe j                            d?          d	k    ZDe j                            d@d	          d	k    ZEe j                            dA e            rd3nd	          d	k    ZFdZGdZHdZIe j                            dBd3          d	k    ZJe j                            dC          d	k    ZKe j                            dDdE          ZLedF         e*dG<   dZMdZN e"            ZOd
ZPd
ZQdZRd
ZSdZTe	jU        jV        jW        e*dH<   dZXe	jU        jV        jW        e*dI<   dZYe	jU        jV        jZ        e*dJ<   dZ[e	jU        jV        jW        e*dK<   dZ\e	jU        jV        jW        e*dL<   dZ]e	jU        jV        jW        e*dM<   dZ^ee_dN         ge_dN         f         dz  e*dO<   dZ`ee_dN         ge_dN         f         dz  e*dP<   dZad
Zbd
Zcd
ZddZei Zfe6e7e6e7ef         f         e*dQ<   i Zge6e7e6e7ef         f         e*dR<   dZhe j                            dSd	          d	k    Zid
ZjdZkdZld
dTdUddVZme6e7ef         e*dW<   dXZnedY         e*dZ<   d
Zog Zpe_e7ee_dN         ge_dN         f         z           e*d[<   dZqe(dz  e*d\<   dZrd
Zsd Zte(e*d]<   d^Zued_         e*d`<   dZvee(ge(f         dz  e*da<   dbZwedc         e*dd<   d^Zxede         e*df<   dZyee(ge(f         dz  e*dg<   dbZzedc         e*dh<   d^Z{ede         e*di<   dZ|ee(ge(f         dz  e*dj<   dbZ}d
Z~ee*dk<   dZdlZdmZ eddndop          Zee*dq<   e j                            dr          d	k    Ze j                            ds          d	k    Ze j                            dt          d	k    Ze j                            du          d	k    Ze j                            dv          d	k    Z e( e j        dwdl                    Z e( e j        dxdy                    Zde(dz  fdzZ e            Ze(dz  e*d{<   e j                            d|d	          d	k    Ze j                            d}d3          d	k    Ze j                            d~d3          d	k    Ze j                            d e            sd	nd3          d	k    Zee*d<   dZde*d<   g Ze_e7         e*d<   g Ze_e7         e*d<   d
Zee*d<    eddd
$          Zee*d<   g Ze_e(         e*d<   e j                            dd                                          Zde(dz  fdZ e            Ze(dz  e*d<   e j                            dd                                          Ze j                            dd                                          Zed         e*d<   e j                            dd                                          Zed         e*d<   d
Zee*d<   d
Zee*d<   dZee*d<   dZee*d<   dbZed         e*d<   dZee*d<   dZee*d<   e j                            dd3          d	k    Zee*d<   d
ZdZd
Ze j                            d          d	k    Ze j                            d          d	k    ZdZdZdZe j                            d          d	k    Z e(e j                            dd                    Z ee j                            dd                    Ze j                            d          d	k    Ze j                            d          d	k    Z e(e j                            dd	                    Zd Zd Z G d d          Z G d d          Ze j                            dd3          d	k    Zde7defdZde7defdZde7defdZe j                            dd          Ze	j        j        sd	nd3Ze j                            de          d	k    Ze j                            dd3          d	k    Ze j                            dd3          d	k    Ze j                            dd	          d	k    Ze j                            d          d	k    ZdZdZdZdZe(dz  e*d<    eddæ          Zee*d<   d
Zd
Zd
ZdZe j                            dŦ          d	k    ZdZeg df         dz  e*d<   d
Ze j                            dȦ          d	k    Zee*d<   e j                            dʦ          d	k    Zee*d<   e j                            ddͦ          Ze j                            d e            rd3nd	          d	k    Zee*d<   dZee*d<   dZe j                            dd	          d	k    ZdUZdZdZdZdZe(dz  e*d<   dZd
ZdZd
Zd
Z e j        dd	          d	k    Z e j        dצ          d	k    Z e(e j                            dd                     Ze j                            dd3          d	k    ZdZd
ZdZd
Zd
Zd
Zd
ZdUZdUZdZdZd
Zd
ZdZd
Zde	j        v pde	j        v Z e            peZe j                            dd	          d	k    ZdZe7dz  e*d<   dZe7dz  e*d<    eddͬæ          Ze7e*d<   de7fdZ e            Ze7e*d<   dZe(e*d<    eddd$          Z ee*d<    edd
æ          Zee*d<   d
ZdlZddgZe_ed         e7z           e*d<   d
Zee*d<   e j                            dd3          d	k    ZdZe(e*d<   dZe(e*d<   dZ	ee*d<    G d d          Z
 G d d          ZdefdZde(fdZ e            rdn
 e            Ze(dz  e*d<    eddd$          Zee*d <    ed          Ze(e*d<    e!            Zee*d<    ed*          Zee*d<    eddd
$          Zee*d	<   e j                            d
d3          d	k    Zee*d<    ed*          Zee*d<   e7dz  e*d<    e            r	 d dlmZ erM ej        e j                            e                    de j                  d                    Zn ej        d          Zn# ee f$ r dZY nw xY wdZdѐZ!e j                            dd	          d	k    Z"e j                            dd	          d	k    Z#d
Z$d
Z%dZ&d
Z'dZ(dZ)d
Z*dZ+d
Z,ee*d<   e j                            dd3          d	k    Z-d
Z.d
Z/d
Z0d
Z1 e             Z2ee*d<   e j                            ddͦ          Z3e3dk    Z4e3d	k    rdne3Z5e j                            dd          Z6e7dz  e*d<   e j                            d          d	k    Z7d
Z8e j                            dd3          d	k    Z9ee*d<   d
Z:ee*d<   d
Z;ee*d<   d
Z<ee*d <   d
Z=ee*d!<   d
Z>ee*d"<   d
Z?ee*d#<   d
Z@ee*d$<   d
ZAee*d%<   e j                            d&d3          d	k    ZBe j                            d'd3          d	k    ZCee*d(<   dZDee*d)<   i ZEe6e7e6e7ef         f         e*d*<    e(e j                            d+d,                    ZFe(e*d-<   d
ZGee*d.<   de7dz  fd/ZH ed0dͬæ          ZIe7e*d1<    G d2 d3          ZJ G d4 d5          ZK G d6 d7          ZL G d8 d9          ZM G d: d;          ZN G d< d=          ZO eeO           G d> d?eO                      ZP eeO           G d@ dAeO                      ZQ G dB dC          ZRd5ZSedD         e*dE<   d7ZTedF         e*dG<   dHZUedH         e*dI<   d7ZVed7         e*dJ<    G dK dL          ZW G dM dN          ZX G dO dP          ZYg dQZZe_e7         e*dR<   g dSZ[e_e7         e*dT<   dgZ\e_e7         e*dU<   g Z]e_ee	j^        e	j^        e	j^        gdf                  e*dV<    e j        dWd	          d	k    Z_ G dX dY          Z` G dZ d[          Zaerd d\lbT  G d] d^          Zce j                            d_d3          d	k    Zdee*d`<    eeje        ef                    dS (a      N)Callable)AnycastLiteralTYPE_CHECKING)	is_fbcode)Configget_tristate_envinherit_fields_frominstall_config_module)InductorChoices)CUDAGraphPolicyTORCHINDUCTOR_INPLACE_PADDING1Freturnc                       t          d          S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
        V/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBBr   c                      t           j                            d          dk    rdS t           j                            d          dk    rdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr       sE    	z~~011S88t	z~~011S88u4r   c                       t          d          S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr#   %   r   r   c                       t          d          S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr&   )   s    IJJJr   c                  B    t          dt                      sdnd           S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr)   -   s'    9KK)T  r   c                       t          d          S )N&TORCHINDUCTOR_AUTOTUNE_AT_COMPILE_TIMEr   r   r   r    autotune_at_compile_time_defaultr,   4   s    DEEEr   c                      d} dt           j        v r#t           j                            d          dk    S t                      r%t          j                            d          }|| k    S dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr6   8   sk    #$ /2:==z~~FGG3NN	 '<<;
 
 666 tr   c                      d} dt           j        v r#t           j                            d          dk    S t                      r'd}t          j                            |          }|| k    S dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr0   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer5   s      r   prologue_fusion_enabledr;   G   sg    %&"&"*44z~~=>>#EE	 <'<<WEE888tr   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_rank,TORCHINDUCTOR_PRECOMPILATION_TIMEOUT_SECONDSi,  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)rB   rD   rE   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rT   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrapper*TORCHINDUCTOR_CPP_CACHE_PRECOMPILE_HEADERScpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAX$TORCHINDUCTOR_APPLY_GUMBEL_MAX_TRICKTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noneri   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)rE   tritonatenr   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyrj   )rj   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinatorrE   )rE   
custom_opscustom_ops_multidtypebucket_all_gathers_bucket_moderj   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinator"bucket_reduce_scatters_bucket_modebucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinator!runtime_estimations_mms_benchmark   g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)rE   rD   rB   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMM&TORCHINDUCTOR_PIPELINE_GEMM_AUTOTUNINGTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM%TORCHINDUCTOR_DEFAULT_AUTOTUNE_WARMUP"TORCHINDUCTOR_DEFAULT_AUTOTUNE_REPd   c                      t           j                            d          } | dS |                                 dv rd S t	          |           S )N,TORCHINDUCTOR_AUTOTUNE_NUM_CHOICES_DISPLAYED
   r   r   r   r   lowerintenv_vals    r   '_autotune_num_choices_displayed_defaultr     sD    jnnKLLGr}}/))tw<<r   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitionzCUDAGraphPolicy | Nonecudagraph_policycustom_should_partition_opscudagraph_unsafe_unbacked_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPc                      t           j                            dd          } |                                 dv rd S t	          |           S )N*TORCHINDUCTOR_NVGEMM_MAX_PROFILING_CONFIGS5r   r   r   s    r   %_nvgemm_max_profiling_configs_defaultr   n  s<    jnnI3OOG}}/))tw<<r   nvgemm_max_profiling_configs(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passes)earlylaterE   pre_grad_pass_timinguse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSc                  l    t           j                            dd                              d          } | S )N#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT ,r   r   r   split)collect_envs    r    _parse_autoheuristic_collect_envr     s-    *..!FKKQQRUVVKr   c                  l    t           j                            dd                              d          } | S )NTORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmr   r   )use_envs    r   _parse_autoheuristic_use_envr     s,    jnn>
KKQQRUVVGNr   c                   B    e Zd ZdZd e            v Zd e            v ZdS )autoheuristic_collectzT
    Config for which autoheuristic optimizations should collect training data.
    pad_mmr   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r     sB          99;;;F==???HHHr   r   c                   B    e Zd ZdZd e            v Zd e            v ZdS )autoheuristic_usezU
    Config for which autoheuristic optimizations should use learned heuristics.
    r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r     sB          55777F==???HHHr   r   'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 >    t          |           pt          |           S N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &&A*;D*A*AAr   c                 j    | dk    rt           j        S | dk    rt           j        S | t                      v S Nr   r   )r   r   r   r   r   s    r   r   r     s>    x$++			$-- 79999r   c                 j    | dk    rt           j        S | dk    rt           j        S | t                      v S r   )r   r   r   r   r   s    r   r   r     s>    x ''			 )) 35555r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT#TORCHINDUCTOR_CACHE_SDPA_CONSTRAINT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold)TORCHINDUCTOR_DELAY_REALIZE_CHEAP_OUTPUTSrD   rE   delay_realize_cheap_outputs.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLESr   (TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusionr   'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   g?max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   devgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesTORCHINDUCTOR_DISABLED_PASSESdisabled_passesc                  n    dt           j        v rt           j        d         } nd} | dv sJ d|              | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr$    s^    #rz11z">?#     /..	  
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSElog_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tp)TORCHINDUCTOR_PARTITIONED_SCATTER_ENABLEDr.   "partitioned_scatter_min_partitions   "partitioned_scatter_max_partitionsg?!partitioned_scatter_memory_budgetc                   ,    e Zd ZU dZeed<   dZeed<   dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)r   r   r   r6  bool__annotations__r7  r   r   r   r   r5  r5  f  s3         K/9'99999r   r5  c                      e Zd ZU dZdZeed<   dZedz  ed<   dZedz  ed<   dZ	e
dz  ed<   dZedz  ed<   dZeej        j        gedz  f         dz  ed	<   d
Zed         ed<   dZed         ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZe
dz  ed<   dZeed<   dZedz  ed<   dZeed<   dZedz  ed<   dZed         dz  ed<   dZeed<   dZeed<   dZ ed         ed<   dZ!eed<   dS ) aten_distributed_optimizationszDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)rB  	benchmarkcollective_estimatorrC  compute_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance!log_final_collectives_estimationsbucket_exposed_firstbucket_only_internode_commsenable_fusion_regions)rE   r   r   	coalescedbucket_modeT&prioritize_bucketing_during_scheduling
spmd_checkwarn)rR  errorspmd_mismatch!overlap_scheduling_autofix_cycles)"r   r   r   r   r<  r8  r9  r=  r>  r?  r   r@  floatrA  r   r1   fxNoderD  r   rE  rF  rG  rH  rI  rJ  rK  rL  rM  rO  rP  rQ  rT  rU  r   r   r   r;  r;  k  s        NN ',t+++ )-$+,,, (,+++ )-3:,,,.2ut|222 QUx(EFMTTT @L'";<KKK =Hw89GGG ,0EDL///.2ut|222 &*edl))) %)sTz(((.3%t333 )-$+,,, ).---
 *.4$;--- 	 MNQUU  
 48*D777
 J
 /5M7?+444
 /4%t33333r   r;  c                  T    d} d}t           j                            |          }| |k    S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    ry   z0pytorch/inductor:enable_parallel_compile_version)r1   r2   r3   )ENABLE_PARALLEL_COMPILE_VERSIONr:   r5   s      r   #parallel_compile_enabled_internallyr[    s0     '(#@G#88AAG*g55r   c                     ddl } |                     t                    }dt          j        v r6t          t          j        d                   }|                    d|           nt          j        dk    rd}|                    d           n|t                      r&t                      sd}|                    d           nHt          j                                        }|sJ t          d	|          }|                    d
|           |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32ry   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcode    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r[  r1   _utils	cpu_countmin)r`  logcompile_threadsrf  s       r   decide_compile_threadsrj    s    NNN 

H
%
%C&"*44bj)HIJJ4oFFFF		 	 56666	 ?@BB ?56666L**,,	yb),,,o>>>r   ri  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )rE   quiesce_async_compile_timeuse_static_cuda_launcherz/torch._inductor.config.use_static_cuda_launcheruse_static_triton_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherz2torch._inductor.config.strict_static_cuda_launcherstrict_static_triton_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   force_shape_padTORCHINDUCTOR_PERMUTE_FUSION_use_fp64_for_unbacked_floatsTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotc                      d } t                      rGt          j                            dd           }t          j                            dd          }|d| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r=   )r   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr    sW    G{{ T
':DAAjnn[#66$SkSSGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                       e Zd ZU ej                            d          dk    Zeed<   dZ	e
ed<   dZe
ed<   ej                            d          # e
ej                            d                    nd	Ze
d	z  ed
<   d	S )auto_chunkerTORCHINDUCTOR_AUTO_CHUNKERr   enablei   output_size_thresholdr   amplify_ratio_threshold TORCHINDUCTOR_CHUNKER_NUM_CHUNKSN	num_chunk)r   r   r   r   r   r   r  r8  r9  r  r   r  r  r   r   r   r  r    s         :>>">??3FFDFFF "-3,,, $%S$$$ :>><==I 	BJNN=>>??? sTz     r   r  c                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          dk    Z	dZ
edz  ed<    eej                            d	d
                    Zdej                            dej        dk    rdnd          fZedef         ed<   ej                            dd          dk    Zej                            dd          dk    ZdZedz  ed<   dZedz  ed<    ed          Zedz  ed<   dZed         ed<    eej                            dd                    Zej                            dd          dk    Zej                            dd          dk    Zej                            dd          Zej                            dd          dk    Z d Z! eej                            d!d                    Z"ej                            d"d          Z#ej                            d#d          Z$d$Z%d Z&ej                            d%d          dk    Z'd Z(ej                            d&d          dk    Z)ej                            d'd          dk    Z*d(Z+dS ))cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr1   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAYr{  ),r   r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr  r   r9  min_chunk_sizerc  rd  r  tuplestrenable_kernel_profileweight_prepackr  r  r
   r  r8  r  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayuse_two_step_variance_thresholdr   r   r   r  r    s          G
 	
=sCCsJ  jnn%H#NNRUUOGS4ZS(JERRSSN 	

u3<8+C+CiiOOCtSy	    	
@#FF#M 
 Z^^$FLLPSSN
 04 #*33304!3:444 /./IJJJtJJJ 	 wHI   
 "%

EtLL" " 	
FLLPSS   	
FLLPSS   +-*..?+ +' 	
BCHHCO 
 $)  BJNN+PRUVVWW *..)PRVWW *..)PRVWW   !
 	
=sCCsJ 
  % 	
>DDK  	
FLLPSS  
 '+###r   r  c                      e Zd ZU dZej                            d          dk    ZdZdZ	dZ
eeeedf         z           dz  ed<   d	ZdZdZdZ e             Zd
ZdZedz  ed<   dZdZ edd          Zeed<   dZeed<   dZdZdZdZdZ ej                            d e            sdnd          dk    Z!eed<   dZ"edz  ed<   dZ#eed<   dZ$dZ% e&            Z'edz  ed<   dZ(eed<   dZ)eed<    ej*        dd          dk    Z+eed<   dZ,dZ-ej                            dd          dk    Z.ej                            dd          dk    Z/dZ0e1d         ed<   ej                            d d          dk    Z2ej                            d!d          dk    Z3eed"<   ej                            d#d          dk    Z4dZ5 eej                            d$d                    Z6e1d%         ed&<   ej                            d'd          dk    Z7d(Z8dZ9e:j;        j<        rd)nd*Z=eed+<   dZ>dZ?dZ@dZAeBdz  ed,<   dZCej                            d-d          dk    ZDej                            d.d          dk    ZEej                            d/d          dk    ZFdZG eej                            d0e:j;        j<        rdnd1                    ZH eej                            d2d3                    ZIej                            d4d          dk    ZJej                            d5 e            rdnd          dk    ZKd6ZLdZMedz  ed7<   ej                            d8d          dk    ZNdZOej                            d9          dk    ZPi ZQeReBef         ed:<   d;ZSej                            d<d          dk    ZTeed=<   ej                            d>          ZUeBdz  ed?<   ej                            d@d          dk    ZVeedA<   ej                            dBd          dk    ZWeedC<   ej                            dDd          dk    ZXeedE<   dS )Fr   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   r1  r   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr  cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsTORCHINDUCTOR_NATIVE_MATMULnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS TORCHINDUCTOR_DECOMPOSE_SORT_OPSdecompose_sort_ops$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   ry   r.      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16   r_     spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STORETORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32TORCHINDUCTOR_ENABLE_PDL!TORCHINDUCTOR_MIX_ORDER_REDUCTIONry   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE4TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGESdebug_dump_kernel_inputsr  %TORCHINDUCTOR_TRITON_PROTON_PROFILINGproton_profiling&TORCHINDUCTOR_TRITON_PROTON_OUTPUT_DIRproton_output_dir'TORCHINDUCTOR_TRITON_PROTON_GROUP_BY_SMproton_group_by_sm-TORCHINDUCTOR_TRITON_PROTON_SPLIT_INVOCATIONSproton_split_invocations-TORCHINDUCTOR_TRITON_PROTON_PER_CTA_OCCUPANCYproton_per_cta_occupancy)Yr   r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r  r   r9  cudagraph_min_partition_sizeslow_path_cudagraph_asserts!cudagraph_trees_history_recordingcudagraph_trees_objgraphr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr	   r  r8  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr,   r  r  r  getenvr   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionsr  cooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr1   r5   hipr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_size#mix_order_reduction_non_strict_mode&mix_order_reduction_allow_multi_stagesr	  dictmax_kernel_dump_occurrencesr  r  r  r  r  r   r   r   r   r   y  si         
  :;;sBJ O %*! DHU3sCx#89D@GGG $%  #' ).%  % ,59;;$ +.' 67&d
666 ! $  &v9         37)4666 #( "   N 	
46UccRU	
 	
 	 d    !IsTz    #d"""   -M,L,N,NdTkNNN
 ).--- "OT!!!$ $")$A3GG3NM4NNN (,$'+$ 	
:C@@CG  	
?EEL  	 wHI    	
<cBBcI  	
93??3F     	
=sCCsJ 
 $)  ),

3S99) )L'*%   
 jnn%DcJJcQO   K "'!2:22OS::: M "
 15-
 04 #*333 " 	
5s;;sB !
 !#
/JC P PTW WJNN#:C@@CGM 6;2 !S

25=;L4VCCRV	
 	
   C

<dCC   :C@@CGJ 	
:99;;<WCCTWXX	  *+&15"C$J555

NPSTT	 , +0' 	
MNNRUU + 02d38n111
 #$ 	
>DDK d    %'JNN0% %sTz   
 	
@#FF#M    
 	
FLLPSS d    	
FLLPSS d     r   r   c                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          dk    Z	ej                            dd          dk    Z
ej                            dd	          Zej                            d
d          Zed         ed<   ej                            dd          ZdZdZdZeed<   dZeed<   dZdZeed<   dZedz  ed<   dZeed<   i Zeeef         ed<   ej                            dd          dk    Zeed<   dZeed<   ej                            dd          dk    Zeed<    eej                            dd                    Z eed<   i Z!eee"f         ed<   dZ#eed <   dZ$eed!<   ej                            d"d          dk    Z%eed#<   dZ&eed$<   dZ'edz  ed%<    e(             Z)eed&<   dZ*edz  ed'<   dZ+edz  ed(<   dZ,edz  ed)<   i Z-ee.j/        j0        e1e         f         ed*<   dZ2e1e         dz  ed+<   ej                            d,d          dk    Z3dZ4eed-<   dZ5edz  ed.<   dZ6ee1e         z  dz  ed/<   dZ7edz  ed0<   dS )1aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS!AOT_INDUCTOR_ENABLE_FRAME_POINTER&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr.   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_path)8r   r   r   r   output_pathr   r   r   debug_compiledebug_symbolsenable_frame_pointercompile_wrapper_opt_levelrN  r   r9  filtered_kernel_namesserialized_in_specserialized_out_specrP  r8  rQ  use_consts_asm_buildrR  rS  rT  rU  rB  r  rW  rX  rZ  r   r\  r]  r   r^  r_  ra  rb  rc  r   rd  re  rf  rg  rh  r1   _ops
OpOverloadlistri  
enable_ltork  rl  rm  rn  r   r   r   rE  rE    s          KJNN#?EELMJNN#?EELM 	
:C@@CG  !#
0$! ! EGJNN7E E$g.@&A   
 JNN0$    */ $...  %$$$  GT$(dTk((( !OT     "Hd38n!!! 	
H#NNRUU (    "d!!!  "z~~.BCHHCOOOO s2:>>*CQGGHHKHHH !GT#s(^    $)D((( ,1"D000 	
BCHHCO !$    %)T((( 48$cDj777 $-9;;... (,+++ +/D4K... 26"C$J555 EG4
 5tCy @AFFF'+NDI$+++  93??3FJ M4 )-3:,,, 15sT#Y-444)-C$J-----r   rE  c                       e Zd ZU dZeed<   dS )aot_inductor_modeFcompile_standaloneN)r   r   r   r~  r8  r9  r   r   r   r}  r}    s%           %$$$$$r   r}  c                   d   e Zd ZU dZdZed         ed<   dZdZe	j
                            e	j                            de	j
                            e	j
                            ej                  d                              ZdZedz  ed	<   g d
Zee         ed<    eeeeef          ed e	j                            dd                              d          D                                 Zeeeef         ed<    eeeeef          ed e	j                            dd                    d eD                                                     d          D                                 Zeeeef         ed<   e	j                            dd          dk    ZdZdZeed<   e	j                            dd          dk    Ze ed<   e	j                            d          Z!e"dz  ed<   e	j                            d          Z#e"dz  ed<   e	j                            d d          Z$e"ed!<   e	j                            d"d          dk    Z%e ed#<   e	j                            d$d          dk    Z&e ed%<   e	j                            d&d'          Z'e"ed(<   d)Z(e ed*<   dZ)e ed+<   dZ*e ed,<   d)Z+e ed-<   dS ).cutlassz-
    Config specific to cutlass backend.
    -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Ncutlass_max_profiling_configs)ry   r.   r   r   %cutlass_max_profiling_swizzle_optionsc              #   4   K   | ]}t          |          V  d S r   r   .0xs     r   	<genexpr>zcutlass.<genexpr>  s<       
 
 FF
 
 
 
 
 
r   +TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_SHAPEz2,1,1r   cutlass_dynamic_cluster_shapec              #   4   K   | ]}t          |          V  d S r   r  r  s     r   r  zcutlass.<genexpr>  s<       
 
 FF
 
 
 
 
 
r   .TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_FALLBACKc              #   4   K   | ]}t          |          V  d S r   )r  )r  vs     r   r  zcutlass.<genexpr>  s(      GGAQGGGGGGr    cutlass_dynamic_cluster_fallbackCUTLASS_EPILOGUE_FUSIONr   r   ry   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen),r   r   r   r   r  r   r9  enable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer1   __file__cutlass_dirr  r   r  rz  r   r  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r8  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r  r    s          EJw@AIII  M '""

'GLL//) 	
 	
 K 15!3:444 8D||)49CCC:>$c3m 
 
Z^^=w eCjj	
 
 
 	
 	
; ;!5c3#7    >BTc3m 
 
Z^^@GG)FGGGGG  eCjj
 
 
 	
 	
	> 	>$eCcM&: 	 	 	 	
0#66#= $
  *+!3***
 	
H#NNRUU $    .0Z^^). .d
    -/JNN(- -sTz    (*z~~3S( (    	
DcJJcQ "4    	
;SAASH $     "z~~+U      
 %)T((( +0!4/// -2#T111 $(D'''''r   r  c                   h    e Zd ZU dZedz  ed<   dZedz  ed<   dZedz  ed<   dZdZ	dZ
edz  ed<   dS )cudaNarchr5   cuda_cxxF   r   )r   r   r   r  r  r9  r5   r  enable_cuda_ltoenable_ptxas_infor   r   r   r   r   r  r  	  sz         
 D#*
 GS4Z  HcDj O  01 #*00000r   r  c                       e Zd ZU dZedz  ed<   dZedz  ed<   dZedz  ed<   ej	        
                    ej                            dd                    ZdS )xpuNr  r5   oneapi_rootr  r   )r   r   r   r  r  r9  r5   r  r   r  r  r   r   r  r   r   r   r  r  5	  sx          D#* GS4Z #Kt"""'""2:>>2Mr#R#RSSKKKr   r  c                      e Zd ZU g Zee         ed<   g dZeed                  ed<   dZ	ed         ed<   dZ
dZdZdZdZd	Zed	z  ed
<   ej                            d          Zej                            dd          dk    Zeed<   d	Zed	z  ed<   d	Zed	z  ed<   d	Zed	z  ed<   dZeed<   d	Zee         d	z  ed<   dZeed<   dZeed<   d	S )rocmr  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_threshold)r   r   r   r  rz  r  r9  r  r   r  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   ck_dirr  r8  r  r   r  r  r  r  r  r  r   r   r   r  r  D	  s         D$s)F F FtG$@AB    	 wL   
 H J M O #( !IsTz    Z^^233F 	
FLLPSS $   
 +/S4Z... ,0cDj/// 15!3:444 ',t+++ &*L$s)d")))  s !##"""""r   r  )r  r   halidepallascpu_backend)r   r  r  cuda_backendr  tpu_backendxpu_backendc                       e Zd ZdZdS )mtiaFN)r   r   r   disable_welford_reductionr   r   r   r  r  	  s         %r   r  c                   X    e Zd ZU dZdZdZed         ed<   dZed         ed<   dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r9  r  assertsdebugscan_kernelsr   r   r   r  r  	  ss         J J
 	 GRS    	 7QR   
 G E LLLr   r  c            	          e Zd ZU ej                            dd          dk    Zej                            dd          dk    ZdZe	dz  e
d<   dZdZdZdZdZdZdZej                            d	d          dk    Zej                            d
d          dk    Zej                            dd          Zej                            dd          ZdZdZee	gdf         dz  e
d<   ej                            dd          dk    Z eej                            dej                            dd                              Zee
d<   dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r   r   r   r   r   r   enabledsave_real_tensorsr  r  r9  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr   r  r   r   r   r  r  	  s        jnn2C88C?G 
'FLLPSS !IsTz    I H H   M N K JNN#=sCCsJM (>DDK jnn%CTJJO !jnn-OQUVV O 04J#%,333Z^^,BCHHCO &)S

!2:>>2G#M#M	
 	
& &s     r   r  )ztrace.upload_tarrq   rr   rs   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrn   ro   r-  rt   r   _save_config_ignore)r  zcuda.cutlass_dirzcutlass.cutlass_dirzxpu.cutlass_dirr%  ri  ro   rn   rq   rr   rs   r-  rt   r   r  r   rF   rH   rL   rM   _cache_config_ignore_prefix_cache_config_factory_keysexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   ^    e Zd ZU dZeeeeeef                  f         dz  ed<   dZ	e
ed<   dS )lookup_tableNtableTcheck_src_hash)r   r   r   r  rB  r  rz  r   r9  r  r8  r   r   r   r  r  H
  sP         48E4T$sCx.))*T1888  NDr   r  c                   p   e Zd ZU dZeed<   dZedz  ed<   dZeed<   dZe	dz  ed<   dZ
dZdZej                            d          Zedz  ed<   ej                            d	          Zedz  ed
<   dZdZed         dz  ed<   dZdZeed<    ej        d          dk    Z ej        dd          ZdZdZdS )test_configsF%force_extern_kernel_in_multi_templateNforce_custom_op_decompositionforce_no_impl_groupingmax_mm_configs(TORCHINDUCTOR_AUTOTUNE_CHOICE_NAME_REGEXautotune_choice_name_regex(TORCHINDUCTOR_AUTOTUNE_CHOICE_DESC_REGEXautotune_choice_desc_regex)assertrh  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   )r   r   r   r  r8  r9  r  r  r  r   runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr   r   r   r  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r&  force_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r  r  S
  sU        27)4777 26!4$;555 $)D(((!%NC$J%%%"'"'# .0Z^^2. .d
    .0Z^^2. .d
    27.>BGO4t;BBB L .2$d111 		@AASH # #,")3R# # ".3+++r   r  )*c                   t    e Zd ZU ej                            dd          dk    Zeed<   dZ	eed<   dZ
eed<   dS )	eager_numerics'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGr   r   division_roundingFdisable_ftzuse_pytorch_libdeviceN)r   r   r   r   r   r   r*  r8  r9  r+  r,  r   r   r   r(  r(  
  sh         
 	
@#FF#M t    K
 #(4'''''r   r(  %TORCHINDUCTOR_EMULATE_PRECISION_CASTSemulate_precision_casts(g  r   rc  collections.abcr   typingr   r   r   r   r1   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   torch._inductor.choicesr   torch._inductor.cudagraph_utilsr   r   r   inplace_paddingcan_inplace_pad_graph_inputr8  r   r    r#   r&   r)   r,   r6   r;   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr   r?   r9  rF   rG   rH   rI   rK   rL   rM   rN   rP   rQ   rR   rB  r  rS   rV   rX   rZ   r\   r^   online_softmaxapply_gumbel_max_trickdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rm   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_first*epilogue_fusion_user_defined_triton_kernelpattern_matcherb2b_gemm_passrn   	_inductorcustom_graph_passCustomGraphPassTypero   rp   CustomPartitionerFnTyperq   rr   rs   rt   rz  ru   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrv   rw   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_mul keep_addmm_fused_for_half_dtypesuse_mixed_mmr~   r    reorder_for_compute_comm_overlapr   r   reorder_for_peak_memoryreorder_for_peak_memory_debugr   r   r   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmpipeline_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr&   inductor_default_autotune_warmupinductor_default_autotune_repr   r   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   r   r   uppermax_autotune_gemm_backendsr   r   max_autotune_conv_backendsr   r   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsrV  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr5   r1  layout_opt_defaultlayout_optimizationforce_layout_optimizationcache_sdpa_constraintkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr  r  fallback_randomalign_random_eager"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr  aggressive_fusionr  r
  enabled_metric_tablesr  r  score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemin_overlap_ratio)max_fusion_buffer_group_pairwise_attemptsr  max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingdo_not_emit_runtime_assertionscompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_args!combo_kernel_per_subkernel_blockscombo_kernels_pointwise_onlyjoint_graph_constant_foldingdebug_index_asserts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr  r  r  r$  r%  r&  r(  r*  _fuse_ddp_communication_fuse_ddp_bucket_sizer-  r.  partitioned_scatter_enabledr0  r2  r3  r5  r;  r[  rj  ri  rl  rn  ro  rp  rr  rt  ru  libfb.pyrw  __package__get_dir_pathr  r  replaceseprv  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler|  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testingr~  _profile_varprofile_bandwidthprofile_bandwidth_regexr  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r  r  r  r  r  enable_linear_binary_foldingr  r  r  r  r  r  r  r  r  r   rE  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingr(  r.  modulesr   r   r   r   <module>r     s8!   					 



 $ $ $ $ $ $ 4 4 4 4 4 4 4 4 4 4 4 4  ( ( ( ( ( ( ( ( ( (             @777777??????*..!@#FF#M# Ctd{ C C C CD4K    Ctd{ C C C CKtd{ K K K K4$;    F$+ F F F Fd    
 
 
 
 
 JNN7==D 
 	     8Ay{{L33 
 '*cJNNA6JJ' '    
 v?;1	      $) D ( ( ( &C%B%D%D td{ D D D .-// "4$;    )/MB) ) )     " d ! ! ! &C%B%D%D td{ D D D .S-R-T-T td{ T T T $V*VWWW d W W W 05 & 4 4 4 57 !4S> 6 6 6 &* d
 ) ) )  (1*    JNN#>DDKT K K K JNN=sCCsJ D    :>>"<cBBcI
D I I I JNN4YY[[6Qccc   d     >DDK JNN93??3F 
    z~~:C@@CGjnn899S@JNN=>>#E   >DDK JNN4YY[[6QcccRR
       *..!@#FF#M 
<==D HJz~~H HWCD   
    *)++   .3 *   TX 5?<P W W WTX EO=Q X X X TX u8P W W W PT u8L S S SPT 9M T T T
 OS eo7K R R R 	 	;	<=:;	= 		 	 	" 	 	;	<=:;	= 		 	 	   $)     ( 68 c4S>12 7 7 7 79 $sDcN23 8 8 8   z~~&JCPPTWW  # 
 $(    	+ + c3h    FQAB P P P $)  *  (	;	<=:;	=*    &* d
 ) ) )   %  /0 & / / / >D w9: C C CNR .#0Dt0K R R R  4!    5; 7=1 : : :RV 2HcUCZ4H44O V V V  #G4%   
 17 w}- 6 6 6NR .#0Dt0K R R R   */ !4 / / /   
 &,V?<& & & d    JNN@AASH  JNN;<<C 
 z~~:;;sB (NOOSVV  JNN#DEEL #&3BI5r::$ $   !$BI2C88! ! t     .U-T-V-V d
 V V V JNNDcJJcQ " JNNQSVWW
 / JNNBCHHCO   JNN2yy{{4SCCPSTT
     .2 * 1 1 1
 *, T#Y + + + ,. tCy - - - -2 #T 1 1 1 $V47   d    !# DI " " "  Z^^.0A %'' sTz     ,Q+P+R+R cDj R R R  Z^^. %''  DF:>>2ID D%'' (? @ 
 
 
 DF:>>2ID D%'' (? @ 
 
 
 " T ! ! ! " T ! ! !     ! T       =F g89 E E E  $  # # #! d ! ! ! JNN.44; 4   
 "      JNN455<	 jnn%HIISP 
 /3 +03 -14 . 
'LMMQTT  !SJNN=tDD  
  %uJNN?FF    
 JNN<==D  JNNJKKsR ( $'3JNN<cBB$ $    
  
@ @ @ @ @ @ @ @@ @ @ @ @ @ @ @ JNN<cBBcI 
BC BD B B B B: : : : : :6C 6D 6 6 6 6 *I  
 !& 1:SSs JNN68JKKsR  JNN+KSQQUXX  JNN8#>>#E  Z^^$FLLPSS  *..!@AASH
       !#*    %+F>% % % T       &+ "  JNNCDDK ! BF %6!67$> E E E   Z^^$@AASHd H H H(HIISP $ P P P
'LbQQ JNN299;;4OCCC   D     (,  + + + !#  JNN<cBBcI 
 $%      -/ ) ,0 cDj / / /          29=sCCsJ  	788C? BJNN#@!DDEE:>>"BCHHCO  "&   %     "'        "# &* #  %* !$   $     11OUe>O5O Y[[8$8  JNNEsKKsR #
 #' S4Z & & &%) d
 ) ) ) v2     
C 
 
 
 
 6577 S 7 7 7 &. s - - - !'7:! ! !     F   T         C Xi%83%> ?   
 ! D       JNN>DDK 
 +, "C + + + +. "C - - - ,0 !5 / / /: : : : : : : :
X4 X4 X4 X4 X4 X4 X4 X4v6T 6 6 6 6    B '0ikkOdd7M7M7O7Ot O O O $*6:=$ $ $ D    #)&# # # C    ">!=!?!? $ ? ? ? $*6
;$ $ $ D    39&IL3 3 3 )4    JNN>DDK T   
 '-f
>' ' ' t   
 *   9;; 
 $$$$$$ 	@3w3[00bf==zJJ     4w3J??$           
<cBBcI JNN8#>>#E        (- $  (            >DDK # 
 $    !  +4)++o t 5 5 5z~~5r:: B&  , 3 3""  (*z~~"D( ( #*   
 JNNHIISP 0    8#>>#E$ E E E %* T ) ) )  %  $ $ $ (-  , , ,
 $ t # # # $ t # # # 8= . < < < ,1 "D 0 0 0 49 *D 8 8 8 JNN?EEL  *..)JCPPTWW 4 W W W 37 )4 6 6 6 46 tCc3h/0 5 5 5RZ^^,MuUUVV 3 V V V %  % % %	S4Z 	 	 	 	 %+F1% % % c          "C+ C+ C+ C+ C+ C+ C+ C+LU U U U U U U Up
t. t. t. t. t. t. t. t.t% % % % % % % %( ( ( ( ( ( ( (D W1 1 1 1 17 1 1 1< WT T T T T' T T TF# F# F# F# F# F# F# F#T =BW89 A A A 7?g23 > > > "*WX ) ) ) "*WX ) ) )& & & & & & & &
       8K K K K K K K K\" " " T#Y   "* * * T#Y   D ) DI   
 UWhelELI4OPQ V V V BIH#NNRUU +
               84 84 84 84 84 84 84 84v  -,,,,( ( ( ( ( ( ( (2 JNN:C@@CG    
  ck(+ , , , , ,s   =A.|, ,|;:|;