
    ϦiK?                        S SK JrJrJrJr  S SKJrJrJrJ	r	  S SK
Jr  S SKJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S\4S	 jr\" S
S9 " S S5      5       r\ " S S\5      5       r " S S\5      rg)    )BaseBackend	GPUTargetAttrsDescriptorregister_descriptor)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                 D    U R                   nSU;   a  S $ SU;   a  S $ S $ )Ngfx94c                 ^    U R                  5       (       d  UR                  5       (       a  S$ S$ )N   r   r   r   r      )is_int8lhsTyperhsTypes     [/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/triton/backends/amd/compiler.py<lambda>min_dot_size.<locals>.<lambda>   s%    9J9JgooN_N_(qfq(q    gfx9c                     g)Nr    r   s     r   r   r      s    r    c                     g)Nr   r#   r   s     r   r   r      s    Lr    )arch)r   arch_strs     r   min_dot_sizer'      s/    {{H (qq3300r    T)frozenc                      \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   S	r
\\S
'   S	r\\S'   S	r\\S'   S	r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   S	r\\S '   Sr\\S!'   Sr\\S"'   S	r \\S#'   S$r!\\S%'   S&r"\\S''   S( r#S) r$Sr%g)*
HIPOptions      	num_warps   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libs)r.   r.   r.   cluster_dimsFdebugTsanitize_overflowr%   )fp8e5supported_fp8_dtypesr#   deprecated_fp8_dtypesieeedefault_dot_input_precision)r>   allowed_dot_input_precisionsenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namedefaultinstruction_sched_variantc                 "   [        [        5      R                  S-  nU R                  c  0 O[	        U R                  5      nSU R
                  ;   d   SU R
                  ;   d  SU R
                  ;   a  SOSn[        R                  U SU5        SS	/nU H  n[        X S
3-  5      X%'   M     [        R                  U S[        UR                  5       5      5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S5       eg )Nlibgfx10gfx11gfx12    @   	warp_sizeocmlocklz.bcr7   r   r.   znum_warps must be a power of 2)r   __file__parentr7   dictr%   object__setattr__strtupleitemsr-   )selfdefault_libdirr7   rQ   libsrK   s         r   __post_init__HIPOptions.__post_init__=   s    h..6 ,,4b$t?O?O:P!TYY.'TYY2F'UYU^U^J^Bdf	4i8C">e3K#?@K 4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr    c           	          SR                  U R                  R                  5        VVs/ s H  u  pU SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )N_-utf-8)join__dict__r[   hashlibsha256encode	hexdigest)r\   namevalkeys       r   hashHIPOptions.hashJ   sa    hh9L9L9NO9NID4&#9NOP~~cjj12<<>> Ps   A7
)&__name__
__module____qualname____firstlineno__r-   int__annotations__r/   r1   r2   r3   r4   r5   r6   r7   rV   r8   rZ   r9   boolr:   r%   rY   r<   r   r=   r?   r@   rA   rB   rC   rD   rE   rG   rI   r_   rn   __static_attributes__r#   r    r   r*   r*      s   IsL#JHc!"3"  ccK#L%#E4"t"D#'2%*2(*5:*'--/9 %*9!d! !#!E3N$$)*!3*L# &/s.0?r    r*   c                   B    \ rS rSrSrSS jr\S 5       r\S 5       rSr	g)	HIPAttrsDescriptorO   pointer_range_32Nc                 .   SU R                   S'   Ub  Uc  g [        X5       VVs/ s HS  u  p4[        R                  U5      (       d  M!  UR                  (       a  M4  UR
                  (       a  MG  UR                  PMU     snnU R                  S'   g s  snnf )NrO   ztt.pointer_range)property_valueszipry   is_within2gbdo_not_specializedo_not_specialize_on_alignmentnumarg_properties)r\   paramsvaluesparamargs        r   _add_backend_properties*HIPAttrsDescriptor._add_backend_propertiesZ   s    35/0>V^ ),F(;3
(;*%?Q?^?^_b?cI++ 494X4X EII(;3
./ 3
s    B	BB/Bc                     [        U S5      (       a  U R                  5       S:*  $ S[        [        U 5      5      ;   a2  [        U S5      (       a!  U R	                  5       R                  5       S:*  $ g)N	ptr_rangeiztorch.Tensoruntyped_storageF)hasattrr   rY   typer   size)r   s    r   r   HIPAttrsDescriptor.is_within2gbd   s]    3$$==?i//Sc^+=N0O0O&&(--/9<<r    c                     [         R                  " X5      n[        R                  U 5      (       a  SOSnX#-   R	                  SS5      nU(       a  U$ S$ )NSN )r   get_property_keyry   r   replace)rl   aligngeneric_keyhip_keyrm   s        r   r   #HIPAttrsDescriptor.get_property_keym   sK    %66sB+88==#3$--c26s"s"r    r#   )NN)
rp   rq   rr   rs   	__slots__r   staticmethodr   r   rw   r#   r    r   ry   ry   O   s6     $I
   # #r    ry   c                   .  ^  \ rS rSr\S\4S j5       rS\SS4U 4S jjrS\4S jr	S r
S	 rS\\\4   4S
 jrS rS r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS r\R6                  " 5       S 5       rSrU =r$ )
HIPBackendu   r   c                      U R                   S:H  $ )NrF   )backend)r   s    r   supports_targetHIPBackend.supports_targetw   s    ~~&&r    returnNc                 t   > [         TU ]  U5        [        UR                  [        5      (       d   eSU l        g )Nhsaco)super__init__
isinstancer%   rY   
binary_ext)r\   r   	__class__s     r   r   HIPBackend.__init__{   s.     &++s++++!r    c                    SU R                   R                  0nSU;  a]  [        [        R                  5      nU R                   R                  S;   a  UR                  SS15        [        [        U5      5      US'   SU;  a  [        R                  " SS5      S:H  US'   UR                  [        R                  R                  5        Vs0 s H  oDU;   d  M
  XAU   _M     sn5        [        S	0 UD6$ s  snf )
Nr%   r<   )gfx940gfx941gfx942fp8e4b8fp8e5b16rA   TRITON_DEFAULT_FP_FUSION1r#   )r   r%   setr*   r<   updaterZ   sortedosgetenv__dataclass_fields__keys)r\   optsargsr<   ks        r   parse_optionsHIPBackend.parse_options   s    (()!-#&z'F'F#G {{#AA$++Y
,CD+08L1M+ND'(T)')yy1KS'QUX'XD#$)H)H)M)M)O])OAX\S\ZQQZ)O]^!D!! ^s   
	C2	C2c                     UR                   UR                  UR                  UR                  S   UR                  S   UR                  S   4$ )Nr   r.   r0   )r-   r2   sharedr8   )r\   metadatas     r   pack_metadataHIPBackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r    c                 4    S[        U R                  5      0nU$ )Nr'   )r'   r   )r\   codegen_fnss     r   get_codegen_implementation%HIPBackend.get_codegen_implementation   s    %|DKK'@Ar    c                     SSK Jn  SU0$ )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r\   r   s     r   get_module_mapHIPBackend.get_module_map   s    719==r    c                 0    [         R                  " U5        g N)r
   load_dialects)r\   ctxs     r   r   HIPBackend.load_dialects   s    #r    c                     [        X5      $ r   )ry   )r\   r   r   s      r   get_attrs_descriptorHIPBackend.get_attrs_descriptor   s    !&//r    c                 ,    [         R                  X5      $ r   )ry   r   )r   r   s     r   compute_spec_keyHIPBackend.compute_spec_key   s    !223>>r    c                  |   [         R                  " S5      n U b"  [        U 5      nUR                  5       (       a  U$ [        [        5      R
                  S-  nUR                  5       (       a  U$ [        S5      nUR                  5       (       a  U$ [        S5      nUR                  5       (       a  U$ [        S5      e)NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r   r   r   is_filerT   rU   	Exception)lld_env_pathllds     r   path_to_rocm_lldHIPBackend.path_to_rocm_lld   s     yy!67#|$C{{}}
8n##&77;;==J./;;==J$%;;==Jqrrr    c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR!                  U 5        U $ r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrunmodr   optionspms       r   	make_ttirHIPBackend.make_ttir   s    __S[[)
!!"%..r2#''+))"-b!r"$$R(##B'
s
r    c                 l   [         R                  " U R                  5      nUR                  5         [        R
                  R                  USUR                   3UR                  UR                  UR                  5        UR                  U 5        [         R                  " U R                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [         R                  R                  R#                  X2R                  UR$                  UR&                  5        [        R                  R                  U5        [         R                  R                  R)                  U5        [        R                  R+                  US5        [         R,                  " UR                  5      (       ai  UR.                  S:w  d   S5       e[         R                  R                  R1                  X2R.                  5        [        R2                  R5                  U5        [         R                  R                  R7                  U5        [        R                  R+                  US5        [        R                  R                  U5        [        R                  R9                  U5        [         R,                  " UR                  5      (       a)  [         R                  R                  R;                  U5        [<        R>                  RA                  SS5      S:X  aq  [         R                  R                  RC                  U5        [        R2                  R5                  U5        [         R                  R                  RE                  U5        [        R2                  R5                  U5        [        R2                  RG                  U5        [        R2                  RI                  U5        UR                  U 5        U $ )Nzhip:Tr   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.AMDGCN_USE_BUFFER_OPS0r   )%r   r   r   r   r   r   add_convert_to_ttgpuirr%   r-   rQ   r2   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr
   add_accelerate_matmulrB   rC   add_optimize_epilogueadd_optimize_dot_operandshas_matrix_core_featurer1   add_stream_pipelinev2r   r   insert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr   environgetadd_canonicalize_pointersadd_convert_to_buffer_opsr   r   r   s       r   
make_ttgirHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00\\7C_C_ahanano44R8

00400T:&&w||44%%* Q .P Q*
 JJ44R9K9KLMM++B/

99"=00T:44R82226&&w||44JJ77;::>>1373>JJ88<MM++B/JJ88<''+b!$$R(
s
r    c                 \   U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  R                  XBR                  5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [
        R                  R                  U5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R!                  U5        [
        R                  R#                  U5        [
        R                  R%                  U5        [
        R                  R'                  U5        [
        R                  R!                  U5        [
        R                  R#                  U5        [
        R                  R)                  U5        [        R
                  R                  R+                  XBR,                  5        [.        R0                  R3                  SS5      S:X  a  [
        R4                  R7                  U5        [        R
                  R                  R9                  XF5        UR;                  U5        [<        R>                  " 5         [<        R                  " 5       n[<        R@                  " X75      n[        RB                  " U5        [<        RD                  " U[        RF                  UR                  S5        [        RH                  " XR                  5        [        RJ                  " US5        [        RL                  " USS5        [        RL                  " US	S5        [        RL                  " US
S5        [        RL                  " USURN                  S:H  5        URQ                  5        V	s/ s H  oRS                  5       (       a  M  U	PM     n
n	U
S   RU                  [        RV                  5        U
S   RY                  SSURZ                  URN                  -   35        U
S   RY                  SUR\                   5        UR^                  (       a  SOSnU
S   RY                  SU5        [        R`                  " U
S   5        URb                  (       aS  URb                   VVs/ s H$  u  p[        Rd                  " X5      (       d  M"  UPM&     nnn[<        Rf                  " X5        [<        Rh                  " U[<        Rj                  UR                  S/ URl                  5        U Ro                  S5      US'   [        Rp                  " U5        [s        U5      $ s  sn	f s  snnf )Nr   TTRITON_DISABLE_LINE_INFOr   r   i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rP   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr>   zdenormal-fp-math-f32ztriton_gpu.sharedr   ):r   r   r   r   r
   r   r   %add_decompose_unsupported_conversionsr%   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   lower_instruction_sched_hintsrI   r   r  r	  llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r	   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrQ   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr-   r/   rD   set_all_fn_arg_inregr7   need_extern_liblink_extern_libsoptimize_moduleOPTIMIZE_O3rA   get_int_attrcleanup_bitcode_metadatarY   )srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modfnfnsdenormal_moderk   pathpathss                  r   	make_llirHIPBackend.make_llir   s   __S[[)


@@\\R 

11"llOT$$R(**2.11"5 	

((\\9E''+b!''+**2.''+b!$$R(

88=^=^_::>>4c:cAMM&&r*

55bD
s 	,,.>>#/  *x):):GLL"M 	Hll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224P4b<M<M<Or4PA > >?A8Bw?P?PQXQbQb?b>c:deA0W5I5I4JL+2+E+E6A1=A
 	  Q(.5.A.Ai.AltSEXEXYaEhT.AEi!!(2Xt'7'7r2wOgOgh !--.AB$$X.8}/ Q js   5V#V#-!V(V(c           	      V   [         R                  " SU 5      n[        U5      S:X  d   eUS   US'   [        R                  " U [
        R                  UR                  S/ UR                  S5      n[        R                  R                  SS5      S	:X  a  [        S
5        [        U5        U$ )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r.   r   rk   r   FAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)refindalllenr	   translate_to_asmr
   r%  r%   rA   r   r  r	  print)r5  r   r   namesamdgcns        r   make_amdgcnHIPBackend.make_amdgcnI  s    
 

QSVW5zQ 8&&sC,=,=w||RQSU\UmUmotu::>>.4;56&Mr    c                    [         R                  " XR                  S5      n[        R	                  5       n[
        R                  " 5        n[
        R                  " 5        n[        UR                  S5       nUR                  U5        S S S 5        [        R                  " USSSUR                  SUR                  /5        S S S 5        [        UR                  S5       nUR                  5       n	S S S 5        S S S 5        W	$ ! , (       d  f       N= f! , (       d  f       NX= f! , (       d  f       N:= f! , (       d  f       W	$ = f)Nr   wbz-flavorgnuz-sharedz-orb)r
   assemble_amdgcnr%   r   r   tempfileNamedTemporaryFileopenrk   write
subprocess
check_callread)
r5  r   r   r   	rocm_pathtmp_outtmp_infd_infd_outrets
             r   
make_hsacoHIPBackend.make_hsacoX  s    ##Cr://1	((*g,,.&&++t,KK& -%%y)UIv{{\`bibnbn&op / gllD)Vkkm * + 
 -, /. *) +* 
sT   D7!D8D
9DD7!D&2D7
DD
D#	D7&
D4	0D77
Ec                 l   ^ ^ UU 4S jUS'   UU 4S jUS'   UU 4S jUS'   UU 4S jUS'   UU 4S	 jUS
'   g )Nc                 (   > TR                  XT5      $ r   )r   r5  r   r   r\   s     r   r   'HIPBackend.add_stages.<locals>.<lambda>g      t~~cW/Ur    r   c                 (   > TR                  XT5      $ r   )r  ra  s     r   r   rb  h      w0Wr    ttgirc                 (   > TR                  XT5      $ r   )r>  ra  s     r   r   rb  i  rc  r    llirc                 (   > TR                  XT5      $ r   )rI  ra  s     r   r   rb  j  s    1A1A#QX1Yr    rH  c                 (   > TR                  XT5      $ r   )r]  ra  s     r   r   rb  k  re  r    r   r#   )r\   stagesr   s   ` `r   
add_stagesHIPBackend.add_stagesf  s1    UvWwUvYxWwr    c                 v    [         R                  " [        R                  5       S/SS9nU SU R                   3$ )Nz	--versionrd   )encodingrc   )rT  check_outputr   r   r   )r\   versions     r   rn   HIPBackend.hashm  s8    )):+F+F+H+*Vahi!DKK=))r    )r   )rp   rq   rr   rs   r   r   r   r   r   r   r   r   r   rY   r   r   r   r   r   r   r   r  r>  rI  r]  rl  	functools	lru_cachern   rw   __classcell__)r   s   @r   r   r   u   s   '	 ' '"y "T "
"S "
>S*_ 5 >0 ? ? s s&   % %N P Pd    X * *r    r   )triton.backends.compilerr   r   r   r   triton._C.libtritonr   r   r	   r
   dataclassesr   typingr   r   r   typesr   rg   rP  r   rB  rT  rs  pathlibr   r'   r*   ry   r   r#   r    r   <module>r|     s    a a 5 5 ! # #    	 	   
1 
1 $/? /? /?d "# "# "#J{* {*r    