
    Ϧi                        S SK r S SKrS SKrS SKJr  S SKrS SKrS SKrS SK	J
r  S SKJr  SSKJr  S SKJr  SSKJr  SS	KJr   " S
 S5      r " S S5      r\" SS9 " S S5      5       rS rS rS rS rS r\R>                  " \\R@                  /S9r!\R>                  " \\RD                  /S9r#\R>                  " \\RH                  /S9r% " S S5      r& " S S5      r'S r(S r)S r* " S S 5      r+ " S! S"\+5      r, " S# S$\+5      r-S% r.S& r/S' r0S( r1\'" 5       r2/ S)Qr3 " S* S+5      r4 " S, S-\ Rj                  5      r6 " S. S/5      r7 " S0 S15      r8g)2    N)Tuple)	dataclass   )InterpreterError)partial   )interpreter)irc                   2    \ rS rSrS rS rS rS rS rSr	g)	TensorHandle   c                 *    Xl         X l        0 U l        g)z
data: numpy array
dtype: triton type, either pointer_type or scalar_type.
we don't store block_type here because the shape information is already availale in the data field
attr: a dictionary of attributes
N)datadtypeattr)selfr   r   s      Y/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/triton/runtime/interpreter.py__init__TensorHandle.__init__   s     	
	    c                 H    [        U R                  R                  5       5      $ N)boolr   allr   s    r   __bool__TensorHandle.__bool__   s    DIIMMO$$r   c                 ~    U R                   n[        US5      (       a  UR                  n[        US5      (       a  M  U$ )N
element_ty)r   hasattrr   )r   r   s     r   get_element_tyTensorHandle.get_element_ty"   s7    

e\**$$E e\**r   c                 ^    [        U R                  R                  5       U R                  5      $ r   )r   r   copyr   r   s    r   cloneTensorHandle.clone(   s    DIINN,djj99r   c                      X R                   U'   g r   )r   )r   keyvalues      r   set_attrTensorHandle.set_attr+   s    		#r   )r   r   r   N)
__name__
__module____qualname____firstlineno__r   r   r!   r%   r*   __static_attributes__ r   r   r   r      s    	%:r   r   c                        \ rS rSrS rS rSrg)BlockPointerHandle/   c                 L    Xl         X l        X0l        X@l        XPl        X`l        g r   )baseshapestridesoffsetstensor_shapeorder)r   r6   r7   r8   r9   r:   r;   s          r   r   BlockPointerHandle.__init__1   s!    	
(
r   c                 @   U R                   R                  5       nUR                  S-  nU R                  n[        R
                  " U R                   R                  U R                  5      n[        R                  " U R                  [        S9n[        [        U5      5       H  nS/[        U5      -  nXG   X'   U R                  U   R                  [        R                  " XG   5      -   R                  U5      n	XSU	-  U R                  U   R                  -  R                  [        R                   5      -   nXq;   d  M  [        R"                  " XiU R$                  U   R                  :  5      nM     ['        XPR                   R(                  R*                  5      nXV4$ )N   r   r   )r6   r!   primitive_bitwidthr:   npbroadcast_tor   onesr   rangelenr9   arangereshaper8   astypeuint64logical_andr7   r   r   scalar)
r   boundary_checkdtype_ttn_bytesr:   ptrsmasksdim
bcast_dimsoffs
             r   materialize_pointers'BlockPointerHandle.materialize_pointers9   s;   99++---2((tyy~~t/@/@A))6\*+Cs<00J*/JO<<$))BIIl6G,HHQQR\]CS=4<<+<+A+AAII"))TTD$uDJJsO4H4H.HI , D))//"8"89{r   )r6   r9   r;   r7   r8   r:   N)r,   r-   r.   r/   r   rT   r0   r1   r   r   r3   r3   /   s    r   r3   T)frozenc                       \ rS rSr% Sr\\S'   Sr\\S'   Sr	\\S'   Sr
\\S'   S	r\\   \S
'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Sr\\S'   Srg)InterpreterOptionsJ   Nextern_libsFdebugTsanitize_overflowarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15supported_fp8_dtypesr1   deprecated_fp8_dtypestf32default_dot_input_precision)re   tf32x3ieeeallowed_dot_input_precisionsr   max_num_imprecise_acc_defaultr	   backend_name)r,   r-   r.   r/   rZ   dict__annotations__r[   r   r\   r]   strrc   r   rd   rf   ri   rj   intrk   r0   r1   r   r   rX   rX   J   sx    KE4"t"D#'^%*^(*5:*'--/I %*I)*!3*%L#%r   rX   c                 &   U [         R                  :X  a  [         R                  $ U [         R                  :X  a  [         R                  $ U [         R
                  :X  a  [         R                  $ U [         R                  :X  a  [         R                  $ U $ r   )	rA   uint8int8uint16int16uint32int32rI   int64r?   s    r   _get_signed_np_dtyperx   X   s[    ww		xx		xx		xxLr   c                    [        U [        R                  5      (       a$  [        R                  " [        R
                  5      $ 0 [        R                  [        R                  " [        5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                   [        R                  " [        R                   5      _[        R"                  [        R                  " [        R"                  5      _[        R
                  [        R                  " [        R
                  5      _[        R$                  [        R                  " [        R                  5      _[        R&                  [        R                  " [        R                  5      _[        R(                  [        R                  " [        R                  5      _[        R*                  [        R                  " [        R                  5      _[        R,                  [        R                  " [        R                  5      _[        R.                  [        R                  " [        R                  5      0En[        U [        R0                  5      (       a[  [        U R2                  [        R                  5      (       a$  [        R                  " [        R
                  5      $ XR2                     $ X   $ r   )
isinstancetlpointer_typerA   r   rI   int1r   float16float32float64rr   rq   rt   rs   rv   ru   rw   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )tt_dtypenp_typess     r   _get_np_dtyper   d   s   (BOO,,xx		""
$


BHHRZZ( 	

BHHRZZ( 	

BHHRZZ(	
 	"''" 	"((288$ 	"((288$ 			288BII& 	"((288$ 			288BII& 	"((288$ 			288BII& 	RXXbii(  	RXXbhh'!" 	*#$ 	rxx)%& 	rxx)'( 	*)H, (BMM**h))2??;;88BII&&++,,r   c                    [        [        SUR                   35      n[        [        SUR                   35      n[        R                  " U R	                  5       US9nXaR                  S-
  -	  S-  nUR                  UR
                  -
  S-
  nUR                  UR
                  -
  S-
  n	USUR
                  -  S-
  -  n
UR                  nUR                  nXaR
                  -	  SU-  S-
  -  R                  [        R                  5      nUS:H  n[        R                  " U5      (       a  [        R                  " U[        R                  S9n[        UR
                  5       H   nU
U-	  S-  nUR
                  U-
  UUS:H  '   M"     U
S:H  nSX   -
  X'   X-
  UUU-  '   X   X   -  SUR
                  -  S-
  -  X'   [        R                  " S[        R                  " X-
  U-   SU	-  S-
  5      5      nUR                  U5      nUR                  U5      nUR                  UR                  :  a  XR
                  UR
                  -
  -	  SUR
                  -  S-
  -  nU[        R                  R                   :X  a*  U
SUR
                  UR
                  -
  S-
  -  -  nUUS:  -   nUR                  U5      nO>U
R                  U5      UR
                  UR
                  -
  -  SUR
                  -  S-
  -  nUS:H  n[        R                  " U5      (       a  XaR
                  -	  SU-  S-
  -  R                  [        R                  5      nUS:g  nUU-  n[        R                  " U[        R                  S9nSU-
  X   U-
  -
  UU'   UU   UU   -	  SUR
                  UU   -
  -  -  UU'   UUR                  S-
  -  UUR
                  -  -  U-  nUR#                  U R$                  5      $ )Nuintr?   r   r   )getattrrA   r@   
frombuffertobytesfp_mantissa_widthexponent_biasrH   rv   any
zeros_likerD   maximumminimum_irROUNDING_MODERTNErG   r7   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r   _convert_floatr      s   rT+*H*H)I#JK tL,K,K+L%MNemmo5EFI881<=ED&99K<Y<YY\]](;;l>\>\\_``[%B%B BaGHK**J,,K;;;FZAZ^_@_`hhikiqiqrH!mO	vvo
 --	:{445A%*d2I&1&C&Ca&GGIN# 6 "-!1$%(@$@!=G=U'/9:(3(DH`(`+///14(6$ jjBJJ0E0SWX\qWquvVv$wxO%,,-?@O++01K%%(G(GG).K.KlNlNl.lm,000A57C--222!Q;+H+H<KiKi+ilm+m%noG!3w{!C/667IJ)001CD+==@]@]]_#$(F(F#F!"KM &*O	vvo
 "?"??QJ^E^bcDcdllmomumuv"*a-),CCirxx8"#k/h6OR\6\!]o/A//RV[\kVl/l,0053IIJ/L?+l==AB<999;=OPF>>%++&&r   c                 .    [         R                  " U 5      $ r   )matherfxs    r   _erfr      s    88A;r   c                 6    [        U 5      [        U5      -  S-	  $ )N@   )ro   )abs     r   
_umulhi_64r      s     FSVO""r   )otypesc                   $    \ rS rSr\S 5       rSrg)ExtraFunctions   c                 d    [         R                  " UR                  U R                  X5      U5      $ r   )r{   tensorcreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding_builders       r   _convert_custom_types$ExtraFunctions._convert_custom_types   s%    yy11%,,]_effr   r1   N)r,   r-   r.   r/   staticmethodr   r0   r1   r   r   r   r      s    g gr   r   c                   2   \ rS rSr\R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  0r\R                  R                  \R                  R                  \R                  R                  \R                  R                  \R                  R                   \R                  R                   \R                  R"                  \R                  R"                  \R                  R$                  \R                  R$                  \R                  R&                  \R                  R&                  \R                  R(                  \R                  R(                  \R                  R*                  \R                  R*                  \R                  R,                  \R                  R,                  \R                  R.                  \R                  R.                  0
rSS jrS rS rS rS rS rS	 rS
 r S r!S r"S r#S r$S r%S r&S r'S r(S r)S r*S r+S r,S r-S r.S r/S r0S r1S r2S r3S r4S r5S  r6S! r7S" r8S# r9S$ r:S% r;S& r<S' r=S( r>S) r?S* r@S+ rAS, rBS- rCS. rDS/ rES0 rFS1 rGS2 rHS3 rIS4 rJS5 rKS6 rLS7 rMS8 rNS9 rOS: rPS; rQS< rRS= rSS> rTS? rUS@ rVSA rWSB rXSC rYSD rZSE r[SF r\SG r]SH r^SI r_SJ r`SK raSL rbSM rcSN rdSO reSP rfSQ rgSR rhSS riST rjSU rkSV rlSW rmSX rnSY roSZ rpS[ rqS\ rrS] rsS^ rtS_ ruS` rvSa rwSb rxSc rySd rzSe r{Sf r|\Kr}\Kr~Sg rSh rSi rSj rSk rSl rSm rSn rSo rSp rSq rSr rSs rSt rSu rSv rSw rSx rSy rSz rS{ rS| rS} rS~ rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rSrg)InterpreterBuilder   Nc                     S U l         [        5       U l        0 U l        [        R
                  U R                  S'   S U R                  S'   g )Nconvert_custom_typesc                     g)N)   r   r   r1   )lhsTyperhsTypes     r   <lambda>-InterpreterBuilder.__init__.<locals>.<lambda>   s    Lr   min_dot_size)r]   rX   optionscodegen_fnsr   r   r   s    r   r   InterpreterBuilder.__init__   sB    	)+3A3W3W/0+P(r   c                     XR                   S   :  d  [        S5      eX R                   S   :  d  [        S5      eX0R                   S   :  d  [        S5      eXU4U l        g )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzs       r   set_grid_idxInterpreterBuilder.set_grid_idx   s^    ==##/00==##/00==##/00q	r   c                     XU4U l         g r   )r   )r   nxnynzs       r   set_grid_dimInterpreterBuilder.set_grid_dim   s    r   c                 "    [         R                  $ r   )r{   r~   r   s    r   get_half_tyInterpreterBuilder.get_half_ty      zzr   c                 "    [         R                  $ r   )r{   r   r   s    r   get_bf16_tyInterpreterBuilder.get_bf16_ty      {{r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_float_tyInterpreterBuilder.get_float_ty  r   r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_double_ty InterpreterBuilder.get_double_ty  r   r   c                 "    [         R                  $ r   )r{   rr   r   s    r   get_int8_tyInterpreterBuilder.get_int8_ty  s    wwr   c                 "    [         R                  $ r   )r{   rq   r   s    r   get_uint8_tyInterpreterBuilder.get_uint8_ty      xxr   c                 "    [         R                  $ r   )r{   rt   r   s    r   get_int16_tyInterpreterBuilder.get_int16_ty  r   r   c                 "    [         R                  $ r   )r{   rs   r   s    r   get_uint16_ty InterpreterBuilder.get_uint16_ty      yyr   c                 "    [         R                  $ r   )r{   rv   r   s    r   get_int32_tyInterpreterBuilder.get_int32_ty  r   r   c                 "    [         R                  $ r   )r{   ru   r   s    r   get_uint32_ty InterpreterBuilder.get_uint32_ty   r  r   c                 "    [         R                  $ r   )r{   rw   r   s    r   get_int64_tyInterpreterBuilder.get_int64_ty#  r   r   c                 "    [         R                  $ r   )r{   rI   r   s    r   get_uint64_ty InterpreterBuilder.get_uint64_ty&  r  r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_fp8e4nv_ty!InterpreterBuilder.get_fp8e4nv_ty)      }}r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_fp8e4b15_ty"InterpreterBuilder.get_fp8e4b15_ty,      ~~r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_fp8e4b8_ty!InterpreterBuilder.get_fp8e4b8_ty/  r  r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_fp8e5_tyInterpreterBuilder.get_fp8e5_ty2  r   r   c                 "    [         R                  $ r   )r{   r   r   s    r   get_fp8e5b16_ty"InterpreterBuilder.get_fp8e5b16_ty5  r  r   c                 .    [         R                  " X5      $ r   )r{   r|   )r   elt_ty
addr_spaces      r   
get_ptr_tyInterpreterBuilder.get_ptr_ty8  s    v22r   c                 .    [         R                  " X5      $ r   )r{   r   )r   r   r7   s      r   get_block_tyInterpreterBuilder.get_block_ty;  s    }}U**r   c                 z    [        [        R                  " U/[        R                  S9[        R
                  5      $ Nr?   )r   rA   arraybool_r{   r}   r   r)   s     r   get_int1InterpreterBuilder.get_int1>  s$    BHHeWBHH=rwwGGr   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rq   r{   r0  s     r   	get_uint8InterpreterBuilder.get_uint8A  $    BHHeWBHH=rxxHHr   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rr   r{   r0  s     r   get_int8InterpreterBuilder.get_int8D  s$    BHHeWBGG<bggFFr   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rs   r{   r0  s     r   
get_uint16InterpreterBuilder.get_uint16G  $    BHHeWBII>		JJr   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rt   r{   r0  s     r   	get_int16InterpreterBuilder.get_int16J  r6  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  ru   r{   r0  s     r   
get_uint32InterpreterBuilder.get_uint32M  r=  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rv   r{   r0  s     r   	get_int32InterpreterBuilder.get_int32P  r6  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rI   r{   r0  s     r   
get_uint64InterpreterBuilder.get_uint64S  r=  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  rw   r{   r0  s     r   	get_int64InterpreterBuilder.get_int64V  r6  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  r~   r{   r0  s     r   get_fp16InterpreterBuilder.get_fp16Y  $    BHHeWBJJ?LLr   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  r   r{   r0  s     r   get_fp32InterpreterBuilder.get_fp32\  rP  r   c                 z    [        [        R                  " U/[        R                  S9[        R                  5      $ r-  )r   rA   r.  r   r{   r0  s     r   get_fp64InterpreterBuilder.get_fp64_  rP  r   c                 T    [        [        R                  " S/[        U5      S9U5      $ Nr   r?   )r   rA   r.  r   )r   types     r   get_null_value!InterpreterBuilder.get_null_valueb  s!    BHHaSd0CDdKKr   c                     U R                   c  [        S5      e[        [        R                  " U R                   U   /[        R
                  S9[        R
                  5      $ )Nzgrid_idx is Noner?   )r   r   r   rA   r.  rv   r{   r   axiss     r   create_get_program_id(InterpreterBuilder.create_get_program_idf  sD    == /00BHHdmmD&9%:"((KRXXVVr   c                     [        [        R                  " U R                  U   /[        R                  S9[
        R                  5      $ r-  )r   rA   r.  r   rv   r{   r]  s     r   create_get_num_programs*InterpreterBuilder.create_get_num_programsk  s.    BHHdmmD&9%:"((KRXXVVr   c                     [        [        R                  " UR                  [        S9[
        R                  5      nS nU R                  XXbX45      $ r-  )r   rA   	ones_liker   r   r{   r}   create_masked_load)r   ptr_0_1is_volatilemaskothers          r   create_loadInterpreterBuilder.create_loado  s;    BLL>H&&s%RMMr   c                     [        [        R                  " UR                  [        S9[
        R                  5      nU R                  XUS S 5      $ r-  )r   rA   re  r   r   r{   r}   create_masked_store)r   rg  valrh  ri  rk  s         r   create_storeInterpreterBuilder.create_storet  s8    BLL>H''$dCCr   c                    UR                  5       n[        U5      nUc)  [        [        R                  " UR
                  US9U5      n[        R                  " UR
                  UR
                  UR
                  U5      n	[        X5      $ r-  )r!   r   r   rA   r   r   _interpreterload)
r   rO   rk  rl  cache_modifiereviction_policyrj  rM   dtype_nprets
             r   rf  %InterpreterBuilder.create_masked_loadx  sg    &&( *= tyy!I8TE		499ejj(KC**r   c                 n    [         R                  " UR                  UR                  UR                  5      $ r   )ru  storer   )r   rO   r)   rk  rw  rx  s         r   rp  &InterpreterBuilder.create_masked_store  s#    !!$))UZZCCr   c                    UR                   R                  nUR                  nU[        R                  :X  a  U[        R                  :X  d(  U[        R                  :X  aX  U[        R                  :X  aD  [        UR                  X4S 5      R                  [        U5      5      n[        XRR                  5      $ [        UR                  R                  [        U5      5      UR                  5      $ r   )r   rK   r{   r   r   r   r   viewr   r   rH   )r   srcdst_typesrc_element_typedst_element_typer   s         r   	cast_implInterpreterBuilder.cast_impl  s    99++#??+0@BJJ0N

*/?2;;/N!#((,<PTUZZ[hiq[rsDoo66h0G H(//ZZr   c                 $    U R                  X5      $ r   r  r   r  r  s      r   r   InterpreterBuilder.<lambda>      $..2Or   c                 $    U R                  X5      $ r   r  r  s      r   r   r    r  r   c                 $    U R                  X5      $ r   r  r  s      r   r   r    r  r   c                 $    U R                  X5      $ r   r  r  s      r   r   r    r  r   c                 $    U R                  X5      $ r   r  r  s      r   r   r    s    s0Mr   c                 $    U R                  X5      $ r   r  r  s      r   r   r    r  r   c                 $    U R                  X5      $ r   r  )r   r  r  	is_signeds       r   r   r    s    T^^C=Zr   c                     UR                   R                  nUR                  n[        UR                  XEU5      R	                  [        U5      5      n[        XbR                  5      $ r   )r   rK   r   r   r  r   r   )r   r  r  r   r  r  r   s          r   r   "InterpreterBuilder.create_fp_to_fp  sP    99++#??chh(8MZ__`mnv`wxD//22r   c                 r    [        UR                  R                  [        U5      5      UR                  5      $ r   )r   r   r  r   rK   r  s      r   create_bitcast!InterpreterBuilder.create_bitcast  s%    CHHMM-*ABHOOTTr   c                 x    [        U" UR                  UR                  5      UR                  R                  5      $ r   r   r   r   rK   )r   lhsrhsops       r   	binary_opInterpreterBuilder.binary_op  s(    Bsxx2CII4D4DEEr   c                 B    U R                  X[        R                  5      $ r   r  rA   addr   r  r  s      r   r   r    s    "&&)Ir   c                 B    U R                  X[        R                  5      $ r   r  rA   multiplyr  s      r   r   r        "++)Nr   c                 B    U R                  X[        R                  5      $ r   r  rA   divider  s      r   r   r    s    ")))Lr   c                 B    U R                  X[        R                  5      $ r   )r  rA   	remainderr  s      r   r   r    s    ",,)Or   c                 B    U R                  X[        R                  5      $ r   r  rA   subtractr  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r        s(Mr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    s    "))1Tr   c                 $    U R                  X5      $ r   create_idivr  s      r   r   r        )9)9#)Cr   c                 $    U R                  X5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  rA   fmodr  s      r   r   r        "'')Jr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    s    s(Hr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   )r  rA   
left_shiftr  s      r   r   r    s    s(Or   c                 B    U R                  X[        R                  5      $ r   )r  rA   right_shiftr  s      r   r   r    s    "..)Qr   c                 B    U R                  X[        R                  5      $ r   r  rA   r   r  s      r   r   r        $..2::*Nr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r        T^^Cbjj-Qr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r        DNN3RZZ,Pr   c                 B    U R                  X[        R                  5      $ r   r  rA   r   r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  rA   
less_equalr  s      r   r   r        DNN3R]],Sr   c                 B    U R                  X[        R                  5      $ r   r  rA   lessr  s      r   r   r        DNN3RWW,Mr   c                 B    U R                  X[        R                  5      $ r   r  rA   greater_equalr  s      r   r   r        DNN3REUEU,Vr   c                 B    U R                  X[        R                  5      $ r   r  rA   greaterr  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  rA   equalr  s      r   r   r    s    4>>#BHH+Mr   c                 B    U R                  X[        R                  5      $ r   r  rA   	not_equalr  s      r   r   r    s    4>>#BLL+Qr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r        DNN3RXX,Nr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r        DNN3R\\,Rr   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   r  r  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   )r  rA   bitwise_andr  s      r   r   r        s(Pr   c                 B    U R                  X[        R                  5      $ r   )r  rA   bitwise_xorr  s      r   r   r    r  r   c                 B    U R                  X[        R                  5      $ r   )r  rA   
bitwise_orr  s      r   r   r    s    t~~c'Nr   c                     [        UR                  [        R                  " UR                  UR                  5      -
  UR                  -  UR                  R
                  5      $ r   )r   r   rA   r  r   rK   r  s      r   r  InterpreterBuilder.create_idiv  sC     SXX#(((CCPRUR[R[RbRbccr   c                 >   [        UR                  R                  5      n[        UR                  R                  5      nUR                  R                  U5      Ul        UR                  R                  U5      Ul        U R	                  X[
        R                  5      $ r   )rx   r   r   rH   r  rA   r  )r   r  r  	lhs_dtype	rhs_dtypes        r   create_ashrInterpreterBuilder.create_ashr  sc    (8	(8	88??9-88??9-~~c77r   c                 V   UR                   R                  nU[        R                  :X  d  U[        R                  :X  a>  [        [        UR                   UR                   5      UR                  R                  5      $ [        [        SUR                  S-  S-   35      nUR                   R                  U5      nUR                   R                  U5      n[        R                  " XV5      UR                  S-  -	  n[        UR                  U5      UR                  R                  5      $ )Nr   r>   r   )r   r   rA   rw   rI   r   np_umulhi_u64rK   r   itemsizerH   r  )r   r  r  r   compute_dtypelhs_datarhs_dataret_datas           r   create_umulhi InterpreterBuilder.create_umulhi  s    BHH 2chh A399CSCSTT#B$u~~/AA/E.F(GHMxx}5Hxx}5H{{865>>A;MNH 6		8H8HIIr   c                     [        U" UR                  UR                  UR                  5      UR                  R                  5      $ r   r  )r   r  r  rl  r  s        r   
ternary_opInterpreterBuilder.ternary_op  s.    Bsxx5::>@R@RSSr   c                 D    U R                  XU[        R                  5      $ r   )r  rA   clip)r   arglohipropagate_nanss        r   r   r    s    doocWY[][b[b>cr   c                 D    U R                  XU[        R                  5      $ r   )r  rA   where)r   condr  r  s       r   r   r    s    CQSQYQY1Zr   c                     [        UR                  UR                  -  UR                  -   UR                  R                  5      $ r   r  r   s       r   
create_fmaInterpreterBuilder.create_fma  s,    AFFQVVOaff4aggnnEEr   c                 b    [        U" UR                  5      UR                  R                  5      $ r   r  )r   r  r  s      r   unary_opInterpreterBuilder.unary_op  s!    BsxxL#))*:*:;;r   c                 .   UR                   nUR                  S-
  n[        [        SUR                   35      nUR                  R                  U5      nSU-  S-
  nXV-  R                  [        U5      5      n[        XqR                   R                  5      $ )Nr   r   )	r   r@   r   rA   r   r  r   r   rK   )r   r  rM   mask_bitwidthnp_uint_dtyper   rk  rz  s           r   create_fabsInterpreterBuilder.create_fabs  s    99 33a7d8+F+F*G$HIxx}}]+]"a'{  x!89C!1!122r   c                 B    U R                  U[        R                  5      $ r   )r  rA   cosr   r  s     r   r   r        4==bff#=r   c                 B    U R                  U[        R                  5      $ r   )r  rA   expr&  s     r   r   r  	  r'  r   c                 B    U R                  U[        R                  5      $ r   )r  rA   exp2r&  s     r   r   r  
      DMM#rww$?r   c                 B    U R                  U[        R                  5      $ r   )r  rA   absr&  s     r   r   r    s    DMM#rvv$>r   c                 B    U R                  U[        R                  5      $ r   )r  rA   floorr&  s     r   r   r    s    T]]3%Ar   c                 B    U R                  U[        R                  5      $ r   )r  rA   ceilr&  s     r   r   r    r,  r   c                 B    U R                  U[        R                  5      $ r   )r  rA   logr&  s     r   r   r    r'  r   c                 B    U R                  U[        R                  5      $ r   )r  rA   log2r&  s     r   r   r    r,  r   c                 B    U R                  U[        R                  5      $ r   r  rA   sqrtr&  s     r   r   r    s    DMM#rww,Gr   c                 B    U R                  U[        R                  5      $ r   r8  r&  s     r   r   r    r,  r   c                 B    U R                  U[        R                  5      $ r   )r  rA   sinr&  s     r   r   r    r'  r   c                     UR                   R                  [        R                  :X  a  [	        UR                   5      O[        UR                   5      n[        X!R                  R                  5      $ r   )r   r   rA   r   np_erf_fp32np_erf_fp64r   rK   )r   r  rz  s      r   
create_erfInterpreterBuilder.create_erf  sF    '*xx~~'Ck#((#UXU]U]I^C!1!122r   c                     [        S[        R                  " UR                  5      -  UR                  R
                  5      $ Nr   )r   rA   r9  r   r   rK   r&  s     r   create_rsqrtInterpreterBuilder.create_rsqrt  s+    A 113993C3CDDr   c                 t    [        UR                  R                  U5      UR                  R                  5      $ r   )r   r   rG   r   rK   )r   r  r7   allow_reorders       r   r   r    s(    \#((JZJZ[`Jacfclclcscs=tr   c                     [        [        R                  " UR                  U5      UR                  R
                  5      $ r   )r   rA   	transposer   r   rK   )r   r  perms      r   create_transInterpreterBuilder.create_trans  s(    BLL48#)):J:JKKr   c                    UR                   nUR                   nUR                  R                  S:X  a  UR                  R                  5       (       d9  UR                  R                  S:X  a  UR                  R                  5       (       a  [	        XaR                  [
        R                  S 5      R                  [        R                  5      n[	        XrR                  [
        R                  S 5      R                  [        R                  5      n[        [        R                  " XgUR                   R                  S9UR                   -   UR                  R                  5      $ )Nr>   r?   )r   r   r@   is_floatingr   r{   r~   r  rA   r   matmulrK   )r   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r   
create_dotInterpreterBuilder.create_dot!  s    GG&&!+0C0C0E0EGG&&!+0C0C0E0E#FGGRZZFKKBJJWF#FGGRZZFKKBJJWFBIIfAFFLLIAFFRTUT[T[TbTbccr   c                 x    [        [        R                  " X[        R                  S9[        R                  5      $ r-  )r   rA   rF   rv   r{   )r   startstops      r   create_make_range$InterpreterBuilder.create_make_range*  s"    BIIeBBHHMMr   c                 |    [        [        R                  " UR                  USU4S9S   [        R
                  5      $ )Nr   )binsrD   )r   rA   	histogramr   r{   rv   )r   r   r]  s      r   create_histogram#InterpreterBuilder.create_histogram-  s1    BLLaYOPQRTVT\T\]]r   c                     UR                  5       nUR                  n[        SUS-  5      n[        UR                  XRR                  R                  [        R                  5      -  -   UR                  5      $ )Nr   r>   )	r!   r@   maxr   r   rH   rA   rI   r   )r   rg  offsetrM   element_bitwidthelement_bytewidths         r   create_addptr InterpreterBuilder.create_addptr2  sc    %%'#66#3q#89CHH'8;;;M;Mbii;X'XXZ]ZcZcddr   c                    UR                  U5      u  pxUR                  5       n	[        U	5      n
Uc  S nOU[        R                  R
                  :X  a*  [        [        R                  " UR                  U
S9U	5      nO`U[        R                  R                  :X  a4  [        [        R                  " UR                  [        S5      U
S9U	5      nO[        SU 35      eU R                  XxXXV5      $ )Nr?   nanzunsupported padding option )rT   r!   r   r   PADDING_OPTIONPAD_ZEROr   rA   r   r   PAD_NAN	full_likefloatr   rf  )r   rg  rL   padding_optionrw  rx  rj  rO   rP   rM   ry  rl  s               r   create_tensor_pointer_load-InterpreterBuilder.create_tensor_pointer_load9  s    ..~>&&( *!Es11::: tyy!I8TEs11999 diiuX!VX`aE:>:JKLL&&tE?hhr   c                 N    UR                  U5      u  pgU R                  XbXtU5      $ r   )rT   rp  )r   rg  r)   rL   rw  rx  rO   rP   s           r   create_tensor_pointer_store.InterpreterBuilder.create_tensor_pointer_storeH  s)    ..~>''UO\\r   c                     [        [        R                  " UR                  U5      UR                  R
                  5      $ r   )r   rA   expand_dimsr   r   rK   )r   r  r^  s      r   create_expand_dims%InterpreterBuilder.create_expand_dimsL  s(    BNN388T:CII<L<LMMr   c                     [        [        R                  " UR                  U5      UR                  R
                  5      $ r   )r   rA   rB   r   r   rK   r   r  r7   s      r   create_broadcast#InterpreterBuilder.create_broadcastO  s(    BOOCHHe<cii>N>NOOr   c                     [        [        R                  " UR                  UR                  /5      UR                  R
                  5      $ r   )r   rA   concatenater   r   rK   r  s      r   
create_catInterpreterBuilder.create_catR  s/    BNNCHHchh+?@#))BRBRSSr   c                     [        [        R                  " UR                  UR                  /SS9UR                  R
                  5      $ )Nr^  )r   rA   stackr   r   rK   r  s      r   create_joinInterpreterBuilder.create_joinU  s1    BHHchh%9CSYYEUEUVVr   c                     [        UR                  S   UR                  R                  5      [        UR                  S   UR                  R                  5      4$ )N).r   ).r   r  )r   rq  s     r   create_splitInterpreterBuilder.create_splitY  sE    SXXf-syy/?/?@,sxxX^O_adajajaqaqBrssr   c           	         [        UR                  [        R                  5      (       aS  [	        [
        R                  " X!R                  S   [        UR                  5      S9UR                  R                  5      $ [	        [
        R                  " X!R                  [        UR                  5      S9UR                  R                  5      $ rX  )
rz   r   r{   r   r   rA   fullr   r   rK   rz  s      r   create_splatInterpreterBuilder.create_splat]  s    cii//xx{-PSPYPYBZ []`]f]f]m]mnnxx}SYY?W XZ]ZcZcZjZjkkr   c                    X@R                   ;  a  [        SU 35      eU R                   U   n[        [        R                  " UR
                  UR
                  UR
                  U5      UR                  R                  5      $ )Nunsupported semantic )ir_sem_to_interpreter_semr   r   ru  
atomic_casr   r   rK   )r   rg  cmprq  semscopes         r   create_atomic_cas$InterpreterBuilder.create_atomic_casc  si    4444SE:;;,,S1L33CHHchhRUVX[XaXaXhXhiir   c           	      \   XR                   ;  a  [        SU 35      eXPR                  ;  a  [        SU 35      eU R                   U   nU R                  U   n[        [        R
                  " XR                  UR                  UR                  U5      UR                  R                  5      $ )Nzunsupported rmwOp r  )	ir_rmw_op_to_interpreter_rmw_opr   r  r   ru  
atomic_rmwr   r   rK   )r   rmwOprg  rq  rk  r  r  s          r   create_atomic_rmw$InterpreterBuilder.create_atomic_rmwi  s    <<<1%9::4444SE:;;44U;,,S1L33E88SXXtyyZ]^`c`i`i`p`pqqr   c                     [        S5      e)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   libNamelibPathsymbolargListretTypeisPures          r   create_extern_elementwise,InterpreterBuilder.create_extern_elementwiser  s    !"XYYr   c                     [        S5      e)Nz,inline_asm not supported in interpreter moder  )r   	inlineAsmconstraintsvaluesrY  r  packs          r   create_inline_asm$InterpreterBuilder.create_inline_asmu  s    !"PQQr   c                 D   SU R                   S    SU R                   S    SU R                   S    S3nU(       a  USU 3-  nU(       a  [        R                  " SS	 0S
9  U H  n[        USUR                   3-   5        M      U(       a  [        R                  " S S
9  g g )N(r   z, r   r   ) r   c                     SU S 3$ )N0x02xr1   r   s    r   r   1InterpreterBuilder.create_print.<locals>.<lambda>  s    b3Lr   )	formatter)r   rA   set_printoptionsprintr   )r   prefixhexr  isSignedmsgr)   s          r   create_printInterpreterBuilder.create_printx  s    
 $--"#2dmmA&6%7r$--:J9K1MQvh<C52H*IJE#!EJJ<(() $/ r   c                 "    U(       d   U 5       eg r   r1   )r   	conditionmessages      r   create_assert InterpreterBuilder.create_assert  s    &WI&yr   c                      U(       d   S5       eg )NzAssume failedr1   )r   r  s     r   create_assume InterpreterBuilder.create_assume  s    )/)yr   c                     g r   r1   r   s    r   create_barrier!InterpreterBuilder.create_barrier  s    r   c                 d    U Vs/ s H  owR                  5       PM     nn[        XX8XV5      $ s  snf r   )r%   r3   )	r   r6   r7   r8   r9   r:   r;   rc  new_offsetss	            r   create_make_block_ptr(InterpreterBuilder.create_make_block_ptr  s.    4;<G&||~G<!$w\YY =s   -c                    [        UR                  5      [        U5      :w  a  [        S5      eUR                   Vs/ s H  o3R                  5       PM     nn[	        UR
                  UR                  UR                  XAR                  UR                  5      n[        [        U5      5       H1  nUR                  U   =R                  X&   R                  -  sl        M3     U$ s  snf )Nz len(ptr.offsets) != len(offsets))rE   r9   r   r%   r3   r6   r7   r8   r:   r;   rD   r   )r   rg  r9   rc  r  rz  r   s          r   create_advance!InterpreterBuilder.create_advance  s    s{{s7|+?@@47KK@K&||~K@ 399ckk;P`P`bebkbkls7|$AKKN7:??2 %
	 As   C#c                     [        U5      nSUR                  ;   a*  [        [        R                  " SSUS9UR
                  5      $ [        SU 35      e)Nro   r   r  r?   zunsupported type )r   namer   rA   r  rK   	TypeError)r   rY  np_types      r   get_all_ones_value%InterpreterBuilder.get_all_ones_value  sI    %GLL 2W =t{{KK/v677r   )r]   r   r   r   r   returnN)r,   r-   r.   r/   r   MEM_SEMANTICACQUIREru  RELEASERELAXEDACQUIRE_RELEASEr  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr  r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r"  r'  r*  r1  r4  r8  r;  r?  rB  rE  rH  rK  rN  rR  rU  rZ  r_  rb  rm  rr  rf  rp  r  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r  r  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intr  r  r  r  create_clampfcreate_selectr  r  r"  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr@  rD  create_reshaperK  rU  rZ  r_  rf  rp  rs  rw  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r0   r1   r   r   r   r      s     ,";";"C"C  ,";";"C"C  ,";";"C"C((,*C*C*S*S	! 	<..22L//44<..22L//44<..22L//44<..22,--00<..22L//44'#Q"%
3+HIGKIKIKIMMMLW
WN
D+D[ POOOOOOOMMOOZO3UF JKNKLKOKNKMJTCKCKJKJKHJMJOJQKNLNLQOPNNLNLQOPNSNMNVNPNSNMNVNPNMMQMMNPNSNVNNNRNMNPNSNVNNNRNPJPJNI&&d8	JT dMZMF<3 >J=J?K>KAL?K=J?KG?K=J3E uNLdN^
ei]NPTWtljrZR0'*Z
8r   r   c                 2   ^ US.U4S jjn[        XU5        g )N)memberc           
      ~   > U " U0 UR                  5        VVs0 s H  u  p4US:w  d  M  X4_M     snnDST0D6$ s  snnf )Nr   )items)r3  argskwargskvbuilders        r   r   _patch_attr.<locals>.<lambda>  s[     :hMS\\^AUM[TQDEO BFM[AU:h `g:hAUs   99)setattr)objr  r3  r:  
new_members      ` r   _patch_attrr?    s    &, iJ Cz"r   c                     [         R                  " U 5       H7  u  p#[        R                  R	                  U5      (       d  M+  [        XX15        M9     g r   )inspect
getmembersr{   core
is_builtinr?  )pkgr:  r  r3  s       r   _patch_builtinrF    s8    **3/77f%%63 0r   c                 x   ^ S mS nS U l         U4S jU l        S U l        S U l        [	        U5      U l        g )Nc                 h    U R                   R                  nUR                  S:X  a  [        U5      $ S$ )Nr   T)r   r   sizer   )r   r   s     r   	_get_bool%_patch_lang_tensor.<locals>._get_bool  s,    {{ "YY!^tDz55r   c                     [         R                  R                  [        [        R
                  " U R                  R                  5      U R                  R                  5      U R                  R                  5      $ r   )
r{   rC  r   r   rA   rI  r   r   r   rK   r   s    r   _get_transpose*_patch_lang_tensor.<locals>._get_transpose  sH    ww~~l2<<8H8H+I4;;K\K\]_c_i_i_p_pqqr   c                 @    [        U R                  R                  5      $ r   )ro   r   r   r   s    r   r   $_patch_lang_tensor.<locals>.<lambda>  s    C(8(8$9r   c                    > T" U 5      $ r   r1   )r   rJ  s    r   r   rP    s	    9T?r   c                 @    [        U R                  R                  5      $ r   )reprr   r   r   s    r   r   rP    s    4(8(8#9r   c                 @    [        U R                  R                  5      $ r   )rn   r   r   r   s    r   r   rP    s    #dkk&6&6"7r   )	__index__r   __repr____str__propertyT)r   rM  rJ  s     @r   _patch_lang_tensorrZ    s9    6r :F2FO9FO7FN'FHr   c                   8    \ rS rSrS rS rS rS rS rS r	Sr
g	)
ReduceScanOpIneterfacei  c                     Xl         X l        g r   r^  
combine_fn)r   r^  r_  s      r   r   ReduceScanOpIneterface.__init__  s    	$r   c                 L    Ub!  U[        U5      :  a  [        SU SU 35      eg g )Nzaxis z out of bounds for shape )rE   r   )r   r7   r^  s      r   
check_axis!ReduceScanOpIneterface.check_axis  s4    E
 2uTF*CE7KLL !3r   c                     U Hi  n[        U[        R                  R                  5      (       d  [	        S[        U5       35      eU R                  UR                  U R                  5        Mk     g )Nzinput must be a tensor, got )	rz   r{   rC  r   r   rY  rb  r7   r^  )r   r   r  s      r   check_tensor#ReduceScanOpIneterface.check_tensor  sN    Cc277>>22 #?S	{!KLLOOCIItyy1 r   c                 P   [        US5      (       a2  UR                  (       a!  [        R                  " X!R                  5      nO1[        R
                  " U/5      R                  [        U5      5      nUn[        R                  R                  [        XR                  5      U5      $ )Nr7   )r    r7   r{   r   rA   r.  rH   r   rC  r   r   rK   )r   rz  r   ret_types       r   	to_tensor ReduceScanOpIneterface.to_tensor  si    3  SYY}}UII6H((C5/((u)=>CHww~~l3=xHHr   c                 v    [        U[        5      (       d  U4nU R                  U5        U R                  U5      $ r   )rz   tuplere  
apply_implr   r   s     r   applyReduceScanOpIneterface.apply  s3    %''IE% u%%r   c                     [        S5      e)Nzapply_impl not implementedr  rn  s     r   rm  !ReduceScanOpIneterface.apply_impl  s    !">??r   r^  N)r,   r-   r.   r/   r   rb  re  ri  ro  rm  r0   r1   r   r   r\  r\    s#    %M2I&@r   r\  c                   J   ^  \ rS rSrU 4S jrS rS rS	S jrS rS r	Sr
U =r$ )
	ReduceOpsi  c                 0   > [         TU ]  X5        X0l        g r   )superr   	keep_dims)r   r^  r_  rw  	__class__s       r   r   ReduceOps.__init__  s    *"r   c                     / nU Hh  nUb  UR                  U5        M  SnUR                  U R                  UR                  R                  R	                  5       UR
                  5      5        Mj     [        U5      U4$ )Nr   )appendri  r   r   flattenr   rl  )r   r   r^  rz  r   s        r   unravelReduceOps.unravel  sg    D

4 

4>>$++*:*:*B*B*DdjjQR  Sz4r   c                 T  ^ ^^^ T R                   nT R                  TT R                   5      u  mn/ n/ nTS   R                  R                  R                  nUSU XcS-   S  -   nT Hi  nUR                  UR                  R                  5        UR                  [        R                  " XxR                  R                  R                  S95        Mk     [        US   R                  5       GHf  n	[        R                  " X5      mTSU TUS-   S  -   m[        UUU 4S j[        U5       5       5      n
TU   S:X  aH  [        [        U5      5       H.  nX   R                  R                  R                  5       X[   T'   M0     M  [        UUU 4S j[        U5       5       5      nT R                   R"                  " / UQU
Q76 n[%        U[        5      (       d  U4OUn[        [        U5      5       H]  n[%        X   [&        R(                  R*                  5      (       a&  X   R                  R                  R                  5       OX   X[   T'   M_     GMi     / n[        U5       H  u  pT R,                  (       aM  Ub  [        R.                  " X5      nOF[        [        U5      5       H  n[        R.                  " US5      nM     OUc  UR                  5       nUR                  T R1                  UTU	   R                  5      5        M     [        U5      S:X  a  US   $ [        U5      $ )Nr   r   r?   c              3   l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr   ri  r   ).0iirP  r   input_indexr   s      r   	<genexpr>+ReduceOps.generic_reduce.<locals>.<genexpr>
  s1     s]rTYTVq~uRy O O]r   14c              3   l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr   r  )r  oior   output_indexr   s      r   r  r    s1     !w`vW\WY$..<%)//"R"R`vr  )r^  r}  r   r   r7   r{  rA   zerosr   rD   rI  unravel_indexrl  	enumeraterE   itemr_  fnrz   r{   rC  r   rw  rv  ri  )r   r   original_axisr^  
input_dataoutput_datainput_shapeoutput_shaper  r   input_tuplej	acc_tuplecombine_fn_retrz  r   _r  r  s   ``               @@r   generic_reduceReduceOps.generic_reduce  s   		ll5$))4t
Ahoo**00"1T*[-CCCcjjoo.rxxJJOO<Q<QRS  z!}))*A**1:K&q.TAXY1GGLs]fgq]rssK4 A%s;/0A3>>3H3H3M3M3R3R3TKN<0 1 "!w`iju`v!ww	!%!3!3!MY!M!M6@QV6W6W^.]k	s;/0AV`!bggnnW6 W69<3F3F3K3K3P3P3R;D<  N<0 1 +"  -GA~~ ,>>$5D"3{#34!~~dA6 5 &yy{JJt~~dE!HNN;< . SQs1v6E#J6r   c                    [        U[        5      (       a  US   OUnS nS nU(       aJ  U R                  U" UR                  R                  U R
                  U R                  S9UR                  5      nU(       aN  U R                  U" UR                  R                  U R
                  U R                  S9[        R                  5      nUb  Ub  XE4$ Ub  U$ Ub  U$ [        S5      e)Nr   r^  keepdimsz-val_reduce_op and idx_reduce_op are both None)rz   rl  ri  r   r   r^  rw  r   r{   rv   r   )r   r   val_reduce_opidx_reduce_oprq  idxs         r   min_maxReduceOps.min_max&  s    &ue44a%..u||/@/@tyy[_[i[i!jlqlwlwxC..u||/@/@tyy[_[i[i!jlnltltuC?s8O_J_JLMMr   c                     U R                  [        R                  " UR                  R                  U R
                  U R                  S9UR                  5      $ )Nr  )ri  rA   sumr   r   r^  rw  r   rn  s     r   r  ReduceOps.sum8  s<    ~~bffU\\%6%6TYYQUQ_Q_`bgbmbmnnr   c                 $   U R                   [        R                  R                  :X  a0  U R	                  US   [
        R                  [
        R                  S9$ U R                   [        R                  R                  :X  a0  U R	                  US   [
        R                  [
        R                  S9$ U R                   [        R                  R                  :X  a"  U R	                  US   [
        R                  S S9$ U R                   [        R                  R                  :X  a"  U R	                  US   [
        R                  S S9$ U R                   [        R                  R                  :X  a  U R                  US   5      $ U R                  U5      $ )Nr   )r  r  )r_  r{   standard_argmin_combine_tie_break_leftr  rA   minargmin_argmax_combine_tie_break_leftrb  argmax_elementwise_max_elementwise_min_sum_combiner  r  rn  s     r   rm  ReduceOps.apply_impl;  s   ??bkkHHH<<abii<XX__ J JJ<<abii<XX__ < <<<<ad<SS__ < <<<<ad<SS__ 8 8888E!H%% &&u--r   )rw  r   )r,   r-   r.   r/   r   r}  r  r  r  rm  r0   __classcell__rx  s   @r   rt  rt    s)    # )7VN$o. .r   rt  c                   @   ^  \ rS rSrU 4S jrS rS rS rS rSr	U =r
$ )ScanOpsiK  c                 0   > [         TU ]  X5        X0l        g r   )rv  r   reverse)r   r^  r_  r  rx  s       r   r   ScanOps.__init__M  s    *r   c                     U R                  [        R                  " UR                  R                  U R
                  S9UR                  S9/$ Nr  r?   )ri  rA   cumsumr   r   r^  r   rn  s     r   r  ScanOps.cumsumQ  s8    ryy):):KSXS^S^_``r   c                     U R                  [        R                  " UR                  R                  U R
                  S9UR                  S9/$ r  )ri  rA   cumprodr   r   r^  r   rn  s     r   r  ScanOps.cumprodT  s8    rzz%,,*;*;$))LTYT_T_`aar   c           	        ^ ^^^ / n/ nTS   R                   R                  R                  nT Hi  nUR                  UR                   R                  5        UR                  [        R
                  " XER                   R                  R                  S95        Mk     [        US   R                  5       GH  n[        R                  " Xd5      m[        UUU 4S j[        U5       5       5      nTT R                     S:X  aH  [        [        U5      5       H.  nXx   R                   R                  R                  5       X8   T'   M0     M  [        UU 4S j[        [        T5      5       5       5      m[        UUU 4S j[        U5       5       5      n	T R                  R                   " / U	QUQ76 n
[#        U
[        5      (       d  U
4OU
n	[        [        U5      5       H]  n[#        X   [$        R&                  R(                  5      (       a&  X   R                   R                  R                  5       OX   X8   T'   M_     GM     / n[        U5       H3  u  pgUR                  T R+                  UTU   R                  5      5        M5     U$ )Nr   r?   c              3   l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr   r  )r  r  rP  indexr   r   s      r   r  'ScanOps.generic_scan.<locals>.<genexpr>b  s/     fPeur%%)//BBPer  c              3   \   >#    U  H!  oTR                   :X  a  TU   S -
  OTU   v   M#     g7f)r   Nr  )r  r   r  r   s     r   r  r  h  s-     "kYjTU		>58a<uQx#OYjs   ),c              3   l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr   r  )r  r  r  r   
prev_indexr   s      r   r  r  i  s1     !u^tUZUW$..:b	"P"P^tr  )r   r   r7   r{  rA   r  r   rD   rI  r  rl  r  r^  rE   r  r_  r  rz   r{   rC  r   ri  )r   r   r  r  r7   r  r   r   r  r  r  rz  r  r  s   ``          @@r   generic_scanScanOps.generic_scanW  s   
a$$**Ccjjoo.rxxZZ__5J5JKL  z!}))*A$$Q.EfPYZdPeffDTYY1$s;/0A,0GNN,?,?,D,D,FKN5) 1 #"kY^_bch_iYj"kk
!!u^ghs^t!uu	!%!3!3!FY!F!F6@QV6W6W^.]k	s;/0AOY!bggnnP6 P6IL,?,?,D,D,I,I,K;D<  N5) 1 +"  -GAJJt~~dE!HNN;< .
r   c           
         / nU R                   (       af  U H_  nUR                  U R                  [        R                  " UR
                  R                  U R                  S9UR                  5      5        Ma     OUnU R                  [        R                  R                  :X  a  U R                  US   5      nONU R                  [        R                  R                  :X  a  U R                  US   5      nOU R!                  U5      nU R                   (       aK  U HE  n[        R                  " UR
                  R                  U R                  S9UR
                  l        MG     [#        U5      S:H  =(       a    US   =(       d    [%        U5      $ )Nr  r   r   )r  r{  ri  rA   flipr   r   r^  r   r_  r{   r  r  r  _prod_combiner  r  rE   rl  )r   r   	new_inputr  rz  s        r   rm  ScanOps.apply_implu  s   	<<  

dii0XZ]ZcZc!de  I??bkk666++il+C__ 9 99,,y|,C ##I.C<<"$''#**//		"J

 3x1}'Q55:5r   )r  )r,   r-   r.   r/   r   r  r  r  rm  r0   r  r  s   @r   r  r  K  s#    ab<6 6r   r  c                      SS jn SS jnU [         l        U[         l        U [         R                  l        U[         R                  l        g )Nc                 8    [        XU5      R                  U 5      $ r   )rt  ro  )r   r^  r_  rw  r7  s        r   _new_reduce'_patch_reduce_scan.<locals>._new_reduce  s    95;;EBBr   c                 8    [        XU5      R                  U 5      $ r   )r  ro  )r   r^  r_  r  r7  s        r   	_new_scan%_patch_reduce_scan.<locals>._new_scan  s    t177>>r   )F)r{   reduceassociative_scanrC  )r  r  s     r   _patch_reduce_scanr    s5    C? BI#B BGGN(BGGr   c                     S nS	S jnS
S jnS nX l         X l        X0l        [        U l        XR
                  l        [        USS9U l        [        USS9U l	        [        USS9U l
        [        5         g )Nc                 `   U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR	                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S	:X  a  UR                  5       $ U R                   S
:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR!                  5       $ U R                   S:X  a  UR#                  5       $ [%        SU  S35      e)Nvoidr}   rr   rq   rt   rs   rv   ru   rw   rI   r^   r`   rb   fp16bf16fp32fp64zfail to convert z to ir type)r  get_void_tyget_int1_tyr   r   r  r  r  r  r  r  r  r  r  r   r   r   r   r   )r   r:  s     r   
_new_to_ir$_patch_lang_core.<locals>._new_to_ir  s   99&&((YY& &&((YY& &&((YY'!''))YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY)#))++YY*$**,,YY& &&((YY& &&((YY& ''))YY& ((**+D6=>>r   c                 6    Uc  SnUc  SU pTOXpT[        XEU5      $ )Nr   r   )rD   )arg1arg2stepr7  rX  ends         r   
_new_range$_patch_lang_core.<locals>._new_range  s*    <D<D33U&&r   c                      U (       d   U5       eg r   r1   )r  r  s     r   _new_static_assert,_patch_lang_core.<locals>._new_static_assert  s    Str   c                    [        U [        R                  5      (       d  U $ [        U[        [        45      (       d  U/OUnU Vs/ s H0  n[        U[        R
                  5      (       a  UR                  OUPM2     nn[        U5      [        S[        U R                  5      5      :w  a  [        SU 35      eU R                  R                  X!5        U $ s  snf )Nr   z$len(values) != len(input.shape) for )rz   r{   r   listrl  	constexprr)   rE   rb  r7   r   r   r*   )r   r  r  r9  s       r   	_set_attr#_patch_lang_core.<locals>._set_attr  s    %++L!+FT5M!B!B&IOPAZ2<<88!''a?Pv;#aU[[!122CD6JKKd+	 Qs   7Cztt.divisiblity)r  ztt.contiguityztt.constancy)NN) )rD   static_rangestatic_assertr  static_printr   to_irr   multiple_ofmax_contiguousmax_constancyr  )langr  r  r  r  s        r   _patch_lang_corer    sp    $?P'
 J"+D!JJy/?@D!)/BD @Dr   c                    U R                   R                  5        VVs/ s H#  u  pU[        [        R                  4;   d  M!  UPM%     nnn[	        U5      S:  d   S5       eU Hq  n[        U[        5        [        UR                  [        5        U[        :X  a  [        UR                  [        5        [        UR                  5        [        U5        Ms     g s  snnf )Nr   z:triton.language must be visible from within jit'd function)__globals__r5  r{   rC  rE   rF  interpreter_builderr   r   rZ  r  )r  r  r)   langsr  s        r   _patch_langr    s    #%>>#7#7#9T#9xqUr277m=SU#9ETu:?XXX?t01t{{$782:499&9:4;;'  Us    CCc                    [        U [        5      (       GaP  [        R                  " [        R
                  R                  R                  R                  [        R
                  R                  R                  R                  U 5      5      5      n[        R                  nSU s=::  a  S:  a  O  O[        R                  nOqSU s=::  a  S:  a  O  O[        R                  nOPSU s=::  a  S:  a  O  O[        R                  nO/SU s=::  a  S:  a  O  O[        R                  nO[        SU  35      e[!        [        R"                  " U /US9U5      n[        R$                  " X15      $ ['        U S	5      (       a  [        R                  " [        R
                  R                  R                  R                  [        R
                  R                  R                  R                  U 5      5      5      n[!        [        R"                  " U R)                  5       /[        R                  S9U5      n[        R$                  " X15      $ U $ )
Ni   l        l        l         l            l            zUnsupported integer value r?   data_ptr)rz   ro   r{   	str_to_tytritonruntimejitJITFunction_type_of_key_ofrA   rv   ru   rw   rI   r   r   r.  r   r    r  )r  tyr   r   s       r   _implicit_cvtr    sz   #s\\&..,,88AA&..BTBTB`B`BhBhilBmnoS 5 HHEc!E!IIEs"U"HHEc!E!IIE9#?@@bhhuE:B?yy$$sJ\\&..,,88AA&..BTBTB`B`BhBhilBmnobhh'7ryyI2Nyy$$Jr   )	num_warps
num_stagesnum_ctasenable_fp_fusiongridmaxnregc                   ,    \ rS rSrS rS rS rS rSrg)GridExecutori  c                    SSK Jn  Xl        X l        X0l        UR
                  R                  5        VVs0 s H  u  pVXT" U5      _M     nnnU Vs/ s H  oWR                  U5      S:X  d  M  UPM     snU l        g s  snnf s  snf )Nr   )_normalize_tyr  )	r  r  r  	arg_namesr  rm   r5  get
constexprs)r   r  r  r  r  r  r  rm   s           r   r   GridExecutor.__init__  su    &"	CECUCUC[C[C]^C]xt4r!22C]^,5bID9L9LT9RVa9a4Ib _bs   A<B-Bc                 ,   / nU HF  n[        US5      (       a!  UR                  UR                  5       5        M5  UR                  U5        MH     0 nUR                  5        H.  u  pg[        US5      (       a  UR                  5       XV'   M*  XuU'   M0     X54$ Nr  )r    r{  cpur5  )r   args_devr7  args_hstr  
kwargs_hstr(   r)   s           r   _init_args_hstGridExecutor._init_args_hst  s    CsJ''	*$	  
 ,,.JCuj))"'))+
"'3	 )
 ##r   c                    [        X5       HV  u  pV[        US5      (       d  M  UR                  R                  UR	                  UR
                  5      R                  5        MX     UR                  5        HZ  u  pxXG   n	[        US5      (       d  M  UR                  R                  U	R	                  UR
                  5      R                  5        M\     g r  )zipr    r   copy_todevicer5  )
r   r  r  r7  r   arg_devarg_hstr(   	kwarg_dev	kwarg_hsts
             r   _restore_args_devGridExecutor._restore_args_dev,  s     #H 7Gw
++""7::gnn#=#B#BC !8
 %llnNC"Iy*--$$Y\\)2B2B%C%H%HI -r   c                    UR                  5        VVs0 s H  u  p4U[        ;  d  M  X4_M     nnnUR                  SS5      (       a  g U R                  X5      u  pV[	        U R
                  5        [        R                  " U R
                  /UQ70 UD6nUR                  5        VV	s0 s H"  u  pXU R                  ;   a  U	O
[        U	5      _M$     nnn	[        U R                  5      (       a  U R                  U5      OU R                  n
[        U
5      S::  d   S5       eU
SS[        U
5      -
  -  -   n
[        R                  " U
6    [        U
S   5       HU  n[        U
S   5       H@  n[        U
S   5       H+  n[        R!                  XU5        U R
                  " S	0 UD6  M-     MB     MW     U R)                  XX&5        g s  snnf s  sn	nf ! ["         a  n[%        ['        U5      5      UeS nAff = f)
NwarmupF   z#grid must have at most 3 dimensions)r   r   r   r   r1   )r5  RESERVED_KWSpopr!  r  r  rA  getcallargsr  r  callabler  rE   r  r   rD   r   	Exceptionr   rS  r,  )r   r  r7  r8  r9  r  r   r6  r  r  r  r   r   r   es                  r   __call__GridExecutor.__call__7  s   #)<<>K>41Ql5J$!$>K::h&&#228DDGG ""477DXDD^b^h^h^jk^jQZQUT__4c-:LL^jk"*499"5"5tyy4994yA~DDD~eq3t9}--(($/	347^tAwA"47^+88qA$ , ( $ 	x6F3 L l  	3"47+2	3s)   GG()G		A'G 
G3G..G3)r  r  r  r  N)	r,   r-   r.   r/   r   r!  r,  r7  r0   r1   r   r   r  r    s    c$ 	JGr   r  c                       \ rS rSrS rSrg)ASTTransformeriU  c                    / nUR                    H  nX R                  U5      /-  nM     [        U5      S:  a  [        S5      e[        R
                  " [        R                  " [        R                  " [        R                  " [        R                  " S[        R                  " 5       S9S[        R                  " 5       S9S[        R                  " 5       S9S[        R                  " 5       S9UR                  [        R                  " S	[        R                  " 5       S9[        R                  " S
S9// S9Ul	        U$ )Nr   z&Multiple assignments are not supportedr  )idctxlanguage)r)   r   r=  semanticri  r  F)r)   )funcr6  keywords)targetsvisitrE   r   astCall	AttributeNameLoadr)   Constant)r   nodenamestargets       r   visit_AssignASTTransformer.visit_AssignW  s    llFjj())E #u:>EFF XXmm--chh(
.SZdjmjrjrjtu#5;FCHHJX **chh*?SXXZP,,U+-79;
 r   r1   N)r,   r-   r.   r/   rM  r0   r1   r   r   r:  r:  U  s    r   r:  c                   L    \ rS rSr\" 5       rS rS rS rS r	S r
S rS rS	rg
)FunctionRewriterii  c                 8    Xl         X l        SU l        SU l        g )Nr  r   )r  r7  filenamedef_file_lineno)r   r  r7  s      r   r   FunctionRewriter.__init__l  s    $%r   c                 L    [         R                  " U R                  5      u  pU R	                  5       u  U l        U l        U R                  U5      U l        U R                  U5      nU R                  U5      nU R                  U5      $ ! [         a    U R                  s $ f = fr   )rA  getsourcelinesr  r5  _get_jit_fn_file_linerR  rS  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r   linesr  r  transformed_asts        r   rewrite_astFunctionRewriter.rewrite_asts  s    	--dgg6HE /3.H.H.J+t+../""5)--c2%%o66  	77N	s   "B
 
B#"B#c                 B    SSK JnJn  U" U" U R                  5      5      $ )Nr   )get_jit_fn_file_liner  )r  rb  r  r  )r   rb  r  s      r   rW  &FunctionRewriter._get_jit_fn_file_line  s    :#K$899r   c                     Sn[        U5       H0  u  p4UR                  5       R                  S5      (       d  M+  US-   nM2     U$ )Nr   zdef r   )r  strip
startswith)r   r]  rY  r   lines        r   rX  FunctionRewriter._find_def  s@    
 'GAzz|&&v..U
 ( r   c                 r    XR                   S-
  S  nSR                  U5      n[        R                  " U5      $ )Nr   r  )rY  jointextwrapdedent)r   r]  r  s      r   rZ   FunctionRewriter._prepare_source  s2    oo)*+ggens##r   c                     [         R                  " U5      nU R                  R                  U5      n[         R                  " U5        U R
                  S-
  n[         R                  " X45        U$ rC  )rD  parseast_transformerrC  fix_missing_locationsrS  increment_lineno)r   r  
parsed_astr^  
inc_linenos        r   r[  FunctionRewriter._transform_ast  sY     YYs^
..44Z@!!/2))A-
_9r   c                    [        XR                  SS9n0 U R                  EnU R                  R                  n[        5       R                  5        H  u  pVXT;  d  M  XdU'   M     [        X$U5        X0R                  R                     $ )Nexec)rR  mode)	compilerR  r7  r  r  globalsr5  rw  r,   )r   r^  compiled_codelocal_namespace
fn_globalsr(   r)   s          r   r\  "FunctionRewriter._compile_and_exec  so    --fU)T[[/WW((
!)//+JC$"'3 , 	]8ww//00r   )rS  rY  rR  r  r7  N)r,   r-   r.   r/   r:  rp  r   r_  rW  rX  rZ  r[  r\  r0   r1   r   r   rP  rP  i  s-    $&O&7(:$
	1r   rP  c                   D    \ rS rSr0 rS	S jrS r\S 5       r S rS r	Sr
g)
InterpretedFunctioni  Nc                    ^  UT l         [        U40 UD6T l        U 4S jnUT l        [        R
                  " U5      nUR                  R                  5        Vs/ s H  oUR                  PM     snT l	        g s  snf )Nc                  h   > US   nTR                  5       n[        UTR                  U5      " U 0 UD6$ )Nr  rewriter  r  )r6  r7  r  r  r   s       r   run)InterpretedFunction.__init__.<locals>.run  s4    &>DBDNND94J6JJr   )
r  rP  rewriterr  rA  	signature
parametersr  r  r  )r   r  r7  r  r  r9  s   `     r   r   InterpretedFunction.__init__  sf    (6v6	K
 %%b)	*3*>*>*E*E*GH*GQ&&*GHHs   A7c                     U R                   U R                  ;  a1  U R                  R                  5       U R                  U R                   '   U R                  U R                      $ r   )r  rewritten_fnr  r_  r   s    r   r  InterpretedFunction.rewrite  sJ    77$+++)-)B)B)DDdgg&  ))r   c                 .    U R                   R                  $ r   )r  r,   r   s    r   r,   InterpretedFunction.__name__  s    wwr   c                 N    U R                  5       n[        X R                  U5      $ r   r  )r   r  r  s      r   __getitem__InterpretedFunction.__getitem__  s    \\^B55r   c                     [        U R                  5        U R                  5       n U" U0 UD6$ ! [         a  n[	        [        U5      5      UeS nAff = fr   )r  r  r  r5  r   rS  )r   r6  r7  r  r6  s        r   r7  InterpretedFunction.__call__  sO    DGG\\^	3t&v&& 	3"47+2	3s   / 
AAA)r  r  r  r  r  )r,   r-   r.   r/   r  r   r  rX  r  r7  r0   r1   r   r   r  r    s0    LI*
    63r   r  )9rD  rk  rA  typingr   r   numpyrA   r  triton.languager>  r{   dataclassesr   errorsr   	functoolsr   _C.libtritonr	   ru  r
   r   r   r3   rX   rx   r   r   r   r   	vectorizer   r>  r   r?  rI   r  r   r   r?  rF  rZ  r\  rt  r  r  r  r  r  r  r1  r  NodeTransformerr:  rP  r  r1   r   r   <module>r     sq   
        ! $  6 $ : 6 $
& 
& 
&	@='@
# ll45ll45Z<g gK8 K8\#4($@ @D].& ].@;6$ ;6|) K\	. )*  ^AG AGHS(( (B1 B1J%3 %3r   