
    Ϧi7                      S SK Jr  S SKrS SKJrJrJrJrJr  S SK	r	SSK
Jr  SSKJr  SSKJr  \" S	5      r " S
 S\5      rSnS jrSnS jrSoS jr    SpS jrSqSrS jjrSsS jr  St SuS jjrSvS jr    SwS jr    SwS jr    SwS jrSxS jrSxS jr    SyS jr SxS jr!SzS jr"SzS jr#S{S jr$    S|S jr%S}S jr&S}S  jr'S}S! jr(S}S" jr)S}S# jr*S~S$ jr+S}S% jr,S}S& jr-S}S' jr.SS( jr/SS) jr0SS* jr1SS+ jr2S}S, jr3S}S- jr4S}S. jr5S}S/ jr6S}S0 jr7S}S1 jr8SS2 jr9SS3 jr:SS4 jr;SS5 jr<SS6 jr=SS7 jr>SS8 jr?SS9 jr@SS: jrASS; jrBSS< jrCSS= jrDSS> jrE S   SS? jjrFS@ rGSA rHSB rISC rJSD rKSE rLSF rMSG rNSH rO            SSI jrP    SSJ jrQSSK jrR                        SSL jrSSSM jrTSN rUSO rV      SSP jrWSSQ jrX    SSR jrYSSS jrZSST jr[SSU jr\SSV jr]SSW jr^SSX jr_    SSY jr`SZ ra      SS[ jrbSS\ jrc        SS] jrdSS^ jreS_ rfSS` jrg    SSa jrhSSb jriSSc jrjSSd jrkSSe jrlSSf jrmSSg jrnSSh jroSSi jrpSj rqSqSk jrrSSl jrsSSm jrtg)    )annotationsN)ListOptionalSequenceTupleTypeVar   )ir   )core)mathTc                  (   ^  \ rS rSrU 4S jrSrU =r$ )IncompatibleTypeErrorImpl   c                   > Xl         X l        SU R                   R                  5       -   S-   U R                  R                  5       -   U l        [        [
        U ]  U R                  5        g )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      W/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/triton/language/semantic.pyr   "IncompatibleTypeErrorImpl.__init__   sT    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )r   r   r   )__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__)r   s   @r   r   r      s    F Fr   r   c                    U S;  a  [        SU  35      e[        R                  " UR                  U 5      [        R                  5      $ )Nr   r   r	   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32axisbuilders     r   
program_idr/      s=    9FtfMNN99W2248"((CCr   c                    U S;  a  [        SU  35      e[        R                  " UR                  U 5      [        R                  5      $ )Nr&   z-num_programs axis must be 0, 1, or 2 but got )r'   r(   r)   create_get_num_programsr+   r,   s     r   num_programsr2   "   s=    9HOPP99W44T:BHHEEr   c                d   U R                   nUR                   nU R                  nUR                  nXE:X  a	  X#:  a  U $ U$ U[        R                  R                  R
                  :X  a	  X#:  a  U $ U$ U[        R                  R                  R
                  :X  a	  X2:  a  U$ U $ [        SU SU 35      e)Nzunexpected signedness r   )int_bitwidthint_signednessr(   dtype
SIGNEDNESSUNSIGNED	TypeError)a_tyb_tya_rankb_ranka_snb_sns         r   integer_promote_implr@   -   s    FFDD |t0D0	$$--	-'t1T1	$$--	-'t1T1
,TF%v>
??r   c                f   X:w  a  U(       a  X4OX 4u  pVUR                  5       R                  UR                  5       R                  ::  a=  U(       a4  U[        R                  [        R                  4;   a  [        R
                  $ U$ U R                  5       (       d  UR                  5       (       a  [        R                  $ U R                  5       (       d  UR                  5       (       a  [        R
                  $ U R                  5       (       d  UR                  5       (       a'  U(       a  [        R
                  $ [        R                  $ U R                  5       (       d  UR                  5       (       aa  U(       a  [        R
                  $ U R                  5       (       a%  UR                  5       (       a  [        R                  $ [        R
                  $ U R                  5       (       a,  UR                  5       (       a  X:X  a  U $ [        R                  $ U R                  5       (       a  UR                  5       (       d  [        SU  SU 35      eU(       aM  U R                  UR                  :w  a3  [        SU R                  5       -   S-   UR                  5       -   S-   5      e[!        X5      $ )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer(   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr9   r5   r   r@   )r:   a_is_scalarr;   b_is_scalar
div_or_mod	scalar_ty	tensor_tys          r   computation_type_implrU   =   s   
 !/:|	>>!!Y^^%5%;%;;yRZZ,EEzz! ||~~zz ||~~zz ||~~::::||~~::<<>>dllnn;;zz{{}}|t33;;==*4&dV<== d))T-@-@@5G'QTXTaTaTcckk l 	l  ++r   c                   [        U [        5      (       a4  [        R                  " UR	                  U 5      [        R
                  5      $ [        U [        5      (       a  SU s=::  a  S:  a  O  O[        R                  nOrSU s=::  a  S:  a  O  O[        R                  nOQSU s=::  a  S:  a  O  O[        R                  nO0SU s=::  a  S:  a  O  O[        R                  nO[        SU  S35      e[        S	XUS
9$ [        U [        5      (       an  SnSSS-  -  n[        S   " U 5      nU[        S5      :X  d  US:X  d  X :w  d  XFs=::  a  U::  a  O  O[        R                  nO[        R                   n[        S	XUS
9$ [        U [        R"                  5      (       a  [%        U R&                  U5      $ [        U [        R                  5      (       a  U $ U(       a  [)        SU  S[+        U 5       S35      eU $ )N           l                             l            zNonrepresentable integer . r6   r.   g      8g   ?r	      absinf        zcannot convert z	 of type z
 to tensor)
isinstanceboolr(   r)   get_int1int1intr+   uint32int64uint64r'   fullfloat__builtins__rH   rJ   	constexpr	to_tensorrE   r9   type)xr.   
check_typer6   min_float32max_float32abs_xs          r   rn   rn   o   s   !Tyy))!,bgg66	As		QHHEa%IIEq 5 HHEa%IIE81=>>B88	Au		!QV+U#A&E%L C<6.;.JJEJJEB88	Ar||	$	$'**	Aryy	!	!/!Id1gYjIJJHr   c                    U R                  5       (       aX  U(       d  [        X5      eUR                  5       (       a  X:w  a  [        X5      eUR                  5       (       a  [        X5      eg g N)is_ptrr   is_floating)r   r   allow_ptr_as      r   check_ptr_type_implrz      sZ    }}+F;;==?? 0+F;;+F;;   r   c                   [        U [        R                  5      n[        U[        R                  5      nU(       a  U n	[        X5      n U(       a  Un
[        X5      nU R                  R
                  nUR                  R
                  n[        XU5        [        XU5        U(       a  UR                  5       (       d  UR                  5       (       d  [        XXU5      nU(       a  W	S:  a  UR                  5       (       d"  U(       a&  W
S:  a   UR                  5       (       a  [        S5      eU(       a  [        SW	XS9O[        XU5      n U(       a  [        SW
XS9O[        XU5      n[        XU5      u  pX4$ )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.r\   r]   )rb   numbersNumberrn   ro   scalarrz   rw   rU   is_int_unsignedr'   rj   castbroadcast_impl_value)lhsrhsr.   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrR   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implr      sR    sGNN3MsGNN3M
%
% JJ
>
>
 1 1 3 3J<M<M<O<O*:jakl
j1n1K1K1M1M Z!^
8R8R8T8T G H H CP 
*?UYZ]krUs 	 CP 
*?UYZ]krUs 	 $Cg6HC8Or   c                :   U R                   R                  R                  S:  d  UR                  R                  (       d  g U R                   R                  nUR                   R                  nXE:X  d   eUR                  5       (       d   e[        U [        R                  U5      n [        U[        R                  U5      nU" XSU5      nUR                  5       n[        R                  " UR                  U5      [        R                  5      nUR                  5       n[        R                  " UR                  U5      [        R                  5      n[        [        XgU5      [        XhU5      U5      n	SUR                   SUR                    3n
[#        XU5        g )N@   Frf   z! overflow detected for operation )ro   r~   r4   optionssanitize_overflowrO   r   r(   rh   get_int_max_valuer)   	get_int64get_int_min_valueand_
less_equalgreater_equalr   device_assert)r   r   r.   	binary_opr   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implr      s7   
xx##r)1R1RJJ###
sBHHg
&C
sBHHg
&C
CeW
-C,,.I		'++I6AI,,.I		'++I6AI
373]3SZ5[]deD
''((I)J\J\I]
^C$W%r   c                   [        XUSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a   UR                  5       (       a  [	        S5      eUR                  5       (       aC  UR                  5       (       d.  XpU R                  R                  nUR                  R                  nUR                  5       (       aE  [
        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       aE  [
        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a]  U(       a  [        XU[        5        [
        R                  " UR                  U R                  UR                  5      U R                  5      $ [	        SU 35      e)NTzcannot add pointers togetherrB   )r   ro   r~   rw   r9   r(   r)   create_addptrhandlerx   create_faddrO   r   add
create_add)inputotherr   r.   input_scalar_tyother_scalar_tys         r   r   r      sk   /gtTRLEjj''Ojj''OO$:$:$<$<677 (>(>(@(@u**++**++yy..u||U\\JEJJWW		$	$	&	&yy,,U\\5<<H%**UU				!	!,U7CHyy++ELL%,,GTT
&&78
99r   c           	        [        XUSS5      u  pU R                  R                  nUR                  5       (       aN  [        R
                  " UR                  U R                  [        X5      R                  5      U R                  5      $ UR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a]  U(       a  [        XU[        5        [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      e)NTFrB   )r   ro   r~   rw   r(   r)   r   r   minusrx   create_fsubrO   r   sub
create_subr9   r   r   r   r.   rS   s        r   r   r      s    /gtUSLE

!!Iyy..u||U5=R=Y=YZ\a\f\fggyy,,U\\5<<H%**UU					,U7CHyy++ELL%,,GTT
&yk2
33r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a]  U(       a  [        XU[        5        [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eNrB   )r   ro   r~   rx   r(   r)   create_fmulr   rO   r   mul
create_mulr9   r   s        r   r   r     s    /gFLE

!!Iyy,,U\\5<<H%**UU					,U7CHyy++ELL%,,GTT
&yk2
33r   c           	        [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a#  UR	                  5       (       a  [        XU5      nGOUR	                  5       (       a"  UR                  5       (       a  [        XU5      n OUR	                  5       (       aL  UR	                  5       (       a7  [        U [        R                  U5      n [        U[        R                  U5      nOlUR                  5       (       aI  UR                  5       (       a4  UR                  UR                  :  a  [        XU5      nO[        XU5      n O[        SU 35      e[        R                  " UR                  U R                  UR                  5      U R                  5      $ NFTrB   )r   ro   r~   rx   rO   r   r(   rH   fp_mantissa_widthr9   r)   create_fdivr   )r   r   r.   r   r   s        r   truedivr     sT   /gueUY[_`LEjj''Ojj''O""$$)?)?)A)AUW5				!	!o&A&A&C&CUW5				!	!o&<&<&>&>UBJJ0UBJJ0		$	$	&	&?+F+F+H+H,,/P/PP9E9E *?*;<==99W((u||DejjQQr   c           	     r   [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       a  [	        X45      n[        XU5      n [        XU5      nUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      er   )r   ro   r~   rO   r@   r   is_int_signedr(   r)   create_sdivr   create_udivr9   )r   r   r.   r   r   ret_tys         r   floordivr   0  s    /gueUY[_`LEjj''Ojj''OO$:$:$<$<%oGUG,UG,!!99W00u||LejjYY99W00u||LejjYY
&&78
99r   c           	     t   U R                   R                  nUR                   R                  nUR                  5       (       a  UR                  5       (       d  [        S5      e[	        XUSSSS5      u  pUR                  U R                  UR                  5      n[        R                  " X`R                   5      $ )Nz4both operands of fdiv must have floating scalar typeFT)	ro   r~   rx   r9   r   r   r   r(   r)   )r   r   ieee_roundingr.   r   r   r   s          r   fdivr   ?  s    jj''Ojj''O&&((0K0K0M0MNOO/gueUZ\`aLE


ellELL
9C99S**%%r   c           	     <   [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a;  [        R
                  " [        XSU5      US9n[        U [        XQSU5      SU5      nU$ UR                  5       (       a  UR                  UR                  :w  a3  [        SUR                  5       -   S-   UR                  5       -   S-   5      eUR                  5       (       aE  [        R                  " UR!                  U R"                  UR"                  5      U R                  5      $ [        R                  " UR%                  U R"                  UR"                  5      U R                  5      $ [        SU 35      e)NFT_builderzCannot mod z by rC   rB   )r   ro   r~   rx   r   floorr   r   r   rO   r5   r9   r   r   r(   r)   create_sremr   create_urem)r   r   r.   rS   r   r   r   s          r   modr   J  sX   /gueUY[_`LE

!!Ijj''O

4eW=P%U494I
					##'E'EEMI,>,>,@@6IOLdLdLff jo o p p ""$$99W00u||LejjYY99W00u||LejjYY
&yk2
33r   c                   [        XU5      u  pU R                  nUR                  5       (       a  U[        R                  R
                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ U[        R                  R                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       aE  [        R                  " UR!                  U R                  UR                  5      U R                  5      $ [#        SU 35      eNzUnexpected propagate_nan Unexpected dtype )r   r6   rx   r(   PropagateNanALLr)   create_minimumfr   ro   NONEcreate_minnumfr'   r   create_minsir   create_minuir9   rp   ypropagate_nanr.   r6   s        r   minimumr   f  8   'g6DAGGEBOO///99W44QXXqxxH!&&QQboo22299W33AHHahhGPP8HII					yy--ahhA166JJ				 	 yy--ahhA166JJ+E7344r   c                   [        XU5      u  pU R                  nUR                  5       (       a  U[        R                  R
                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ U[        R                  R                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       aE  [        R                  " UR!                  U R                  UR                  5      U R                  5      $ [#        SU 35      er   )r   r6   rx   r(   r   r   r)   create_maximumfr   ro   r   create_maxnumfr'   r   create_maxsir   create_maxuir9   r   s        r   maximumr   x  r   r   c                X   [        XU5      u  p[        XU5      u  p[        XU5      u  pU R                  nUR                  5       (       aQ  [        R                  " UR                  U R                  UR                  UR                  U5      U R                  5      $ [        SU S35      e)Nr   z(. Only floating point clamp is supported)	r   r6   rx   r(   r)   create_clampfr   ro   r9   )rp   minmaxr   r.   r6   s         r   clampr     s    +Cg>HC)!':FA)!':FAGGEyy..qxxSZZQ^_abagaghh+E72Z[\\r   c                @   [        XU5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       d  [	        X45      e[        X45      nXS:w  a  [        XU5      n XT:w  a  [        XU5      nX4$ rv   )r   ro   r~   rO   r   r@   r   )r   r   r.   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implr     s    /gFLE::$$L::$$L  (;(;(=(='CC%lAJ!U0!U0<r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   
create_andr   ro   r   r   r.   s      r   r   r     :    0wGLE99W''ellCUZZPPr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   	create_orr   ro   r   s      r   or_r     s:    0wGLE99W&&u||U\\BEJJOOr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   
create_xorr   ro   r   s      r   xor_r     r   r   c                   U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n UR                   R                  5       (       d!  [        U[        R                  " S5      U5      n[        XU5      $ Nre   )ro   is_int1bitcastr(   r6   r   r   s      r   logical_andr     sc    ::rxx/9::rxx/9g&&r   c                   U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n UR                   R                  5       (       d!  [        U[        R                  " S5      U5      n[        XU5      $ r   )ro   r   r   r(   r6   r   r   s      r   
logical_orr     sc    ::rxx/9::rxx/9uW%%r   c                    U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n [        X5      $ r   )ro   r   r   r(   r6   invert)r   r.   s     r   not_r     s7    ::rxx/9%!!r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   create_lshrr   ro   r   s      r   lshrr     :    0wGLE99W((u||DejjQQr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   create_ashrr   ro   r   s      r   ashrr    r   r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ rv   )r   r(   r)   
create_shlr   ro   r   s      r   shlr    r   r   c                    U $ rv   r\   )r   s    r   plusr    s    Lr   c                   U R                   R                  nUR                  5       (       a  [        SUR	                  5       -   S-   5      e[
        R                  " UR                  UR                  U5      5      U5      n[        X0SU5      $ )Nz$wrong type argument to unary minus ()T)
ro   r~   rw   r'   r   r(   r)   get_null_valueto_irr   )r   r.   r   _0s       r   r   r     su    ::$$L?,BWBWBYY\__``	7)),*<*<W*EF	UBr$((r   c                B   U R                   R                  nUR                  5       (       d  UR                  5       (       a  [	        SUR                  5       -   S-   5      e[        R                  " UR                  UR                  U5      5      U5      n[        XU5      $ )Nz%wrong type argument to unary invert (r
  )ro   r~   rw   rx   r'   r   r(   r)   get_all_ones_valuer  r   )r   r.   r   _1s       r   r   r     s    ::$$L 8 8 : :@<CXCXCZZ]``aa	7--l.@.@.IJL	YB7##r   c                    U R                   R                  5       (       d  [        R                  $ U R                   R                  n[        R
                  " [        R                  U5      $ rv   )ro   is_blockr(   re   shape
block_type)vr  s     r   
_bool_liker    s>    66??wwFFLLE==%((r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpOGTr   r  rO   r   create_icmpSGTcreate_icmpUGTr9   r   r   r.   rS   s       r   greater_thanr        /gFLE

!!Iyy//ellKZX]M^__					""$$99W33ELL%,,OQ[\aQbcc99W33ELL%,,OQ[\aQbcc
&yk2
33r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpOGEr   r  rO   r   create_icmpSGEcreate_icmpUGEr9   r  s       r   r   r     r  r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpOLTr   r  rO   r   create_icmpSLTcreate_icmpULTr9   r  s       r   	less_thanr&    r  r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpOLEr   r  rO   r   create_icmpSLEcreate_icmpULEr9   r  s       r   r   r   .  r  r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpOEQr   r  rO   create_icmpEQr9   r  s       r   equalr.  =      /gFLE

!!Iyy//ellKZX]M^__					yy..u||U\\JJW\L]^^
&yk2
33r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   ro   r~   rx   r(   r)   create_fcmpUNEr   r  rO   create_icmpNEr9   r  s       r   	not_equalr3  I  r/  r   c                   [        U [        5      (       a  [        U[        5      (       d  [        S5      e[        U S-	  5      n[        US-	  5      nU(       d  U(       a  [        S5      eX::  a  [        S5      eX-
  nXUS-
  -  S:w  a  [        S5      eU/n[        R
                  " [        R                  U5      n[        R                  " UR                  X5      U5      $ )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)	rb   rf   r'   rc   r(   r  r+   r)   create_make_range)startendr.   is_start_int64is_end_int64ranger  r   s           r   aranger<  Z  s    eS!!C)=)=JKK%2+&Nr	?L344
|XYYKE!>??GE]]288U+F99W..u:FCCr   c                   [        U[        R                  5      (       a.  UR                  R                  S:X  d   S5       e[        XU5      nOlUc  [        S5      eUS:X  a!  UR                  UR                  U5      5      nO![        USUR                   35      nU" U5      n[        R                  " X5      n[        XU5      $ )Nr   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)rb   r(   r)   numelrE   r   r'   r  r  getattrnamesplat)r  rE   r6   r.   get_value_fns        r   rj   rj   k  s    %##{{  A%C'CC%U7+ =QRRA:**5;;w+?@E"7d5::,,?@L 'E		%'w''r   c                   U R                   R                  5       (       a   S5       e[        U5      S:X  a  U $ [        R                  " U R
                  U5      n[        R                  " UR                  U R                  U5      U5      $ )NzCannot splat a block tensorr   )	ro   r  lenr(   r  r6   r)   create_splatr   )rE   r  r.   r   s       r   rB  rB    sg    zz""$$C&CC$
5zQ]]5;;.F99W))%,,>GGr   c                "   SnU H  nXE-  nM	     U R                   R                  U:w  a  [        S5      e[        R                  " U R                   R
                  U5      n[        R                  " UR                  U R                  X5      U5      $ )Nr   z:reshape() cannot change total number of elements in tensor)	ro   r?  r'   r(   r  r~   r)   create_reshaper   )r   	dst_shapecan_reorderr.   r?  sr   s          r   reshaperL    ss    E
 zz5 UVV]]5::,,i8F99W++ELL)QSYZZr   c                   U R                    Vs/ s H  n[        R                  " U5      PM     nnUR                  US5        U R                  R                  5       (       d
  [        XUS9$ [        R                  " U R                  R                  U5      n[        R                  " UR                  U R                  U5      U5      $ s  snf )Nr   )r  r.   )r  r(   _constexpr_to_valueinsertro   r  rB  r  r~   r)   create_expand_dimsr   )r   r-   r.   rp   rI  r   s         r   expand_dimsrQ    s    49KK@Kq''*KI@T1::  UW==]]5::,,i8F99W//dCVLL As    Cc                \   U(       d   S5       e[        U R                  5      S:X  d   e[        R                  " U R                  R
                  U R                  S   UR                  S   -   /5      n[        R                  " UR                  U R                  UR                  5      U5      $ )Nz;current implementation of `cat` always may reorder elementsr   r   )	rE  r  r(   r  ro   r~   r)   
create_catr   )r   r   rJ  r.   ret_types        r   catrU    s|    UUU;syy>Q}}SXX__syy|ciil/J.KLH99W''

CJJ?JJr   c                    [        XU5      u  pU R                  / :H  nU(       a  [        U SU5      n [        USU5      n[        U R                  S   [        R
                  5      (       a  [        R
                  " S5      nOSnU R                  U/-   n[        R                  " U R                  R                  U5      n[        R                  " UR                  U R                  UR                  5      U5      nU(       a  [        US/SUS9nU$ )Nr   r	   FrJ  r.   )r   r  rQ  rb   r(   rm   r  ro   r~   r)   create_joinr   rL  )abr.   
was_rank_1two	new_shaperT  r   s           r   joinr_    s    g.DA BJ1g&1g&!''"+r||,,ll1o3%I}}QVV]]I6H
))G''!((;X
FCcA3E7CJr   c                   [        U R                  5      S:  d   e[        R                  " U R                  S   5      S:X  d   eU R                  S S n[        R                  " U R
                  R                  U5      nUR                  U R                  5      u  pE[        R                  " XC5      [        R                  " XS5      4$ )Nr   rW  r	   )
rE  r  r(   rN  r  ro   r~   create_splitr   r)   )rZ  r.   r^  rT  outLHSoutRHSs         r   splitrd    s    L1""1772;/1454I}}QVV]]I6H))!((3NF
		&#
		&# r   c                   [        U R                  5      [        U5      :w  a  [        S5      e[        S U 5       5      [	        [        [        U5      5      5      :w  a  [        SU 35      e[        R                  " U R                  R                  U Vs/ s H  o0R                  U   PM     sn5      n[        R                  " UR                  U R                  U5      U5      $ s  snf )Nz5permute dims must have the same length as input shapec              3  N   #    U  H  n[         R                  " U5      v   M     g 7frv   )r(   rN  ).0ds     r   	<genexpr>permute.<locals>.<genexpr>  s     6Ab$$Q''s   #%z?permute dims must be a permutation of 0, 1, ..., n-1, but were )rE  r  r'   sortedlistr;  r(   r  ro   r~   r)   create_transr   )r   dimsr.   rh  rT  s        r   permutero    s    
5;;3t9$PQQ666$uSY?O:PPZ[_Z`abb}}UZZ..0NAQ0NOH99W))%,,=xHH 1Os   C&
c                   U R                   R                  5       (       dR  [        R                  " U R                   U5      n[        R                  " UR                  U R                  U5      U5      $ U R                   R                  5       n[        U5      [        U5      :w  a  [        SU SU 35      eX:X  a  U $ [        U5       H1  u  pVX   U:w  d  M  US:w  d  M  [        SX    SU SU SU SU 3
5      e   [        R                  " U R                   R                  U5      n[        R                  " UR                  U R                  U5      U5      $ )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )ro   r  r(   r  r)   rF  r   get_block_shapesrE  r'   	enumerater~   create_broadcast)r   r  r.   r   	src_shapeiitems          r   broadcast_impl_shaperx    s0   ::  uzz51yy--ellEBFKK

++-I
9~U#<YKr%QRRY'8t	RSXS[R\ ]??Cf E!!"2i[5'; < < (
 ]]5::,,e4F99W--ellEBFKKr   c           	     z   U R                   nUR                   nUR                  5       (       a  UR                  5       (       dm  [        R                  " UR                  UR
                  5      n[        R                  " UR                  UR                  UR                  5       5      U5      nX4$ UR                  5       (       d  UR                  5       (       am  [        R                  " UR                  UR
                  5      n[        R                  " UR                  U R                  UR                  5       5      U5      n X4$ UR                  5       (       Ga  UR                  5       (       Ga  UR                  5       nUR                  5       n[        U5      [        U5      :  a  [        [        U5      [        U5      5       Hs  n[        R                  " UR                  U R                  S5      [        R                  " UR                  S/U-   5      5      n U R                   nUR                  5       nMu     O[        U5      [        U5      :  a  [        [        U5      [        U5      5       Hs  n[        R                  " UR                  UR                  S5      [        R                  " UR                  S/U-   5      5      nUR                   nUR                  5       nMu     [        U5      [        U5      :X  d   e/ n[        U5       Hs  u  pXi   nU
S:X  a  UR                  U5        M"  US:X  d  X:X  a  UR                  U
5        M@  [        S[!        U	5      -   S-   [!        U
5      -   S-   [!        U5      -   5      e   XX:w  aR  [        R                  " UR                  U5      n[        R                  " UR#                  U R                  U5      U5      n Xh:w  aR  [        R                  " UR                  U5      n[        R                  " UR#                  UR                  U5      U5      nX4$ )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rq  r   )ro   r  r(   r  r~   r  r)   rF  r   rr  rE  r;  rP  rs  appendr'   strrt  )r   r   r.   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperv  leftrightr   s                r   r   r     sB   XXFXXF !2!2v}}fll;ii,,SZZ9P9P9RSU[\V 8OS __6??#4#4v}}fll;ii,,SZZ9P9P9RSU[\N 8OK 
		v00++-	++-	y>C	N*3y>3y>:ii : :3::q I "fmmaS9_ MO"335		 ;
 ^c)n,3y>3y>:ii : :3::q I "fmmaS9_ MO"335		 ;
 9~Y///	 +GALEqy  '1*%-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a , !]]6==)<F))G44SZZKVTC!]]6==)<F))G44SZZKVTC8Or   c                    U c  g U S:X  a  [         R                  R                  $ U S:X  a  [         R                  R                  $ [	        SU  S35      e)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r
   ROUNDING_MODERTNERTZr'   )rounding_modes    r   _str_to_rounding_moder  "  sU    $$$###
.}o=mn
oor   c                f   U R                   nUR                  5       (       a9  [        R                  " UR                  U R                   R                  5       5      nX1:X  a  U $ UR                  nUR                  nUR                  5       (       d  UR                  5       (       a  [        XU5      $ UR                  nUR                  nXg:w  a&  [        S[        U5      -   S-   [        U5      -   5      e[        R                  " UR                  U R                  UR                  U5      5      U5      $ )Nz!Cannot bitcast data-type of size z to data-type of size )ro   r  r(   r  r~   rr  rw   r   primitive_bitwidthr'   r{  r)   create_bitcastr   r  )r   dst_tyr.   src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   r   ,  s    ZZFv}}ejj.I.I.KLJJj//11E7++,,H,,H<s8}L P. .03H> ? 	?99W++ELL&,,w:OPRXYYr   c                "   U R                   n[        U[        R                  5      (       a  UR                  n[        U[        R                  5      (       a  UR                  nUR                  5       (       a9  [        R                  " UR                  U R                   R                  5       5      nXA:X  a  U $ UR                  nUR                  n[        U5      nSnUR                  5       (       an  UR                  5       (       aY  UR                  UR                  :  a?  Uc  [        R                  R                  nOJU[        R                  R                  :w  a  SnO)Ub&  [        S[!        U5      -   S-   [!        U5      -   5      eUR#                  5       (       d  UR#                  5       (       a8  UR$                  R'                  S5       c   S5       eUR$                  S   " XX2S9$ UR)                  5       (       a  UR                  5       (       d1  UR                  5       (       a  UR)                  5       (       d  U(       aA  [        R*                  " UR-                  U R.                  UR1                  U5      U5      U5      $ UR3                  5       (       a  UR5                  5       (       a*  UR7                  5       (       a:  UR5                  5       (       d%  [9        [9        U [        R:                  U5      Xb5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  nU(       a@  [        R*                  " UR=                  U R.                  UR1                  U5      5      U5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  n	U	(       a@  [        R*                  " UR?                  U R.                  UR1                  U5      5      U5      $ URA                  5       (       Ga#  URA                  5       (       Ga  URB                  URB                  :w  d  URD                  URD                  :w  a  URG                  5       =(       a    URI                  5       (       + n
URI                  5       (       aW  U RJ                  R1                  U5      n[        R*                  " URM                  U5      U RJ                  5      n[O        XU5      $ [        R*                  " URQ                  U R.                  UR1                  U5      U
5      U5      $ URS                  5       (       Ga  URA                  5       (       Ga  URI                  5       (       aW  U RJ                  R1                  U5      n[        R*                  " URM                  U5      U RJ                  5      n[O        XU5      $ URG                  5       (       a@  [        R*                  " URU                  U R.                  UR1                  U5      5      U5      $ [        R*                  " URW                  U R.                  UR1                  U5      5      U5      $ URA                  5       (       a  URS                  5       (       a  URI                  5       (       d  URG                  5       (       d@  [        R*                  " URY                  U R.                  UR1                  U5      5      U5      $ [        R*                  " UR[                  U R.                  UR1                  U5      5      U5      $ UR]                  5       (       a  URA                  5       (       a  URB                  nUS:X  a@  [        R*                  " UR_                  U R.                  UR1                  U5      5      U5      $ US	:X  aX  [O        [9        U [        R`                  U5      [        R*                  " URc                  S
5      [        R`                  5      U5      $ URA                  5       (       aU  UR]                  5       (       a@  [        R*                  " URe                  U R.                  UR1                  U5      5      U5      $ UR]                  5       (       aU  UR]                  5       (       a@  [        R*                  " URg                  U R.                  UR1                  U5      5      U5      $  SU  SU 35       e)NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.r   r   r   r   zcannot cast z to )4ro   rb   r(   rm   rE   r  r  r~   rr  r  rx   r  r
   r  r  r'   r{  is_fp8e4b15codegen_fnsgetrN   r)   create_fp_to_fpr   r  rL   rK   rM   r   rH   create_fp_trunccreate_fp_extrO   r4   r5   r   is_boolr6   r  r3  create_int_castis_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprw   create_ptr_to_intrh   r   create_int_to_ptrr  )r   r  r.   fp_downcast_roundingr  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r   r   ?  s   ZZF&",,''&55399v}}ejj.I.I.KLJJ 11EFJ$:$: % %

'
'**G*G
G'@P@P@U@U)=!R%5%5%:%::RV<O+ 68;JHJefhklvhwx y y 	  J$:$:$<$<""&&"$+/0 	d1c	d 0""#9:5J^qq 	
 6 6 8 8  Z%6%6%8%8yy00v||G?TVjkmstt 	Z%7%7%9%9Z%7%7%9%9D

G4jJJ
 ((* F F%%
(E(EE  yy00v||G?TUW]^^ ##% F F%%
(E(EE  yy..u||V\\'=RSU[\\ z0022:#:#::j>W>W[e[t[t>t ..0M9K9K9M5M""7+B711"5u{{CBU0099W44U\\6<<PWCXZefhnoo &&((Z->->-@-@""7+B711"5u{{CBU00%%''99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z>>@@z'?'?'A'A99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z0022**r>99W66u||V\\RYEZ[]cddq=T%7;RYYwGXGXYZG[]_]e]e=fhopp z0022yy225<<gAVWY_`` z0022yy//fll7>STV\]]4LtF8445r   c                2   [         R                  R                  nU (       au  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ [        SU  S35      eU$ )Nz.ca.cgz.cvCache modifier  not supported)r
   CACHE_MODIFIERr   CACGCVr'   cache_modifiercaches     r   _str_to_load_cache_modifierr    s    ""EU"%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                v   [         R                  R                  nU (       a  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )Nz.wbr  z.csz.wtr  r  )r
   r  r   WBr  CSWTr'   r  s     r   _str_to_store_cache_modifierr    s    ""EU"%%((E L u$%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                    [         R                  R                  nU (       aS  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )N
evict_lastevict_firstzEviction policy r  )r
   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr'   )eviction_policyevictions     r   _str_to_eviction_policyr    su    !!((Hl*))44H
 O	 -))55H O //@OPPOr   c                    S nU (       aS  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [	        SU  S35      eU$ )NzeronanzPadding option r  )r
   PADDING_OPTIONPAD_ZEROPAD_NANr'   )padding_optionpaddings     r   _str_to_padding_optionr    sh    GV#''00G
 N	 u$''//G N ~.>nMNNNr   c                v   [         R                  R                  nU (       a  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ [        SU  S35      eU$ )Nacquirereleaseacq_relrelaxedMemory semantic r  )r
   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr'   )
sem_optionsems     r   _str_to_semr    s    
//
)
)C"//))C J 9$//))C J 9$//11C
 J	 9$//))C J /
|>JKKJr   c                2   [         R                  R                  nU (       au  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )Ngpuctasysr  r  )r
   MEM_SYNC_SCOPEGPUCTASYSTEMr'   )scope_optionscopes     r   _str_to_scoper    s    !!E5 %%))E L U"%%))E
 L	 U"%%,,E L /~^LMMLr   c                   U (       a  [        U S5      (       d  U /n U  Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     n nU  H3  n[        U[
        5      (       a  SUs=::  a  [        U5      :  a  M0   e   e   [        U 5      S:  d   e[        U 5      [        [        U 5      5      :X  d   S5       e[        U 5      $ gs  snf )N__iter__r   z'Duplicate dimension in `boundary_check`r\   )	hasattrrb   r(   rm   rE   rf   rE  setrk  )boundary_checkblock_shapeelemdims       r   _canonicalize_boundary_checkr    s    ~z22,-N]kl]kUY
4(F(F$**DP]kl!Cc3''A,Gs;7G,GGG,GGG ">"Q&&&>"c#n*=&>>i@ii>n%% ms   7Cc	           
        Uc  Ub  [        S5      eU R                  R                  R                  n	U	[        R                  :w  d   S5       eU	R                  5       (       a)  U[        R                  R                  :X  a  [        S5      eU R                  R                  n
[        X:R                  5       5      n[        R                  " UR                  U R                  X4XVU5      U
5      $ )NK`mask` and `other` arguments cannot be specified for loading block pointers3`tl.int1` should be rewrited in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r'   ro   
element_tyr(   re   rO   r
   r  r  r  rr  r)   create_tensor_pointer_loadr   )ptrmaskr   r  r  r  r  is_volatiler.   elt_tyr  s              r   _load_block_pointerr    s     5,fggXX  ++FRWWSSS}}7b&7&7&?&??[\\ XX  F 2.BYBYB[\N 99**3::~PUalmouw wr   c	           
        U R                   R                  R                  5       (       d'  [        SU R                   R	                  5        S35      eUc  Ub  [        S5      eU(       d  U(       a  [        S5      eU R                   R                  5       (       db  U(       a*  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eU R                   R                  5       (       aN  Ub$  [        XR                   R                  5       U5      nUb$  [        X R                   R                  5       U5      nU R                   R                  n	U	R                  n
U
[        R                  :H  nU(       a<  [        R                  n
[        R                  " XR                  5      n	[        X	U5      n Ub  [        X*U5      nU R                   R                  5       (       a1  U R                   R                  5       n[        R                  " X5      nOU
nUc3  [        R                   " UR#                  U R$                  XVU5      U5      nOQ[        R                   " UR'                  U R$                  UR$                  U(       a  UR$                  OS XVU5      U5      nU(       a  [        U[        R                  U5      nU$ )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)ro   r~   rw   r'   r   r  rx  rr  r  r(   re   int8pointer_typeaddress_spacer   r  r)   create_loadr   create_masked_load)r  r  r   r  r  r  r  r  r.   ptr_tyr  r  r  r  r   s                  r   _load_legacyr  *  sB   88??!!##01B1B1D0E]STT |)DEE. T U 	U
 88DII&&((deeUZZ((**eff xx'hh.G.G.I7SD(0I0I0KWUE XX__FF G)=)=>3( UG, xx))+v-  |ii++CJJUW]^ii&&szz4;;PU[_af'245;= 3)Jr   c	                   [        U5      n	[        U5      n
[        U5      nU R                  R	                  5       (       a8  U R                  R
                  R                  5       (       a  [        XX#XXU5	      $ [        XX#XXU5	      $ rv   )	r  r  r  ro   rw   r  r  r  r  )r  r  r   r  r  r  r  r  r.   r  r  r  s               r   loadr  h  sv     (7E&7H$^4G
xxSXX0099;;"3eWU]lstt Cughelmmr   c           	         [        XQSS9nUR                  U R                  XR                  U5      [	        U5      [        U5      5      n[        R                  " Xd5      $ NFrequire_i64)_convert_to_ir_valuescreate_descriptor_loadr   r  r  r  r(   r)   )desc_ptroffsetsr  r  ro   r.   rp   s          r   descriptor_loadr  x  sQ    #G%HG&&xGAT'B>'R'>'O	QA 99Qr   c                    [        X2SS9n[        R                  " UR                  U R                  UR                  U5      [        R
                  5      $ r
  )r  r(   r)   create_descriptor_storer   void)r  rE   r  r.   s       r   descriptor_storer    s>    #G%HG99W44X__ellT[\^`^e^effr   c                   U(       a#  US   R                   [        R                  :X  d   e[        R                  " U
R	                  U R
                  UR
                  U Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snUUUU	5
      [        R                  5      $ s  snf s  snf s  snf s  snf )Nr   )r6   r(   rh   r)   create_tensormap_creater   r  )r  global_addressbox_dim
global_dimglobal_strideelement_stride	elem_typeinterleave_layoutswizzle_mode	fill_moder.   rp   s               r   tensormap_creater!    s     a 0 6 6"(( BBB99''OO!!&'w!XXw')*z!XXz*,-}!XX}--.~!XX~.	
 	  (*-.s    C$;C)C.1C3c                ~    [         R                  " UR                  U R                  5      [         R                  5      $ rv   )r(   r)   #create_tensormap_fenceproxy_acquirer   r  )r  r.   s     r   tensormap_fenceproxy_acquirer$    s)    99W@@QSUSZSZ[[r   c           	        Ub  [        S5      eU R                  R                  R                  5       nUR                  R	                  5       (       d  [        XU5      nUR                  R	                  5       (       d   S5       eXqR                  R                  5       :X  d&   SU SUR                  R                  5        S35       eU R                  R                  R                  UR                  R                  :X  d@   SU R                  R                  R                   SUR                  R                   S35       eU R                  R                  R                  nU[        R                  :w  d   S5       e[        X75      n[        XU5      n[        R                  " UR                  U R                  UR                  X4U5      [        R                  5      $ )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r'   ro   r  rr  r  rx  r(   re   r  r   r)   create_tensor_pointer_storer   r  )	r  valr  r  r  r  r.   r  r  s	            r   _store_block_pointerr(    s    fgg ((%%668K88"3W=88O OO((33   ]	k]"4SXX5N5N5P4QQ[\] 88))SXX-@-@@  qDWX[X`X`XkXkXvXvWw  xQ  RU  RZ  RZ  Re  Re  Qf  fp  Cq  q@XX  ++FRWWSSS 2.NN sG
$C 99W88SZZQ_hpqWW r   c           	     >   U R                   R                  R                  5       (       d'  [        SU R                   R	                  5        S35      eU(       a  [        S5      eU R                   R                  5       (       d[  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eU R                   R                  5       (       aK  [        XR                   R                  5       U5      nUb$  [        X R                   R                  5       U5      nU R                   R                  nUR                  nU[        R                  :X  a<  [        R                  n[        R                  " XR                  5      n[        XU5      n [        XU5      nU(       dJ  [        R                  " UR!                  U R"                  UR"                  XE5      [        R$                  5      $ UR                   R                  R'                  5       (       d  [        S5      e[        R                  " UR)                  U R"                  UR"                  UR"                  XE5      [        R$                  5      $ )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)ro   r~   rw   r'   r   r  rx  rr  r  r(   re   r   r  r  r   r)   create_storer   r  r  create_masked_store)	r  r'  r  r  r  r  r.   r  r  s	            r   _store_legacyr,    s   88??!!##01B1B1D0E^TUU  A B 	B
 8888effDII&&((dee xx"3(A(A(CWM'hh.G.G.I7SDXX__FF )=)=>3( sG
$C yy--cjj#**eVXZX_X_``99##%%=>>99W00SZZV[fhjhohoppr   c           	        [        U5      n[        U5      nU R                  R                  5       (       d)  U R                  R                  R                  5       (       a  [        S5      eU R                  R                  5       (       a7  U R                  R                  R                  5       (       a  [        XX#XxU5      $ [        XX#XxU5      $ )N"Cannot store to a constant pointer)r  r  ro   is_constr~   r'   rw   r  r  r(  r,  )	r  r'  r  r  r  r  r.   r  r  s	            r   storer0    s     )8E&7H
xxchhoo6688=>>
xxSXX0099;;#CdEU\]] StUgVVr   c           	     F   [        U5      n[        U5      nU R                  R                  R                  nUR
                  S;  a  [        S5      e[        R                  " UR                  U R                  UR                  UR                  X45      UR                  5      $ )N)   r5  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  ro   r~   r  r  r'   r(   r)   create_atomic_casr   )r  cmpr'  r  r  r.   r  s          r   
atomic_casr5    sy    
c
C% E++J$$L8TUU99W..szz3::szzSV^`c`h`hiir   c                T   U R                   R                  R                  5       (       d&  [        SU R                   R	                  5       -   5      eU R                   R                  5       (       d)  U R                   R                  R                  5       (       a  [        S5      eU R                   R                  R                  nU[        R                  L a  US:w  a  [        SU-   S-   5      eU[        R                  [        R                  [        R                  [        R                  4;   a  [        SU-   S-   [        U5      -   5      eU R                   R                  5       (       aN  Ub$  [        X R                   R!                  5       U5      nUb$  [        XR                   R!                  5       U5      n[#        XR                   R                  R                  U5      nU(       d  UR%                  S5      n[        R                  nU R                   R                  5       (       af  UR'                  X`R                   R!                  5       5      n[        R(                  " [        R                  U R                   R!                  5       5      n[        R*                  " Xg5      nXU4$ )Nz)Pointer argument of store instruction is r.  r   atomic_z does not support fp16z does not support T)ro   r~   rw   r'   r   r/  r  r(   rF   re   r   int16rG   r{  r  rx  rr  r   rd   rF  r  r)   )r  r'  r  opr.   r  mask_irmask_tys           r   atom_red_typechecking_implr<    s   88??!!##DsxxGXGXGZZ[[
xxchh11::<<=>>++JRZZB%KR*BBCCbggrww"++>>R*>>ZPQQ
xx'hh.G.G.I7SD?&sHH,E,E,GQC
sHHOO..
8C""4(''88**7HH4M4M4OPGmmBGGSXX-F-F-HIGyy*T>r   c                   [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       aj  [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ U[        R                  [        R                   1;  a  [#        SU 35      e[%        / SXe5      nU[        R                  :X  a  [        R&                  O[        R(                  n[+        XU5      n	[+        U [        R,                  " US5      U5      n
U[        R                  :X  a  [        R.                  O[        R0                  n[+        XU5      n[+        U [        R,                  " US5      U5      n[3        XU5      n[5        XU5      n[        R                  " UR                  [        R                  R                  U
R                  U	R                  [7        X.U5      R                  X45      U	R                  5      n[        R                  " UR                  [        R                  R8                  UR                  UR                  [7        X/U5      R                  X45      UR                  5      n[;        UUUU5      n[+        UXe5      $ )Nr   z#atomic_max not supported for dtype ra   r   )r<  r  r  ro   r~   rO   r   r(   r)   create_atomic_rmwr
   	ATOMIC_OPMAXr   UMAXrH   rJ   r9   rj   r+   rh   r   r  rg   ri   r   r&  r   UMINwherer  r'  r  r  r  r.   sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   s                      r   
atomic_maxrP  ,  |   /$wONCd
c
C% EXX__F}}!!99))",,*:*:CJJ

TXT_T_adlnqnvnvx x 99))",,*;*;SZZUYU`U`bemorowowy y
 bjj"**--=fXFGGC)D2::-RXX288FC)EC3W=E!RZZ/biiRYYGS7+FS"//'15w?F
7
+C
Cw
'Cii!!",,"2"2ELL%,,"&t'":"A"A3	OPUPZPZ\G ii!!",,"3"3V]]FMM"&t'":"A"A3	OPVP[P[]G Wgw
/C3((r   c                   [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       aj  [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ U[        R                  [        R                   1;  a  [#        SU 35      e[%        / SXe5      nU[        R                  :X  a  [        R&                  O[        R(                  n[+        XU5      n	[+        U [        R,                  " US5      U5      n
U[        R                  :X  a  [        R.                  O[        R0                  n[+        XU5      n[+        U [        R,                  " US5      U5      n[3        XU5      n[5        XU5      n[        R                  " UR                  [        R                  R                  U
R                  U	R                  [7        X.U5      R                  X45      U	R                  5      n[        R                  " UR                  [        R                  R8                  UR                  UR                  [7        X/U5      R                  X45      UR                  5      n[;        UUUU5      n[+        UXe5      $ )Nr   z#atomic_min not supported for dtype ra   r   )r<  r  r  ro   r~   rO   r   r(   r)   r>  r
   r?  MINr   rB  rH   rJ   r9   rj   r+   rh   r   r  rg   ri   r   r&  r   rA  rC  rD  s                      r   
atomic_minrT  S  rQ  r   c           
        [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  [        R                  R                  O[        R                  R                  n[        R                  " UR                  XpR                  UR                  UR                  X45      UR                  5      $ )Nr   )r<  r  r  ro   r~   rx   r
   r?  FADDADDr(   r)   r>  r   )r  r'  r  r  r  r.   rE  r9  s           r   
atomic_addrX  z  s    /$wONCd
c
C% EXX__F$0022		8H8HB99W..r::szz4;;X[cehememnnr   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nand)r<  r  r  r(   r)   r>  r
   r?  ANDr   ro   r  r'  r  r  r  r.   s         r   
atomic_andr]    q    /$wONCd
c
C% E99W..r||/?/?SZZY]YdYdfiqXX r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nor)r<  r  r  r(   r)   r>  r
   r?  ORr   ro   r\  s         r   	atomic_orrb    so    /$gNNCd
c
C% E99W..r||

CJJX\XcXcehpXX r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nxor)r<  r  r  r(   r)   r>  r
   r?  XORr   ro   r\  s         r   
atomic_xorrf    r^  r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nxchg)r<  r  r  r(   r)   r>  r
   r?  XCHGr   ro   r\  s         r   atomic_xchgrj    sq    /$PNCd
c
C% E99W..r||/@/@#**cjjZ^ZeZegjrXX r   c                    U R                  5       UR                  R                  ;   d!   SUR                  R                   SU  35       eU R                  5       n U S:X  a  Sn [	        [
        R                  U 5      $ )Nzinput_precision must be one of z. Got TF32X3TF32x3)lowerr   allowed_dot_input_precisionsupperr@  r
   INPUT_PRECISION)input_precisionr.   s     r   _str_to_dot_input_precisionrs    sx      "goo&R&RR p
)'//*V*V)WW]^m]nopR%++-O(""2%%77r   c           
        U R                   R                  5       (       a  UR                   R                  5       (       d   eU R                  R                  5       (       a!  UR                  R                  5       (       a  GOU R                  [        R
                  [        R                  [        R                  [        R                  [        R                  4;   d   SU R                   35       eUR                  [        R
                  [        R                  [        R                  [        R                  [        R                  4;   d   SUR                   35       eU R                  UR                  :X  d!   SU R                   SUR                   35       eU R                  R                  5       (       d  UR                  R                  5       (       a6  [        U [        R                  U5      n [        U[        R                  U5      nUc  UR                  R                  n[        X65      n[        U R                   5      n[        UR                   5      nXxs=:X  a  S:X  d2  O  Xxs=:X  a  S:X  d$  O   SU R                    SUR                    S	35       eU R                   S
   R"                  UR                   S   R"                  :X  dV   SU R                    SUR                    SU R                   S
   R"                   SUR                   S   R"                   S	3	5       eUR$                  R'                  S5      c   S5       eUR$                  S   " U R                   UR                   5      n	U R                   S   R"                  U	S   :  a@  U R                   S
   R"                  U	S   :  a   UR                   S
   R"                  U	S   :  d   SU	S    SU	S    SU	S    35       eU R                   R(                  R+                  5       (       aQ  U R                   R(                  [        R
                  :X  d   S5       eUR-                  S5      n
[        R.                  nOUR1                  5       (       a  [3        S5      eU R                   R(                  R5                  5       (       d)  U R                   R(                  R1                  5       (       a"  UR7                  S5      n
[        R                  nO9UR9                  5       (       a  UR;                  S5      OUR7                  S5      n
UnU R                   R                   S   nUR                   R                   S
   nU R                   R                   S
   nUS:X  a  U R                   R                   S   OS n[        R<                  " X(       a  XU/OX/5      nUc  UR?                  X(       a  XU/OX/5      nOUR@                  nUR                   U:X  d   eUcX  U R                  R                  5       (       a6  UR                  R                  5       (       a  UR                  RB                  nOXSnOUU R                  R                  5       (       a6  UR                  R                  5       (       a  XN:  a  [3        SU SU S	35      e[        RD                  " URG                  U R@                  UR@                  UX45      U5      $ )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   r	      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r
  rW  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()$ro   r  r6   rN   r(   r   uint8rF   rG   rH   r  r   r   default_dot_input_precisionrs  rE  r  rE   r  r  r~   rO   	get_int32r+   rM   r'   rK   get_fp32rL   get_fp16r  rF  r   max_num_imprecise_acc_defaultr)   
create_dot)r   r   accrr  max_num_imprecise_acc	out_dtyper.   lhs_rankrhs_rankry  r  ret_scalar_tyMNKBr   
acc_handles                     r   dotr    s   88388#4#4#6#666
yycii..00yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyCII%k)OPSPYPY{Z_`c`i`i_j'kk%
yy#))"7"7"9"93

G,3

G,!//EE1/KO399~H399~H$1$(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA99R=#))
#E q(3LSYYK  XU  VY  V_  V_  `b  Vc  Vi  Vi  Uj  jZ  [^  [d  [d  eg  [h  [n  [n  Zo  op  qq "">2>t@tt>&&~6sxxJL99R=,q/1ciim6I6I\Z[_6\IIbM<?2r,\!_,=W\RS_DUU_`lmn`o_pqr 3 xxxx"'')A+AA)q!					vx 	x		 	 	"	"chhoo&=&=&?&?a 

$-$5$5$7$7Wa W=M=Ma=P!rArArA%]qA]]=q1)qfEF
{))"1qQi1&I
ZZ
xx6!!! $99#))"2"2"4"4$+OO$Q$Q!$%!99#))"2"2"4"49N9R67L6MM]^_]``abcc99W''

CJJ
Os r   c                `   U S:X  a  [         R                  R                  $ U S:X  a  [         R                  R                  $ U S:X  a  [         R                  R                  $ U S:X  a  [         R                  R
                  $ U S:X  a  [         R                  R                  $ [        SU  S35      e)Ne4m3e5m2e2m3e3m2e2m1zInvalid float format: r[   )r
   F8F6F4TYE4M3E5M2E2M3E3M2E2M1r'   )float_formats    r   _str_to_fp_typer    s    v{{v{{v{{v{{v{{
-l^1=
>>r   c	                J   U R                   R                  5       (       a  UR                   R                  5       (       d   e[        U R                  5      n	[        UR                  5      n
Xs=:X  a  S:X  d2  O  Xs=:X  a  S:X  d$  O   SU R                   SUR                   S35       e[	        U5      n[	        U5      nUS;   d
   SU 35       eUS;   d
   S	U 35       e[        U[        R                  5      =(       a    UR                  S L nU(       d   S
5       eU R                   R                  S   nUR                   R                  SS  u  nnUS:X  a  SOSnUUU R                   R                  S   -  :X  d"   SU R                   SUR                   S35       eUS:  d   SU< 35       eU	S:X  a  U R                   R                  S   OS n[        R                  " UU(       a  UUU/OUU/5      nUR                  S5      nUc!  UR                  UU(       a  UUU/OUU/5      nOUR                  nUR                   U:X  d   eU(       a  S OUR                  n[        R                  " UR                  U R                  UR                  XR                  UUU5      U5      $ )Nr	   ru  rv  rw  r
  )r  r  r  zNYI: lhs_format )r  r  zNYI: rhs_format zNYI: rhs_scale not supportedrx  r  r   rW  zCReduction dimension should pack the same number of elements; (lhs: r   z!scaled_dot NYI for K < 64. Got K=r   )ro   r  rE  r  r  rb   r(   rm   rE   r  r}  rF  r   r)   create_dot_scaled)r   	lhs_scale
lhs_formatr   	rhs_scale
rhs_formatr  r  r.   r  r  lhs_format_enumrhs_format_enumrhs_scale_is_noner  r  r  PACKEDr  r   r  r  rhs_scale_handles                          r   
dot_scaledr    s   88388#4#4#6#666399~H399~H$1$(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA%j1O%j1O11R5Ej\3RR1))J-=j\+JJ)"9bll;W	SW@W<<<rA88>>"#DAq&QAF
   tRSVS\S\R]]fgjgpgpfqqrst 7:8aT::7%]qA]]91q!Qi1a&AF			!	B
{))"1q!Qi1a&I
ZZ
xx6!!!0ti6F6F99!!#**i.>.>Q[Q[]m"1:	?@FH Hr   c                   U R                   [        R                  :w  a#  [        R                  " SU R                    35        [        U [        R                  U5      n [        XUSS5      u  pU R                  R                  5       (       a  [        XU5      u  p[        XU5      u  pO[        XU5      u  pUR                  n[        R                  " UR                  U R                  UR                  UR                  5      U5      $ )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r6   r(   re   warningswarnr   r   ro   r  r   r)   create_selectr   )	conditionrp   r   r.   r  r   s         r   rC  rC  0  s    "''!uv  wF  wF  vG  H	
 Y1I'gtTBDA~~  +I'B	#A'21+I'B	VVF99W**9+;+;QXXqxxPRXYYr   c                n    U(       a  [         R                  " X5      nOUn[         R                  " X5      $ rv   )r(   r  r)   )rp   rS   r  res_tys       r   wrap_tensorr  F  s)    y4 99Qr   c                (  ^ ^^^	^
 Uc  [        U4S jT  5       5      m SnT S   R                  R                  m
[        T
5      nX:  d   SU S35       e[	        T
5       VVs/ s H  u  pVXQ:w  d  M  UPM     snnm	[        U
4S jT  5       5      (       d   S5       eTR                  T  Vs/ s H  owR                  PM     snU5      mU" T5        TR                  5         [        U UU	4S j[        [        T 5      5       5       5      $ s  snnf s  snf )Nc              3  d   >#    U  H%  n[        XR                  R                  /S TS9v   M'     g7f)TrX  N)rL  r?  rE   )rg  tr.   s     r   ri  reduction.<locals>.<genexpr>Q  s'     f_eZ[wq77==/tWU_es   -0r   z&reduction axis must be < inputs rank (r
  c              3  T   >#    U  H  oR                   R                  T:H  v   M     g 7frv   )ro   r  )rg  r  r  s     r   ri  r  X  s     5fvv||u$fs   %(z-all reduction inputs must have the same shapec              3     >#    U  H8  n[        TR                  U5      TU   R                  R                  T5      v   M:     g 7frv   r  
get_resultro   r~   )rg  rv  inputs	reduce_opr  s     r   ri  r  ^  s9     tas\]Y11!4fQinn6K6KYWWas   A A)
tuplero   r  rE  rs  allcreate_reducer   verifyr;  )r  r-   region_builder_fnr.   rankrv  rK  r  r  r  r  s   `  `    @@@r   	reductionr  O  s    |f_eff1INN  Eu:D;H@aHH;(/=/tq19/=I5f555f7ff5%%&@Axx&@$GIi tafgjkqgrasttt > 'As   "D	1D	+Dc                  ^ ^^ T S   R                   R                  m[        T5      nU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X-  nT  H$  nUR                   R                  T:X  a  M   S5       e   UR                  T  Vs/ s H  ofR                  PM     snX5      mU" T5        TR                  5         [        U UU4S j[        [        T 5      5       5       5      $ s  snf )Nr   z
scan axis z must be < inputs rank (r
  z(all scan inputs must have the same shapec              3     >#    U  H8  n[        TR                  U5      TU   R                  R                  T5      v   M:     g 7frv   r  )rg  rv  r  scan_opr  s     r   ri  #associative_scan.<locals>.<genexpr>w  s9     n[mVWW//2F1INN4I4I5QQ[mr  )ro   r  rE  create_scanr   r  r  r;  )	r  r-   r  reverser.   r  r  r  r  s	   `      @@r   associative_scanr  f  s    1INN  Eu:D5D4S:dV3KD6QR!SSaxvv||u$P&PP$  !!V"<V88V"<dLGgNNn[`adekal[mnnn	 #=s   	C'c                8   [        U R                  5      S:X  d   S5       eU R                  R                  5       (       d   S5       e[        R
                  " UR                  U R                  U5      [        R                  " [        R                  U45      5      $ )Nr   z histogram only supports 1D inputz%histogram only supports integer input)
rE  r  r6   rO   r(   r)   create_histogramr   r  r+   )r   num_binsr.   s      r   	histogramr    sv    u{{q D"DD ;;H!HH99W--ellHEr}}UWU]U]`h_kGlmmr   c                   [        S[        U R                  5      5      [        U5      :w  a  [        S5      eU R                  R                  S[        R                  " XR                  R                  5       5      5        U $ )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rE  r  r'   r   set_attrr
   	make_attrget_contextrp   valuess     r   multiple_ofr    sY    
1c!''ls6{*\]]HH'fhh>R>R>T)UVHr   c                    [        U R                  5      [        U5      :w  a  [        S5      eU R                  R	                  S[
        R                  " XR                  R                  5       5      5        U $ )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrE  r  r'   r   r  r
   r  r  r  s     r   max_contiguousr    sQ    
177|s6{"_``HHor||FHH<P<P<R'STHr   c                    [        U R                  5      [        U5      :w  a  [        S5      eU R                  R	                  S[
        R                  " XR                  R                  5       5      5        U $ )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s     r   max_constancyr    sQ    
177|s6{"^__HHnbll688;O;O;Q&RSHr   c                h    [         R                  " U R                  5       [         R                  5      $ rv   )r(   r)   create_barrierr  )r.   s    r   debug_barrierr    s     99W++-rww77r   c           	     j   U R                  S5      (       d  U(       a  U S-  n U R                  S5      (       d  U(       a  U S S S-   n [        U 5      S:  a  U R                  S5      (       d  SU -   n U Vs/ s H  oDR                  PM     nnU Vs/ s H\  oDR                  [
        R                  [
        R                  [
        R                  [
        R                  [
        R                  4;   PM^     nn[
        R                  " UR                  XXV5      [
        R                  5      $ s  snf s  snf )N rq  rW  r	   )endswithrE  
startswithr   r6   r(   re   r   r8  r+   rh   r)   create_printr  )prefixargshexr.   argnew_args	is_signeds          r   device_printr    s     ??3D#??4  Tt#
6{Qv0055v&*+ds

dH+Z^_Z^SVrww288RXXNNZ^I_99W))&xKRWWUU ,_s   6D+A#D0c                    UR                   R                  (       d  g [        R                  " UR	                  U R
                  U5      [        R                  5      $ rv   )r   debugr(   r)   create_assertr   r  )r   r   r.   s      r   r   r     s8    ??  99W**4;;<bggFFr   c                ~    [         R                  " UR                  U R                  5      [         R                  5      $ rv   )r(   r)   create_assumer   r  )r   r.   s     r   assumer    s&    99W**4;;7AAr   c                   [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       a  U(       aI  SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  UR                  5      $ SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  UR                  5      $ [        U[        R                  5      (       a  UR                  R                  S:X  d   S	5       eUR                  R                  5       (       d   S
5       eUR                  [        R                  :w  aJ  U(       aC  U R                  UR                  U R                  5       UR                  R                  5       5      $ UR                  [        R                   :w  a  U(       d   S5       eUR                  $  S[#        U5       35       e)NrY   rZ   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerW   rX   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rb   rf   r(   rm   rE   r   r|  r)   r?  r6   rO   rh   r  r   get_int64_tyr   r+   ro   )r.   r  r  s      r   _convert_elem_to_ir_valuer    s   $||D!$%%TZZ/%/ F 4#zzl*D2F F/$$TZZ00TZZ/%/ F 4#zzl*D2F F/$$TZZ00	D"))	$	$zz1$R&RR$zz  ""^$^^"::!k**4;;8L8L8NPTPZPZPhPhPjkkZZ288#KS S S5{{TGT
|TT5r   c                    [        US5      (       a  U Vs/ s H  n[        XU5      PM     sn$ [        XU5      /$ s  snf )Nr  )r  r  )r.   	list_liker  r  s       r   r  r    sD    y*%%R[\R[$)'ER[\\%g+FGG ]s   <c           	       ^ [        Xa5      n[        Xb5      n[        XcSS9nU R                  R                  5       (       a)  U R                  R                  R	                  5       (       a  [        S5      eU R                  R                  [        R                  :X  aD  [        U [        R                  " [        R                  U R                  R                  5      U5      n [        TS5      (       d  T/mT Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     snm[!        S T 5       5      (       d   S5       e[        US5      (       d  U/nU Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     nn[#        U5      [%        ['        [)        U5      5      5      :X  d   S5       e[!        U4S jXX54 5       5      (       d   S	5       eUR+                  U R,                  XUTU5      n[        R.                  " U[        R                  " [        R0                  " U R                  R                  T5      5      5      $ s  snf s  snf )
NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c              3  z   #    U  H1  n[        U[        5      =(       a    S Us=:*  =(       a    S:  Os  v   M3     g7f)rW   rX   N)rb   rf   )rg  r  s     r   ri  !make_block_ptr.<locals>.<genexpr>  s,     XKDz$$?4)?)?%)??Ks   9;zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  R   >#    U  H  n[        T5      [        U5      :H  v   M     g 7frv   )rE  )rg  r  r  s     r   ri  r    s!     dCcis;3y>1Ccs   $'zBExpected shape/strides/offsets/block_shape to have the same length)r  ro   rw   r  r  r'   r(   re   r   r  r   r  r  rb   rm   rE   r  rk  rl  r;  rE  create_make_block_ptrr   r)   r  )	baser  stridesr  r  orderr.   r  r   s	       `    r   make_block_ptrr    s    "'1E#G5G#G%HG 99!5!5!>!>!@!@hii yyrww&D"//"''4993J3JKWU ;
++"mVabVadD",,!?!?4::TIVabKXKXXX RQRX 5*%%PUVPU:dBLL99TZZtCPUEV%=Ds5z!233s5ss3 dET[Ccddd MLMd **4;;Q\^cdF99VR__R]]499;O;OQ\-]^__% c Ws   97I))7I.c                    [        X!SS9n[        R                  " UR                  U R                  U5      U R
                  5      $ r
  )r  r(   r)   create_advancer   ro   )r  r  r.   s      r   advancer    s6    #G%HG 99W++DKKA499MMr   )r-   rf   r.   
ir.builderreturn	tl.tensor)r:   tl.dtyper;   r  r  r  )r:   r  rP   rc   r;   r  rQ   rc   rR   rc   r  r  )T)rq   rc   )r   r  r   r  ry   rc   r  None)FFTF)r   tl.tensor | numbers.Numberr   r  r.   r  r  Tuple[tl.tensor, tl.tensor])r   r   r   r   r.   r  r   callable)
r   r  r   r  r   rc   r.   r  r  r   )r   r  r   r  r.   r  r  r   )
r   r  r   r  r   rc   r.   r  r  r   )rp   r   r   r   r   tl.PropagateNanr.   r  )
rp   r   r   r   r   r   r   r  r.   r  )r   r   r   r   r.   r  r  r  )r   r   r   r   r.   r  r  r   )r   r   r.   r  )r   r   r  r   )r   r   r.   r  r  r   )r   r   r.   r   r  r   )r  r   r  ztl.block_type)r7  rf   r8  rf   r.   r  r  r   )r  	List[int]r6   r  r.   r  r  r   )rE   r   r  r  r.   r  r  r   )
r   r   rI  r  rJ  rc   r.   r  r  r   )r   r   r-   rf   r.   r  r  r   )
r   r   r   r   rJ  rc   r.   r  r  r   )rZ  r   r[  r   r.   r  r  r   )rZ  r   r.   r  r  r  )r   r   rn  z
Tuple[int]r.   r  r  r   )r   r   r  r  r.   r  r  r   )r   r   r   r   r.   r  r  r   )r  Optional[str])r   r   r  r  r.   r  r  r   rv   )
r   r   r  r  r.   r  r  r  r  r   )r  r   r  Optional[tl.tensor]r   r	  r  r   r  r{  r  r{  r  r{  r  rc   r.   r  r  r   )
r  r   r  r{  r  r{  r.   r  r  r   )r  r   rE   r   r.   r  r  r   )r  r   r  r   r  List[tl.tensor]r  r
  r  r
  r  r
  r  rf   r  rf   r  rf   r   rf   r.   r  r  r   )r  r   r.   r  r  r   )r  r   r'  r   r  r	  r  r{  r  r{  r.   r  r  r   )r  r   r4  r   r'  r   r  r{  r  r{  r.   r  r  r   )r  r   r'  r   r  r   r9  r{  r.   r  r  z&Tuple[tl.tensor, tl.tensor, tl.tensor])r  r   r'  r   r  r   r  r{  r  r{  r.   r  r  r   )r   r   r   r   r  r   rr  r  r  rf   r  r  r.   r  r  r   )r  r  )r   r   r  r   r   r   r  r	  r  ztl.tensor | Noner  r  r.   r  r  r   )
r  r   rp   r   r   r   r.   r  r  r   )r  Sequence[tl.tensor]r-   rf   r.   r  r  Tuple[tl.tensor, ...])
r  r  r-   rf   r  rc   r.   r  r  r  )r   r   r  rf   r.   r  r  r   )rp   r   r  r  r  r   )r.   r  r  r   )
r  r{  r  r
  r  rc   r.   r  r  r   )r   r   r   r{  r.   r  r  r   )r  r   r.   r  r  r   )u
__future__r   r  typingr   r   r   r   r   r|   _C.libtritonr
    r   r(   r   r   	Exceptionr   r/   r2   r@   rU   rn   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r   r  r  r   r&  r   r.  r3  r<  rj   rB  rL  rQ  rU  r_  rd  ro  rx  r   r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r!  r$  r(  r,  r0  r5  r<  rP  rT  rX  r]  rb  rf  rj  rs  r  r  r  rC  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r\   r   r   <module>r     s#   "  ; ;    CLF	 FDF@ /,&*/,/7/,d#V	< ]a,16QB&&:: ):644 )4$44 )4R4:&&!*&485$5$	]"+5:UQ
P
Q
'&"R
R
Q)$)4444	4	4"D"(.H[MK2
IL$2tpZ( 04l5,l58Al5h 		 
w,;|nn.1nDGnVZnn!*n ',5g
   	
 # $      >\:)qXWW)3W8AW,j(27]6$)N$)No#(18EE&0E5>EP? H 0 H=E HPZ H_h HPZ, u.o(o-Bo2n8VGBU0H$`NNr   