
    ϦiH                        S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  \R                  R!                  \R                  R#                  \5      5      r\R                  R'                  \S5      /rS r\ R,                  " 5       S	 5       rS
 r " S S\5      rS rS r " S S\5      r " S S\5      rg)    N)Path)_build)get_cache_manager)	GPUTarget)	GPUDriverincludec                   ^ ^^^^ SS K nUR                  5       S:w  a  g SS KmSSKJnJnJnJmJmJn   " UU4S jSTR                  5      nTR                  X5" U5      U" U5      U" U5      5      n TR                  S5      R                  nUT/Ul        X8l        SmTR                  TS-   5      n	UU U4S	 jn
U" U" U
5      U	5      (       a%  [         R"                  " TR%                  U	5      5      $ g !    g = f)
Nr   Linux)c_charc_intc_size_tc_void_pc_char_pPOINTERc                   *   > \ rS rSrS Y4S Y 4/rSrg)8_find_already_mmapped_dylib_on_linux.<locals>.DlPhdrInfo   	dlpi_addr	dlpi_name N)__name__
__module____qualname____firstlineno___fields___static_attributes__)r   r   s   Y/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/triton/backends/amd/driver.py
DlPhdrInfor      s    (#(#
    r   z	libc.so.6i      c           
         > U R                   R                  n[        [        R                  " U5      5      nTUR
                  ;   a&  TR                  X#[        T[        U5      5      5        gg)Nr    r   )	contentsr   r   osfsdecodenamememmoveminlen)infosizedatar   pctypeslib_namemax_path_lengths        r   callback6_find_already_mmapped_dylib_on_linux.<locals>.callback2   sP    MM++	Y'(qvvNN4CY,PQr   )platformsystemr-   r   r   r   r   r   r   	Structure	CFUNCTYPECDLLdl_iterate_phdrargtypesrestypecreate_string_bufferr#   r$   	string_at)r.   r2   r   r   r   r   r   
callback_tr7   pathr0   r   r   r-   r/   s   `          @@@@r   $_find_already_mmapped_dylib_on_linuxr>      s    G#
 KK
 
V%% 
 !!%)<gh>OQXY_Q`aJ ++k2BB !+H5O#O&&':;D z(+T22{{6++D122+s   2C4 4C8c                     Sn [         R                  " S5      nU(       aM  UR                  U 5      (       a&  [         R                  R	                  U5      (       a  U$ [        SU SU  35      e[        U 5      nU(       a7  [         R                  R	                  U5      (       a  U$ [        SU SU  35      e/ nSS KnUR                  5       nUR                  5       nUR                  (       a  U/U-   nU H^  n[         R                  R                  USS	U 5      n[         R                  R	                  U5      (       a  Us  $ UR                  U5        M`     [         R                  " S
5      nU(       ap  UR                  S5       H[  n	[         R                  R                  X5      n
[         R                  R	                  U
5      (       a  U
s  $ UR                  U
5        M]     [        R                  " SS/5      R!                  5       nUR#                  5        Vs/ s H;  oR%                  5       R                  U 5      (       d  M(  UR                  5       S   PM=     nnU H<  n[         R                  R	                  U5      (       a  Us  $ UR                  U5        M>     [         R                  R                  SU 5      n[         R                  R	                  U5      (       a  U$ UR                  U5        [        SU  SU 35      es  snf )Nzlibamdhip64.soTRITON_LIBHIP_PATHzTRITON_LIBHIP_PATH 'z' does not point to a valid zmemory mapped 'z'' in process does not point to a valid r   torchlibLD_LIBRARY_PATH:z/sbin/ldconfigz-pz/opt/rocm/lib/zcannot locate z after attempted paths )r#   getenvendswithr=   existsRuntimeErrorr>   sitegetsitepackagesgetusersitepackagesENABLE_USER_SITEjoinappendsplit
subprocesscheck_outputdecode
splitlinesstrip)r.   env_libhip_pathmmapped_pathpathsrJ   site_packages	user_siter=   env_ld_library_pathdflibslinelocsloccommon_install_paths                   r   _get_path_to_hip_runtime_dylibrc   @   sp   H ii 45O##H--"''..2Q2Q""1/1BB^_g^hijj 8AL77>>,''_\N:abjaklmmE ((*M((*I"m3ww||D'5(;77>>$KT	  ))$56$**3/AQ)Aww~~a  LLO	 0 ""$4d#;<CCED *.):^):jjl>S>ST\>]DJJL):D^77>>#JS  '',,'7B	ww~~)**""	LL$%
z1HP
QQ _s   'L
L
c           	         [         R                  " U R                  S5      5      R                  5       n[	        U5      nUR                  U S35      nUc  [        R                  " 5        n[        R                  R                  US5      n[        US5       nUR                  U 5        S S S 5        [        XU/ [        / 5      n[        US5       nUR                  UR!                  5       U S3SS9nS S S 5        S S S 5        SS Kn	U	R$                  R'                  X5      n
U	R$                  R)                  U
5      nU
R*                  R-                  U5        U$ ! , (       d  f       N= f! , (       d  f       N~= f! , (       d  f       N= f)	Nzutf-8z.sozmain.cwrbT)binaryr   )hashlibsha256encode	hexdigestr   get_filetempfileTemporaryDirectoryr#   r=   rN   openwriter   include_dirputreadimportlib.utilutilspec_from_file_locationmodule_from_specloaderexec_module)srcr%   keycache
cache_pathtmpdirsrc_pathr]   so	importlibspecmods               r   compile_module_from_srcr      s)   
..G,
-
7
7
9Cc"E4&-J((*fww||FH5Hh$ %KDBb$1"YYqvvxD6dYK
   + >>11$CD
..
)
)$
/CKKC J %$   +*s<   +-E=E*'E=#E,4E=
E)	%E=,
E:	6E==
Fc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HIPUtils   c                 n   > [        U S5      (       d  [        [        U ]  U 5      U l        U R                  $ )Ninstance)hasattrsuperr   __new__r   )cls	__class__s    r   r   HIPUtils.__new__   s-    sJ'' 37<CL||r   c                    [        5       n[        [        R                  R	                  [
        S5      5      R                  5       nUR                  SUS5      n[        US5      nUR                  U l	        UR                  U l
        g )Nzdriver.cz/*py_libhip_search_path*/r    	hip_utils)rc   r   r#   r=   rN   dirname	read_textreplacer   load_binaryget_device_properties)selflibhip_pathrz   r   s       r   __init__HIPUtils.__init__   sg    46277<<45??A kk5{AF%c;7??%(%>%>"r   )r   r   )r   r   r   r   r   r   r   __classcell__r   s   @r   r   r      s    
	? 	?r   r   c                 @    U S   S:X  a  gSSSSSSS	S
SSSSSSSS.U    $ )Nr   *hipDeviceptr_tint32_tint8_tint16_tint64_tuint32_tuint8_tuint16_tuint64_tfloatdoublei1i8i16i32i64u1u8u16u32u64fp16bf16fp32f32fp64r   tys    r   	ty_to_cppr      sQ    	!u|  	!
 
r   c                    [        U5      nSR                  S UR                  5        5       5      nS nS nSR                  UR                  5        Vs/ s H  o" U" U5      5      PM     sn5      n	SU	-   n
[        U5      S:  a)  SSR                  S UR                  5        5       5      -   OSn[	        5       nUR                  5        Vs/ s H  oU ;  d  M
  UPM     nnS	U S
[        U5      S:  a  SU-   OS SSR                  S U 5       5       SU SSR                  UR                  5        VVs/ s H  u  pU" U5       SU S3PM     snn5       SU
 SU SSR                  UR                  5        VVs/ s H  u  pUS   S:X  a  SU SU SU SU S3	OSPM!     snn5       S[        U5      S:  a)  SSR                  S UR                  5        5       5      -   OS S3nU$ s  snf s  snf s  snnf s  snnf )Nz, c              3   F   #    U  H  u  p[        U5       S U 3v   M     g7f)z argN)r   .0ir   s      r   	<genexpr> make_launcher.<locals>.<genexpr>   s#     SARYr]O4s3ARs   !c                 @    U S   S:X  a  gSSSSSSS	S
SSSSSSSS.U    $ )Nr   r   	PyObject*r   r   r   r   r   r   r   r   r   r   r   r   r   s    r   _extracted_type&make_launcher.<locals>._extracted_type   sQ    a5C<
  ! 	r   c                 &    SSSSSSSSSS	S
SS.U    $ )NOr]   r\   lbhr   BHIK)r   r   r   longr   r   r   r   r   r   r   r   r   r   s    r   	format_of make_launcher.<locals>.format_of   s:    
  	r    	iiiKKOOOOr   c              3   0   #    U  H  u  pS U 3v   M     g7f)z&_argNr   r   s      r   r   r      s      L:K5:Ks   a;  
#define __HIP_PLATFORM_AMD__
#include <hip/hip_runtime.h>
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <dlfcn.h>

// The list of paths to search for the HIP runtime library. The caller Python
// code should substitute the search path placeholder.
static const char *hipLibSearchPaths[] = {"a  "};

// The list of HIP dynamic library symbols and their signature we are interested
// in this file.
#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN)                     \
  FOR_EACH_STR_FN(hipGetErrorString, hipError_t hipError)                     \
  FOR_EACH_ERR_FN(hipModuleLaunchKernel, hipFunction_t f,                     \
                  unsigned int gridDimX, unsigned int gridDimY,               \
                  unsigned int gridDimZ, unsigned int blockDimX,              \
                  unsigned int blockDimY, unsigned int blockDimZ,             \
                  unsigned int sharedMemBytes, hipStream_t stream,            \
                  void **kernelParams, void **extra)                          \
  FOR_EACH_ERR_FN(hipPointerGetAttribute, void *data,                         \
                  hipPointer_attribute attribute, hipDeviceptr_t ptr)

// The HIP symbol table for holding resolved dynamic library symbols.
struct HIPSymbolTable {
#define DEFINE_EACH_ERR_FIELD(hipSymbolName, ...)                             \
  hipError_t (*hipSymbolName)(__VA_ARGS__);
#define DEFINE_EACH_STR_FIELD(hipSymbolName, ...)                             \
  const char *(*hipSymbolName)(__VA_ARGS__);

  HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
};

static struct HIPSymbolTable hipSymbolTable;

bool initSymbolTable() {
  // Use the HIP runtime library loaded into the existing process if it exits.
  void *lib = dlopen("libamdhip64.so", RTLD_NOLOAD);
  if (lib) {
    // printf("[triton] chosen loaded libamdhip64.so in the process\n");
  }

  // Otherwise, go through the list of search paths to dlopen the first HIP
  // driver library.
  if (!lib) {
    int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
    for (int i = 0; i < n; ++i) {
      void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
      if (handle) {
        lib = handle;
        // printf("[triton] chosen %s\n", hipLibSearchPaths[i]);
      }
    }
  }
  if (!lib) {
    PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
    return false;
  }

  // Resolve all symbols we are interested in.
  dlerror(); // Clear existing errors
  const char *error = NULL;
#define QUERY_EACH_FN(hipSymbolName, ...)                                     \
  *(void **)&hipSymbolTable.hipSymbolName = dlsym(lib, #hipSymbolName);       \
  error = dlerror();                                                          \
  if (error) {                                                               \
    PyErr_SetString(PyExc_RuntimeError,                                       \
                    "cannot query " #hipSymbolName " from libamdhip64.so");   \
    dlclose(lib);                                                             \
    return false;                                                             \
  }

  HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)

  return true;
}

static inline void gpuAssert(hipError_t code, const char *file, int line)
{
   if (code != HIP_SUCCESS)
   {
      const char* prefix = "Triton Error [HIP]: ";
       const char* str = hipSymbolTable.hipGetErrorString(code);
      char err[1024] = {0};
      snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
      PyErr_SetString(PyExc_RuntimeError, err);
   }
}

#define HIP_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, hipStream_t stream, hipFunction_t functionz>) {
  // printf("_launch hip kernel\n");
  void *params[] = { c              3   ,   #    U  H
  nS U 3v   M     g7f)z&argNr   )r   r   s     r   r   r   M  s      <V4sVs   zw };
  if (gridX*gridY*gridZ > 0) {
      HIP_CHECK(hipSymbolTable.hipModuleLaunchKernel(function, gridX, gridY, gridZ, aw  *num_warps, 1, 1, shared_memory, stream, params, 0));
    }
  }

typedef struct _DevicePtrInfo {
    hipDeviceptr_t dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == hipErrorInvalidValue) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
    Py_DECREF(ret);
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  return ptr_info;
}

static PyObject* launch(PyObject* self, PyObject* args) {
   // printf("launch\n");
  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *kernel_metadata = NULL;
  PyObject *launch_metadata = NULL;
   z _argz; z
  if(!PyArg_ParseTuple(args, "z", &gridX, &gridY, &gridZ, &_stream, &_function,
                                           &kernel_metadata, &launch_metadata,
                                           &launch_enter_hook, &launch_exit_hook a=  )) {
    return NULL;
  }

  // extract kernel metadata
  int num_warps, num_ctas, shared_memory, clusterDimX, clusterDimY, clusterDimZ;
  if (!PyArg_ParseTuple(kernel_metadata, "iiiiii", &num_warps, &num_ctas, &shared_memory, &clusterDimX, &clusterDimY, &clusterDimZ)) {
    return NULL;
  }
  // extract launch metadata
  if (launch_enter_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_enter_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }


  // raise exception asap
  r   zDevicePtrInfo ptr_infoz = getPointer(_argz); if (!ptr_infoz.valid) return NULL;z;
  _launch(gridX, gridY, gridZ, num_warps, num_ctas, clusterDimX, clusterDimY, clusterDimZ, shared_memory, (hipStream_t)_stream, (hipFunction_t)_functionc              3   N   #    U  H  u  pUS    S:X  a  SU S3OSU 3v   M     g7f)r   r   ptr_infoz.dev_ptr_argNr   r   s      r   r   r     s       j~  l}  ch  cd  EG  HI  EJ  LO  EO  mu  vw  ux  x@  kA  W[  \]  [^  U_  k_  l}s   #%an  );

  if(launch_exit_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_exit_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }

  if(PyErr_Occurred()) {
    return NULL;
  }
  // return None
  Py_INCREF(Py_None);
  return Py_None;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  if (!initSymbolTable()) {
    return NULL;
  }
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)r(   rN   itemsvaluesrc   keys)	constants	signatureids	warp_size
start_desc	arg_declsr   r   r   args_formatformat	args_listr   r   paramsrz   s                   r   make_launcherr      s   YJ		SARSSI*  ''IDTDTDVWDVb9_R%89DVWXK;&FPST]P^abPbtyy L)//:K LLLhjI02K #)@)Ai-?a)F@
- .9M S:Cf X[  \e  Xf  ij  Xj  DH  KT  DT  pr  Cs syy <V <<= >UU^T_ ;`v 88Y__=NO=NEA#$E!B/=NOPQ R  &x (RR[Q\ ]( 99  R[  Ra  Ra  Rc  d  Rc  IN  IJoqrsotx{o{&qc);A3bCSTUSVVjk  BD  D  Rc  d  e  f fY BE  FO  BP  ST  BT  Z^  ae  aj  aj  j~  lu  l{  l{  l}  j~  a~  Z~  Z\  Y] *]i^C~ JO X Az P. ds   G-	G2G28G7?&G=(c                        \ rS rSrS rS rSrg)HIPLauncheri  c                   ^ S[        TS5      (       a  TR                  R                  O	[        5       0n[        TS5      (       a  TR                  O	[        5       nU4S jnUR                  5        VVs0 s H  u  pgU" U5      U_M     nnnTR                  R                  5        VVs0 s H  u  pgU" U5      U_M     nnn[        XHX2R                  5      m[        TS5      n	U	R                  U l        g s  snnf s  snnf )Nids_of_const_exprsfnr   c                 |   > [        U [        5      (       a%  TR                  R                  R	                  U 5      $ U $ N)
isinstancestrr   	arg_namesindex)r   rz   s    r   <lambda>&HIPLauncher.__init__.<locals>.<lambda>  s/    As9K9KCFF,,2215RQRRr   __triton_launcher)r   r   
constexprstupler   dictr   r   r   r   r   launch)
r   rz   metadatar   r   cst_keyr{   valuer   r   s
    `        r   r   HIPLauncher.__init__  s    #'#t:L:LSVV%6%6RWRYZ%,S+%>%>CMMDF	R;D??;LM;LZSWS\5(;L	M;>==;N;N;PQ;PZSWS\5(;P	QI#7I7IJ%c+>?jj	 NQs   5C6*C<c                 (    U R                   " U0 UD6  g r   r   )r   argskwargss      r   __call__HIPLauncher.__call__  s    T$V$r   r  N)r   r   r   r   r   r  r   r   r   r   r   r     s    !%r   r   c                   P   ^  \ rS rSrU 4S jrS r\S 5       rS rS r	S r
SrU =r$ )		HIPDriveri  c                 V   > [         TU ]  5         [        5       U l        [        U l        g r   )r   r   r   utilsr   launcher_cls)r   r   s    r   r   HIPDriver.__init__  s    Z
'r   c                 "    SS K nUR                  $ Nr   )rA   cuda)r   rA   s     r   get_device_interfaceHIPDriver.get_device_interface  s    zzr   c                  :    SS K n U R                  R                  S L$ r  )rA   versionhip)rA   s    r   	is_activeHIPDriver.is_active  s    }}  ,,r   c                     U R                  5       nU R                  R                  U5      nUS   nUS   n[        SUR	                  S5      S   U5      $ )NarchwarpSizer  rD   r   )get_current_devicer	  r   r   rP   )r   devicedevice_propertiesr  r   s        r   get_current_targetHIPDriver.get_current_target  sU    ((* JJ<<VD (%j1	

3 2I>>r   c                     SSK Jn  U$ )Nr   )do_bench)triton.testingr  )r   r  s     r   get_benchmarkerHIPDriver.get_benchmarker  s
    +r   c                 \    SS K nSnUR                  [        US-  5      UR                  SS9$ )Nr   i      r  )dtyper  )rA   emptyint)r   rA   
cache_sizes      r   get_empty_cache_for_benchmark'HIPDriver.get_empty_cache_for_benchmark  s.     '
{{3zQ/uyy{PPr   )r
  r	  )r   r   r   r   r   r  staticmethodr  r  r!  r)  r   r   r   s   @r   r  r    s7    (
 - -?Q Qr   r  )	functoolsr#   rh   rQ   rm   pathlibr   triton.runtime.buildr   triton.runtime.cacher   triton.backends.compilerr   triton.backends.driverr   r=   r   realpath__file__rN   rq   r>   	lru_cacherc   r   objectr   r   r   r   r  r   r   r   <module>r6     s     	     ' 2 . ,
''//"''**84
5ww||GY/0-` ;R ;R|&?v ?(
,Qh%& %  Q	  Qr   