
    Αi                   N   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	J
r
Jr  S SKJr  S SKrS SKJr  S SKJrJr  S SKJrJrJrJr  S SKJrJrJrJrJrJ r J!r!J"r"J#r#J$r$  S	S
K%J&r&J'r'  \	(       a  S SK(J)r)  S SKJ*r*  S SKJ+r,J-r.  S SK/J0r0  S SK1J2r2  \
\Rf                  \Rh                  \Rj                  4   r6\
\Rn                  \Rp                  \Rr                  4   r:S SKJ;r;J<r<  S SK=J>r>  \
\;\<\?\@4   rAS\BS'   S rC\R.                  " 5       (       a!  S	SK&JDrEJFrGJHrHJIrIJJrKJLrLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrT  O\R                  " 5       (       a  S	SK'JDrEJFrGJHrHJIrIJLrLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrT  Oz\V" \S5      (       a  \R                  " 5       rXO/ rX\X(       a;  \R                  " \XS    5      (       a!  S	SKZJDrEJFrGJHrHJIrIJJrKJLrLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrT  OS	rCS	SK[JHrHJLrLJMrMJNrNJOrOJRrRJSrSJTrT  / SQr\Sq]SQS jrYSRS jr^SSS jr+SRS jrUSTS jr-SRS jr_SRS jr`SUS  jraSVSWS" jjrbSXS# jrc " S$ S!5      rdSYS% jreSZS[S& jjrfS\S' jrg\SVS]S( jj5       rh\S^S) j5       rhSVS_S* jjrhS`S+ jriSaS, jrjSbS- jrkScS. jrWSbS/ jrlScS0 jrm SV   SdS1 jjrJSVSeS2 jjrn SV   SfS3 jjro SV   SgS4 jjrpShS5 jrq " S6 S75      rr " S8 S95      rs SV SiS: jjrtSVSjS; jjruSkS< jrv " S= S>5      rw\wrxSlS? jry " S@ SA5      rzSVSmSB jjr{SnSC jr| SV     SoSD jjr}SpSE jr~ " SF SG5      r " SH SI5      r " SJ SK5      rSVSqSL jjr " SM SN\?5      r " SO SP\GR                  5      r\GR                  \   r\" \\GR                  5      r\GR                  GR                  \GR                  5        \\GR                  \'   g)r    )annotationsN)TYPE_CHECKINGUnionoverload)	TypeAlias)autocast)core	framework)is_compiled_with_cinnis_compiled_with_cudais_compiled_with_distributeis_compiled_with_rocm)
BFloat16Tensor
BoolTensor
ByteTensor
CharTensorDoubleTensorFloatTensor
HalfTensor	IntTensor
LongTensorShortTensor   )cudaxpu)AbstractContextManager)TracebackType)IPUPlaceXPUPlace)	PlaceLike)Place)	CUDAPlaceCustomPlace)_customDevicePropertiesr   _CustomPlaceLike)create_eventcreate_streamdevice_countempty_cacheget_device_propertiesget_rng_statemanual_seedmax_memory_allocatedmax_memory_reservedmemory_allocatedmemory_reservedreset_max_memory_allocatedreset_max_memory_reservedset_rng_state)r&   r'   r(   r)   r+   r,   r-   r.   r/   r0   r1   r2   r3   get_all_custom_device_type)r(   r+   r,   r-   r.   r1   r2   r3   )7get_cudnn_version
set_device
get_devicer   r   is_compiled_with_xpuis_compiled_with_ipur   r   r   r   is_compiled_with_custom_deviceget_all_device_typer4   get_available_deviceget_available_custom_devicer*   StreamEventcurrent_stream
set_streamstream_guarddevice_guardsynchronizer(   r)   r-   r.   r1   r2   r/   r0   is_availableis_current_stream_capturingget_device_nameget_device_capabilityr+   r3   r   r   r   r   r   r   r   r   r   r   deviceis_bf16_supportedr,   reset_peak_memory_statsipc_collectget_stream_from_externalStreamContextc                .    [         R                  " U 5      $ )a\  

Whether paddle was built with Paddle_CUSTOM_DEVICE .

Args:
    device_type (str): the registered device type, like "npu".

Return:
    bool, ``True`` if CustomDevice is supported, otherwise ``False``.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> support_npu = paddle.device.is_compiled_with_custom_device("npu")

)r	   r:   )device_types    V/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/device/__init__.pyr:   r:      s    $ ..{;;    c                 ,    [         R                  " 5       $ )a   

Whether paddle was built with WITH_IPU=ON to support Graphcore IPU.

Returns (bool): `True` if IPU is supported, otherwise `False`.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> support_ipu = paddle.is_compiled_with_ipu()

)r	   r9    rR   rQ   r9   r9           $$&&rR   c                 ,    [         R                  " 5       $ )z

Return a Graphcore IPU Place

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:IPU)

        >>> import paddle
        >>> paddle.device.set_device('ipu')
        >>> place = paddle.device.IPUPlace()

)r	   r   rT   rR   rQ   r   r      s     ==?rR   c                 ,    [         R                  " 5       $ )a   

Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun

Returns (bool): whether paddle was built with WITH_XPU=ON

Examples:
    .. code-block:: python

        >>> import paddle
        >>> support_xpu = paddle.device.is_compiled_with_xpu()

)r	   r8   rT   rR   rQ   r8   r8     rU   rR   c                .    [         R                  " U 5      $ )a  

Return a Baidu Kunlun Place

Args:
    dev_id(int): Baidu Kunlun device id

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:XPU)

        >>> import paddle
        >>> paddle.device.set_device('xpu')
        >>> place = paddle.device.XPUPlace(0)

)r	   r   )dev_ids    rQ   r   r     s    $ ==  rR   c                     [        5       S:  $ )a  
Check whether **any supported device** is available in the current environment.

This function checks whether Paddle is built with support for at least one
type of accelerator (e.g., CUDA, XPU, CustomDevice) and whether there is
at least one device of that type available.

If any supported device is available, this function returns True. Otherwise,
it returns False.

Returns:
    bool: True if there is at least one available device (GPU/XPU/CustomDevice),
    False otherwise.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> if paddle.device.is_available():
        ...     print("At least one device is available")
        ... else:
        ...     print("No supported devices available")
r   )r(   rT   rR   rQ   rE   rE   2  s    2 >QrR   c                 ,    [         R                  " 5       $ )a  
Check whether the current stream is in CUDA graph capturing state.

Returns:
    bool: True if the current stream is capturing, False otherwise.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> if paddle.device.is_available():
        ...     graph = paddle.device.cuda.graphs.CUDAGraph()
        ...     graph.capture_begin()
        ...     print(paddle.device.is_current_stream_capturing())  # True
        ...     graph.capture_end()
)r	   is_cuda_graph_capturingrT   rR   rQ   rF   rF   N  s    " ''))rR   c                     [         R                  " 5       (       d  g[        c-  [        [         R                  " 5       5      n U q[        S:  a  gU $ [        $ )a  

This function return the version of cudnn. the return value is int which represents the
cudnn version. For example, if it return 7600, it represents the version of cudnn is 7.6.

Returns:
    int: A int value which represents the cudnn version. If cudnn version is not installed, it return None.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> cudnn_version = paddle.device.get_cudnn_version()



Nr   )r	   r   _cudnn_versionintcudnn_version)r`   s    rQ   r5   r5   b  sK    ( %%''D..01&A  rR   rI   c                2    [        U 5      n [        U 5      n U $ )zQ
Convert input device(Place | int | str | None) into corresponding Place object.
)_device_to_paddle_convert_to_placerI   s    rQ   device_to_placere     s     v&Fv&FMrR   c                   [        U [        5      (       d  U $ U R                  5       nUR                  S5      (       a  UR	                  SS5      nU [
        R                  " 5       ;   aP  [        R                  " SU  S3S5      R                  S5      n[        US   5      n[
        R                  " X5      nU$ US:X  a  [
        R                  " 5       nU$ US:X  d  US	:X  ac  [
        R                  " 5       (       d  [        S
5      e[
        R                  " [         R"                  R%                  5       R&                  5      nU$ US:X  aq  [
        R(                  " 5       (       d  [        S5      e[        R                  " SS5      R                  S5      n[        US   5      n[
        R*                  " U5      nU$ US:X  a<  [
        R,                  " 5       (       d  [        S5      e[
        R.                  " 5       nU$ [0        R2                  " SU5      =(       d    [0        R2                  " SU5      n[0        R2                  " SU5      nU(       aa  [
        R                  " 5       (       d  [        SU S35      eU R                  SS5      nUS   n[        U5      n[
        R                  " U5      nU(       aa  [
        R(                  " 5       (       d  [        SU S35      eU R                  SS5      nUS   n[        U5      n[
        R*                  " U5      nU(       d  U(       d  U R                  SS5      nUS   n	U	[
        R                  " 5       ;   a(  US   n[        U5      n[
        R                  " X5      nU$ [        SR5                  SR7                  S SS	SS/[
        R                  " 5       Q 5       5      5      5      eW$ )Nr   gpuFLAGS_selected_s0,r   cpudcuzLThe device should not be 'gpu', since PaddlePaddle is not compiled with CUDAr   zKThe device should not be 'xpu', since PaddlePaddle is not compiled with XPUFLAGS_selected_xpusipuzKThe device should not be 'ipu', since PaddlePaddle is not compiled with IPUzgpu:\d+zdcu:\d+zxpu:\d+zThe device should not be z., since PaddlePaddle is not compiled with CUDA:r   z-, since PaddlePaddle is not compiled with XPUz3The device must be a string which is like 'cpu', {}z, c              3  4   #    U  H  nS U SU S3v   M     g7f)'z', 'z:x'NrT   ).0xs     rQ   	<genexpr>$_convert_to_place.<locals>.<genexpr>  s*      	"&  s$qc-&s   npu)
isinstancestrlower
startswithreplacer	   r4   osgetenvsplitr_   r#   CPUPlacer   
ValueErrorr"   paddledistributedParallelEnvrY   r8   r   r9   r   rematchformatjoin)
rI   lower_deviceselected_devices	device_idplaceselected_xpusavailable_gpu_deviceavailable_xpu_devicedevice_info_listrP   s
             rQ   rc   rc     s   fc""<<>Lv&&#++FE:002299vha%@#FLL
 (+,	  3R LQ 
	N LM 
	,%"7))++?  v11==?FFG@ L 
	((**>  		"7=CCCHa()	i(n Lm 
	((**>  ` L]  "xx
LA  
RXXF
  "xx
LA--// /0D/E F- -   &||C3(+IIINN9-E,,.. /0D/E F, ,   &||C3(+IIIMM),E#,@%||C3*1-Kd==??,Q/		N	((@  L !IPP		 	" !& % % %	&
 "&!@!@!B&	" 	  LrR   c                  F    \ rS rSrSrSS	S jjrS r        S
S jrSrg)rI   i  a0  Context-manager that changes the selected device.

Args:
    device (paddle.Place, int or str): device index to select.

Examples:
    .. code-block:: python
        >>> import paddle

        >>> print(paddle.device.get_device())  # gpu:0
        >>> with paddle.device.device("cpu"):
        ...     print(paddle.device.get_device())  # cpu

        >>> # paddle.cuda.device is an alias of paddle.device.device
        >>> with paddle.cuda.device("cpu"):
        ...     print(paddle.device.get_device())  # cpu
        >>> print(paddle.device.get_device())
Nc                2    [        U5      U l        SU l        g )Nz-1)re   r   prev_place_strselfrI   s     rQ   __init__device.__init__  s    $V,
"rR   c                L    [        5       U l        [        U R                  5        g N)r7   r   r6   r   r   s    rQ   	__enter__device.__enter__  s    (l4::rR   c                .    [        U R                  5        gNF)r6   r   )r   exc_type	exc_value	tracebacks       rQ   __exit__device.__exit__   s     	4&&'rR   )r   r   r   )rI   Place | int | str | None)r   type[BaseException] | Noner   BaseException | Noner   ztypes.TracebackType | Nonereturnzbool | None)	__name__
__module____qualname____firstlineno____doc__r   r   r   __static_attributes__rT   rR   rQ   rI   rI     s<    &#, ( .	
 
rR   c                 h    [        5       n SU ;   a  [        U R                  S5      S   5      nU$ SnU$ )a  
Return the index of a currently selected device.

Returns:
    int: The index of the currently selected device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> device_id = paddle.device.current_device() # this is equivalent to paddle.cuda.current_device()
        >>> print(f"Current device index: {device_id}")
rp   r   r   )r7   r_   r   )
device_strr   s     rQ   current_devicer   
  sD      J j
((-a01	
  	rR   c                   [         R                  " [        R                  R	                  5       5      (       a  g[        5       (       d  g[        5       nU (       d  g [        R                  " S/[        R                  US9  g!    g= f)aa  
Return a bool indicating if the current CUDA/ROCm device supports dtype bfloat16.

Args:
    including_emulation (bool = True): Whether to treat software-emulated BF16 as supported; if False, only native hardware BF16 support is considered.

Returns:
    bool: A boolean value which indicates whether the current CUDA/ROCm device supports dtype bfloat16.

Examples:

    .. code-block:: python

        >>> import paddle

        >>> paddle.device.is_bf16_supported()
        >>> # paddle.cuda.is_bf16_supported() is an alias of paddle.device.is_bf16_supported()
        >>> paddle.cuda.is_bf16_supported()

TFg      ?)dtyperI   )	r	   is_bfloat16_supportedr   r
   _current_expected_place_rE   r7   tensorbfloat16)including_emulationrI   s     rQ   rJ   rJ   '  sj    , !!&"2"2"K"K"MNN >>\F se6??6Bs   %B Bc                H    [        U 5      n[        R                  " U5        U$ )a  

Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU.
They are represented by string identifiers. This function can specify the global device
which the OP will run.

Args:
    device(str, Place or int): This parameter determines the specific running device.
        It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``,
        where ``x`` is the index of the GPUs, XPUs or NPUs.

Returns:
    Place,the Place to set.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        >>> paddle.device.set_device("cpu")
        >>> x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32')
        >>> print(x1.place)
        Place(cpu)

        >>> paddle.device.set_device("gpu:0")
        >>> x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32')
        >>> print(x2.place)
        Place(gpu:0)
        >>> # x1 is still on cpu
        >>> print(x1.place)
        Place(cpu)

)re   r
   _set_expected_placerI   r   s     rQ   r6   r6   S  s"    H F#E!!%(LrR   c                    g r   rT   inputs    rQ   r7   r7   |  s    +.rR   c                    g r   rT   r   s    rQ   r7   r7     s    -0rR   c                X   [        U [        R                  5      (       a4  S[        U R                  5      ;   a  gU R                  R                  5       $ Sn[        R                  " 5       n[        U[        R                  5      (       a  SnU$ [        U[        R                  5      (       a   UR                  5       nS[        U5      -   nU$ [        U[        R                  5      (       a   UR                  5       nS[        U5      -   nU$ [        U[        R                  5      (       a   [        R                  " 5       nSUS-
   S3nU$ [        U[        R                  5      (       a3  UR                  5       nUR!                  5       nUS	-   [        U5      -   nU$ [#        S
U S35      e)a  

This function can get the current global device of the program is running.
It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not
set, it will return a string which is 'gpu:x' when cuda is available or it
will return a string which is 'cpu' when cuda is not available.

Returns:
    if input is Tensor, this function will return the device ID where the given Tensor is located.
    int:
        - -1, if the Tensor is on CPU.
        - The device ID (e.g., 0, 1, ...) if the Tensor is on GPU.

    if input is not Tensor, this function will return the device name where the program is running.
    str:
        - 'cpu': If the program is running on CPU.
        - 'gpu:x': If the program is running on GPU, where `x` is the index of the GPU.
        - 'xpu:x': If the program is running on XPU, where `x` is the index of the XPU.
        - 'npu:x': If the program is running on NPU, where `x` is the index of
Examples:

    .. code-block:: python

        >>> import paddle
        >>> device = paddle.device.get_device()

        >>> x_cpu = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace())
        >>> id = paddle.get_device(x_cpu) # -1



rl    gpu:zxpu:zipus:{0-r   }rp   zThe device specification z is invalid)rx   r   Tensorry   r   gpu_device_idr
   r   r	   r   r"   get_device_idr   r   get_ipu_device_countr#   get_device_typer   )r   rI   r   r   num_devicesrP   s         rQ   r7   r7     sv   B %''C$${{((**F..0E%''" M! 
E4>>	*	*'')	#i.( M 
E4==	)	)'')	#i.( M 
E4==	)	)//1[1_-R0 M 
E4++	,	,'')	++-s"S^3 M 4UG;GHHrR   c                 ^    [         R                  " [        5       R                  SS5      5      $ )z
Returns:
    str: The default device for PaddlePaddle.
Example:
    .. code-block:: pycon

        >>> import paddle

        >>> print(paddle.get_default_device())
rg   r   )r   rI   r7   r|   rT   rR   rQ   get_default_devicer     s"     ==--eV<==rR   c                    [        U 5        g)a  
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU.
This function can specify the global device which the OP will run.

Args:
    device(str, Place or int): This parameter determines the specific running device.
        It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``,
        where ``x`` is the index of the GPUs, XPUs or NPUs.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.set_device("cpu")
N)r6   rd   s    rQ   set_default_devicer     s      vrR   c                 ,    [         R                  " 5       $ )a  

Get all available device types.

Returns:
    A list of all available device types.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.get_all_device_type()

        >>> # Case 1: paddlepaddle-cpu package installed, and no custom device registered.
        >>> # Output: []

        >>> # Case 2: paddlepaddle-gpu package installed, and no custom device registered.
        >>> # Output: ['gpu']

        >>> # Case 3: paddlepaddle-cpu package installed, and custom device 'CustomCPU' is registered.
        >>> # Output: ['CustomCPU']

        >>> # Case 4: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
        >>> # Output: ['gpu', 'CustomCPU', 'CustomGPU']

)r	   r;   rT   rR   rQ   r;   r;     s    6 ##%%rR   c                 ,    [         R                  " 5       $ )a  

Get all available custom device types.

Returns:
    A list of all available custom device types.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.get_all_custom_device_type()

        >>> # Case 1: paddlepaddle-gpu package installed, and no custom device registered.
        >>> # Output: None

        >>> # Case 2: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
        >>> # Output: ['CustomCPU', 'CustomGPU']

)r	   r4   rT   rR   rQ   r4   r4     s    * **,,rR   c                 ,    [         R                  " 5       $ )a  

Get all available devices.

Returns:
    A list of all available devices.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.get_available_device()

        >>> # Case 1: paddlepaddle-cpu package installed, and no custom device registered.
        >>> # Output: []

        >>> # Case 2: paddlepaddle-gpu package installed, and no custom device registered.
        >>> # Output: ['gpu:0', 'gpu:1']

        >>> # Case 3: paddlepaddle-cpu package installed, and custom device 'CustomCPU' is registered.
        >>> # Output: ['CustomCPU']

        >>> # Case 4: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
        >>> # Output: ['gpu:0', 'gpu:1', 'CustomCPU', 'CustomGPU:0', 'CustomGPU:1']

)r	   r<   rT   rR   rQ   r<   r<     s    6 $$&&rR   c                 ,    [         R                  " 5       $ )a  

Get all available custom devices.

Returns:
   A list of all available custom devices.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.get_available_custom_device()

        >>> # Case 1: paddlepaddle-gpu package installed, and no custom device registered.
        >>> # Output: None

        >>> # Case 2: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
        >>> # Output: ['CustomCPU', 'CustomGPU:0', 'CustomGPU:1']

)r	   r=   rT   rR   rQ   r=   r=   5  s    * ++--rR   c                .    [        U 5      n [        U 5      $ )a  

Return the properties of given device.

Args:
    device(|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
        the string name of device like npu:x' which to get the properties of the
        device from. If device is None, the device is the current device.
        Default: None.

Returns:
   _customDeviceProperties: The properties of the device which include ASCII string
    identifying device, major compute capability, minor compute capability, global
    memory available and the number of multiprocessors on the device.

Examples:
    .. code-block:: python

        >>> # import paddle
        >>> # paddle.device.set_device('npu')
        >>> # paddle.device.get_device_properties('npu:0')
        >>> # _customDeviceProperties(name='', major=0, minor=0, total_memory=0MB, multi_processor_count=0)

        >>> # paddle.device.get_device_properties('npu')
        >>> # _customDeviceProperties(name='', major=0, minor=0, total_memory=0MB, multi_processor_count=0)
)rb   _get_device_propertiesrd   s    rQ   r*   r*   M  s    : v&F!&))rR   c                d   [        U 5      n [        U [        5      (       a  U R                  5       R	                  S5      S   n 1 SknU S;   a  [
        R                  $ U S:X  a  [
        R                  R                  $ X;   a  [
        R                  R                  $ U S:X  a  [
        R                  R                  $ [        SU  35      eU c  [
        R                  R                  5       O
[        U 5      n[        R                   [
        R                  [        R"                  [
        R                  R                  [        R$                  [
        R                  R                  [        R&                  [
        R                  0nUR)                  5        H  u  pE[        X$5      (       d  M  Us  $    g)	aD  
Returns the Paddle module associated with a given device.

Args:
    device (_CustomPlaceLike, optional): The device to query.
        Can be one of the following:
            - paddle.Place object (e.g., paddle.CUDAPlace(0))
            - str (e.g., "gpu:0", "xpu", "npu")
            - int (device index, e.g., 0 -> "gpu:0")
            - None (use current expected place)

Returns:
    module: The corresponding Paddle device module (e.g., paddle.cuda, paddle.device.xpu)

Raises:
    RuntimeError: If the device type is CPU (Paddle does not expose `paddle.cpu`)
                  or if no matching device module is found.

Example:
    .. code-block:: python
    >>> paddle.get_device_module("gpu:0")
    <module 'paddle.cuda' ...>

    >>> # paddle.get_device_module(paddle.XPUPlace(0))
    >>> # <module 'paddle.device.xpu' ...>
rp   r   >   gcumlumpsrw   sdaa	biren_gpu	intel_gpu	intel_hpu	metax_gpu
custom_cpuiluvatar_gpu)r   rg   r   rl   Unsupported device type: N)rb   rx   ry   rz   r   r   r   rI   r   custom_devicerl   RuntimeErrorr
   r   rc   r	   r"   r#   r   r   items)rI   custom_device_typesr   place_to_module
place_typemodules         rQ   get_device_moduler   n  sD   6 v&F&#%%c*1-
 _$;;u_==$$$*==...u_==$$$!:6(CDD > 	113v& 
 	&--55v}}((v}}	O .335
e((M 6rR   c                ,    [        U 5      R                  $ )a.  

Return the properties of given device.

Args:
    device(|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
        the string name of device like npu:x' which to get the properties of the
        device from. If device is None, the device is the current device.
        Default: None.

Returns:
    str: The name of the CUDA device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle
        >>> name = paddle.device.get_device_name()
        >>> print(name)
)r*   namerd   s    rQ   rG   rG     s    0 !(---rR   c                H    [        U 5      nUR                  UR                  4$ )aC  

Return the device_capability of given device.

Args:
    device(|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
        the string name of device like npu:x' which to get the properties of the
        device from. If device is None, the device is the current device.
        Default: None.

Returns:
    str: The device_capability of given device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle
        >>> cap = paddle.device.get_device_capability()
        >>> print(cap)
)r*   majorminor)rI   props     rQ   rH   rH     s!    0 !(D::tzz!!rR   c           	        U c  [         R                  " 5       $ [        U [        5      (       a  U n[         R                  " 5       (       a  SnGOSn[         R
                  " 5       n[        U5      S:X  a!  U S:X  a  US   nOD[        SU  SUS    S35      eU H)  nUR                  S5      u  pg[        U5      U :X  d  M'  UnM+     Uc  [        SU  S	U 35      eO[        U [         R                  5      (       a  SnU R                  5       nO[        U [         R                  5      (       a!  U R                  5       nU R                  5       nO[        U [        5      (       a]  U R                  S
5      (       a  [        U SS 5      nOJSU ;   a   U R                  SS5      u  p8[        U5      nO$[        SU  SU S35      e[        SU  SU S35      eUS:  d   SU S35       e[         R                  " 5       (       a&  U[        5       :  d   SU S[        5        35       e U$ U[         R                   " W5      :  d$   SU SU S[         R                   " U5       35       eU$ )a~  
Return the id of the given device. It is just a utility that will not be exposed to users.

Args:
    device(paddle.CUDAPlace|paddle.CustomPlace|int|str): The device, the id of the device or
        the string name of device like 'gpu:x' or 'custom_device:x'.
        Default: None.

Return:
    int: The id of the given device. If device is None, return the id of current device.
Nrg   r   r   z
Device id z) not found in available_custom_devices: [z:0]rp   z( not found in available_custom_devices: r      zThe current string z is not expected. Because zi only support string which is like 'gpu:x' or '<custom_device>:x'. Please input appropriate string again!zThe device type z only support int, str (format 'gpu:x' or '<custom_device>:x'), paddle.CUDAPlace or paddle.CustomPlace. Please input appropriate device again!z4The device id must be not less than 0, but got id = .zThe device id z exceeds gpu card number z	 exceeds z device card number )r	   get_cuda_current_device_idrx   r_   r   r=   lenr   r   r"   r   r#   r   ry   r{   r(   get_custom_device_count)	rI   op_namer   rP   available_custom_devicesddev_typerY   device_id_strs	            rQ   extract_device_idr     s    ~..00&#	%%''KK'+'G'G'I$+,1Q;":1"=K$$VH,UVnopVqUrruv  2A'(wws|$H6{f,&. 2 "  (PQiPjk  # 
FDNN	+	+((*		FD,,	-	-,,.((*		FC	 	 V$$F12JI6M)/c1)=&KM*I%fX-Gy Q9 9 
 vh&@	 J5 5
 	

 > 
>ykK> !!##<>) 	
YK'@@PQ	
)  477DD 	
YKy=QRVRnRnozR{Q|}	
D rR   c                      \ rS rSr% SrS\S'   S\S'   S\S'      S       SS	 jjrSSS jjrSS jrSS jr	SS jr
SS jrSrg
)r?   i;  a  

A device event wrapper around StreamBase.

Args:
    enable_timing (bool, optional): indicates if the event should measure time, default is False
    blocking (bool, optional): if True, ``wait`` will be blocking, default is False
    interprocess (bool): if True, the event can be shared between processes, default is False

Returns:
    Event: The event.

Note:
    The `device` parameter has been removed in the latest version. The event will always use the current device context.
    Previously, you could specify the device like:
    ```python
    # Old usage (no longer supported)
    e = paddle.device.Event(device="gpu:0")
    ```
    Now it will automatically use the current device:
    ```python
    # New usage
    paddle.set_device("gpu:0")  # Set device first
    e = paddle.device.Event()  # Will use gpu:0
    ```

    paddle.device.Event is equivalent to paddle.cuda.Event.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> e1 = paddle.device.Event()  # Uses current device (custom_cpu)
        >>>
        >>> # Old usage (no longer supported):
        >>> # e2 = paddle.device.Event('custom_cpu')
        >>> # e3 = paddle.device.Event('custom_cpu:0')
        >>> # e4 = paddle.device.Event(paddle.CustomPlace('custom_cpu', 0))
        >>>
        >>> # New equivalent usage:
        >>> paddle.set_device('custom_cpu:0')
        >>> e5 = paddle.device.Event()  # Uses custom_cpu:0

PlaceLike | NonerI   boolenable_timing_InitEventBase
event_basec                J   [         R                  R                  5       U l        [	        U R                  S5      (       a  U R                  R                  5       OS n[	        U R                  S5      (       a  U R                  R                  5       OS n[        UUUUUS9U l        g )Nr   r   )r   blockinginterprocessrP   r   )	r   r
   r   rI   hasattrr   r   _create_event_baser   )r   r   r  r  r   rP   s         rQ   r   Event.__init__p  s     &&??A t{{O44 KK%%' 	 t{{$566 KK'') 	 -'%#
rR   Nc                ~    Uc  [        U R                  5      nU R                  R                  UR                  5        g)a  

Records the event in a given stream.

Args:
    stream(Stream, optional): The given stream. By default, stream is None,
    event will be recorded in current_stream.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> e = paddle.device.Event()
        >>> e.record()

        >>> s = paddle.device.Stream()
        >>> e.record(s)

N)r@   rI   r   recordstream_baser   streams     rQ   r  Event.record  s/    4 >#DKK0Fv112rR   c                6    U R                   R                  5       $ )a  

Checks if all work currently captured by event has completed.

Returns:
    bool: Whether all work currently captured by event has completed.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> e = paddle.device.Event()
        >>> e.record()
        >>> e.query()

)r   queryr   s    rQ   r  Event.query  s    ( $$&&rR   c                L    U R                   R                  UR                   5      $ )a  

Returns the time elapsed in milliseconds after the event was
recorded and before the end_event was recorded.

Returns:
    int: The time.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> e1 = paddle.device.Event()
        >>> e1.record()

        >>> e2 = paddle.device.Event()
        >>> e2.record()
        >>> e1.elapsed_time(e2)

)r   elapsed_time)r   	end_events     rQ   r  Event.elapsed_time  s    0 ++I,@,@AArR   c                8    U R                   R                  5         g)a  

Waits for the event to complete.
Waits until the completion of all work currently captured in this event.
This prevents the CPU thread from proceeding until the event completes.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> e = paddle.device.Event()
        >>> e.record()
        >>> e.synchronize()

N)r   rD   r   s    rQ   rD   Event.synchronize  s    , 	##%rR   c                <    SU R                    SU R                   S3$ )NzEvent(device=z, event_base=)rI   r   r   s    rQ   __repr__Event.__repr__  s     t{{m=8IKKrR   r  )FFF)r   r   r  r   r  r   r   Noner   r
  Stream | Noner   r  r   r   )r  r?   r   r_   r   r  r   ry   )r   r   r   r   r   __annotations__r   r  r  r  rD   r  r   rT   rR   rQ   r?   r?   ;  sm    .`  $"	

 
 	

 

63>',B4&0LrR   r?   c                     ^  \ rS rSr% SrS\S'   S\S'   SSSSS	.rS
\S'      S       SS jjrSS jrSS jr	SSS jjr
SS jrSS jr\S 5       rS rS U 4S jjrS!S jrS"S jrSrU =r$ )#r>   i  aZ  

A device stream wrapper around StreamBase.
paddle.cuda.Stream() is equivalent to paddle.device.Stream().

Args:
    device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)|None): Which device the stream run on. If device is None, the device is the current device. Default: None.
        It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
        where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
    priority(int, optional): priority of the CUDA stream. Can be either
        1 or -1 (high priority) or 0 or 2 (low priority). By default, streams have
        priority 2.

Returns:
    Stream: The stream.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s1 = paddle.device.Stream()
        >>> s2 = paddle.device.Stream('custom_cpu')
        >>> s3 = paddle.device.Stream('custom_cpu:0')
        >>> s4 = paddle.device.Stream(paddle.CustomPlace('custom_cpu', 0))

_InitStreamBaser  PlaceLike | intrI   r      )r   r   r   r$  zdict[int, int]_priority_mapc                "   Uba  [        U[        R                  [        R                  [        R                  45      (       a  X0l        UR                  U l        g [        S5      e[        U5      U l        [        U R                  S5      (       a  U R                  R                  5       OS n[        U R                  S5      (       a  U R                  R                  5       OS nU R                  R                  US5      n[        UUSUS9U l        g )Nz?stream_base should be CUDAStream, XPUStream, CustomDeviceStreamr   r   r$  F)r   priorityr  rP   )rx   r	   
CUDAStreamCustomDeviceStream	XPUStreamr  r   rI   	TypeErrorre   r  r   r   r%  get_create_stream_base)r   rI   r'  r  r   rP   s         rQ   r   Stream.__init__  s     "$"9"94>>J  $/ )//
   U  &f- t{{O44 KK%%' 	 t{{$566 KK'') 	
 %%))(A6.#	
rR   c                N    U R                   R                  UR                  5        g)a  

Makes all future work submitted to the stream wait for an event.

Args:
    event (Event): an event to wait for.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s1 = paddle.device.Stream()
        >>> s2 = paddle.device.Stream()
        >>> e = paddle.device.Event()
        >>> e.record(s1)
        >>> s2.wait_event(e)

N)r  
wait_eventr   r   events     rQ   r0  Stream.wait_event?  s    2 	##E$4$45rR   c                N    U R                   R                  UR                   5        g)a  

Synchronizes with another stream.
All future work submitted to this stream will wait until all kernels
submitted to a given stream at the time of call complete.

Args:
    stream (Stream): a stream to synchronize.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s1 = paddle.device.Stream()
        >>> s2 = paddle.device.Stream()
        >>> s1.wait_stream(s2)

N)r  wait_streamr	  s     rQ   r5  Stream.wait_streamZ  s    2 	$$V%7%78rR   c                B    Uc
  [        5       nUR                  U 5        U$ )a  

Records an event.

Args:
    event (Event, optional): event to record. If not given, a new one
    will be allocated.

Returns:
    Event: Recorded event.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s = paddle.device.Stream()
        >>> e1 = s.record_event()

        >>> e2 = paddle.device.Event()
        >>> s.record_event(e2)

)r?   r  r1  s     rQ   record_eventStream.record_eventu  s!    4 =GETrR   c                6    U R                   R                  5       $ )a]  

Checks if all the work submitted has been completed.

Returns:
    bool: Whether all kernels in this stream are completed.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s = paddle.device.Stream()
        >>> s.query()

)r  r  r   s    rQ   r  Stream.query  s    & %%''rR   c                8    U R                   R                  5         g)a1  

Wait for all the kernels in this stream to complete.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s = paddle.device.Stream()
        >>> s.synchronize()

N)r  rD   r   s    rQ   rD   Stream.synchronize  s    & 	$$&rR   c                   [        U R                  [        R                  5      (       a*  [        R
                  " U R                  R                  5      $ [        U R                  [        R                  5      (       a*  [        R
                  " U R                  R                  5      $ [        R
                  " U R                  R                  5      $ r   )
rx   r  r	   r(  ctypesc_void_pcuda_streamr*  
xpu_stream
raw_streamr   s    rQ   _as_parameter_Stream._as_parameter_  s    d&&88??4#3#3#?#?@@(($..99??4#3#3#>#>????4#3#3#>#>??rR   c                2    SU R                   R                  4$ )z
CUDA Stream protocol described at
https://nvidia.github.io/cuda-python/cuda-core/latest/interoperability.html#cuda-stream-protocol

Returns a tuple of (protocol_version, cudaStream_t)
r   )r  rC  r   s    rQ   __cuda_stream__Stream.__cuda_stream__  s     4##..//rR   c                N   > [        U[        5      (       a  [        TU ]  U5      $ gr   )rx   r>   super__eq__)r   o	__class__s     rQ   rK  Stream.__eq__  s"    a  7>!$$rR   c                D    [        U R                  U R                  45      $ r   )hashr  rI   r   s    rQ   __hash__Stream.__hash__  s    T%%t{{344rR   c                R    SU R                    SU R                  R                  S S3$ )Nz<paddle.device.Stream device=z stream=z#x>)rI   rD  valuer   s    rQ   r  Stream.__repr__  s,    .t{{m8DDWDWD]D]^`CaabccrR   )rI   r  )Nr$  N)rI   PlaceLike | int | Noner'  r_   r  z_InitStreamBase | Noner   r  )r2  r?   r   r  )r
  r>   r   r  r   )r2  zEvent | Noner   r?   r  r  )rL  r  r   r   r   r_   r  )r   r   r   r   r   r   r%  r   r0  r5  r8  r  rD   propertyrD  rG  rK  rQ  r  r   __classcell__)rM  s   @rQ   r>   r>     s    < ! )*qQ1$=M>= *..2	$
&$
 $
 ,	$

 
$
L6696>(*'* @ @0
5d drR   r>   c                p   [        U [        5      (       aT  U S:  a  [        SU  35      e[        5       nUS:X  a  U S:w  a  [        SU  35      egUR	                  S5      S   nU SU  3$ [        U [
        5      (       a*  U R                  5       nSU;   a  UR                  SS5      $ U$ U c
  [        5       $ U $ )Nr   z'Device index must be non-negative, got rl   z&CPU device only supports index 0, got rp   zcuda:r   )rx   r_   r   r7   r   ry   stripr|   )devcurrent_placerP   cleaned_devices       rQ   rb   rb     s     #s7FseLMM"E!ax #I#!OPP#))#.q1au%%	C		 .( ""7F3	
  	

 
|
rR   c                   U c  [         R                  R                  5       nO7[        U [        5      (       a   [         R
                  R                  U 5      nOU n[         R                  " 5       (       aJ  [        U[         R                  5      (       a+  [        [        R                  " UR                  5       5      S9$ [         R                  " 5       (       aJ  [        U[         R                  5      (       a+  [        [        R                  " UR                  5       5      S9$ [        U[         R                   5      (       a:  [        [        R"                  " UR%                  5       UR                  5       5      S9$ ['        SR)                  SR+                  [         R
                  R-                  5       5      5      5      e)a  

Return the current stream by the device.

Args:
    device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from.  If device is None, the device is the current device. Default: None.
        It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
        where ``x`` is the index of the GPUs, CustomDevices. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).

Returns:
    Stream: The stream to the device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s1 = paddle.device.current_stream()
        >>> s2 = paddle.device.current_stream("custom_cpu:0")
        >>> place = paddle.CustomPlace('custom_cpu', 0)
        >>> s3 = paddle.device.current_stream(place)

r  device should be gpu, xpu, {}rk   )r   r
   r   rx   ry   rI   rc   r   r"   r>   r	   _get_current_streamr   r8   r   _xpu_get_current_streamr#   !_get_current_custom_device_streamr   r+  r   r   r4   r   s     rQ   r@   r@     sJ   4 ~  99;	FC	 	 //7##%%*UF<L<L*M*M001D1D1FG
 	
 
	$	$	&	&:eV__+M+M44U5H5H5JK
 	
 
E6--	.	.>>%%')<)<)>
 	
 +22AACD
 	
rR   c                   [        U R                  R                  5      n[        R                  " 5       (       aU  [        U R                  R                  [        R                  5      (       a"  [        R                  " U R                  5        U$ [        R                  " 5       (       a_  [        U R                  R                  [        R                  5      (       a,  [        R                  " U R                  R                  5        U$ [        U R                  R                  [        R                  5      (       ah  [        R                  " U R                  R                  R                  5       U R                  R                  R!                  5       U R                  5        U$ [#        SR%                  SR'                  [        R(                  R+                  5       5      5      5      e)a  

Set the current stream.

Args:
    stream(Stream): The selected stream.

Returns:
    Stream: The previous stream.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s = paddle.device.Stream()
        >>> # paddle.cuda.set_stream(s) is equivalent to paddle.device.set_stream(s)
        >>> paddle.device.set_stream(s)

rb  rk   )r@   r  r   r   r   rx   r"   r	   _set_current_streamr8   r   _xpu_set_current_streamidxr#   !_set_current_custom_device_streamr   r   r+  r   r   rI   r4   )r
  prev_streams     rQ   rA   rA   -  sf   0 !!3!3!9!9:K##%%*  &"2"2+ + 	  !3!34$ # 
	$	$	&	&:  &//, , 	$$V%7%7%;%;<  
F&&,,f.@.@	A	A..$$446$$224	
  +22AACD
 	
rR   c                  V    \ rS rSr% SrS\S'   S
SS jjrSS jr        SS jrS	r	g)rB   i_  aL  

Notes:
    This API only supports dynamic graph mode currently.
A context manager that specifies the current stream context by the given stream.

Args:
    stream(Stream, optional): the selected stream. If stream is None, just yield.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('custom_cpu')
        >>> s = paddle.device.Stream()
        >>> data1 = paddle.ones(shape=[20])
        >>> data2 = paddle.ones(shape=[20])
        >>> data3 = data1 + data2
        >>> with paddle.device.stream_guard(s):# this is equivalent to paddle.cuda.StreamContext(s) and paddle.device.StreamContext(s)
        ...     s.wait_stream(paddle.device.default_stream()) # type: ignore[attr-defined]
        ...     data4 = data1 + data3

r  r
  Nc                    Xl         g r   r
  r	  s     rQ   r   stream_guard.__init__  s    rR   c                   U R                   nUc  g [        UR                  5      U l        U R                  R                  UR                  :w  a  [        R
                  R                  R                  5       U l        [        R
                  R                  R                  UR                  5        [        UR                  5      U l
        [        U5        g [        U5        g r   )r
  r@   rI   src_prev_streamr   baser
   r   	tmp_placer   dst_prev_streamrA   )r   
cur_streams     rQ   r   stream_guard.__enter__  s    [[
-j.?.?@&&**;*;;#[[22KKMDNKK!!55j6G6GH#1*2C2C#DD z"z"rR   c                R   U R                   nUc  g U R                  R                  UR                  :w  a^  [        U R                  5        [
        R                  R                  R                  U R                  5        [        U R                  5        g [        U R                  5        g r   )
r
  rq  rI   rA   rt  r   rr  r
   r   rs  )r   r   exc_valexc_tbru  s        rQ   r   stream_guard.__exit__  sv     [[
&&**;*;;t++,KK!!55dnnEt++,t++,rR   )rt  rq  r
  rs  r   r  r  r   r   rx  r   ry  zTracebackType | Noner   r  
r   r   r   r   r   r   r   r   r   r   rT   rR   rQ   rB   rB   _  sD    : #-,- &- %	-
 
-rR   rB   c                    [        U 5      $ )a  

Notes:
    This API only supports dynamic graph mode currently.
A context manager that specifies the current stream context by the given stream.

Args:
    stream(Stream, optional): the selected stream. If stream is None, just yield.

Returns:
    None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> paddle.set_device('cuda')
        >>> s = paddle.device.Stream()
        >>> data1 = paddle.ones(shape=[20])
        >>> data2 = paddle.ones(shape=[20])
        >>> data3 = data1 + data2

        >>> with paddle.device.stream(s): # this is equivalent to paddle.cuda.stream(s)
        ...     s.wait_stream(paddle.cuda.current_stream())
        ...     data4 = data1 + data3
        >>> print(data4)

)rN   rn  s    rQ   r
  r
    s    >   rR   c                  \    \ rS rSr% SrS\S'   S\S'   SS jrSS jr        SS jrS	r	g
)rC   i  a  

Notes:
    This API only supports dynamic graph mode currently.

A context manager that specifies the current device context by the given device.

Args:
    device(PlaceLike): The specified device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        >>> # Set the global default device to CPU
        >>> paddle.set_device("cpu")
        >>> # Temporarily switch to GPU:0 using device_guard with string input
        >>> with paddle.device.device_guard("gpu:0"):
        ...     x = paddle.randn([4, 4])       # Create a Tensor on GPU:0
        ...     x = x.tanh() * 2               # Perform computation on GPU:0
        ...     print(x.place)                 # Check the device of the Tensor
        Place(gpu:0)

        >>> # Set the global default device to GPU:0
        >>> paddle.set_device("gpu:0")
        >>> # Temporarily switch to CPU using device_guard with Place object (CPUPlace)
        >>> cpu_place = paddle.CPUPlace()
        >>> with paddle.device.device_guard(cpu_place):
        ...     x = paddle.randn([4, 4])       # Create a Tensor on CPU
        ...     x = x.tanh() * 2               # Perform computation on CPU
        ...     print(x.place)
        Place(cpu)
r!   _target_place_original_placec                   [        U[        5      (       a%  [        R                  R	                  U5      U l        g [        U[        R                  R                  R                  5      (       a  Xl        g [        S[        U5       35      e)Nz_'device' must be a string or an instance of a subclass of paddle.base.libpaddle.Place, but got )rx   ry   r   rI   rc   r  rr  	libpaddler!   r   typer   s     rQ   r   device_guard.__init__  sj    fc""!'!@!@!HD 5 5 ; ;<<!'88<V~G rR   c                    [         R                  R                  5       U l        U R                  U R                  :w  a*  [         R                  R                  U R                  5        g g r   )r   r
   r   r  r  r   r   s    rQ   r   device_guard.__enter__  sL    %//HHJ4#5#55001C1CD 6rR   c                    U R                   U R                  :w  a*  [        R                  R	                  U R                   5        g g r   )r  r  r   r
   r   )r   r   rx  ry  s       rQ   r   device_guard.__exit__  s7     4#5#55001E1EF 6rR   )r  r  N)rI   r    r   r  r  r{  r|  rT   rR   rQ   rC   rC     sQ    "H 	E
G,G &G %	G
 
GrR   rC   c                \   U c  [         R                  R                  5       nO7[        U [        5      (       a   [         R
                  R                  U 5      nOU n[         R                  " 5       (       aD  [        U[         R                  5      (       a%  [        R                  " UR                  5       5        g[         R                  " 5       (       aD  [        U[         R                  5      (       a%  [        R                  " UR                  5       5        g[        U[         R                  5      (       a4  [        R                   " UR#                  5       UR                  5       5        g[%        SR'                  SR)                  [         R
                  R+                  5       5      5      5      e)ap  
Wait for the compute on the given device to finish.
Args:
    device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for.  If device is None, the device is the current device. Default: None.
        It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
        where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).
Examples:
    .. code-block:: python
        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle
        >>> paddle.set_device('custom_cpu')
        >>> paddle.device.synchronize()
        >>> paddle.device.synchronize("custom_cpu:0")
        >>> place = paddle.CustomPlace('custom_cpu', 0)
        >>> paddle.device.synchronize(place)
Nrb  rk   )r   r
   r   rx   ry   rI   rc   r   r"   r	   _device_synchronizer   r8   r   _xpu_device_synchronizer#   _synchronize_custom_devicer   r+  r   r   r4   r   s     rQ   rD   rD   	  s   $ ~  99;	FC	 	 //7##%%*UF<L<L*M*M  !4!4!67		$	$	&	&:eV__+M+M$$U%8%8%:;	E6--	.	.''!!#U%8%8%:	
 +22AACD
 	
rR   c                 T    [         R                  R                  R                  5         g)aX  
Force collects GPU memory after it has been released by CUDA IPC.
This function checks if any sent CUDA tensors could be cleaned from the memory.
Force closes shared memory file used for reference counting if there is no active counters.
Useful when the producer process stopped actively sending tensors and want to release unused memory.
Returns:
    None
Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # Force collect expired IPC memory
        >>> paddle.device.ipc_collect() #this is equivalent to paddle.cuda.ipc_collect()
N)r   rr  r  _ipc_collectrT   rR   rQ   rL   rL   2  s      KK&&(rR   c                
   Uc  [         R                  R                  5       nO7[        U[        5      (       a   [         R
                  R                  U5      nOUn[        [        R                  " XR                  5       5      S9$ )aT  
Return a :class:`Stream` from an externally allocated CUDA stream.

This function is used to wrap streams allocated in other libraries in order
to facilitate data exchange and multi-library interactions.

.. note::
    This function doesn't manage the stream life-cycle, it is the user
    responsibility to keep the referenced stream alive while this returned
    stream is being used.

Args:
    data_ptr(int): Integer representation of the CUDA stream handle (``cudaStream_t``)
        that is allocated externally.
    device(str|paddle.CUDAPlace(n), optional):
        The CUDA device where the stream was originally allocated.
        If device is None, the current CUDA device is used.
        It can be ``gpu``, ``gpu:x``, or ``paddle.CUDAPlace(n)``.

Returns:
    Stream: The wrapped CUDA stream corresponding to the given external pointer.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> # doctest: +SKIP('original_raw_ptr not exist')
        >>> original_raw_ptr = 77777
        >>> external_stream = paddle.device.get_stream_from_external(original_raw_ptr,"cuda:0")
ra  )r   r
   r   rx   ry   rI   rc   r>   r	   _get_stream_from_externalr   )data_ptrrI   r   s      rQ   rM   rM   E  sk    B ~  99;	FC	 	 //722))+
 rR   c                0    [         R                  " U 5        g)a  

Sets the seed for global default generator, which manages the random number generation.

Args:
    seed(int): The random seed to set.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.device.manual_seed_all(102)

N)r   seed)r  s    rQ   manual_seed_allr  t  s    $ KKrR   c                  H    \ rS rSr\S\R                  S4 SS jj5       rSrg)_AutocastModei  Tc                    [        SXS9$ )a  
Create a context which enables auto-mixed-precision(AMP) of operators executed in dynamic graph mode.
If enabled, the input data type (float32, float16 or bfloat16) of each operator is decided
by autocast algorithm for better performance.

Commonly, it is used together with `GradScaler` and `decorator` to achieve Auto-Mixed-Precision in
imperative mode.

Args:
    device_type(str, optional): Device type. But because the paddle does not distinguish between devices, this parameter does not work.
    enable(bool, optional): Enable auto-mixed-precision or not. Default is True.
    dtype(str, optional): Whether to use 'float16' or 'bfloat16'. Default is 'float16'.
    cache_enabled(bool, optional): whether to enable cache or not. Default is True. But this parameter is not used

Note:
    paddle.cuda.amp.

Examples:

    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        >>> conv2d = paddle.nn.Conv2D(3, 2, 3, bias_attr=False)
        >>> data = paddle.rand([10, 3, 32, 32])

        >>> with paddle.device.amp.auto_cast():
        ...     conv = conv2d(data)
        ...     print(conv.dtype)
        >>> # doctest: +SKIP("This has diff in xdoctest env")
        paddle.float16
        >>> # doctest: -SKIP

        >>> with paddle.device.amp.auto_cast(enable=False):
        ...     conv = conv2d(data)
        ...     print(conv.dtype)
        >>> # doctest: +SKIP("This has diff in xdoctest env")
        paddle.float32
        >>> # doctest: -SKIP

r   )rP   enabledr   )	_autocast)r  r   cache_enableds      rQ   r   _AutocastMode.autocast  s    \ VWJJrR   rT   N)r   r   )	r   r   r   r   staticmethodr   float16r   r   rT   rR   rQ   r  r    s*    FNN$-K	-K -KrR   r  c                  J    \ rS rSrSr\" \R                  5      r\" 5       rSr	g)ampi  z$Namespace for amp marker operations.rT   N)
r   r   r   r   r   r  r  r   autocast_moder   rT   rR   rQ   r  r    s    .M223H!OMrR   r  c                  <    \ rS rSrSr\SS j5       r\S 5       rSrg)nvtxi  z%Namespace for NVTX marker operations.c                V    [         R                  R                  R                  U 5        g)aj  
Push an NVTX range marker with the given message.

Args:
    msg (str): The name of the NVTX range.
Example:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # paddle.device.nvtx.range_push("test") is equivalent to paddle.cuda.nvtx.range_push("test")
        >>> paddle.device.nvtx.range_push("test")

N)r   rr  r	   nvprof_nvtx_push)msgs    rQ   
range_pushnvtx.range_push  s      	))#.rR   c                 T    [         R                  R                  R                  5         g)a#  
Pop the most recent NVTX range marker.
Example:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # paddle.device.nvtx.range_pop("test") is equivalent to paddle.cuda.nvtx.range_pop("test")
        >>> paddle.device.nvtx.range_pop()
N)r   rr  r	   nvprof_nvtx_poprT   rR   rQ   	range_popnvtx.range_pop  s     	((*rR   rT   N)r  ry   )	r   r   r   r   r   r  r  r  r   rT   rR   rQ   r  r    s+    // /" + +rR   r  c                    [        5         g)a  
Resets all devices' peak memory statistics.

This method resets the peak memory usage recorded for each device during the execution of the program.
It sets the peak memory usage back to zero for all devices.

Example:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')  # or '<custom_device>'

        >>> # paddle.cuda.reset_max_memory_allocated() is equivalent to paddle.device.reset_max_memory_allocated()

        >>> paddle.device.reset_max_memory_allocated(paddle.CUDAPlace(0))
        >>> paddle.device.reset_max_memory_allocated(0)
        >>> paddle.device.reset_max_memory_allocated("gpu:0")
N)r1   rd   s    rQ   rK   rK     s
    (  rR   c                  z    \ rS rSrSr/ r1 Skr S   SS jjr\S 5       r	\S 5       r
SS jrSS	 jrS
 rS rSrg)Devicei  a  
Paddle computing device.

This class represents a computing device in Paddle, such as CPU, GPU (CUDA), or XPU,
and can be passed directly to Paddle tensor creation APIs.

Note:
    - Only device types "cpu", "gpu", "cuda", and "xpu" are supported.
    - The string representation of the device (e.g., "cuda:0") can be used directly
      in Paddle APIs that accept a device argument.
    - This class supports context manager usage to temporarily set the default device.

Args:
    type (str|int, optional): The device type or a legacy device index.
        - str: "cpu", "cuda", "cuda:0", "gpu:1", "xpu:0"
        - int: legacy, interpreted as the default GPU device index
    index (int, optional): The device index, used with `type` string. Ignored for CPU.

Attributes:
    type (str): Device type ("cpu", "cuda", "gpu", "xpu").
    index (int|None): Device index. None for CPU.

Examples:
    .. code-block:: python

        >>> import paddle

        # String initialization
        >>> d1 = paddle.device("cpu")
        >>> d2 = paddle.device("cuda:0")
        >>> d3 = paddle.device("xpu", 1)

        # Type + index initialization
        >>> d4 = paddle.device(type="cuda", index=0)

        # Legacy int initialization
        >>> d5 = paddle.device(0)  # equivalent to paddle.device("cuda", 0)

        # Copy from another device
        >>> d6 = paddle.device(d2)

        # Using as context manager
        >>> with paddle.device("cuda:1"):
        ...     x = paddle.zeros([2, 3])  # created on CUDA device 1

        >>> print(d2.type)   # "cuda"
        >>> print(d2.index)  # 0
        >>> print(d1)        # "cpu"
        >>> print(d2)        # "cuda:0"
>   rl   rg   r   r   Nc                   [        U[        R                  R                  R                  5      (       a  UR                  5       (       a  SnS nGOUR                  5       (       a  SnUR                  5       nGO]UR                  5       (       a  SnUR                  5       nGO4UR                  5       (       a!  UR                  5       nUR                  5       nO[        SU 35      e[        U[        5      (       a  UR                  5       nXPR                  ;  a  SU;  a  [        SU 35      eUb  UnUS:w  a  UOS nOSU;   aL  UR!                  S5      u  p6UR                  5       nX0R                  ;  a  [        SU 35      e[#        U5      nOFUnUS:w  a  SOS nO9[        U["        5      (       a  SnUnOUc  Ub  [        S5      e[%        S	U 35      eUS:w  a  U SU 3OSn[        R'                  X5      nX8l        XHl        U$ )
Nrl   r   r   zUnknown place type: rp   r   r   z/Device type must be specified if index is givenzUnsupported type for Device: )rx   r   rr  r  r!   is_cpu_placeis_gpu_placer   is_xpu_placeis_custom_placer   r   r   ry   rz   _SUPPORTED_TYPESr   r_   r+  __new__	_dev_type_index)	clsr  indexr   	dev_indextri  ri   objs	            rQ   r  Device.__new__4  s    dFKK117788  ""  	""$$! ..0	""$$  ..0	%%''//1 ..0	 #7v!>??c""

A,,,A #<QC!@AA %&%ZET	!8$%GGCLMH'~~/H';';;(+DXJ)OPP #CI H%&%ZTIc""HI\e/NOO ;D6BCC)1U):xj)%kk#! 

rR   c                    U R                   $ r   )r  r   s    rQ   r  Device.typei  s    ~~rR   c                    U R                   $ r   )r  r   s    rQ   r  Device.indexm  s    {{rR   c                <   U R                   S:X  a  [        R                  " 5       $ U R                   S;   a   [        R                  " U R                  5      $ U R                   S:X  a   [        R
                  " U R                  5      $ [        SU R                    35      e)Nrl   >   rg   r   r   r   )r  r	   r   r"   r  r   r   r   s    rQ   	_to_placeDevice._to_placeq  so    99==?"YY/)>>$**--YY%==,,8DEErR   c                >    U R                  5       R                  5       $ r   )r  __dlpack_device__r   s    rQ   r  Device.__dlpack_device__{  s    ~~1133rR   c                    [         R                  " 5       n[        R                  R	                  U5        [         R
                  " [        U 5      5        U $ r   )r   r7   r  _DEFAULT_DEVICE_STACKappendr6   ry   )r   r   s     rQ   r   Device.__enter__~  s;    **,$$++N;#d)$rR   c                l    [         R                  R                  5       n[        R                  " U5        g r   )r  r  popr   r6   )r   r   rx  ry  previous_devices        rQ   r   Device.__exit__  s$     66::</*rR   rT   )NN)r  rW  r  
int | None)r   z
core.Place)r   tuple[int, int])r   r   r   r   r   r  r  r  rY  r  r  r  r  r   r   r   rT   rR   rQ   r  r    sp    1f 4 GK3)39C3j    F4+rR   r  c                  ,    \ rS rSrSrSS jrSS jrSrg)	_DeviceModulei  z<A callable package module: paddle.device(...) -> Device(...)c                    [        U0 UD6$ r   )r  )r   argskwargss      rQ   __call___DeviceModule.__call__  s    t&v&&rR   c                     [         R                  " U R                   SU 35      n[        XU5        U$ ! [         a  n[        U5      UeS nAff = f)Nr   )	importlibimport_moduler   setattrModuleNotFoundErrorAttributeError)r   r   modes       rQ   __getattr___DeviceModule.__getattr__  sR    	.))T]]O1TF*CDCD$J" 	. &A-	.s   25 
AAArT   N)r   r  )r   ry   )r   r   r   r   r   r  r  r   rT   rR   rQ   r  r    s    F'.rR   r  )rP   ry   r   r   r  )r   	_IPUPlace)rY   r_   r   	_XPUPlace)r   r  r   )rI   r   r   r!   )rI   r    r   r!   rX  )T)r   r   r   r   )rI   r#  r   r    )r   r  r   ry   )r   zpaddle.Tensorr   r_   )r   zpaddle.Tensor | Noner   z	str | int)r   zpaddle.device)rI   r#  r   r  )r   z	list[str])r   zlist[str] | None)rI   _CustomPlaceLike | Noner   r$   )rI   r%   )rI   r  r   ry   )rI   r  r   r  )rI   r%   r   ry   r   r_   )r]  r   )rI   r   r   r>   )r
  r>   r   r>   )r
  r  r   rB   )rI   r   r   r  r  )r  r_   rI   r   r   r>   )r  r_   r   r  )rI   rW  r   r  )
__future__r   r?  r  r}   r   systypestypingr   r   r   typing_extensionsr   r   
paddle.ampr   r  paddle.baser	   r
   paddle.base.frameworkr   r   r   r   paddle.tensor.creationr   r   r   r   r   r   r   r   r   r   r   r   r   
contextlibr   r   r   r  r   r  paddle._typing.device_liker    paddle.base.corer!   r(  r)  r*  r"  	CUDAEventCustomDeviceEventXPUEventr   r"   r#   paddle.base.libpaddler$   ry   r_   r%   r   current_device_is_cpur&   r  r'   r-  r(   r)   r*   r   r+   r,   r-   r.   r/   r0   r1   r2   r3   r8   r  r4   	dev_typesr:   r   rl   __all__r^   r9   rE   rF   r5   re   rc   rI   r   rJ   r6   r7   r   r   r;   r<   r=   r   rG   rH   r   r?   r>   rb   r@   rA   rB   rN   r
  rC   rD   rL   rM   r  r  r  r  rK   r  
ModuleTyper  modulesr   _selfr   _proxy__dict__updaterT   rR   rQ   <module>r     s    #   	 	 
  1 1 '  , '   
 1#C4&00$..@O ..=N .="'	#i         
       t122335		T881FF	
 	
 	
 	
 	
" !"		
 		
 		
8t <*'"$'"!*8*(BUp# #L:)X&R 
 . 
 . 
 0 
 09x>&&<-0'<.2 '+*#**BER '+.#..8 '+"#""8K\xL xLvcd cdN %)	!44
n/d@- @-F !D?G ?GD&
R)( /3,,+,,^*/K /Kd$ $!+ !+H!.I+S I+X.E$$ .  	H	x	/   u~~ &H rR   