
    Αi0u                    r   S SK Jr  S SKJrJr  S SKrS SKJrJrJr	J
r
  S SKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  S SKJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(  \(       a  \\R                  RR                  \*\+S4   r,S$S jrS%S&S jjr-S%S'S	 jjr.S$S
 jrS%S(S jjr/S%S)S jjr0S%S*S jjr1S+S jrS%S,S jjr2 S%     S-S jjr3 " S S5      r4S r5 " S S\65      r7S.S jr8S%S*S jjr9S/S jr:S0S jr;S$S jr<S%S1S jjr=S%S1S jjr>S%S1S jjr?S%S2S jjr@S%S2S jjrAS%S1S  jjrBS&S! jrC S%     S3S" jjrD/ S#QrEg)4    )annotations)TYPE_CHECKINGUnionN)basecoredevice	framework)EventStreamStreamContext_device_to_paddleampcurrent_devicer   ipc_collectis_availableis_bf16_supportedis_current_stream_capturingmanual_seedmanual_seed_allreset_peak_memory_stats
set_streamstream)
BFloat16Tensor
BoolTensor
ByteTensor
CharTensorDoubleTensorFloatTensor
HalfTensor	IntTensor
LongTensorShortTensorc                     [        5       $ )a  
Check whether **any supported device** is available in the current environment.

This function checks whether Paddle is built with support for at least one
type of accelerator (e.g., CUDA, XPU, CustomDevice) and whether there is
at least one device of that type available.

If any supported device is available, this function returns True. Otherwise,
it returns False.

Returns:
    bool: True if there is at least one available device (GPU/XPU/CustomDevice),
    False otherwise.

Examples:
    .. code-block:: python

        >>> import paddle

        >>> if paddle.cuda.is_available():
        ...     print("At least one device is available")
        ... else:
        ...     print("No supported devices available")
)_device_is_available     T/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/cuda/__init__.pyr   r   :   s    2  !!r&   c                F    [        U 5      n[        R                  " U5        g)aT  
Wait for all streams on a given device to complete.

This function blocks the calling thread until all the operations
on the specified device have finished. It is useful for ensuring
synchronization between CPU and GPU or across multiple devices.

Args:
    device (CUDAPlace | CustomPlace | int | str | None, optional): The target device to synchronize.
        - None: Synchronize the current device.
        - int: Device index, e.g., ``2`` means ``gpu:2``.
        - str: Device string, e.g., ``'cuda:0'`` or ``'gpu:0'``.
        - CUDAPlace: A Paddle CUDA place object.
        - CustomPlace: A Paddle custom device place object.

Returns:
    None

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        # synchronize the current device
        >>> paddle.cuda.synchronize()
N)r   paddle_devicesynchronizer   devs     r'   r*   r*   V   s    8 F
#Cc"r&   c                D    [        U 5      n[        R                  " U5      $ )aI  
Return the current stream for the given device.

Args:
    device (int | str | paddle.CUDAPlace | paddle.CustomPlace | None, optional):
        The target device to query.

        - None: use the current device.
        - int: device index (e.g., 0 -> 'gpu:0').
        - str: device string (e.g., "cuda:0", "gpu:1").
        - CUDAPlace or CustomPlace: Paddle device objects.

Returns:
    core.CUDAStream: The current CUDA stream associated with the given device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        # Get the current stream on the default CUDA device
        >>> s1 = paddle.cuda.current_stream()
        >>> print(s1)

        # Get the current stream on device cuda:0
        >>> s2 = paddle.cuda.current_stream("cuda:0")
        >>> print(s2)
)r   r)   current_streamr+   s     r'   r.   r.   v   s    < F
#C'',,r&   c                     [        5       $ )a  
Check whether the current stream is in CUDA graph capturing state.

Returns:
    bool: True if the current stream is capturing, False otherwise.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> if paddle.device.is_available():
        ...     graph = paddle.device.cuda.graphs.CUDAGraph()
        ...     graph.capture_begin()
        ...     print(paddle.cuda.is_current_stream_capturing())  # True
        ...     graph.capture_end()
)_is_current_stream_capturingr%   r&   r'   r   r      s    " ())r&   c                .    [         R                  " U 5      $ )a  
Get the properties of a CUDA device.

Args:
    device (int | str | paddle.CUDAPlace | paddle.CustomPlace | None, optional):
        The target device to query.

        - None: use the current device.
        - int: device index (e.g., 0 -> 'gpu:0').
        - str: device string (e.g., "cuda:0", "gpu:1").
        - CUDAPlace or CustomPlace: Paddle device objects.

Returns:
    DeviceProperties: An object containing the device properties, such as
    name, total memory, compute capability, and multiprocessor count.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        # Get the properties of the current device
        >>> props = paddle.cuda.get_device_properties()
        >>> print(props)

)r)   get_device_propertiesr   s    r'   r2   r2      s    8 ..v66r&   c                .    [         R                  " U 5      $ )a  
Get the name of a device.

Args:
    device (int | str | paddle.CUDAPlace | paddle.CustomPlace | None, optional):
        The target device to query.

        - None: use the current device.
        - int: device index (e.g., 0 -> 'gpu:0').
        - str: device string (e.g., "cuda:0", "gpu:1").
        - CUDAPlace or CustomPlace: Paddle device objects.

Returns:
    str: The name of the CUDA device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        # Get the name of the current CUDA device
        >>> name = paddle.cuda.get_device_name()
        >>> print(name)

        # Get the name of device cuda:0
        >>> name0 = paddle.cuda.get_device_name("cuda:0")
        >>> print(name0)
)r)   get_device_namer3   s    r'   r5   r5      s    < ((00r&   c                .    [         R                  " U 5      $ )a  
Get the compute capability (major, minor) of a device.

Args:
    device (int | str | paddle.CUDAPlace | paddle.CustomPlace | None, optional):
        The target device to query.

        - None: use the current device.
        - int: device index (e.g., 0 -> 'gpu:0').
        - str: device string (e.g., "cuda:0", "gpu:1").
        - CUDAPlace or CustomPlace: Paddle device objects.

Returns:
    tuple[int, int]: A tuple ``(major, minor)`` representing the compute capability of the CUDA device.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle

        # Get compute capability of the current CUDA device
        >>> capability = paddle.cuda.get_device_capability()
        >>> print(capability)  # e.g., (8, 0)

        # Get compute capability of device cuda:0
        >>> capability0 = paddle.cuda.get_device_capability("cuda:0")
        >>> print(capability0)
)r)   get_device_capabilityr3   s    r'   r7   r7      s    < ..v66r&   c                    [        U 5        g)a
  

Sets the seed for global default generator, which manages the random number generation.

Args:
    seed(int): The random seed to set.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.cuda.manual_seed_all(102)

N)device_manual_seed_all)seeds    r'   r   r     s    $ 4 r&   c                .    [         R                  " U 5      $ )a  
Return the random number generator state of the specified device.

Args:
    device (DeviceLike, optional): The device to retrieve the RNG state from.
        If not specified, uses the current default device (as returned by paddle.framework._current_expected_place_()).
        Can be a device object, integer device ID, or device string.

Returns:
    core.GeneratorState: The current RNG state of the specified device.

Examples:
    .. code-block:: python

        >>> import paddle
        >>> paddle.cuda.get_rng_state()
)r)   get_rng_stater3   s    r'   r<   r<   "  s    & &&v..r&   c                0    [         R                  " X5        g)a"  
Set the random number generator state of the specified device.

Args:
    new_state (core.GeneratorState): The desired RNG state to set.
        This should be a state object previously obtained from ``get_rng_state()``.
    device (DeviceLike, optional): The device to set the RNG state for.
        If not specified, uses the current default device (as returned by ``paddle.framework._current_expected_place_()``).
        Can be a device object, integer device ID, or device string.

Returns:
    None

Examples:
    .. code-block:: python

        >>> import paddle
        >>> # Save RNG state
        >>> state = paddle.cuda.get_rng_state()
        >>> # Do some random operations
        >>> x = paddle.randn([2, 3])
        >>> # Restore RNG state
        >>> paddle.cuda.set_rng_state(state)
N)r)   set_rng_state)	new_stater   s     r'   r>   r>   8  s    6 	2r&   c                  <    \ rS rSrSr\SS j5       r\S 5       rSrg)nvtxiV  z%Namespace for NVTX marker operations.c                V    [         R                  R                  R                  U 5        g)ag  
Push an NVTX range marker with the given message.

Args:
    msg (str): The name of the NVTX range.
Example:
    .. code-block:: python
        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # paddle.device.nvtx.range_push("test") is equivalent to paddle.cuda.nvtx.range_push("test")
        >>> paddle.cuda.nvtx.range_push("test")

N)paddler   r   nvprof_nvtx_push)msgs    r'   
range_pushnvtx.range_pushY  s     	))#.r&   c                 T    [         R                  R                  R                  5         g)a   
Pop the most recent NVTX range marker.
Example:
    .. code-block:: python
        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # paddle.device.nvtx.range_pop("test") is equivalent to paddle.cuda.nvtx.range_pop("test")
        >>> paddle.cuda.nvtx.range_pop()
N)rC   r   r   nvprof_nvtx_popr%   r&   r'   	range_popnvtx.range_popj  s     	((*r&   r%   N)rE   str)	__name__
__module____qualname____firstlineno____doc__staticmethodrF   rJ   __static_attributes__r%   r&   r'   rA   rA   V  s+    // /  
+ 
+r&   rA   c                 6    [         R                  R                  $ )a  Retrieves the CUDA runtime API module.

This function initializes the CUDA runtime environment if it is not already
initialized and returns the CUDA runtime API module (_cudart). The CUDA
runtime API module provides access to various CUDA runtime functions.

Args:
    ``None``

Returns:
    module: The CUDA runtime API module (_cudart).

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle
        >>> from paddle.cuda import cudart, check_error
        >>> import os
        >>>
        >>> os.environ['CUDA_PROFILE'] = '1'
        >>>
        >>> def perform_cuda_operations_with_streams():
        >>>     stream = paddle.cuda.Stream()
        >>>     with paddle.cuda.stream(stream):
        >>>         x = paddle.randn((100, 100), device='cuda')
        >>>         y = paddle.randn((100, 100), device='cuda')
        >>>         z = paddle.mul(x, y)
        >>>     return z
        >>>
        >>> paddle.cuda.synchronize()
        >>> # print("====== Start nsys profiling ======")
        >>> check_error(cudart().cudaProfilerStart())
        >>> paddle.core.nvprof_start()
        >>> paddle.core.nvprof_nvtx_push("Test")
        >>> result = perform_cuda_operations_with_streams()
        >>> paddle.core.nvprof_nvtx_pop()
        >>> # print("CUDA operations completed.")
        >>> check_error(paddle.cuda.cudart().cudaProfilerStop())
        >>> # print("====== End nsys profiling ======")
)r   	libpaddle_cudartr%   r&   r'   cudartrW   x  s    T >>!!!r&   c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )	CudaErrori  c                   > [         R                  R                  R                  [         R                  R                  R	                  U5      5      n[
        TU ]  U SU S35        g )Nz ())r   rU   rV   cudaGetErrorString	cudaErrorsuper__init__)selfcoderE   	__class__s      r'   r_   CudaError.__init__  sQ    nn$$77NN"",,T2
 	C54&*+r&   r%   )ra   intreturnNone)rM   rN   rO   rP   r_   rS   __classcell__)rb   s   @r'   rY   rY     s    , ,r&   rY   c                ~    U [         R                  R                  R                  R                  :w  a  [        U 5      eg)a  Check the return code of a CUDA runtime API call.

This function validates whether the given result code from a CUDA
runtime call indicates success. If the result code is not
:data:`base.libpaddle._cudart.cudaError.success`, it raises a
:class:`CudaError`.

Args:
    res (int): The CUDA runtime return code.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> from paddle.cuda import check_error
        >>> check_error(0) # check for cuda success code # will not raise Error
        >>> # check_error(1) # check for cuda error code 1(invalid argument), will raise Error
        >>> # check_error(2) # check for cuda error code 2(out of memory), will raise Error
N)r   rU   rV   r]   successrY   )ress    r'   check_errorrk     s1    ( dnn$$..666n 7r&   c                0   U c  [         R                  " 5       n [        U [        5      (       a  [         R                  " U 5      n [        U [
        5      (       a  U nO[        U [        R                  5      (       a5  [        U [        R                  5      (       a$  U R                  " 5       (       d  [        SU  35      e[        U [        R                  5      (       a  U R                  " 5       OU R                  " 5       n[        5       R                  U5      $ )a,  Return the free and total GPU memory (in bytes) for a given device using ``cudaMemGetInfo``.

This function queries the CUDA runtime for the amount of memory currently
available and the total memory capacity of the specified device.

Args:
    device (DeviceLike, optional): The target device. If ``None`` (default),
        the current device, as returned by ``paddle.device.get_device``
        will be used.

Returns:
    tuple[int, int]: A tuple ``(free, total)``, where
        - ``free`` (int): The number of free bytes of GPU memory available.
        - ``total`` (int): The total number of bytes of GPU memory.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> from paddle.cuda import mem_get_info
        >>> free_bytes, total_bytes = mem_get_info()
z!Expected a cuda device, but got: )r)   
get_device
isinstancerL   _convert_to_placerd   r   	CUDAPlacePlaceis_gpu_place
ValueErrorget_device_idgpu_device_idrW   cudaMemGetInfo)r   	device_ids     r'   mem_get_inforx     s    . ~#..0&#*<<VD&#	&$..11vtzz**63F3F3H3H@IJJ &$..11   "%%' 	
 8""9--r&   c                 ,    [         R                  " 5       $ )a2  
Return the number of devices available.

Returns:
    int: The number of devices available.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> count = paddle.cuda.device_count()
        >>> print(f"Number of devices available: {count}")
)r)   device_countr%   r&   r'   rz   rz     s    " %%''r&   c                 .    [         R                  " 5         g)a7  
Release all unoccupied cached memory currently held by the caching allocator so that those can be used in other application and visible in nvidia-smi.

Returns:
    None

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # Create a tensor to allocate memory
        >>> tensor = paddle.randn([1000, 1000], device='cuda')
        >>> # Delete the tensor to free memory (but it may still be cached)
        >>> del tensor
        >>> # Release the cached memory
        >>> paddle.cuda.empty_cache()
N)r)   empty_cacher%   r&   r'   r|   r|     s    * r&   c                    [         R                  " 5       n [         R                  " 5       nSn[        R                  " 5       nU(       a)  U H"  n[         R
                  " U5      (       d  M   Sn  O   OSnU =(       d    U=(       d    U$ )al  
Return whether device has been initialized.

Returns:
    bool: True if any device (CUDA, XPU, or Custom) has been initialized, False otherwise.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> initialized = paddle.cuda.is_initialized()
        >>> print(f"Device initialized: {initialized}")
FT)r   is_compiled_with_cudais_compiled_with_xpur)   get_all_custom_device_typeis_compiled_with_custom_device)cuda_initializedxpu_initializedcustom_device_initializedcustom_device_typesdevice_types        r'   is_initializedr     sz    " 113//1O !&'BBD.K22;??,0) /
 %*! KK2KKr&   c                .    [         R                  " U 5      $ )aL  
Return the current device memory occupied by tensors in bytes for a given device.

Args:
    device (DeviceLike, optional): The device to query. If None, use the current device.
        Can be paddle.CUDAPlace, paddle.CustomPlace, paddle.XPUPlace, int (device index), or str (device string).

Returns:
    int: The current memory occupied by tensors in bytes.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # Get memory allocated for current device
        >>> mem_allocated = paddle.cuda.memory_allocated()
        >>> print(f"Memory allocated: {mem_allocated} bytes")
        >>>
        >>> # Get memory allocated for specific device
        >>> mem_allocated = paddle.cuda.memory_allocated(0)
        >>> print(f"Memory allocated on device 0: {mem_allocated} bytes")
)r)   memory_allocatedr3   s    r'   r   r   B  s    4 ))&11r&   c                .    [         R                  " U 5      $ )a^  
Return the peak size of memory that is allocated to tensor of the given device.

Note:
    The size of memory allocated to tensor is 256-byte aligned in Paddle, which may larger than the memory size that tensor actually need.
    For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes.

Args:
    device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
        the string name of device like 'gpu:x'. If device is None, the device is the current device.
        Default: None.

Return:
    int: The peak size of memory that is allocated to tensor of the given device, in bytes.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')  # or '<custom_device>'

        >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(paddle.CUDAPlace(0))
        >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated(0)
        >>> max_memory_allocated_size = paddle.cuda.max_memory_allocated("gpu:0")
)r)   max_memory_allocatedr3   s    r'   r   r   _  s    6 --f55r&   c                .    [         R                  " U 5      $ )a8  
Return the peak size of memory that is held by the allocator of the given device.

Args:
    device(paddle.Place|int|str|None, optional): The device, the id of the device or
        the string name of device like 'gpu:x'. If device is None, the device is the current device.
        Default: None.

Return:
    int: The peak size of memory that is held by the allocator of the given device, in bytes.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')  # or '<custom_device>'

        >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(paddle.CUDAPlace(0))
        >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved(0)
        >>> max_memory_reserved_size = paddle.cuda.max_memory_reserved("gpu:0")
)r)   max_memory_reservedr3   s    r'   r   r   }  s    . ,,V44r&   c                .    [         R                  " U 5      $ )a  
Reset the peak size of memory that is allocated to tensor of the given device.

Args:
    device(paddle.Place|int|str|None, optional): The device, the id of the device or
        the string name of device like 'gpu:x'. If device is None, the device is the current device.
        Default: None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')  # or '<custom_device>'

        >>> paddle.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0))
        >>> paddle.cuda.reset_max_memory_allocated(0)
        >>> paddle.cuda.reset_max_memory_allocated("gpu:0")
)r)   reset_max_memory_allocatedr3   s    r'   r   r     s    * 33F;;r&   c                .    [         R                  " U 5      $ )a  
Reset the peak size of memory that is held by the allocator of the given device.

Args:
    device(paddle.Place|int|str|None, optional): The device, the id of the device or
        the string name of device like 'gpu:x'. If device is None, the device is the current device.
        Default: None.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> paddle.device.set_device('gpu')  # or '<custom_device>'

        >>> paddle.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0))
        >>> paddle.cuda.reset_max_memory_reserved(0)
        >>> paddle.cuda.reset_max_memory_reserved("gpu:0")
)r)   reset_max_memory_reservedr3   s    r'   r   r     s    ( 226::r&   c                .    [         R                  " U 5      $ )a\  
Return the current device memory managed by the caching allocator in bytes for a given device.

Args:
    device (DeviceLike, optional): The device to query. If None, use the current device.
        Can be paddle.CUDAPlace, paddle.CustomPlace, paddle.XPUPlace, int (device index), or str (device string).

Returns:
    int: The current memory managed by the caching allocator in bytes.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> # Get memory reserved for current device
        >>> mem_reserved = paddle.cuda.memory_reserved()
        >>> print(f"Memory reserved: {mem_reserved} bytes")
        >>>
        >>> # Get memory reserved for specific device
        >>> mem_reserved = paddle.cuda.memory_reserved(0)
        >>> print(f"Memory reserved on device 0: {mem_reserved} bytes")
)r)   memory_reservedr3   s    r'   r   r     s    4 ((00r&   c                V   [        U [        5      (       a  [        R                  " 5       n[        U[        R
                  5      (       a  SU  3nGOC[        U[        R                  5      (       a  UR                  5        SU  3nGO[        U[        R                  5      (       a  SU  3nO[        S5      e[        U [        5      (       a  U nO[        U [        R
                  5      (       a  SU R                  " 5        3nO[        U [        R                  5      (       a&  U R                  " 5        SU R                  " 5        3nOL[        U [        R                  5      (       a  SU R                  " 5        3nO[        S[        U 5       S35      e[        R                  " U5        g)a  
Set the current device.

Args:
    device (DeviceLike): The device to set as current.
        Can be paddle.CUDAPlace, paddle.CustomPlace, paddle.XPUPlace,
        int (device index), or str (device string).

Returns:
    None

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle
        >>> # Set current device to GPU:0
        >>> paddle.cuda.set_device(0)
        >>> # Set current device to GPU:0
        >>> paddle.cuda.set_device('gpu:0')
        >>> # Set current device to a specific CUDAPlace
        >>> place = paddle.CUDAPlace(0)
        >>> paddle.cuda.set_device(place)
zgpu::zxpu:zTPaddle-CPU is not supported. Please use PaddlePaddle with CUDA, XPU or Custom DevicezUnsupported device type: z9. Expected int, str, CUDAPlace, XPUPlace, or CustomPlace.N)rn   rd   r	   _current_expected_place_r   rp   CustomPlaceget_device_typeXPUPlacers   rL   rt   typer)   
set_device)r   device_place
device_strs      r'   r   r     s[   6 &# 99;lDNN33xJd&6&677(88:;1VHEJdmm44xJf  
FC	 	 
	FDNN	+	+F00234
	FD,,	-	-..0163G3G3I2JK
	FDMM	*	*F00234
'V~5no
 	

 Z(r&   c                H    [        U5      n[        R                  " X5      nU$ )a2  
Wrap an externally allocated CUDA stream into a Paddle :class:`paddle.cuda.Stream` object.

This function allows integrating CUDA streams allocated by other libraries
into Paddle, enabling multi-library interoperability and data exchange.

Note:
    - This function does not manage the lifetime of the external stream.
      It is the caller's responsibility to ensure the external stream remains valid
      while the returned Paddle stream is in use.
    - Providing an incorrect `device` may result in errors during kernel launches.

Args:
    data_ptr (int): Integer representation of the external `cudaStream_t`.
    device (DeviceLike, optional): The device where the external stream was created.
        Can be a Paddle device string (e.g., "cuda:0"), an int index (e.g., 0),
        or a PaddlePlace (CUDAPlace). Default: None (current device).

Returns:
    paddle.cuda.Stream: A Paddle Stream object that wraps the external CUDA stream.

Examples:
    .. code-block:: python
        >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
        >>> import paddle

        >>> # Assume an external library provides a stream pointer:original_raw_ptr
        >>> # doctest: +SKIP('original_raw_ptr not exist')
        >>> original_raw_ptr = 77777
        >>> external_stream = paddle.cuda.get_stream_from_external(original_raw_ptr)
)r   r)   get_stream_from_external)data_ptrr   	stream_exs      r'   r   r      s%    F v&F66xHIr&   )*rW   rk   r   r   rx   r*   r.   r2   r5   r7   r   r   r   r   rz   r|   r   r   r   r   r   r   r<   r>   r   r   r   r   r   r   r"   r    r!   r   r   r   r   r   r   r
   r   r   )re   bool)N)r   
DeviceLikere   rf   )r   r   re   r   )r   r   )r   r   re   rL   )r   r   re   ztuple[int, int])r:   rd   re   rf   )r   DeviceLike | Nonere   core.GeneratorState)r?   r   r   r   re   rf   )rj   rd   re   rf   )re   rd   )re   rf   )r   r   re   rd   )r   r   re   rf   )r   rd   r   r   re   r   )F
__future__r   typingr   r   rC   r   r   r   r)   r	   paddle.devicer
   r   r   r   r   r   r   r   r$   r   r   r0   r   r   r9   r   r   r   paddle.tensor.creationr   r   r   r   r   r   r   r    r!   r"   rq   rd   rL   r   r*   r.   r2   r5   r7   r<   r>   rA   rW   RuntimeErrorrY   rk   rx   rz   r|   r   r   r   r   r   r   r   r   r   __all__r%   r&   r'   <module>r      s<  " # '  A A    $   v{{((#sD89J"8#@-D*(7>1B7B!*/. AE3"3,=3	3<+ +D*"Z, ,0*.Z(( 0!LH2:6<54<0;.1::)| )-&&%&&R+r&   