
    ϑi<                        S SK Jr  S SKrS SKJrJrJrJr  S SKr	SSK
Jr  SSKJrJrJr   " S S\\\      5      r " S	 S
\\S      5      r " S S\5      rg)    )annotationsN)IterableIteratorSequenceSized   )IterableDataset)RandomSamplerSamplerSequenceSamplerc                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'        S           SS jjrSS jrSS jrSr	g
)BatchSampler   a  
A base implement of batch sampler used by `paddle.io.DataLoader`
which yield mini-batch indices(a list/tuple with length as
mini-batch size and holds sample indices) iterably.

Batch sampler used by :code:`paddle.io.DataLoader` should be a subclass
of :code:`paddle.io.BatchSampler`, BatchSampler subclasses should
implement following methods:

:code:`__iter__`: return mini-batch indices iterably.

:code:`__len__`: get mini-batch number in an epoch.


Args:
    dataset(Dataset, optional): this should be an instance of a subclass of :ref:`api_paddle_io_Dataset` or
            :ref:`api_paddle_io_IterableDataset` or other python object which implemented
            :code:`__len__` for BatchSampler to get indices as the
            range of :attr:`dataset` length. Default None, disabled.
    sampler (Sampler, Iterable, optional): this should be a :ref:`api_paddle_io_Sample` or Iterable
            instance which implemented :code:`__iter__` to generate
            sample indices. :attr:`sampler` and :attr:`dataset`
            can not be set in the same time.  If :attr:`sampler`
            is set, :attr:`dataset` should not be set. Default None, disabled.
    shuffle(bool, optional): whether to shuffle indices order before generating
            batch indices. Default False, don't shuffle indices before generating batch indices.
    batch_size(int, optional): sample indice number in a mini-batch indices. default 1, each mini-batch includes 1 sample.
    drop_last(bool, optional): whether drop the last incomplete (less than 1 mini-batch) batch dataset. Default False, keep it.
see :ref:`api_paddle_io_DataLoader`

Returns:
    BatchSampler: an iterable object for indices iterating

Examples:

    .. code-block:: python

        >>> import numpy as np
        >>> from paddle.io import RandomSampler, BatchSampler, Dataset

        >>> np.random.seed(2023)
        >>> # init with dataset
        >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
        ...     def __init__(self, num_samples):
        ...         self.num_samples = num_samples
        ...
        ...     def __getitem__(self, idx):
        ...         image = np.random.random([784]).astype('float32')
        ...         label = np.random.randint(0, 9, (1, )).astype('int64')
        ...         return image, label
        ...
        ...     def __len__(self):
        ...         return self.num_samples
        ...
        >>> bs = BatchSampler(dataset=RandomDataset(100),
        ...                     shuffle=False,
        ...                     batch_size=16,
        ...                     drop_last=False)
        ...
        >>> for batch_indices in bs:
        ...     print(batch_indices)
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
        ...
        [96, 97, 98, 99]
        >>> # init with sampler
        >>> sampler = RandomSampler(RandomDataset(100))
        >>> bs = BatchSampler(sampler=sampler,
        ...                     batch_size=8,
        ...                     drop_last=True)
        ...
        >>> for batch_indices in bs:
        ...     print(batch_indices)
        [56, 12, 68, 0, 82, 66, 91, 44]
        ...
        [53, 17, 22, 86, 52, 3, 92, 33]
zSampler[int] | Iterable[int]samplerint
batch_sizeboolshuffle	drop_lastNc                \   UcM  Uc   S5       e[        U[        [        45      (       d   S[        U5       35       eU(       a   S5       eX l        Ov[        U[
        5      (       a   S5       eUb   S5       e[        U[        5      (       d   S[        U5       35       eU(       a  [        U5      U l        O[        U5      U l        [        U[        5      (       a  US:  d
   SU 35       eX@l
        X0l        [        U[        5      (       d   S	[        U5       35       eXPl        S
U l        g )Nz'either dataset or sampler should be setz@sampler should be either paddle.io.Sampler or Iterable, but got z+shuffle should be False when sampler is setz1dataset should not be a paddle.io.IterableDatasetz'should not set both dataset and samplerz+shuffle should be a boolean value, but got r   z1batch_size should be a positive integer, but got z-drop_last should be a boolean value, but got r   )
isinstancer   r   typer   r	   r   r
   r   r   r   r   r   
_acc_steps)selfdatasetr   r   r   r   s         b/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/io/dataloader/batch_sampler.py__init__BatchSampler.__init__m   sJ    ?& 9& g':;; RSWX_S`Rab; M MM;"L!'?;; C; ?M$MM?gt,, =d7m_M, ,W5.w7*c**zA~ 	
?
|L	
= %)T** 	
;DO;LM	
* #     c              #    #    U R                   U R                  -  n/ nU R                   H+  nUR                  U5        [	        U5      U:X  d  M%  Uv   / nM-     U R
                  (       d  [	        U5      S:  a  Uv   g g g 7f)Nr   )r   r   r   appendlenr   )r   local_batch_sizebatch_indicesidxs       r   __iter__BatchSampler.__iter__   st     ??T__<<<C  %=!%55## "	  
 ~~#m"4q"8 #9~s   A	B 1B c                    U R                   U R                  -  n[        U R                  5      nU[	        U R
                  (       + 5      US-
  -  -  nX!-  $ Nr   )r   r   r"   r   r   r   r   r#   num_sampless      r   __len__BatchSampler.__len__   sM    ??T__<$,,'st~~-.2BQ2FGG..r   )r   r   r   r   r   )NNFr   F)r   zSized | Noner   zSampler | Iterable[int] | Noner   r   r   r   r   r   returnNoner.   zIterator[list[int]]r.   r   )
__name__
__module____qualname____firstlineno____doc____annotations__r   r&   r,   __static_attributes__ r   r   r   r      s{    KZ *)OMO !%26** 0* 	*
 * * 
*X	 /r   r   c                  B    \ rS rSr% S\S'   S\S'   S
SS jjrSS jrSrg	)_InfiniteIterableSampler   r	   r   r   r   c                T    [        U[        5      (       d   S5       eXl        X l        g )Nz:dataset should be an instance of paddle.io.IterableDataset)r   r	   r   r   )r   r   r   s      r   r   !_InfiniteIterableSampler.__init__   s+    '?33 	
H	
3 $r   c              #  4   #     S /U R                   -  v   M  7f)N)r   )r   s    r   r&   !_InfiniteIterableSampler.__iter__   s     &4??** s   )r   r   N)r   )r   r	   r   r   r.   r/   )r.   zIterator[list[None]])r2   r3   r4   r5   r7   r   r&   r8   r9   r   r   r;   r;      s    O%+r   r;   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'       S             SS jjrSS jrSS jrSS jr	Sr
g)DistributedBatchSampler   a  Sampler that restricts data loading to a subset of the dataset.

In such case, each process can pass a DistributedBatchSampler instance
as a DataLoader sampler, and load a subset of the original dataset that
is exclusive to it.

.. note::
    Dataset is assumed to be of constant size.

Args:
    dataset(Dataset): this could be an instance of subclass of :ref:`api_paddle_io_Dataset`
                 or other python object which implemented
                 `__len__` for BatchSampler to get indices of samples.
    batch_size(int): sample size of each mini-batch.
    num_replicas(int, optional): process number in distributed training.
        If :attr:`num_replicas` is None, :attr:`num_replicas` will be
        retrieved from :ref:`api_paddle_distributed_ParallelEnv` .
        Default None.
    rank(int, optional): the rank of the current process among :attr:`num_replicas`
        processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
        :ref:`api_paddle_distributed_ParallelEnv`. Default None.
    shuffle(bool, optional): whether to shuffle indices order before generating
        batch indices. Default False.
    drop_last(bool, optional): whether drop the last incomplete(less than a mini-batch) batch dataset size.
        Default False.

Returns:
    DistributedBatchSampler, return an iterable object for indices iterating.

Examples:
    .. code-block:: python

        >>> import numpy as np

        >>> from paddle.io import Dataset, DistributedBatchSampler

        >>> # init with dataset
        >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
        ...     def __init__(self, num_samples):
        ...         self.num_samples = num_samples
        ...
        ...     def __getitem__(self, idx):
        ...         image = np.random.random([784]).astype('float32')
        ...         label = np.random.randint(0, 9, (1, )).astype('int64')
        ...         return image, label
        ...
        ...     def __len__(self):
        ...         return self.num_samples
        ...
        >>> dataset = RandomDataset(100)
        >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

        >>> for data in sampler:
        ...     # do something
        ...     break
r   r   r   r   r   r   nranksepoch
local_rankr+   
total_sizeNc                   Xl         [        U[        5      (       a  US:  d   S5       eX l        [        U[        5      (       d   S5       eXPl        [        U[        5      (       d   S5       eSSKJn  Ub)  [        U[        5      (       a  US:  d   S5       eX0l        OU" 5       R                  U l        Ub)  [        U[        5      (       a  US:  d   S5       eX@l	        OU" 5       R                  U l	        X`l
        SU l        [        [        R                  " [        U R                   5      S-  U R                  -  5      5      U l        U R                  U R                  -  U l        S	U l        g )
Nr   z'batch_size should be a positive integerz!shuffle should be a boolean valuez$drop_last should be a boolean number)ParallelEnvz)num_replicas should be a positive integerz%rank should be a non-negative integerg      ?r   )r   r   r   r   r   r   paddle.distributedrI   rD   rF   r   rE   mathceilr"   r+   rG   r   )r   r   r   num_replicasrankr   r   rI   s           r   r    DistributedBatchSampler.__init__   sJ    *c**zA~ 	
5	
= %'4((M*MM()T** 	
2	
* 	3#lC00\A5E ;E 'K%-..DKdC((TQY 76 #O)m66DO"
tyyT\\):S)@4;;)NOP**T[[8 r   c              #    ^ #    T R                   T R                  -  n[        T R                  5      n[        R
                  " U5      R                  5       nT R                  [        U5      -
  nU[        U5      ::  a  X3S U -  nO*X3[        R                  " U[        U5      -  5      -  S U -  n[        U5      T R                  :X  d   eT R                  (       aM  [        R                  R                  T R                  5      R                  U5        T =R                  S-  sl        U 4S jnT R                  S:  a  U" U5      n[        U5      T R                  :X  d   e[!        U5      n/ nU H+  nUR#                  U5        [        U5      U:X  d  M%  Uv   / nM-     T R$                  (       d  [        U5      S:  a  Uv   g g g 7f)Nr   c                   > / nTR                   TR                  TR                  -  -  nUTR                  -  S:X  d   eUTR                  -  n[        TR                  TR                  -  [        U 5      U-
  TR                  TR                  -  5       H#  nUR                  XUTR                  -    5        M%     U [        U 5      U-
  S  n UR                  U TR                  U-  TR                  S-   U-   5        U$ )Nr   r   )rG   r   rD   rangerF   r"   extend)indicessubsampled_indiceslast_batch_sizelast_local_batch_sizeir   s        r   _get_indices_by_batch_sizeDDistributedBatchSampler.__iter__.<locals>._get_indices_by_batch_sizeA  s    !#"oo4;;1NOO"T[[0A555$3t{{$B!$//1G.$++-
 #))'a$//6I*JK c'l_<>?G%%OO&;;!++?, &%r   r   )r   r   r"   r   nparangetolistrG   rK   rL   r   randomRandomStaterE   rD   r+   iterr!   r   )	r   r#   r+   rT   padding_sizerY   _sample_iterr$   r%   s	   `        r   r&    DistributedBatchSampler.__iter__.  sx    ??T__<$,,'))K(//1W53w<'}--G$))L3w<,G"HH G 7|t...<<II!!$**-55g>JJ!OJ	&2 ;;?09G7|t/////G}C  %=!%55## "	  
 ~~#m"4q"8 #9~s   FG1Gc                    U R                   U R                  -  nU R                  nU[        U R                  (       + 5      US-
  -  -  nX!-  $ r)   )r   r   r+   r   r   r*   s      r   r,   DistributedBatchSampler.__len__i  sJ    ??T__<&&st~~-.2BQ2FGG..r   c                    Xl         g)a  
Sets the epoch number. When :attr:`shuffle=True`, this number is used
as seeds of random numbers. By default, users may not set this, all
replicas (workers) use a different random ordering for each epoch.
If set same number at each epoch, this sampler will yield the same
ordering at all epochs.

Arguments:
    epoch (int): Epoch number.

Examples:
    .. code-block:: python

        >>> import numpy as np

        >>> from paddle.io import Dataset, DistributedBatchSampler

        >>> # init with dataset
        >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
        ...     def __init__(self, num_samples):
        ...         self.num_samples = num_samples
        ...
        ...     def __getitem__(self, idx):
        ...         image = np.random.random([784]).astype('float32')
        ...         label = np.random.randint(0, 9, (1, )).astype('int64')
        ...         return image, label
        ...
        ...     def __len__(self):
        ...         return self.num_samples
        ...
        >>> dataset = RandomDataset(100)
        >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

        >>> for epoch in range(10):
        ...     sampler.set_epoch(epoch)
N)rE   )r   rE   s     r   	set_epoch!DistributedBatchSampler.set_epocho  s
    J 
r   )
r   r   r   r   rE   rF   rD   r+   r   rG   )NNFF)r   r   r   r   rM   
int | NonerN   ri   r   r   r   r   r.   r/   r0   r1   )rE   r   r.   r/   )r2   r3   r4   r5   r6   r7   r   r&   r,   rg   r8   r9   r   r   rB   rB      s    7r NOOKJOO $(.. . !	.
 . . . 
.`9 v/%r   rB   )
__future__r   rK   collections.abcr   r   r   r   numpyr[   r   r	   r   r
   r   r   r   r   r;   rB   r9   r   r   <module>rm      sZ    #  ? ?  $ < <N/78C=) N/b+wx~6 + Yl Yr   