
    x-j<                        d dl mZ d dlZd dlmZmZmZmZ d dlZ	ddl
mZ ddlmZmZmZ  G d deee                            Z G d	 d
eed                            Z G d de          ZdS )    )annotationsN)IterableIteratorSequenceSized   )IterableDataset)RandomSamplerSamplerSequenceSamplerc                  `    e Zd ZU dZded<   ded<   ded<   ded<   	 	 	 	 	 dddZddZddZd	S )BatchSamplera  
    A base implement of batch sampler used by `paddle.io.DataLoader`
    which yield mini-batch indices(a list/tuple with length as
    mini-batch size and holds sample indices) iterably.

    Batch sampler used by :code:`paddle.io.DataLoader` should be a subclass
    of :code:`paddle.io.BatchSampler`, BatchSampler subclasses should
    implement following methods:

    :code:`__iter__`: return mini-batch indices iterably.

    :code:`__len__`: get mini-batch number in an epoch.


    Args:
        dataset(Dataset, optional): this should be an instance of a subclass of :ref:`api_paddle_io_Dataset` or
                :ref:`api_paddle_io_IterableDataset` or other python object which implemented
                :code:`__len__` for BatchSampler to get indices as the
                range of :attr:`dataset` length. Default None, disabled.
        sampler (Sampler, Iterable, optional): this should be a :ref:`api_paddle_io_Sample` or Iterable
                instance which implemented :code:`__iter__` to generate
                sample indices. :attr:`sampler` and :attr:`dataset`
                can not be set in the same time.  If :attr:`sampler`
                is set, :attr:`dataset` should not be set. Default None, disabled.
        shuffle(bool, optional): whether to shuffle indices order before generating
                batch indices. Default False, don't shuffle indices before generating batch indices.
        batch_size(int, optional): sample indice number in a mini-batch indices. default 1, each mini-batch includes 1 sample.
        drop_last(bool, optional): whether drop the last incomplete (less than 1 mini-batch) batch dataset. Default False, keep it.
    see :ref:`api_paddle_io_DataLoader`

    Returns:
        BatchSampler: an iterable object for indices iterating

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import RandomSampler, BatchSampler, Dataset

            >>> np.random.seed(2023)
            >>> # init with dataset
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> bs = BatchSampler(dataset=RandomDataset(100),
            ...                     shuffle=False,
            ...                     batch_size=16,
            ...                     drop_last=False)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
            ...
            [96, 97, 98, 99]
            >>> # init with sampler
            >>> sampler = RandomSampler(RandomDataset(100))
            >>> bs = BatchSampler(sampler=sampler,
            ...                     batch_size=8,
            ...                     drop_last=True)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [56, 12, 68, 0, 82, 66, 91, 44]
            ...
            [53, 17, 22, 86, 52, 3, 92, 33]
    zSampler[int] | Iterable[int]samplerint
batch_sizeboolshuffle	drop_lastNFr   datasetSized | NoneSampler | Iterable[int] | NonereturnNonec                   |V|
J d            t          |t          t          f          sJ dt          |                       |r
J d            || _        nt          |t
                    r
J d            |
J d            t          |t                    sJ dt          |                       |rt          |          | _        nt          |          | _        t          |t                    r|dk    sJ d|             || _
        || _        t          |t                    sJ d	t          |                       || _        d
| _        d S )Nz'either dataset or sampler should be setz@sampler should be either paddle.io.Sampler or Iterable, but got z+shuffle should be False when sampler is setz1dataset should not be a paddle.io.IterableDatasetz'should not set both dataset and samplerz+shuffle should be a boolean value, but got r   z1batch_size should be a positive integer, but got z-drop_last should be a boolean value, but got r   )
isinstancer   r   typer   r	   r   r
   r   r   r   r   r   
_acc_steps)selfr   r   r   r   r   s         b/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/io/dataloader/batch_sampler.py__init__zBatchSampler.__init__m   s    ?&&9 '&& g':;;  bSWX_S`S`bb ; MM MMM;"DLL!'?;;  C ; ??$M???gt,,  Md7mmMM ,  8,W55.w77*c** 	
zA~~~L
LL 0>~= %)T** 	
 	
MDOOMM	
 	
* #     Iterator[list[int]]c              #     K   | j         | j        z  }g }| j        D ]0}|                    |           t	          |          |k    r|V  g }1| j        st	          |          dk    r|V  d S d S d S )Nr   )r   r   r   appendlenr   )r   local_batch_sizebatch_indicesidxs       r   __iter__zBatchSampler.__iter__   s      ?T_<< 	# 	#C  %%%=!!%555#### "~ 	 #m"4"4q"8"8	  	 "8"8r!   c                    | j         | j        z  }t          | j                  }|t	          | j                   |dz
  z  z  }||z  S Nr   )r   r   r%   r   r   r   r   r&   num_sampless      r   __len__zBatchSampler.__len__   sN    ?T_<$,''st~-..2BQ2FGG...r!   )NNFr   F)r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r   r   )__name__
__module____qualname____doc____annotations__r    r)   r.    r!   r   r   r      s         K KZ *)))OOOMMMOOO !%26* * * * *X	  	  	  	 / / / / / /r!   r   c                  6    e Zd ZU ded<   ded<   dddZdd
ZdS )_InfiniteIterableSamplerr	   r   r   r   r   r   r   c                `    t          |t                    s
J d            || _        || _        d S )Nz:dataset should be an instance of paddle.io.IterableDataset)r   r	   r   r   )r   r   r   s      r   r    z!_InfiniteIterableSampler.__init__   s=    '?33 	
 	
H	
 	
3 $r!   Iterator[list[None]]c              #  $   K   	 d g| j         z  V  )N)r   )r   s    r   r)   z!_InfiniteIterableSampler.__iter__   s%      	+&4?****	+r!   N)r   )r   r	   r   r   r   r   )r   r:   )r1   r2   r3   r5   r    r)   r6   r!   r   r8   r8      sX         OOO% % % % %+ + + + + +r!   r8   c                      e Zd ZU dZded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   	 	 	 	 dddZddZddZddZdS )DistributedBatchSampleray	  Sampler that restricts data loading to a subset of the dataset.

    In such case, each process can pass a DistributedBatchSampler instance
    as a DataLoader sampler, and load a subset of the original dataset that
    is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.

    Args:
        dataset(Dataset): this could be an instance of subclass of :ref:`api_paddle_io_Dataset`
                     or other python object which implemented
                     `__len__` for BatchSampler to get indices of samples.
        batch_size(int): sample size of each mini-batch.
        num_replicas(int, optional): process number in distributed training.
            If :attr:`num_replicas` is None, :attr:`num_replicas` will be
            retrieved from :ref:`api_paddle_distributed_ParallelEnv` .
            Default None.
        rank(int, optional): the rank of the current process among :attr:`num_replicas`
            processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
            :ref:`api_paddle_distributed_ParallelEnv`. Default None.
        shuffle(bool, optional): whether to shuffle indices order before generating
            batch indices. Default False.
        drop_last(bool, optional): whether drop the last incomplete(less than a mini-batch) batch dataset size.
            Default False.

    Returns:
        DistributedBatchSampler, return an iterable object for indices iterating.

    Examples:
        .. code-block:: python

            >>> import numpy as np

            >>> from paddle.io import Dataset, DistributedBatchSampler

            >>> # init with dataset
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> dataset = RandomDataset(100)
            >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

            >>> for data in sampler:
            ...     # do something
            ...     break
    r   r   r   r   r   r   nranksepoch
local_rankr-   
total_sizeNFnum_replicas
int | Nonerankr   r   r   c                   || _         t          |t                    r|dk    s
J d            || _        t          |t                    s
J d            || _        t          |t                    s
J d            ddlm} |-t          |t                    r|dk    s
J d            || _        n |            j        | _        |-t          |t                    r|dk    s
J d            || _	        n |            j	        | _	        || _
        d| _        t          t          j        t          | j                   dz  | j        z                      | _        | j        | j        z  | _        d	| _        d S )
Nr   z'batch_size should be a positive integerz!shuffle should be a boolean valuez$drop_last should be a boolean number)ParallelEnvz)num_replicas should be a positive integerz%rank should be a non-negative integerg      ?r   )r   r   r   r   r   r   paddle.distributedrF   r>   r@   r   r?   mathceilr%   r-   rA   r   )r   r   r   rB   rD   r   r   rF   s           r   r    z DistributedBatchSampler.__init__   s    *c** 	
zA~~~5 0>~= %'4((MM*MMM()T** 	
 	
2	
 	
* 	322222#lC00 \A5E5E5E; 6F5EE 'DKK%+--.DKdC(( TQYYY7 .7Y6 #DOO)kmm6DO"
tyT\):):S)@4;)NOOPP*T[8 r!   r"   c              #  x   K    j          j        z  }t           j                  }t	          j        |                                          } j        t          |          z
  }|t          |          k    r||d |         z  }n2||t          j	        |t          |          z            z  d |         z  }t          |           j        k    sJ  j
        rGt          j                             j                  
                    |            xj        dz  c_         fd} j        dk    r ||          }t          |           j        k    sJ t!          |          }g }|D ]0}|                    |           t          |          |k    r|V  g }1 j        st          |          dk    r|V  d S d S d S )Nr   c                   g }j         j        j        z  z  }|j        z  dk    sJ |j        z  }t          j        j        z  t          |           |z
  j        j        z            D ]'}|                    | ||j        z                       (| t          |           |z
  d          } |                    | j        |z  j        dz   |z                      |S )Nr   r   )rA   r   r>   ranger@   r%   extend)indicessubsampled_indiceslast_batch_sizelast_local_batch_sizeir   s        r   _get_indices_by_batch_sizezDDistributedBatchSampler.__iter__.<locals>._get_indices_by_batch_sizeA  s   !#"o4;1NOO"T[0A5555$3t{$B!$/1G.$+-  L L
 #))'!a$/6I2I*JKKKKc'll_<>>?G%%O&;;!++?, ,   &%r!   r   )r   r   r%   r   nparangetolistrA   rH   rI   r   randomRandomStater?   r>   r-   iterr$   r   )	r   r&   r-   rN   padding_sizerS   _sample_iterr'   r(   s	   `        r   r)   z DistributedBatchSampler.__iter__.  s     ?T_<$,'')K((//11W53w<<''w}}--GG$)L3w<<,G"H"HH G 7||t....< 	I!!$*--55g>>>JJ!OJJ	& 	& 	& 	& 	&2 ;??0099G7||t/////G}} 	# 	#C  %%%=!!%555#### "~ 	 #m"4"4q"8"8	  	 "8"8r!   c                t    | j         | j        z  }| j        }|t          | j                   |dz
  z  z  }||z  S r+   )r   r   r-   r   r   r,   s      r   r.   zDistributedBatchSampler.__len__i  sG    ?T_<&st~-..2BQ2FGG...r!   c                    || _         dS )a  
        Sets the epoch number. When :attr:`shuffle=True`, this number is used
        as seeds of random numbers. By default, users may not set this, all
        replicas (workers) use a different random ordering for each epoch.
        If set same number at each epoch, this sampler will yield the same
        ordering at all epochs.

        Arguments:
            epoch (int): Epoch number.

        Examples:
            .. code-block:: python

                >>> import numpy as np

                >>> from paddle.io import Dataset, DistributedBatchSampler

                >>> # init with dataset
                >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
                ...     def __init__(self, num_samples):
                ...         self.num_samples = num_samples
                ...
                ...     def __getitem__(self, idx):
                ...         image = np.random.random([784]).astype('float32')
                ...         label = np.random.randint(0, 9, (1, )).astype('int64')
                ...         return image, label
                ...
                ...     def __len__(self):
                ...         return self.num_samples
                ...
                >>> dataset = RandomDataset(100)
                >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

                >>> for epoch in range(10):
                ...     sampler.set_epoch(epoch)
        N)r?   )r   r?   s     r   	set_epochz!DistributedBatchSampler.set_epocho  s    J 


r!   )NNFF)r   r   r   r   rB   rC   rD   rC   r   r   r   r   r   r   r/   r0   )r?   r   r   r   )	r1   r2   r3   r4   r5   r    r)   r.   r^   r6   r!   r   r=   r=      s         7 7r NNNOOOOOOKKKJJJOOOOOO $(. . . . .`9  9  9  9 v/ / / /% % % % % %r!   r=   )
__future__r   rH   collections.abcr   r   r   r   numpyrT   r   r	   r   r
   r   r   r   r   r8   r=   r6   r!   r   <module>rb      s/   # " " " " "  ? ? ? ? ? ? ? ? ? ? ? ?     $ $ $ $ $ $ < < < < < < < < < <N/ N/ N/ N/ N/78C=) N/ N/ N/b+ + + + +wx~6 + + + Y Y Y Y Yl Y Y Y Y Yr!   