
    ёiD                       S SK Jr  S SKrS SKJrJr  S SKrS SKJr  S SKJ	r	  S SK
JrJr  S SKJr  \(       a  S SKJr  S S	KJrJr  S S
KJr   " S S\R*                  R,                  5      r " S S\R*                  R0                  5      rg)    )annotationsN)TYPE_CHECKINGLiteral)_C_ops)LayerHelper)in_dynamic_or_pir_modeno_grad)_BatchNormBase)Tensor)DataLayoutNDParamAttrLike)Layerc                  x   ^  \ rS rSrSr       S                 SU 4S jjjrS	S jrS
S jrSrU =r	$ )	BatchNorm#   a  
Applies Batch Normalization over a SparseCooTensor as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .

When use_global_stats = False, the :math:`\mu_{\beta}`
and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch.
Calculated as follows:

..  math::

    \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad &//\
    \ mini-batch\ mean \\
    \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \
    \mu_{\beta})^2 \qquad &//\ mini-batch\ variance \\

When use_global_stats = True, the :math:`\mu_{\beta}`
and :math:`\sigma_{\beta}^{2}` are not the statistics of one mini-batch.
They are global or running statistics (moving_mean and moving_variance). It usually got from the
pre-trained model. Calculated as follows:

.. math::
    moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global \ mean \\
    moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global \ variance \\

The normalization function formula is as follows:

..  math::

    \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\
    y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift

- :math:`\epsilon` : add a smaller value to the variance to prevent division by zero
- :math:`\gamma` : trainable proportional parameter
- :math:`\beta` : trainable deviation parameter

Parameters:
    num_features(int): Indicate the number of channels of the input ``Tensor``.
    momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
    epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5.
    weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
        of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
        will create ParamAttr as weight_attr. If it is set to False, the weight is not learnable.
        If the Initializer of the weight_attr is not set, the parameter is initialized with Xavier. Default: None.
    bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of batch_norm.
        If it is set to None or one attribute of ParamAttr, batch_norm
        will create ParamAttr as bias_attr. If it is set to False, the weight is not learnable.
        If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None.
    data_format(str, optional): Specify the input data format, may be "NDHWC" or "NHWC". Default "NDHWC".
    use_global_stats(bool|None, optional): Whether to use global mean and variance. If set to False, use the statistics of one mini-batch, if set to True, use the global statistics, if set to None, use global statistics in the test phase and use the statistics of one mini-batch in the training phase. Default: None.
    name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

Shape:
    - x: A SparseCooTensor with layout = 'NDHWC' or 'NHWC'.
    - output: SparseCooTensor with same shape as input x.

Returns:
    None.


Examples:
    .. code-block:: pycon

        >>> import paddle
        >>> paddle.seed(123)
        >>> channels = 3
        >>> x_data = paddle.randn((1, 6, 6, 6, channels)).astype('float32')
        >>> dense_x = paddle.to_tensor(x_data)
        >>> sparse_x = dense_x.to_sparse_coo(4)
        >>> batch_norm = paddle.sparse.nn.BatchNorm(channels)
        >>> batch_norm_out = batch_norm(sparse_x)
        >>> print(batch_norm_out.shape)
        paddle.Size([1, 6, 6, 6, 3])
c	                .   > [         T	U ]  UUUUUUUUS9  g )N)momentumepsilonweight_attr	bias_attrdata_formatuse_global_statsnamesuper__init__)
selfnum_featuresr   r   r   r   r   r   r   	__class__s
            [/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/sparse/nn/layer/norm.pyr   BatchNorm.__init__m   s/     	##- 	 		
    c                &    US;  a  [        S5      eg )N)NDHWCNHWCz:sparse BatchNorm only support layout of "NDHWC" and "NHWC")
ValueError)r   inputs     r    _check_data_formatBatchNorm._check_data_format   s     ))L  *r"   c                6   U R                  U R                  5        U R                  (       a  [        R                  " S5        U R
                  c  U R                  (       + U l        SnOU R
                  (       + nU R                  S   S:X  a  SOSn[        5       (       a~  [        R                  " UU R                  U R                  U R                  U R                  U R                  (       + U R                  U R                  UU R
                  U5      u  n        nU$ UU R                  U R                  U R                  U R                  S.nU R                  U R                  UU R                  (       + U R
                  USS.nS	n[        U5      n	UR                   n
U	R#                  U
S
S9nU	R#                  U
S
S9nU	R#                  U
S
S9nU	R#                  U
S
S9nU	R#                  U
S
S9nU	R%                  U
5      nUUUUUUS.nU	R'                  XUUS9  U$ )Nz<When training, we now always track global mean and variance.F   CNCHWr%   )xscalebiasmeanvariance)r   r   data_layoutis_testr   trainable_statisticsfuse_with_relusparse_batch_normT)dtypestop_gradient)outmean_outvariance_out
saved_meansaved_variancereserve_space)typeinputsoutputsattrs)r(   _data_formattrainingwarningswarn_use_global_statsr   r   sparse_batch_norm__mean	_varianceweightr0   	_momentum_epsilonr   r8   "create_variable_for_type_inference)create_sparse_variable_for_type_inference	append_op)r   r'   r5   r   batch_norm_out_rA   rC   op_typehelperr8   r;   r<   r=   r>   r?   r:   rB   s                     r    forwardBatchNorm.forward   s=    1 12==MMN !!))-%6D"#( '+'='=#=  $ 1 1! 4 ;f!##,2,E,E

		MM!&&$-)NAq!Q "! 		

 NNF !NN==*#}},$($:$:(<"'E *G )FKKE@@4 A H "DD4 E L  BB4 C J $FF4 G N #EE4 F M BB5IC$ ,("0!.G WE   Jr"   )rH   )?h㈵>NNr$   NN)r   intr   floatr   r[   r   ParamAttrLike | Noner   r\   r   Literal['NDHWC', 'NHWC']r   zbool | Noner   
str | NonereturnNone)r'   r]   r_   r`   )r'   r   r_   r   )
__name__
__module____qualname____firstlineno____doc__r   r(   rV   __static_attributes____classcell__r   s   @r    r   r   #   s    GX ,0*.07(,

 
 	

 *
 (
 .
 &
 
 

 
,N Nr"   r   c                  |   ^  \ rS rSrSr      S               SU 4S jjjrS	S jr\S
S j5       rSr	U =r
$ )SyncBatchNorm   a  
This interface is used to construct a callable object of the ``SyncBatchNorm`` class.
It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can
be used as a normalizer function for other operations, such as conv2d and fully connected
operations.
The data is normalized by the mean and variance of the channel based on whole mini-batch
, which including data in all gpus.
Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
for more details.

When model in training mode, the :math:`\\mu_{\\beta}`
and :math:`\\sigma_{\\beta}^{2}` are the statistics of whole mini-batch data in all gpus.
Calculated as follows:

..  math::

    \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad &//\
    \ mini-batch\ mean \\
    \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \
    \mu_{\beta})^2 \qquad &//\ mini-batch\ variance \\

- :math:`x` : whole mini-batch data in all gpus
- :math:`m` : the size of the whole mini-batch data

When model in evaluation mode, the :math:`\\mu_{\\beta}`
and :math:`\sigma_{\beta}^{2}` are global statistics (moving_mean and moving_variance,
which usually got from the pre-trained model). Global statistics calculated as follows:

.. math::
    moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global \ mean \\
    moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global \ variance \\

The formula of normalization is as follows:

..  math::

    \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\
    \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\
    y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift

- :math:`\epsilon` : add a smaller value to the variance to prevent division by zero
- :math:`\gamma` : trainable scale parameter vector
- :math:`\beta` : trainable shift parameter vector

Note:
    If you want to use container to pack your model and has ``SyncBatchNorm`` in the
    evaluation phase, please use ``nn.LayerList`` or ``nn.Sequential`` instead of
    ``list`` to pack the model.

Parameters:
    num_features(int): Indicate the number of channels of the input ``Tensor``.
    epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5.
    momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
    weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
         of this layer. If it is set to None or one attribute of ParamAttr, this layer
         will create ParamAttr as param_attr. If the Initializer of the param_attr
         is not set, the parameter is initialized with Xavier. If it is set to False,
         this layer will not have trainable scale parameter. Default: None.
    bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of this layer.
         If it is set to None or one attribute of ParamAttr, this layer
         will create ParamAttr as bias_attr. If the Initializer of the bias_attr
         is not set, the bias is initialized zero. If it is set to False, this layer will not
         have trainable bias parameter. Default: None.
    data_format(str, optional): Specify the input data format, may be "NCHW". Default "NCHW".
    name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

Shapes:
    input: Tensor that the dimension from 2 to 5.

    output: Tensor with the same shape as input.

Examples:
    .. code-block:: python

        >>> # doctest: +REQUIRES(env:GPU)
        >>> import paddle
        >>> import paddle.sparse.nn as nn
        >>> paddle.device.set_device('gpu')

        >>> x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]], dtype='float32')
        >>> x = x.to_sparse_coo(len(x.shape)-1)

        >>> if paddle.is_compiled_with_cuda():
        ...     sync_batch_norm = nn.SyncBatchNorm(2)
        ...     hidden1 = sync_batch_norm(x)
        ...     print(hidden1)
        Tensor(shape=[1, 2, 2, 2], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=False,
               indices=[[0, 0, 0, 0],
                        [0, 0, 1, 1],
                        [0, 1, 0, 1]],
               values=[[-0.40730840, -0.13725480],
                        [-0.40730840, -1.20299828],
                        [ 1.69877410, -0.23414057],
                        [-0.88415730,  1.57439375]])
c           	     0   > [         TU ]  UUUUUUU5        g )Nr   )	r   r   r   r   r   r   r   r   r   s	           r    r   SyncBatchNorm.__init__<  s'     		
r"   c                   U R                  5         [        R                  " UU R                  U R                  U R
                  U R                  U R                  (       + U R                  U R                  U R                  SS5      u  n        nU$ )NF)r(   r   sparse_sync_batch_norm_rJ   rK   rL   r0   rE   rM   rN   rD   )r   r.   sync_batch_norm_outrS   s       r    rV   SyncBatchNorm.forwardP  sz    !-3-K-KJJNNKKIINNMM.
*Q1a #"r"   c           	        Un[        U[        5      (       Ga#  UR                  b^  [        UR                  [        5      (       d?  UR                  R                  b(  UR                  R                  S-   UR                  l        UR
                  b^  [        UR
                  [        5      (       d?  UR
                  R                  b(  UR
                  R                  S-   UR
                  l        [        U[        5      (       aX  [        UR                  UR                  UR                  UR                  UR
                  UR                  UR                  5      nOk[        R                  R                  UR                  UR                  UR                  UR                  UR
                  UR                  UR                  5      nUR                  SLaD  UR
                  SLa5  [        5          UR                   Ul        UR"                  Ul        SSS5        UR$                  Ul        UR&                  Ul        UR)                  5        H%  u  p4UR+                  X0R-                  U5      5        M'     AU$ ! , (       d  f       Nl= f)an  
Helper function to convert :class: `paddle.sparse.nn.BatchNorm` layers in the model to :class: `paddle.sparse.nn.SyncBatchNorm` layers.

Parameters:
    layer(paddle.nn.Layer): model containing one or more `BatchNorm` layers.

Returns:
    The original model with converted SyncBatchNorm layers. If BatchNorm layer in the model, use SyncBatchNorm layer instead.

Examples:

    .. code-block:: python

        >>> import paddle
        >>> import paddle.sparse.nn as nn

        >>> model = paddle.nn.Sequential(nn.Conv3D(3, 5, 3), nn.BatchNorm(5))
        >>> sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
N_syncF)
isinstancer
   _weight_attrboolr   
_bias_attrr   rj   _num_featuresrM   rN   rD   _namepaddlennr	   rL   r0   rJ   rK   named_childrenadd_sublayerconvert_sync_batchnorm)clslayerlayer_outputr   sublayers        r    r~   $SyncBatchNorm.convert_sync_batchnorma  s   , e^,,""."5#5#5t<<&&++7*/*<*<*A*AG*K""'  ,"5#3#3T::$$))5(-(8(8(=(=(G  % %++,''OONN&&$$&&KK   &yy66''OONN&&$$&&KK  ""%/$$E1Y*/,,L'(-

L%  "'L%*__L"#224ND%%00: 5  Ys   /#I88
J )rX   rY   NNr-   N)r   rZ   r   r[   r   r[   r   r\   r   r\   r   r   r   r^   r_   r`   )r.   r   r_   r   )r   r   r_   r   )ra   rb   rc   rd   re   r   rV   classmethodr~   rf   rg   rh   s   @r    rj   rj      s    _H ,0*.$*

 
 	

 *
 (
 "
 
 

 
(#" J Jr"   rj   )
__future__r   rF   typingr   r   rz   r   paddle.base.layer_helperr   paddle.frameworkr   r	   paddle.nn.layer.normr
   r   paddle._typingr   r   	paddle.nnr   r{   BatchNorm1Dr   rj   r   r"   r    <module>r      s`    #  )   0 < /  t		%% tnRFII++ Rr"   