
    RЦi!                        S r SSKJrJrJr  SSKrSSKJr  SSKJs  J	r
  SSKJr  SSKJrJrJr   " S S\R"                  5      r\ " S	 S
\R"                  5      5       r " S S\R"                  5      r\ " S S\R"                  5      5       rg)a  Convolution with Weight Standardization (StdConv and ScaledStdConv)

StdConv:
@article{weightstandardization,
  author    = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille},
  title     = {Weight Standardization},
  journal   = {arXiv preprint arXiv:1903.10520},
  year      = {2019},
}
Code: https://github.com/joe-siyuan-qiao/WeightStandardization

ScaledStdConv:
Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
    - https://arxiv.org/abs/2101.08692
Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets

Hacked together by / copyright Ross Wightman, 2021.
    )OptionalTupleUnionN   )register_notrace_module)get_paddingget_padding_valuepad_samec                      ^  \ rS rSrSr        SS\S\S\\\\\4   4   S\\\\\4   4   S\\\\\\4   4      S\\\\\4   4   S	\S
\	S\
4U 4S jjjrS rSrU =r$ )	StdConv2d   zConv2d with Weight Standardization. Used for BiT ResNet-V2 models.

Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
    https://arxiv.org/abs/1903.10520v2

in_channelout_channelskernel_sizestridepaddingdilationgroupsbiasepsc                 R   > Uc  [        X4U5      n[        TU ]	  XX4XVXxXS9
  Xl        g )Nr   r   r   r   r   devicedtype)r   super__init__r   )selfr   r   r   r   r   r   r   r   r   r   r   	__class__s               S/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/layers/std_conv.pyr   StdConv2d.__init__#   s=     ?!+x@GkvQW 	 	f     c           	      b   [         R                  " U R                  R                  SU R                  S5      S S SSU R
                  S9R                  U R                  5      n[         R                  " XU R                  U R                  U R                  U R                  U R                  5      nU$ Nr   T        )trainingmomentumr   )F
batch_normweightreshaper   r   
reshape_asconv2dr   r   r   r   r   r   xr*   s      r   forwardStdConv2d.forward8   s    KK4#4#4b9
 *T[[
! 	 HHQ		4;;dmmUYU`U`ar!   )r   )r   Nr   r   Fư>NN)__name__
__module____qualname____firstlineno____doc__intr   r   r   boolfloatr   r0   __static_attributes____classcell__r   s   @r   r   r      s     34=A45  sE#s(O34	
 #uS#X./ eCsCx$89: CsCx01    *
 
r!   r   c                      ^  \ rS rSrSr        SS\S\S\\\\\4   4   S\\\\\4   4   S\S\\\\\4   4   S	\S
\	S\
4U 4S jjjrS rSrU =r$ )StdConv2dSameE   zConv2d with Weight Standardization. TF compatible SAME padding. Used for ViT Hybrid model.

Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
    https://arxiv.org/abs/1903.10520v2
r   r   r   r   r   r   r   r   r   c                 X   > [        XSXFS9u  p\[        TU ]	  XX4XVXxXS9
  Xl        Xl        g )Nr   r   r   )r	   r   r   same_padr   )r   r   r   r   r   r   r   r   r   r   r   r   
is_dynamicr   s                r   r   StdConv2dSame.__init__L   s?     0Vgk'V 	 	B #r!   c           	         U R                   (       a+  [        XR                  U R                  U R                  5      n[
        R                  " U R                  R                  SU R                  S5      S S SSU R                  S9R                  U R                  5      n[
        R                  " XU R                  U R                  U R                  U R                  U R                  5      nU$ r#   )rC   r
   r   r   r   r(   r)   r*   r+   r   r   r,   r-   r   r   r   r.   s      r   r0   StdConv2dSame.forwarda   s    ==,,dkk4==IAKK4#4#4b9
 *T[[
! 	 HHQ		4;;dmmUYU`U`ar!   )r   rC   )r   SAMEr   r   Fr2   NN)r3   r4   r5   r6   r7   r8   r   r   strr9   r:   r   r0   r;   r<   r=   s   @r   r?   r?   E   s     34!45  sE#s(O34	
 #uS#X./  CsCx01    * r!   r?   c                      ^  \ rS rSrSr          SS\S\S\\\\\4   4   S\\\\\4   4   S\\\\\\4   \	4      S\\\\\4   4   S	\S
\
S\S\S\4U 4S jjjrSU 4S jjrS rSrU =r$ )ScaledStdConv2dp   a$  Conv2d layer with Scaled Weight Standardization.

Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
    https://arxiv.org/abs/2101.08692

NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
in_channelsr   r   r   r   r   r   r   gammar   	gain_initc           	      Z  > XS.nUc  [        X4U5      n[        TU ]  " XU4XEUXxS.UD6  XR                  S   R	                  5       S-  -  U l        Xl        Xl        [        R                  " [        R                  " U R                  SSS440 UD65      U l        U R                  5         g )Nr   r   r   r   r   r   r   r         r   )r   r   r   r*   numelscaler   rO   nn	Parametertorchemptyr   gainreset_parameters)r   rM   r   r   r   r   r   r   r   rN   r   rO   r   r   ddr   s                  r   r   ScaledStdConv2d.__init__y   s      /?!+x@G{	,;A]e	,(*	, [[^113t;;
"LLd.?.?Aq-I!PR!PQ	r!   c                    > [        U S5      (       aM  [        R                  R                  R	                  U R
                  U R                  5        [        TU ]!  5         g g NrZ   	hasattrrX   rV   init	constant_rZ   rO   r   r[   r   r   s    r   r[    ScaledStdConv2d.reset_parameters   @    4  HHMM##DIIt~~>G$& !r!   c           
         [         R                  " U R                  R                  SU R                  S5      S S U R
                  U R                  -  R                  S5      SSU R                  S9R                  U R                  5      n[         R                  " XU R                  U R                  U R                  U R                  U R                  5      $ Nr   r$   Tr%   )r*   r&   r'   r   )r(   r)   r*   r+   r   rZ   rU   viewr   r,   r-   r   r   r   r   r   r.   s      r   r0   ScaledStdConv2d.forward   s    KK4#4#4b9II

*004
 *T[[
! 	 xx499dkk4<<X\XcXcddr!   )r   rZ   rO   rU   )
r   Nr   r   T      ?r2   rk   NNreturnN)r3   r4   r5   r6   r7   r8   r   r   r   rI   r9   r:   r   r[   r0   r;   r<   r=   s   @r   rK   rK   p   s     34BF45"     sE#s(O34	 
 #uS#X./  eCsCx#$=>?  CsCx01             <'
e 
er!   rK   c                      ^  \ rS rSrSr          SS\S\S\\\\\4   4   S\\\\\4   4   S\S\\\\\4   4   S	\S
\	S\
S\
S\
4U 4S jjjrSU 4S jjrS rSrU =r$ )ScaledStdConv2dSame   aL  Conv2d layer with Scaled Weight Standardization and Tensorflow-like SAME padding support

Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
    https://arxiv.org/abs/2101.08692

NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
rM   r   r   r   r   r   r   r   rN   r   rO   c           	      `  > XS.n[        XSXFS9u  p_[        TU ]  " XU4XEUXxS.UD6  XR                  S   R	                  5       S-  -  U l        Xl        Xl        Xl        [        R                  " [        R                  " U R                  SSS440 UD65      U l        U R                  5         g )NrQ   rB   rR   r   rS   r   )r	   r   r   r*   rT   rU   rC   r   rO   rV   rW   rX   rY   r   rZ   r[   )r   rM   r   r   r   r   r   r   r   rN   r   rO   r   r   r\   rD   r   s                   r   r   ScaledStdConv2dSame.__init__   s      //Vg{	,;A]e	,(*	, [[^113t;;
""LLd.?.?Aq-I!PR!PQ	r!   c                    > [        U S5      (       aM  [        R                  R                  R	                  U R
                  U R                  5        [        TU ]!  5         g g r_   r`   rd   s    r   r[   $ScaledStdConv2dSame.reset_parameters   rf   r!   c           
      $   U R                   (       a+  [        XR                  U R                  U R                  5      n[
        R                  " U R                  R                  SU R                  S5      S S U R                  U R                  -  R                  S5      SSU R                  S9R                  U R                  5      n[
        R                  " XU R                   U R                  U R"                  U R                  U R$                  5      $ rh   )rC   r
   r   r   r   r(   r)   r*   r+   r   rZ   rU   ri   r   r,   r-   r   r   r   r.   s      r   r0   ScaledStdConv2dSame.forward   s    ==,,dkk4==IAKK4#4#4b9II

*004
 *T[[
! 	 xx499dkk4<<X\XcXcddr!   )r   rZ   rO   rC   rU   )
r   rH   r   r   Trk   r2   rk   NNrl   )r3   r4   r5   r6   r7   r8   r   r   rI   r9   r:   r   r[   r0   r;   r<   r=   s   @r   ro   ro      s     34!45"     sE#s(O34	 
 #uS#X./    CsCx01             <'e er!   ro   )r7   typingr   r   r   rX   torch.nnrV   torch.nn.functional
functionalr(   _fxr   r   r   r	   r
   Conv2dr   r?   rK   ro    r!   r   <module>r~      s   $ * )     ( = =%		 %P 'BII ' 'T9ebii 9ex ;e")) ;e ;er!   