
    QЦi}                     R   % S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	J
r
Jr  S SKrS SKJr  SSKJrJr  SSKJrJr  SSKJr  S	S
KJrJrJr  S	SKJr  S	SKJrJr  / SQr  " S S\
5      r! " S S\5      r" " S S\RF                  5      r$ " S S\RF                  5      r% " S S\RF                  5      r&S\'S\'S\'S\'S\'S\\   S\(S\S \&4S! jr)S"\0r*\\+\4   \,S#'   0 \*ES$S%S&.Er- " S' S(\5      r. " S) S*\5      r/ " S+ S,\5      r0 " S- S.\5      r1 " S/ S0\5      r2\" 5       \" S1\.Rf                  4S29SS3S4.S\\.   S\(S\S \&4S5 jj5       5       r4\" 5       \" S1\/Rf                  4S29SS3S4.S\\/   S\(S\S \&4S6 jj5       5       r5\" 5       \" S1\0Rf                  4S29SS3S4.S\\0   S\(S\S \&4S7 jj5       5       r6\" 5       \" S1\1Rf                  4S29SS3S4.S\\1   S\(S\S \&4S8 jj5       5       r7\" 5       \" S9S29SS3S4.S\\2   S\(S\S \&4S: jj5       5       r8  SAS;\'S\'S<S=S>\+S?\(S S=4S@ jjr9g)B    N)OrderedDict)partial)AnyCallableDictList
NamedTupleOptional   )Conv2dNormActivationMLP)ImageClassificationInterpolationMode)_log_api_usage_once   )register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VisionTransformerViT_B_16_WeightsViT_B_32_WeightsViT_L_16_WeightsViT_L_32_WeightsViT_H_14_Weightsvit_b_16vit_b_32vit_l_16vit_l_32vit_h_14c                       \ rS rSr% \\S'   \\S'   \\S'   \R                  r\	S\R                  4   \S'   \R                  r\	S\R                  4   \S'   Srg	)
ConvStemConfig    out_channelskernel_sizestride.
norm_layeractivation_layer N)__name__
__module____qualname____firstlineno__int__annotations__nnBatchNorm2dr)   r   ModuleReLUr*   __static_attributes__r+       d/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchvision/models/vision_transformer.pyr$   r$       sJ    K+->>Jbii(913hsBII~.8r7   r$   c                   L   ^  \ rS rSrSrSrS\S\S\4U 4S jjrU 4S jr	S	r
U =r$ )
MLPBlock(   zTransformer MLP block.r   in_dimmlp_dimdropoutc                 v  > [         TU ]  XU/[        R                  S US9  U R	                  5        H  n[        U[        R                  5      (       d  M$  [        R                  R                  UR                  5        UR                  c  M\  [        R                  R                  UR                  SS9  M     g )N)r*   inplacer>   ư>std)super__init__r2   GELUmodules
isinstanceLinearinitxavier_uniform_weightbiasnormal_)selfr<   r=   r>   m	__class__s        r8   rE   MLPBlock.__init__-   s}    6!2RWWVZdklA!RYY''''166%GGOOAFFO5	  r7   c           	        > UR                  SS 5      nUb  US:  aN  [        S5       H?  n	S H6  n
U SU	S-    SU
 3nU SU	-   SU
 3nX;   d  M#  UR                  U5      X'   M8     MA     [        TU ]  UUUUUUU5        g )Nversionr   )rL   rM   linear_r   .   )getrangepoprD   _load_from_state_dict)rO   
state_dictprefixlocal_metadatastrictmissing_keysunexpected_keys
error_msgsrT   itypeold_keynew_keyrQ   s                r8   r[   MLPBlock._load_from_state_dict6   s     !$$Y5?gk1X.D!'!uAdV<G!'1Qtf5G,.8nnW.E
+	 /  	%	
r7   r+   )r,   r-   r.   r/   __doc___versionr0   floatrE   r[   r6   __classcell__rQ   s   @r8   r:   r:   (   s/     H6s 6S 65 6
 
r7   r:   c                      ^  \ rS rSrSr\" \R                  SS94S\S\S\S\	S	\	S
\
S\R                  R                  4   4U 4S jjjrS\R                  4S jrSrU =r$ )EncoderBlockV   zTransformer encoder block.rA   eps	num_heads
hidden_dimr=   r>   attention_dropoutr)   .c                    > [         TU ]  5         Xl        U" U5      U l        [        R
                  " X!USS9U l        [        R                  " U5      U l        U" U5      U l	        [        X#U5      U l        g )NT)r>   batch_first)rD   rE   rr   ln_1r2   MultiheadAttentionself_attentionDropoutr>   ln_2r:   mlp)rO   rr   rs   r=   r>   rt   r)   rQ   s          r8   rE   EncoderBlock.__init__Y   sh     	" z*	 33JSdrvwzz'* z*	J9r7   inputc                 *   [         R                  " UR                  5       S:H  SUR                   35        U R	                  U5      nU R                  X"USS9u  p#U R                  U5      nX!-   nU R                  U5      nU R                  U5      nX$-   $ )NrW   2Expected (batch_size, seq_length, hidden_dim) got F)need_weights)	torch_assertdimshaperw   ry   r>   r{   r|   )rO   r~   x_ys        r8   forwardEncoderBlock.forwardn   s    eiikQ&*\]b]h]h\i(jkIIe""1"?LLOIIIaLHHQKur7   )r>   rw   r{   r|   rr   ry   r,   r-   r.   r/   rh   r   r2   	LayerNormr0   rj   r   r   r4   rE   Tensorr   r6   rk   rl   s   @r8   rn   rn   V   s    $ 6=R\\t5T:: : 	:
 : !: S%((//12: :*	U\\ 	 	r7   rn   c                      ^  \ rS rSrSr\" \R                  SS94S\S\S\S\S	\S
\	S\	S\
S\R                  R                  4   4U 4S jjjrS\R                  4S jrSrU =r$ )Encoderz   z?Transformer Model Encoder for sequence to sequence translation.rA   rp   
seq_length
num_layersrr   rs   r=   r>   rt   r)   .c	           	        > [         TU ]  5         [        R                  " [        R
                  " SX5      R                  SS95      U l        [        R                  " U5      U l	        [        5       n	[        U5       H  n
[        UUUUUU5      U	SU
 3'   M     [        R                  " U	5      U l        U" U5      U l        g )Nr   g{Gz?rB   encoder_layer_)rD   rE   r2   	Parameterr   emptyrN   pos_embeddingrz   r>   r   rY   rn   
Sequentiallayersln)rO   r   r   rr   rs   r=   r>   rt   r)   r   rc   rQ   s              r8   rE   Encoder.__init__}   s     	  \\%++a*P*X*X]a*X*bczz'*.9mz"A+7!,F^A3'( # mmF+Z(r7   r~   c                     [         R                  " UR                  5       S:H  SUR                   35        XR                  -   nU R                  U R                  U R                  U5      5      5      $ )NrW   r   )r   r   r   r   r   r   r   r>   )rO   r~   s     r8   r   Encoder.forward   sZ    eiikQ&*\]b]h]h\i(jk***wwt{{4<<#6788r7   )r>   r   r   r   r   rl   s   @r8   r   r   z   s    I 6=R\\t5T)) ) 	)
 ) ) ) !) S%((//12) ):9U\\ 9 9r7   r   c                   .  ^  \ rS rSrSrSSSS\" \R                  SS9S4S\S	\S
\S\S\S\S\	S\	S\S\
\   S\S\R                  R                  4   S\
\\      4U 4S jjjrS\R"                  S\R"                  4S jrS\R"                  4S jrSrU =r$ )r      z;Vision Transformer as per https://arxiv.org/abs/2010.11929.        i  NrA   rp   
image_size
patch_sizer   rr   rs   r=   r>   rt   num_classesrepresentation_sizer)   .conv_stem_configsc                   > [         TU ]  5         [        U 5        [        R                  " X-  S:H  S5        Xl        X l        XPl        X`l        Xl	        Xpl
        Xl        Xl        Xl        Ub  [        R                  " 5       nSn[!        U5       He  u  nnUR#                  SU 3[%        UUR&                  UR(                  UR*                  UR                  UR,                  S95        UR&                  nMg     UR#                  S[        R.                  " XSS95        Xl        O[        R.                  " SXRUS	9U l        X-  S
-  n[        R2                  " [        R4                  " SSU5      5      U l        US-  n[9        UUUUUUUU5      U l        UU l        [?        5       nU
c  [        R@                  " XY5      US'   OJ[        R@                  " XZ5      US'   [        RB                  " 5       US'   [        R@                  " X5      US'   [        R                  " U5      U l"        [G        U R0                  [        R.                  5      (       a  U R0                  RH                  U R0                  R(                  S   -  U R0                  R(                  S   -  n[        RJ                  RM                  U R0                  RN                  [P        RR                  " SU-  5      S9  U R0                  RT                  b3  [        RJ                  RW                  U R0                  RT                  5        GOU R0                  RX                  Gb  [G        U R0                  RX                  [        R.                  5      (       a  [        RJ                  R[                  U R0                  RX                  RN                  S[P        RR                  " SU R0                  RX                  R&                  -  5      S9  U R0                  RX                  RT                  b=  [        RJ                  RW                  U R0                  RX                  RT                  5        []        U RD                  S5      (       a  [G        U RD                  R^                  [        R@                  5      (       a  U RD                  R^                  R`                  n[        RJ                  RM                  U RD                  R^                  RN                  [P        RR                  " SU-  5      S9  [        RJ                  RW                  U RD                  R^                  RT                  5        [G        U RD                  Rb                  [        R@                  5      (       a{  [        RJ                  RW                  U RD                  Rb                  RN                  5        [        RJ                  RW                  U RD                  Rb                  RT                  5        g g )Nr   z&Input shape indivisible by patch size!rW   conv_bn_relu_)in_channelsr&   r'   r(   r)   r*   	conv_lastr   )r   r&   r'   )r   r&   r'   r(   r   head
pre_logitsactrB   r   g       @)meanrC   )2rD   rE   r   r   r   r   r   rs   r=   rt   r>   r   r   r)   r2   r   	enumerate
add_moduler   r&   r'   r(   r*   Conv2d	conv_projr   zerosclass_tokenr   encoderr   r   rI   TanhheadsrH   r   rJ   trunc_normal_rL   mathsqrtrM   zeros_r   rN   hasattrr   in_featuresr   )rO   r   r   r   rr   rs   r=   r>   rt   r   r   r)   r   seq_projprev_channelsrc   conv_stem_layer_configr   heads_layersfan_inrQ   s                       r8   rE   VisionTransformer.__init__   s?    	D!j-24\]$$$!2&#6 $(}}HM-67H-I))###A3'($1%;%H%H$:$F$F5<<#9#D#D)?)P)P
 !7 C C .J RYY=ghi )1NYYJWaDN !.14
 <<Aq*(EFa
	
 %4?M&#%99Z#EL )+:)SL&"$'')L#%99-@#NL ]]<0
dnnbii00^^//$..2L2LQ2OORVR`R`RlRlmnRooFGG!!$.."7"7TYYq6z=R!S~~"".t~~223^^%%1jAYAY[][d[d6e6eGGOO((//ctyyt~~OgOgOtOtIt?u   ~~'',,8t~~77<<=4::|,,DJJ<Q<QSUS\S\1]1]ZZ**66FGG!!$**"7"7">">DIIaRXjDY!ZGGNN4::00556djjooryy11GGNN4::??112GGNN4::??//0 2r7   r   returnc                    UR                   u  p#pEU R                  n[        R                  " X@R                  :H  SU R                   SU S35        [        R                  " XPR                  :H  SU R                   SU S35        XF-  nXV-  nU R                  U5      nUR                  X R                  Xx-  5      nUR                  SSS5      nU$ )NzWrong image height! Expected z	 but got !zWrong image width! Expected r   r   r   )	r   r   r   r   r   r   reshapers   permute)	rO   r   nchwpn_hn_ws	            r8   _process_input VisionTransformer._process_input  s    WW
aOOa??*.KDOOK\\efgehhi,jka??*.J4??J[[defdggh,ijff NN1IIa#)4 IIaAr7   c                    U R                  U5      nUR                  S   nU R                  R                  USS5      n[        R
                  " X1/SS9nU R                  U5      nUS S 2S4   nU R                  U5      nU$ )Nr   r   r   )r   r   r   expandr   catr   r   )rO   r   r   batch_class_tokens       r8   r   VisionTransformer.forward!  s~    "GGAJ !,,33Ar2>II(,!4LLO adGJJqMr7   )rt   r   r   r>   r   r   rs   r   r=   r)   r   r   r   r   )r,   r-   r.   r/   rh   r   r2   r   r0   rj   r
   r   r   r4   r   r$   rE   r   r   r   r6   rk   rl   s   @r8   r   r      s   E #&-15<R\\t5T<@g1g1 g1 	g1
 g1 g1 g1 g1 !g1 g1 &c]g1 S%((//12g1 $D$89g1 g1R  *  r7   r   r   r   rr   rs   r=   weightsprogresskwargsr   c           
      p   Ubh  [        US[        UR                  S   5      5        UR                  S   S   UR                  S   S   :X  d   e[        USUR                  S   S   5        UR                  SS5      n[	        SUU UUUUS.UD6n	U(       a  U	R                  UR                  US	S
95        U	$ )Nr   
categoriesmin_sizer   r   r      )r   r   r   rr   rs   r=   T)r   
check_hashr+   )r   lenmetarZ   r   load_state_dictget_state_dict)
r   r   rr   rs   r=   r   r   r   r   models
             r8   _vision_transformerr   4  s     fmSl9S5TU||J'*gll:.Fq.IIIIflGLL4LQ4OPL#.J  E g44hSW4XYLr7   r   _COMMON_METAz(https://github.com/facebookresearch/SWAGz:https://github.com/facebookresearch/SWAG/blob/main/LICENSE)recipelicensec                       \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSSSS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSSSSS.0SSS S!.ES9r\rS"rg#)$r   i_  z9https://download.pytorch.org/models/vit_b_16-c867db91.pthr   	crop_sizei(r   r   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_b_16ImageNet-1KgS㥛DT@g1ZW@zacc@1zacc@5gMb1@g(\t@
                These weights were trained from scratch by using a modified version of `DeIT
                <https://arxiv.org/abs/2012.12877>`_'s training recipe.
            
num_paramsr   r   _metrics_ops
_file_size_docsurl
transformsr   z>https://download.pytorch.org/models/vit_b_16_swag-9ac1b537.pth  r   resize_sizeinterpolationi^-)r   r   g~jtSU@giX@gˡEK@g|?5^t@
                These weights are learnt via transfer learning by end-to-end fine-tuning the original
                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
            r   r   r   r   r   r   zAhttps://download.pytorch.org/models/vit_b_16_lc_swag-4e70ced5.pth+https://github.com/pytorch/vision/pull/5793gbX9xT@gQX@
                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
            r   r   r   r   r   r   r   r+   Nr,   r-   r.   r/   r   r   r   r   IMAGENET1K_V1r   BICUBIC_COMMON_SWAG_METAIMAGENET1K_SWAG_E2E_V1IMAGENET1K_SWAG_LINEAR_V1DEFAULTr6   r+   r7   r8   r   r   _  s#   G.#>

""f##  !
M, %L+33	


""##  !
4 !(O+33	


C""##  !
!6 Gr7   r   c                   T    \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\r	Sr
g)r   i  z9https://download.pytorch.org/models/vit_b_32-d86f8d99.pthr   r   i1Br   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_b_32r   g|?5^R@gW@r   gA`Т@gl	u@r   r   r   r+   Nr,   r-   r.   r/   r   r   r   r   r  r  r6   r+   r7   r8   r   r     s\    G.#>

""f##  !
M, Gr7   r   c                       \ rS rSr\" S\" \SSS90 \ESSSS	S
SS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSS	SSS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSS	SS S.0SSS!S".ES9r\rS#rg$)%r   i  z9https://download.pytorch.org/models/vit_l_16-852ce7e3.pthr      )r   r   i#r   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_l_16r   g|?5^S@gFԨW@r   gףp=
N@g;O$@a  
                These weights were trained from scratch by using a modified version of TorchVision's
                `new training recipe
                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
            r   r   z>https://download.pytorch.org/models/vit_l_16_swag-4f3808c9.pth   r   i0)r  r  gjtV@gT㥛ĠX@gƟv@gy&11@r   r   zAhttps://download.pytorch.org/models/vit_l_16_lc_swag-4d563306.pthr   gMbXIU@g^I[X@r   r  r+   Nr  r+   r7   r8   r   r     s%   G.#3O

#"f##  "
M. %L+33	


#"##  "
4 !(O+33	


C#"##  "
!6 Gr7   r   c                   T    \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\r	Sr
g)r   i  z9https://download.pytorch.org/models/vit_l_32-c7638314.pthr   r   i[Er   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_l_32r   g|?5>S@gGzDW@r   gK7.@gE@r   r   r   r+   Nr
  r+   r7   r8   r   r     s\    G.#>

#"f#"  "
M, Gr7   r   c                       \ rS rSr\" S\" \SS\R                  S90 \	ESSSSS	S
.0SSSS.ES9r
\" S\" \SS\R                  S90 \	ESSSSSSS
.0SSSS.ES9r\
rSrg)r   i2  z>https://download.pytorch.org/models/vit_h_14_swag-80465313.pth  r   i%)r  r  r   gS#V@g#~jX@r   g~jŏ@gK7I@r   r   r   zAhttps://download.pytorch.org/models/vit_h_14_lc_swag-c1eb923e.pthr   r   i@%r   gZd;OmU@gQnX@g=
ףpd@gIk֢@r   r  r+   N)r,   r-   r.   r/   r   r   r   r   r  r  r  r  r  r6   r+   r7   r8   r   r   2  s    $L+33	


#"##  "
4 !(O+33	


C#"##  "
!6 %Gr7   r   
pretrained)r   T)r   r   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_b_16 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_B_16_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_B_16_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_B_16_Weights
    :members:
            r   r   rr   rs   r=   r   r   r+   )r   verifyr   r   r   r   s      r8   r   r   k  E    ( %%g.G 		 	 	r7   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_b_32 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_B_32_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_B_32_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_B_32_Weights
    :members:
r%   r  r  r  r  r+   )r   r  r   r  s      r8   r   r     r  r7   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_l_16 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_L_16_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_L_16_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_L_16_Weights
    :members:
r           r  r+   )r   r  r   r  s      r8   r    r      E    ( %%g.G 		 	 	r7   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_l_32 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_L_32_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_L_32_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_L_32_Weights
    :members:
r%   r  r  r  r  r  r+   )r   r  r   r  s      r8   r!   r!     r   r7   )r  Nc                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_h_14 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_H_14_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_H_14_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_H_14_Weights
    :members:
   r%   r  i   i   r  r+   )r   r  r   r  s      r8   r"   r"     r   r7   r   model_statezOrderedDict[str, torch.Tensor]interpolation_modereset_headsc                    US   nUR                   u  pgnUS:w  a  [        SUR                    35      eX-  S-  S-   n	X:w  Ga3  US-  nU	S-  n	USS2SS2SS24   n
USS2SS2SS24   nUR                  SSS5      n[        [        R
                  " U5      5      nX-  U:w  a  [        SX-   SU 35      eUR                  SXU5      nX-  n[        R                  R                  UUUS	S
9nUR                  SX5      nUR                  SSS5      n[        R                  " X/SS9nXS'   U(       aC  [        5       nUR                  5        H#  u  nnUR                  S5      (       a  M  UUU'   M%     UnU$ )aw  This function helps interpolate positional embeddings during checkpoint loading,
especially when you want to apply a pre-trained model on images with different resolution.

Args:
    image_size (int): Image size of the new model.
    patch_size (int): Patch size of the new model.
    model_state (OrderedDict[str, torch.Tensor]): State dict of the pre-trained model.
    interpolation_mode (str): The algorithm used for upsampling. Default: bicubic.
    reset_heads (bool): If true, not copying the state of heads. Default: False.

Returns:
    OrderedDict[str, torch.Tensor]: A state dict which can be loaded into the new model.
zencoder.pos_embeddingr   z%Unexpected position embedding shape: r   Nr   zPseq_length is not a perfect square! Instead got seq_length_1d * seq_length_1d = z and seq_length = T)sizemodealign_cornersr   r   )r   
ValueErrorr   r0   r   r   r   r2   
functionalinterpolater   r   r   items
startswith)r   r   r$  r%  r&  r   r   r   rs   new_seq_lengthpos_embedding_tokenpos_embedding_imgseq_length_1dnew_seq_length_1dnew_pos_embedding_imgnew_pos_embeddingmodel_state_copykvs                      r8   interpolate_embeddingsr:    s   *   78M - 3 3A:Av@ATAT@UVWW .14q8N
 #a
!+Arr1H5)!QR(3 .55aA>DIIj12(J6bcp  dA  cC  CU  V`  Ua  b 
 .55aTab&4 !# 9 9"#	 !: !
 !6 = =a \ !6 = =aA F!II':&RXYZ/@+,AL#))+1||G,,*+$Q' , +Kr7   )bicubicF):r   collectionsr   	functoolsr   typingr   r   r   r   r	   r
   r   torch.nnr2   ops.miscr   r   transforms._presetsr   r   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r$   r:   r4   rn   r   r   r0   boolr   r   strr1   r  r   r   r   r   r   r  r   r   r    r!   r"   r:  r+   r7   r8   <module>rI     sh    #  B B   0 H ' 6 6 ' B9Z 9+
s +
\!299 !H#9bii #9LQ		 Qh  	
  k"   B & d38n 8K L{ L^{ 4M{ M`{ 46%{ 6%r ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ !566:T "23 d ]` ev  7 H (KKK 2K 	K
 K &Kr7   