
    RЦiH                        S r SSKrSSKJr  SSKJrJrJrJrJ	r	J
r
  SSKrSSKJr  SSKJs  Jr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJrJrJr  SSK J!r!J"r"  SS	K#J$r$  SS
K%J&r&  SSK'J(r(  SSK)J*r*J+r+  S/r,S\-S\	\R\                     4S jr/\(S\\-   S\R`                  S\R`                  4S j5       r1S\R`                  S\\-   S\\-   S\R`                  4S jr2 " S S\R\                  5      r3 " S S\R\                  5      r4 " S S\R\                  5      r5 " S S\R\                  5      r6 " S  S!\R\                  5      r7 " S" S\R\                  5      r8SKS# jr9\!" 0 S$\9" S%S&S'9_S(\9" S%S&SS)9_S*\9" S%S&S'9_S+\9" S%S&SS)9_S,\9" S%S&S'9_S-\9" S%S&SS)9_S.\9" S%S&S'9_S/\9" S%S&SS)9_S0\9" S%S&S'9_S1\9" S%S&SS)9_S2\9" S%S&S'9_S3\9" S%S&SS)9_S4\9" S%S5S6S79_S8\9" S%S5S6S79_S9\9" S%S:S5S6S;9_S<\9" S%S:S5S6S;9_S=\9" S5S6S>9_5      r:SLS? jr;SMS@\<SA\=S\84SB jjr>\"SMSC j5       r?\"SMSD j5       r@\"SMSE j5       rA\"SMSF j5       rB\"SMSG j5       rC\"SMSH j5       rD\"SMSI j5       rE\"SMSJ j5       rFg)NzqAn PyTorch implementation of Hiera

Adapted for timm from originals at https://github.com/facebookresearch/hiera
    N)partial)DictListOptionalTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathcalculate_drop_path_ratesMlp
LayerScaleClNormMlpClassifierHeaduse_fused_attn_assertget_norm_layer	to_2tupleinit_weight_vitinit_weight_jax   )generate_default_cfgsregister_model)build_model_with_cfg)feature_take_indices)register_notrace_function)named_apply
checkpointHieranreturnc                     [         R                  [         R                  [         R                  [         R                  /U    $ )z
Returns a conv with nd (e.g., Conv2d for n=2). Work up to n=3.
If you wanted a 4d Hiera, you could probably just implement this for n=4. (no promises)
)nnIdentityConv1dConv2dConv3d)r!   s    P/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/hiera.pyconv_ndr*   :   s(    
 KKBIIryy9!<<    target_sizemaskc                     Uc  U$ [        [        UR                  SS  5      [        U 5      :H  S5        UR                  SS  U :w  a#  [        R                  " UR                  5       U S9$ U$ )N   z.mask spatial shape and target_size must match.)size)r   lenshapeFinterpolatefloatr,   r-   s     r)   get_resized_maskr7   B   s`     |C

123{#335efzz!"~$}}TZZ\<<Kr+   xr2   mu_shapec                     [        U5      nU R                  S   U R                  S   pT[        X5       VVs/ s H	  u  pgXg-  PM     nnnU R                  " U/UQUQUP76 n S/[	        [        [        SSU-   5      [        SU-   SSU-  -   5      5       V	s/ s H  n	[        U	5      PM     sn	/ 5      -   [        U R                  5      S-
  /-   n
U R                  U
5      R                  " U/UQUP76 n U $ s  snnf s  sn	f )a  
Restore spatial organization by undoing windowed organization of mask units.

Args:
    x: organized by mask units windows, e.g. in 2d [B, #MUy*#MUx, MUy, MUx, C]
    shape: current spatial shape, if it were not organized into mask unit
        windows, e.g. in 2d [B, #MUy*MUy, #MUx*MUx, C].
    mu_shape: current mask unit shape, e.g. in 2d [MUy, MUx]
Returns:
    x: e.g. in 2d, [B, #MUy*MUy, #MUx*MUx, C]
r   r   r/   )	r1   r2   zipviewsumrangelistpermutereshape)r8   r2   r9   DBCsmunum_MUsprA   s              r)   undo_windowingrJ   O   s     	E
A771:qwwr{q$'$89$851qw$8G9	q)7)X)q)A 

E!QUOU1q5!a!e)5L MN M1tAw MNPR
S	Tqww<!
	 
 	
		'""10u0a0AH : Os   C5C;
c            	          ^  \ rS rSrSrS\\S4   S\\S4   S\\\S4      4U 4S jjrS\	R                  S	\	R                  4S
 jrSrU =r$ )Unrollp   a  
Reorders the tokens such that patches are contiguous in memory.
E.g., given [B, (H, W), C] and stride of (Sy, Sx), this will re-order the tokens as
                       [B, (Sy, Sx, H // Sy, W // Sx), C]

This allows operations like Max2d to be computed as x.view(B, Sx*Sy, -1, C).max(dim=1).
Not only is this faster, but it also makes it easy to support inputs of arbitrary
dimensions in addition to patch-wise sparsity.

Performing this operation multiple times in sequence puts entire windows as contiguous
in memory. For instance, if you applied the stride (2, 2) 3 times, entire windows of
size 8x8 would be contiguous in memory, allowing operations like mask unit attention
computed easily and efficiently, while also allowing max to be applied sequentially.

Note: This means that intermediate values of the model are not in HxW order, so they
need to be re-rolled if you want to use the intermediate values as a HxW feature map.
The last block of the network is fine though, since by then the strides are all consumed.

input_size.patch_strideunroll_schedulec                    > [         TU ]  5         [        X5       VVs/ s H	  u  pEXE-  PM     snnU l        X0l        g s  snnf N)super__init__r<   r0   schedule)selfrN   rO   rP   irF   	__class__s         r)   rT   Unroll.__init__   s<     	(+J(EF(EQV(EF	' Gs   >r8   r"   c           
         UR                   u  p#nU R                  nUR                  " U/U-   U/-   6 nU R                   H  n[	        XV5       VVs/ s H	  u  pxXx-  PM     nnnU/[        [	        XV5       VVs/ s H  u  pxXx/PM
     snn/ 5      -   U/-   n	UR                  U	5      n[        U	5      n
S/[        [        SU
S-
  S5      5      -   [        [        SU
S-
  S5      5      -   U
S-
  /-   nUR                  U5      nUR                  S[        U5      5      nU[        R                  " U5      -  nM     UR                  S[        R                  " U R                  5      U5      nU$ s  snnf s  snnf )z
Input: Flattened patch embeddings [B, N, C]
Output: Patch embeddings [B, N, C] permuted such that [B, 4, N//4, C].max(1) etc. performs MaxPoolNd
r   r/   r   r;   )r2   r0   r=   rU   r<   r>   r1   r@   r?   rA   flattenmathprodrB   )rV   r8   rD   _rE   cur_sizestridesrW   rF   	new_shapeLrA   s               r)   forwardUnroll.forward   s]   
 ''a99FFaS8^qc)+}}G
 ,/x+AB+A41+AHBcc(6L"M6LdaA66L"MrRRVWUXXIy!A IAcDq!a%!344tE!QUA<N7OOSTWXSXRYYG		'"A 		!S\*A7##A# %& IIb$))DII.2 C"Ms   E,8E2rU   r0   __name__
__module____qualname____firstlineno____doc__r   intr   rT   torchTensorrc   __static_attributes____classcell__rX   s   @r)   rL   rL   p   sf    &(c3h(  S/( "%S/2	( %,,  r+   rL   c            
          ^  \ rS rSrSrS\\S4   S\\S4   S\\\S4      S\\   S\4
U 4S	 jjr SS
\	R                  S\S\	R                  S\	R                  4S jjrSrU =r$ )Reroll   zI
Undos the "unroll" operation so that you can use intermediate features.
rN   .rO   rP   
stage_endsq_poolc                   > [         T
U ]  5         [        X5       VVs/ s H	  u  pgXg-  PM     snnU l        0 U l        U R                  n[        US   S-   5       HU  nX84U R                  U'   XdS U ;   d  M  [        U5      S:  a$  [        XS   5       V	Vs/ s H	  u  pX-  PM     nn	nUSS  nMW     g s  snnf s  snn	f )Nr;   r   r   )rS   rT   r<   r0   rU   r?   r1   )rV   rN   rO   rP   ru   rv   rW   rF   r0   r!   rX   s             r)   rT   Reroll.__init__   s     	(+J(EF(EQV(EF	 yyz"~)*A.4DMM!w'''!+/249K/LM/LtqAF/LDM"1!""5 + G Ns   B5B;r8   	block_idxr-   r"   c                    U R                   U   u  pEUR                  u  pgn[        U5      n	S/U	-  n
U H  nUR                  " U/UQU[        R
                  " U5      -  PU
QUP76 n[        UR                  5      nSSU	-   /[        [        [        SSU	-   5      [        SU	-   S-   US-
  5      5       Vs/ s H  n[        U5      PM     sn/ 5      -   US-
  /-   nUR                  U5      n[        U	5       H  nX==   X   -  ss'   M     UR                  " US/U
QUP76 nUR                  S   nM     UR                  " Xg/U
QUP76 nUb  U$ [        XU
5      nU$ s  snf )z
Roll the given tensor back up to spatial order assuming it's from the given block.

If no mask is provided:
    - Returns [B, H, W, C] for 2d, [B, T, H, W, C] for 3d, etc.
If a mask is provided:
    - Returns [B, #MUs, MUy, MUx, C] for 2d, etc.
r   r   r;   )rU   r2   r1   r=   r\   r]   r>   r<   r?   r@   rA   rB   rJ   )rV   r8   ry   r-   rU   r0   rD   NrE   rC   cur_mu_shaper`   rb   rI   rA   rW   s                   r)   rc   Reroll.forward   s|    y1''aIsQwGqN7NA7);$;NlNANA AGGAAE
E!QUOU1q519aRSe=T(UV(U1tAw(UVXZ[\q5' 
 		'"A 1X7:- 		!R2,22A
A%  * FF1*,** H 1L1+ Ws   5Ere   rR   rf   rq   s   @r)   rs   rs      s    6c3h6  S/6 "%S/2	6
 S	6 66 "&	2||2 2 ,,	2
 
2 2r+   rs   c                      ^  \ rS rSr% Sr\R                  R                  \   \	S'        SS\
S\
S\
S\
S\
S	\4U 4S
 jjjrS\R                  S\R                  4S jrSrU =r$ )MaskUnitAttention   z
Computes either Mask Unit or Global Attention. Also is able to perform q pooling.

Note: this assumes the tokens have already been flattened and unrolled into mask units.
See `Unroll` for more details.

fused_attndimdim_outheadsq_stridewindow_sizeuse_mask_unit_attnc	                 B  > XxS.n	[         T
U ]  5         Xl        X l        X0l        X@l        X#-  U l        U R                  S-  U l        [        5       U l	        [        R                  " USU-  40 U	D6U l        [        R                  " X"40 U	D6U l        XPl        X`l        g)a^  
Args:
- dim, dim_out: The input and output feature dimensions.
- heads: The number of attention heads.
- q_stride: If greater than 1, pool q with this stride. The stride should be flattened (e.g., 2x2 = 4).
- window_size: The current (flattened) size of a mask unit *after* pooling (if any).
- use_mask_unit_attn: Use Mask Unit or Global Attention.
devicedtypeg         N)rS   rT   r   r   r   r   head_dimscaler   r   r$   Linearqkvprojr   r   )rV   r   r   r   r   r   r   r   r   ddrX   s             r)   rT   MaskUnitAttention.__init__  s    & /
 (]]d*
(*99S!g+44IIg5"5	&"4r+   r8   r"   c                    UR                   u  p#nU R                  (       a  X0R                  U R                  -  -  OSnU R	                  U5      R                  USUSU R                  U R                  5      R                  SSSSSS5      nUR                  S5      u  pxn	U R                  S:  a?  UR                  X R                  XPR                  SU R                  5      R                  SS9nU R                  (       a  [        R                  " XxU	5      nO4XpR                  -  UR!                  SS	5      -  n
U
R#                  SS9n
X-  nUR!                  SS5      R                  USU R$                  5      nU R'                  U5      nU$ )
z4Input should be of shape [batch, tokens, channels]. r   r;   r   r      r/      r   )r2   r   r   r   r   rB   r   r   rA   unbindr=   amaxr   r3   scaled_dot_product_attentionr   	transposesoftmaxr   r   )rV   r8   rD   r{   r^   num_windowsr   qkvattns              r)   rc   MaskUnitAttention.forward+  sB   ''aCGCZCZq]]T-=-==>`ahhqk!!!RaT]]S[[\]_`bcefhiklm**Q-a==1q**k=="dmmTYY^_Y`A??..qQ7A

Nakk"b&99D<<B<'DAKK1%%aT\\:IIaLr+   )r   r   r   r   r   r   r   r   r   r   r   )r   r   FNN)rg   rh   ri   rj   rk   rm   jitFinalbool__annotations__rl   rT   rn   rc   ro   rp   rq   s   @r)   r   r      s     		%%  ',!5!5 !5 	!5
 !5 !5 !%!5 !5F %,,  r+   r   c                     ^  \ rS rSrSSS\R
                  \R                  SSSSSS4S	\S
\S\S\S\S\	\   S\
\R                     S\
\R                     S\S\S\S\4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )
HieraBlockiC        @        Nr   r   TFr   r   r   	mlp_ratio	drop_pathinit_values
norm_layer	act_layerr   r   use_expand_projr   c                   > XS.n[         TU ]  5         Xl        X l        U" U40 UD6U l        X:w  a=  SU l        U(       a  [        R                  " X40 UD6U l        O X!S-  :X  d   eS U l        OSU l        S U l        [        UUUU	U
U40 UD6U l
        Ub  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      O[        R                  " 5       U l        U" U40 UD6U l        [#        U[%        X$-  5      4SU0UD6U l        Ub  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      U l        g [        R                  " 5       U l        g )Nr   Tr/   Fr   r   r   )rS   rT   r   r   norm1	do_expandr$   r   r   r   r   r   r%   ls1r   
drop_path1norm2r   rl   mlpls2
drop_path2)rV   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rX   s                   r)   rT   HieraBlock.__init__D  s`   " /*r*
>!DNIIc9b9	'))) 	"DNDI%
 
	 JUI`:gE;E"Efhfqfqfs1:Q(9-BKKM.2.
wG$7 8TITQSTITI`:gE;E"Efhfqfqfs1:Q(9-BKKMr+   r8   r"   c           
      l   U R                  U5      nU R                  (       Ga  U R                  ba  U R                  U5      nUR                  UR                  S   U R
                  R                  SUR                  S   5      R                  SS9nO[        R                  " UR                  UR                  S   U R
                  R                  SUR                  S   5      R                  SS9UR                  UR                  S   U R
                  R                  SUR                  S   5      R                  SS9/SS9nXR                  U R                  U R                  U5      5      5      -   nXR                  U R                  U R                  U R!                  U5      5      5      5      -   nU$ )Nr   r;   r   r   )r   r   r   r=   r2   r   r   r   rm   catmeanr   r   r   r   r   r   )rV   r8   x_norms      r)   rc   HieraBlock.forwardv  sR   A>>>yy$IIf%FF1771:tyy'9'92qwwr{KPPUVPWIIFF1771:tyy'9'92qwwr{KPPUVPWFF1771:tyy'9'92qwwr{KPPUVPW 	 6): ;<< $**Q-)@ ABBr+   )r   r   r   r   r   r   r   r   r   r   r   r   )rg   rh   ri   rj   r$   	LayerNormGELUrl   r5   r   r   Moduler   rT   rm   rn   rc   ro   rp   rq   s   @r)   r   r   C  s      #"+/*,,,)+ $(',0R0R 0R 	0R
 0R 0R "%0R RYY0R BII0R 0R 0R "0R !%0R 0Rd %,,  r+   r   c                      ^  \ rS rSrSr   SS\S\S\\S4   S\\S4   S\\S4   S	\4U 4S
 jjjr SS\	R                  S\\	R                     S\	R                  4S jjrSrU =r$ )
PatchEmbedi  zHPatch embed that supports any number of spatial dimensions (1d, 2d, 3d).dim_inr   kernel.stridepaddingrB   c	                    > XxS.n	[         T
U ]  5         [        U5      U l        X`l        [        U R                  5      " UU4UUUS.U	D6U l        g )Nr   )kernel_sizer   r   )rS   rT   r1   spatial_dimsrB   r*   r   )rV   r   r   r   r   r   rB   r   r   r   rX   s             r)   rT   PatchEmbed.__init__  s`     /KD--.
 
 
	r+   r8   r-   r"   c                 Z   UbH  [        UR                  SS  US9nU R                  XR                  [        R
                  5      -  5      nOU R                  U5      nU R                  (       a=  UR                  UR                  S   UR                  S   S5      R                  SS5      nU$ )Nr/   r6   r   r   r;   )r7   r2   r   torm   r   rB   r   rV   r8   r-   s      r)   rc   PatchEmbed.forward  s    
 #$GD		!ggejj112A		!A<<		!''!*aggaj"5??1EAr+   )r   rB   r   )TNNrR   )rg   rh   ri   rj   rk   rl   r   r   rT   rm   rn   r   rc   ro   rp   rq   s   @r)   r   r     s    R !

 
 #s(O	

 #s(O
 38_
 
 
8 ,0|| 5<<( 
	 r+   r   c            =         ^  \ rS rSr                               S@S\\S4   S\S\S\S\S	\S
\\S4   S\S\\S4   S\\S4   S\\S4   S\S\S\S\\S4   S\\S4   S\\S4   S\S\S\	\   S\S\S\
\\\R                     4   S\S\S\S\S\S \\\4   4:U 4S! jjjrS" r\R"                  R$                  S# 5       r\R"                  R$                  SAS$\S%\4S& jj5       r\R"                  R$                  SBS'\S%S4S( jj5       r\R"                  R$                  S) 5       rSCS\S	\	\   S*\4S+ jjrS,\R2                  S-\S%\R2                  4S. jrS%\R2                  4S/ jr       SDS,\R2                  S0\	\R2                     S1\	\
\\\   4      S2\S3\S4\S5\S$\S%\
\\R2                     \\R2                  \\R2                     4   4   4S6 jjr    SES1\
\\\   4   S7\S8\S$\4S9 jjr  SCS,\R2                  S0\	\R2                     S:\S%\R2                  4S; jjrSAS<\S%\R2                  4S= jjr  SFS,\R2                  S0\	\R2                     S%\R2                  4S> jjr!S?r"U =r#$ )Gr    i  Nimg_size.in_chans	embed_dim	num_headsnum_classesglobal_poolstagesrv   r   mask_unit_sizemask_unit_attnr   dim_mulhead_mulpatch_kernelrO   patch_paddingr   drop_path_rater   fix_initweight_initr   	drop_ratepatch_drop_ratehead_init_scalesep_pos_embedabs_win_pos_embedglobal_pos_sizec                    > [         T/U ]  5         UUS.n XPl        X l        SU l        [        U5      n[        U[        5      (       a  [        U5      nUU l	        [        UU5       V!V"s/ s H  u  n!n"U!U"-  PM     sn"n!U l        [        R                  " U R                  5      n#[        R                  " U
5      n$[        R                  " U	5      n%U[        U5      :  d   eXsU l        U l        U$U
sU l        U l        [        U R                  U R$                  5       V!V"s/ s H  u  n!n"U!U"-  PM     sn"n!U l        [)        S[        U5      S-   5       V!s/ s H  n![+        US U! 5      S-
  PM     sn!U l        UU l        [1        UUUUU40 U D6U l        S U l        S U l        S U l        S U l        U(       a  [<        R>                  " [@        RB                  " SU R                  S   U R                  S   -  U40 U D65      U l        [<        R>                  " [@        RB                  " SU R                  S   U40 U D65      U l        OU(       ai  [<        R>                  " [@        RB                  " SU/UQ70 U D65      U l        [<        R>                  " [@        RB                  " SU/U
Q70 U D65      U l        O2[<        R>                  " [@        RB                  " SU#U40 U D65      U l        [E        UUU	/[        U R,                  S S 5      -  5      U l#        [I        UUU	/[        U R,                  S S 5      -  U R,                  U5      U l%        U R,                  S U  V&s/ s H  n&U&S-   PM
     n'n&Sn([+        U5      n)[M        UU)5      n*[<        RN                  " 5       U l(        / U l)        [)        U)5       H  n!Un+UU(   n,U!S-
  U R,                  ;   a*  [        X=-  5      n+[        XN-  5      nU(S-  n(U!U';   a  U$U%-  n$[U        SUU+UUU*U!   UUU!U';   a  U%OSU$UU,S.U D6n-U+nU!U R,                  ;   a5  U =RR                  [W        U+SU(S-   -  SU R,                  U(    3S	9/-  sl)        U RP                  RY                  U-5        M     U=U l-        U l.        [_        UU4UUUS
S.U D6U l0        U(       aQ  [<        Rb                  Re                  U R8                  SS9  [<        Rb                  Re                  U R:                  SS9  OjU R4                  b(  [<        Rb                  Re                  U R4                  SS9  U R6                  b(  [<        Rb                  Re                  U R6                  SS9  US:w  a(  US:X  a  [f        O[h        n.[k        U.SS9n.[m        U.U 5        U(       a  U Ro                  5         [        U R`                  Rp                  [<        Rr                  5      (       as  U R`                  Rp                  Rt                  Rv                  Ry                  U5        U R`                  Rp                  Rz                  Rv                  Ry                  U5        g g s  sn"n!f s  sn"n!f s  sn!f s  sn&f )Nr   Fr   r/   r   r;   )r   r   r   r   r   r   r   r   r   r   r   zblocks.)num_chs	reductionmoduleNLC)	pool_typer   r   	input_fmtg{Gz?)stdskipjaxhead.fc)classifier_name )>rS   rT   r   r   grad_checkpointingr   
isinstancerl   r   rO   r<   tokens_spatial_shaper\   r]   r1   rv   r   mu_sizer   mask_spatial_shaper?   r>   ru   r   r   patch_embed	pos_embedpos_embed_winpos_embed_spatialpos_embed_temporalr$   	Parameterrm   zerosrL   unrollrs   rerollr   
ModuleListblocksfeature_infor   dictappendnum_featureshead_hidden_sizer   headinittrunc_normal_r   r   r   r   fix_init_weightfcr   weightdatamul_bias)0rV   r   r   r   r   r   r   r   rv   r   r   r   r   r   r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rW   rF   
num_tokensflat_mu_sizeflat_q_strider8   q_pool_blocks	cur_stagedepthdprr   r   blockinit_fnrX   s0                                                  r)   rT   Hiera.__init__  s   F 	/& "'#J/
h$$ *H(8;Hl8S$T8S1Q!V8S$T!YYt889
yy0		(+F###%+"T],8.)d)69$:S:SUYUhUh6i"j6ida166i"j8=aVq8QR8Q13vbqz?Q.8QR.%
 
 26599=:>%'\\At88;d>W>WXY>ZZ\elikl&D" ')llAt88;YM"M'D# !!#ekk!Y._._\^._!`%'\\%++a2b^2b_a2b%c"!#ekk!Z.YVX.Y!Z JT__Sb122

 JT__Sb122OO
 )-(@A(@1Q(@A 	F'>mmouAG "0	!:1u'i12	 45	Q	% ]2L #a&'%+,+=-1( /#5 E  IDOO#!!A	!4DwW[WfWfgpWqVrMst&v v!KKu%A D 5>=D1+
 "!
 
	 GG!!$"8"8d!CGG!!$"9"9t!D~~)%%dnn$%?!!-%%d&8&8d%C& )4)=o?GgyAG&  "diillBII..IILL$$))/:IILL""''8 /u %U #kRZ Bs   )Y!YY"Y'c                    S n[        U R                  5       Hm  u  p#U" UR                  R                  R                  R
                  US-   5        U" UR                  R                  R                  R
                  US-   5        Mo     g )Nc                 T    U R                  [        R                  " SU-  5      5        g )N       @)div_r\   sqrt)param	_layer_ids     r)   rescale&Hiera.fix_init_weight.<locals>.rescaled  s    JJtyyy12r+   r   )	enumerater  r   r   r  r  r   fc2)rV   r%  layer_idlayers       r)   r  Hiera.fix_init_weightc  sb    	3  )5OHEJJOO**//A>EIIMM((--x!|<  6r+   c                 L    U R                   b  S/$ U R                  b  SS/$ SS/$ )Nr   pos_embed_absr   r   r   )r   r-  rV   s    r)   no_weight_decayHiera.no_weight_decayk  s7    >>%= +#_55')=>>r+   coarser"   c                     [        SSS/S9$ )NzW^pos_embed|pos_embed_spatial|pos_embed_temporal|pos_embed_abs|pos_embed_win|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr  )r  )rV   r1  s     r)   group_matcherHiera.group_matchert  s    k-/CD
 	
r+   enablec                     Xl         g rR   )r   )rV   r6  s     r)   set_grad_checkpointingHiera.set_grad_checkpointing{  s    "(r+   c                 .    U R                   R                  $ rR   )r  r  r.  s    r)   get_classifierHiera.get_classifier  s    yy||r+   reset_otherc                 D    Xl         U R                  R                  XUS9  g )Nr=  )r   r  reset)rV   r   r   r=  s       r)   reset_classifierHiera.reset_classifier  s    &		kJr+   r8   
mask_ratioc                    UR                   S   n[        R                  " U R                  5      n[	        USU-
  -  5      n[
        R                  " X4UR                  S9n[
        R                  " USS9n[
        R                  " USS9n[
        R                  " X4/UR                  S9n	SU	SS2SU24'   [
        R                  " U	SUS9n	U	R                  5       $ )zr
Generates a random mask, mask_ratio fraction are dropped.
1 is *keep*, 0 is *remove*. Useful for MAE, FLIP, etc.
r   r   )r   r   N)r   index)r2   r\   r]   r   rl   rm   randr   argsortr  gatherr   )
rV   r8   rC  rD   r   len_keepnoiseids_shuffleids_restorer-   s
             r)   get_random_maskHiera.get_random_mask  s    
 GGAJii 7 78{a*n56

1!((; mmEq1mmKQ7 {{A+AHH=Q		\||Da{;yy{r+   c                    U R                   by  U R                   R                  U R                  5      n[        R                  " U R
                  UR                  SS  SSS9nX2-   nUR                  S5      R                  SS5      nOU R
                  b  U R
                  nOhU R                  R                  SU R                  S   S5      [        R                  " U R                  U R                  S   U R                  S   -  SS9-   nX-   nU$ )	Nr   bicubicT)r0   mode	antialiasr/   r   r   r   )r   tiler   r3   r4   r   r2   r[   r   r   repeatr   rm   repeat_interleaver   )rV   r8   r   r   s       r)   
_pos_embedHiera._pos_embed  s   ) !..33D4K4KLM"((-	I "1I!))!,66q!<I^^'I &&--a1J1J11MqQ''++--a043L3LQ3OO  Mr+   r-   indicesnorm
stop_early
output_fmtintermediates_onlyc	           	         U(       a   S5       eUS;   d   S5       eU(       aO  [        [        U R                  5      U5      u  pU	 Vs/ s H  oR                  U   PM     n	nU R                  U
   n
O![        [        U R                  5      U5      u  pUb,  UR                  " UR
                  S   S/U R                  Q76 nOSnU R                  XS9nU R                  U5      nU R                  U5      nUbZ  XS   R                  SU R                  UR
                  S	   5         R	                  UR
                  S   S
UR
                  S
   5      n/ n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SU
S-    n[        U5       H  u  pU R                   (       a/  [        R                  R                  5       (       d  [#        X5      nOU" U5      nX;   d  MT  U R%                  XUS9nUR'                  US:X  a  UR)                  SSSS	5      OU5        M     U(       a  U$ X4$ s  snf )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

z'normalization of features not supported)NCHWNHWCz(Output format must be one of NCHW, NHWC.Nr   r   r-   .Nr/   r;   r^  r   )r   r1   ru   r  r=   r2   r   r   rV  r  rS  r   rm   r   is_scriptingr'  r   r   r  r  rA   )rV   r8   r-   rX  rY  rZ  r[  r\  r1  take_indices	max_indexrW   
patch_maskintermediatesr  blkx_ints                    r)   forward_intermediatesHiera.forward_intermediates  s   . BBBx--Y/YY-&:3t;OQX&Y#L8DE1OOA.LE	2I&:3t{{;KW&U#L1771:qK43J3JKJJQ0OOAKKN y/&&q$,,
CDII!''RS*VXZ[ZaZabdZefA99!!##:[[F[[)a-0F'FA&&uyy/E/E/G/Gs&F At4$$*PVBVU]]1aA%>\ab (   E Fs   I
prune_norm
prune_headc                 (   U(       a1  [        [        U R                  5      U5      u  pVU R                  U   nO![        [        U R                  5      U5      u  pVU R                  SUS-    U l        U(       a  U R                  R                  SSS9  U$ )z?Prune layers not required for specified intermediates.
        Nr   r   Tr?  )r   r1   ru   r  r  r@  )rV   rX  rk  rl  r1  rc  rd  s          r)   prune_intermediate_layersHiera.prune_intermediate_layers  sy     &:3t;OQX&Y#L	2I&:3t{{;KW&U#Lkk.9q=1IIOOA4O0r+   return_intermediatesc           	      X   U R                   (       a.  U R                  S:  a  Ub   eU R                  XR                  S9nUb,  UR                  " UR                  S   S/U R
                  Q76 nOSnU R                  XS9nU R                  U5      nU R                  U5      nUbZ  XS   R                  SU R                  UR                  S   5         R                  UR                  S   SUR                  S   5      n/ n[        U R                  5       H  u  pgU R                  (       a/  [        R                  R!                  5       (       d  [#        Xq5      nOU" U5      nU(       d  MV  X`R$                  ;   d  Mg  UR'                  U R)                  XUS95        M     U(       a  X4$ U$ )	z
mask should be a boolean tensor of shape [B, #MUt*#MUy*#MUx] where #MU are the number of mask units in that dim.
Note: 1 in mask is *keep*, 0 is *remove*; mask.sum(dim=-1) should be the same across the batch.
r   N)rC  r   r`  ra  r/   r;   )trainingr   rM  r=   r2   r   r   rV  r  rS  r   r'  r  r   rm   r   rb  r   ru   r  r  )rV   r8   r-   rp  re  rf  rW   rg  s           r)   forward_featuresHiera.forward_features  sq    ==T11A5<<''6J6J'KD1771:qK43J3JKJJQ0OOAKKN y/&&q$,,
CDII!''RS*VXZ[ZaZabdZefA,FA&&uyy/E/E/G/Gs&F##__(<$$T[[D[%AB -  ##r+   
pre_logitsc                 X    U(       a  U R                  XS9nU$ U R                  U5      nU$ )N)ru  )r  )rV   r8   ru  s      r)   forward_headHiera.forward_head:  s/    3=DIIaI/ DH99Q<r+   c                 L    U R                  XS9nUc  U R                  U5      nU$ )Nr`  )rs  rw  r   s      r)   rc   Hiera.forward>  s1    
 !!!!/<!!!$Ar+   )r  r  r   r  r
  r   r   r   r   r   r	  r   r   rO   r   r   r   r   rv   r   r  ru   r   r  ))   r{  r   `   r     avgr/   r      r   r   )r/   r/   )   r  )TTFFTr   r   )   r  )r   r   )r   r   r   r   NT r   r   r   gMbP?FF)   r  NNF)T)NF)NNFTr^  FT)r   FTTrR   )$rg   rh   ri   rj   r   rl   strr   r5   r   r	   r   r$   r   rT   r  rm   r   ignorer/  r   r4  r8  r;  rA  rn   rM  rV  r   ri  rn  rs  rw  rc   ro   rp   rq   s   @r)   r    r      s    )3#$&3(..4/I$( !,2,2-3"$'+/!!6A"%(%*"'&+/7Ci9CHoi9 i9 	i9
 i9 i9 i9 #s(Oi9 i9 CHoi9 "#s(Oi9 "$),i9 "i9 i9  !i9"  S/#i9$  S/%i9& !c?'i9( )i9* "+i9, "%-i9. /i90 1i92 c4		?233i94 5i96 #7i98 #9i9:  ;i9<  $=i9> #38_?i9 i9V= YY? ? YY
D 
T 
 
 YY)T )T ) ) YY KC Khsm Kae K 5 U\\ 0u|| > ,07;#$',= ||=  5<<(=  eCcN34	= 
 =  =  =  !%=  =  
tELL!5tELL7I)I#JJ	K= B ./$#3S	>*  	
 , ,0).	+||+ 5<<(+ #'	+
 
+Z$ 5<<  ,0|| 5<<( 
	 r+   c                 4    U SSS SSS[         [        SSSS	.UE$ )
Nr}  )r   r{  r{  g?rP  Tzpatch_embed.projr   z
apache-2.0)urlr   rN   	pool_sizecrop_pctinterpolationfixed_input_sizer   r   
first_conv
classifierlicenser
   )r  kwargss     r)   _cfgr  I  s5    =t%.B(	  r+   zhiera_tiny_224.mae_in1k_ft_in1kztimm/zcc-by-nc-4.0)	hf_hub_idr  zhiera_tiny_224.mae)r  r  r   z hiera_small_224.mae_in1k_ft_in1kzhiera_small_224.maezhiera_base_224.mae_in1k_ft_in1kzhiera_base_224.maez$hiera_base_plus_224.mae_in1k_ft_in1kzhiera_base_plus_224.maez hiera_large_224.mae_in1k_ft_in1kzhiera_large_224.maezhiera_huge_224.mae_in1k_ft_in1kzhiera_huge_224.maez.hiera_small_abswin_256.sbb2_e200_in12k_ft_in1k)r      r  gffffff?)r  rN   r  z1hiera_small_abswin_256.sbb2_pd_e200_in12k_ft_in1kz&hiera_small_abswin_256.sbb2_e200_in12ki-.  )r  r   rN   r  z)hiera_small_abswin_256.sbb2_pd_e200_in12kzhiera_base_abswin_256.untrained)rN   r  c                 J   U R                  SU 5      n 0 nU R                  5        Hz  u  p4SU;   a  UR                  SS5      nUR                  S5      (       a  UR                  SS5      nO(UR                  S5      (       a  UR                  SS5      nUS:X  a  SnXBU'   M|     U$ )	Nmodel_statezhead.projection.zhead.fc.zencoder_norm.z
head.norm.znorm.r-  r   )getitemsreplace
startswith)
state_dictmodeloutputr   r   s        r)   checkpoint_filter_fnr    s    z:JF  " "		,j9A<<((		/<8A\\'""		'<0AAq	% #& Mr+   variant
pretrainedc           	      h    UR                  SS5      n[        [        U U4[        [	        USS9S.UD6$ )Nout_indicesr   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)popr   r    r  r  )r  r  r  r  s       r)   _create_hierar    sF    **]A.K 2[hG  r+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )Nr|  r   )r   r/   r  r/   r   r   r   r  )hiera_tiny_224r  r  r  r  
model_argss      r)   r  r    s.    aEJ_j_DD^W]D^__r+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )Nr|  r   r   r/      r/   r  r  )hiera_small_224r  r  s      r)   r  r    s.    aFJ`z`T*E_X^E_``r+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )Nr|  r   r  r  r  )hiera_base_224r  r  s      r)   r  r    s.    aFJ_j_DD^W]D^__r+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )NrM   r/   r  r  r  )hiera_base_plus_224r  r  s      r)   r  r    s.    qGJd:djIc\bIcddr+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )N   r/   r/      $   r   r  r  )hiera_large_224r  r  s      r)   r  r    s.    qGJ`z`T*E_X^E_``r+   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )Nr  r   r  r  r  )hiera_huge_224r  r  s      r)   r  r    s.    qGJ_j_DD^W]D^__r+   c                 P    [        SSSSSSSSS	9n[        SS
U 0[        U40 UD6D6$ )Nr|  r   r  T)r  r  h㈵>r   F)r   r   r   r   r   r   r   r   r  )hiera_small_abswin_256r  r  s      r)   r  r    s@    -4aieUJ gjgDQ[Lf_eLfggr+   c           	      L    [        SSSSSSS9n[        S	SU 0[        U40 UD6D6$ )
Nr|  r   r  Tr  r   )r   r   r   r   r   r   r  )hiera_base_abswin_256r  r  s      r)   r  r    s;    -4]aotvJfZf4PZKe^dKeffr+   )r  rR   r  )Grk   r\   	functoolsr   typingr   r   r   r   r   r	   rm   torch.nnr$   torch.nn.functional
functionalr3   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   	_registryr   r   _builderr   	_featuresr   _features_fxr   _manipulater   r   __all__rl   r   r*   rn   r7   rJ   rL   rs   r   r   r   r    r  default_cfgsr  r  r   r  r  r  r  r  r  r  r  r  r   r+   r)   <module>r     s  0   ; ;     A    = * + 3 0 )=s =tBII = 	$s) 	5<< 	ELL 	 	<<Cy s) \\	B;RYY ;|NRYY NbA		 AHE EP( (VPBII Pf	 % S&%t(S&
 $S& ')S& 4S&* &t(+S&2 $3S&> +D-?S&F t GS&R ')SS&Z 4[S&f &t(gS&n $oS&z 5d 47{S&B 8 4:CS&J -d 4/KS&T 0 42US&^ &t 4(_S& Sl2
3 
D 
u 
 ` `
 a a
 ` `
 e e
 a a
 ` `
 h h g gr+   