
    RЦix&                    z<   S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
JrJrJrJrJrJrJrJr   SSK	Jr  SSKrSSKJr  SSKJs  Jr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$  SS	K%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;  S
SK<J=r=  S
SK>J?r?  S
SK@JArAJBrBJCrCJDrD  S
SKEJFrFJGrGJHrH  S/rI\R                  " \K5      rL\&\&\'S.rM        GSS\:S\NS\NS\OS\OS\OS\OS\PS\PS\\\R                        S\NS\R                  4S jjrR " S S \R                  5      rS " S! S"\R                  5      rT " S# S$\R                  5      rU " S% S&\R                  5      rV " S' S(\R                  5      rW   GSS)\R                  S*\YS+\NS,\O4S- jjrZ " S. S\R                  5      r[GSS0\R                  S1\YS2\OSS4S3 jjr\   GSS0\R                  S1\YS4\PS2\OSS4
S5 jjr]GSS0\R                  S1\YS2\OSS4S6 jjr^GSS0\R                  S1\YS2\OSS4S7 jjr_GSS8\YS4\PS2\OS\4S9 jjr`    GSS;\R                  S<\R                  S+\NS=\\N\N4   S>\YS?\OS\R                  4S@ jjra\R                  " 5       GSSA\[SB\YSC\YSD\OSS4
SE jj5       rc GSSF\\Y\R                  4   SA\[SC\YS\\Y\R                  4   4SG jjrdSF\\Y\R                  4   SA\[S\\Y\R                  4   4SH jreSF\\Y\R                  4   SA\[S\\Y\R                  4   4SI jrfSF\g4SJ jrh   GSSF\\Y\R                  4   SA\[SK\OS>\YS?\OS\\Y\R                  4   4SL jjriGSSM\YS\\Y\
4   4SN jjrj0 SO\j" SPSQ9_SR\j" SPSQ9_SS\j" STSPSSU9_SV\j" SWSPSSXSYSZ9_S[\j" S\SPSSU9_S]\j" S^SPSSXSYSZ9_S_\j" S`SPSSU9_Sa\j" SbSPSSXSYSZ9_Sc\j" SdSPSSU9_Se\j" SfSPSSXSYSZ9_Sg\j" ShSPSSU9_Si\j" SjSPSSXSYSZ9_Sk\j" SlSPSSU9_Sm\j" SnSPSSU9_So\j" SpSPSSXSYSZ9_Sq\j" SrSPSs9_St\j" SuSPSXSYSv9_0 Sw\j" SxSPSXSYSv9_Sy\j" SzSPSSU9_S{\j" S|SPSSXSYSZ9_S}\j" S~SPSSU9_S\j" SSPSSXSYSZ9_S\j" SSPSSU9_S\j" SSPSSXSYSZ9_S\j" S/S9_S\j" S/S9_S\j" S/S9_S\j" S/S9_S\j" SPSS9_S\j" SPSS9_S\j" SPSS9_S\j" SPSS9_S\j" SPSS9_S\j" SSPSSS9_E0 S\j" SSPSSS9_S\j" SSPSSS9_S\j" SSPSSS9_S\j" SSPSSS9_S\j" SSPSSS9_S\j" SSPSSS9_S\j" SSSPS9_S\j" SSSPS9_S\j" SSP\\ SS9_S\j" SSP\\ SS9_S\j" SSP\\ SS9_S\j" SSP\\ SS9_S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_E0 S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_S\j" SSPS\\ SSSYS9_S\j" SSPSSSSSS9_S\j" SSPSSSSS9_S\j" SSPSs9_S\j" SPSSSS9_S\j" SPSSS9_S\j" SPSXSSS9_S\j" SSS9_S\j" 5       _S\j" SP\#\$S9_S\j" SP\#\$SYSXS9_S\j" SP\#\$SYSS9_S\j" SP\#\$SS9_S\j" SP\#\$SYSXSS9_S\j" SP\!\"SYS9_E0 S\j" SP\!\"SYSSS9_S\j" SP\#\$SYS9_S\j" SP\#\$SYSSS9_S\j" \#\$S9_S\j" SP\#\$SSXSS9_S\j" SP\#\$SS9_S\j" SP\#\$SSXSS9_S\j" SP\#\$SYS9_S\j" SP\#\$SYSSS9_S\j" SP\#\$S9_S\j" SP\#\$SYS9_S\j" SP\#\$SYSXSS9_S\j" SP\!\"SYS9_S\j" SP\!\"SYSSS9_S\j" SP\#\$SYS9_S\j" S/\#\$SYSSS9_S\j" SP\#\$S9_E0 S\j" SP\#\$S9_S\j" SP\#\$SYSXSS9_S\j" SP\#\$SYS9_S\j" SP\#\$SS9_S\j" SP\!\"SYSGS 9_GS\j" SP\#\$SYSGS 9_GS\j" SP\#\$SS9_GS\j" SP\#\$SYSGS 9_GS\j" SP\#\$GSS9_GS\j" SP\#\$SYGSGS 9_GS\j" SP\!\"SYGSGS 9_GS	\j" SP\#\$SYGS
GS 9_GS\j" SP\#\$SYGS
GS 9_GS\j" SP\#\$SYGSGS 9_GS\j" SPGSGS\#\$GSGS9_GS\j" SPGS\#\$SYGSGS9_GS\j" SPGS\#\$SSYGSGS9_E0 GS\j" SPGS\#\$SYGSGS9_GS\j" SP\#\$SYGSGS 9_GS\j" SP\#\$SYSGSGS9_GS\j" SP\#\$SYGSGS 9_GS\j" SP\#\$SYGSGS 9_GS\j" SPGS\#\$SYGSGS9_GS\j" SPGS\#\$SYGSGS9_GS \j" SPGSGS\#\$SYGSGS!9_GS"\j" SPGSGS\#\$SYGS
GS!9_GS#\j" SP\#\$GSGSSYGS$GS
GS%9_GS&\j" SPGS'GS\#\$SYGS
GS!9_GS(\j" SPGS'\#\$GS$SYSGS
GS)9_GS*\j" SPGS'\#\$SYGSGS9_GS+\j" SPGS'\#\$GS$SYSGSGS)9_GS,\j" SPGS'GS\#\$SYGSGS!9_GS-\j" SPGS'GS\#\$SYGSGS!9_GS.\j" SPGS'GS\#\$SYGSGS!9_E0 GS/\j" SPGS'GS\#\$SYGS
GS!9_GS0\j" SPGS'\#\$SYGS
GS9_GS1\j" SPGS'GS\#\$SYGSGS!9_GS2\j" SPGS'GS\#\$SYGSGS!9_GS3\j" SPGS'GS\#\$SYGSGS!9_GS4\j" SPGS'GS\#\$SYGSGS!9_GS5\j" SPGS\#\$GSGS69_GS7\j" SPGS\#\$GSGS69_GS8\j" SPGS\#\$SYGSGS99_GS:\j" SPGS\#\$SYSGSGS;9_GS<\j" SPGS\#\$SYGS=GS>9_GS?\j" S/SSGS@9_GSA\j" S/SSGS@9_GSB\j" S/S9_GSC\j" S/S9_GSD\j" S/S9_GSE\j" SPGS\#\$GSFSYGSG9_E0 GSH\j" SPGS\#\$SSYSGSI9_GSJ\j" SPGS\#\$GSFSYGSG9_GSK\j" SPGS\#\$SSYSGSI9_GSL\j" GSMSSPSSGSN9_GSO\j" GSPSSPSSGSN9_GSQ\j" GSRSSPSSGSN9_GSS\j" GSTSSPSSGSN9_GSU\j" GSVSSPSSGSN9_GSW\j" GSXSSPSSGSN9_GSY\j" GSZSSPSSSGS[9_GS\\j" GS]SSPSSSGS[9_GS^\j" GS_SSPSSGSN9_GS`\j" GSaSSPSSGSN9_GSb\j" GScSSPSSGSN9_GSd\j" GSeSSPSSSGS[9_GSf\j" GSgSSPSSSGS[9_GSh\j" S/S9_E0 GSi\j" S/S9_GSj\j" S/S9_GSk\j" GSlSPGS'\\ SGSm9_GSn\j" GSoSPGS'\\ SGSm9_GSp\j" GSqSPGS'\\ SGSm9_GSr\j" GSsGS'\\ SGSt9_GSu\j" GSvGS'\\ SGSt9_GSw\j" GSxGS'SSY\\ SGSy9_GSz\j" GS{GS'\\ SGSt9_GS|\j" SPSSGS}9_GS~\j" SPSS9_GS\j" SPSS9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPSXSGS}9_E0 GS\j" SPGSSGS}9_GS\j" SPGSSGS}9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPSXSGS}9_GS\j" SPGSSGS}9_GS\j" SPSS9_GS\j" SPSS9_GS\j" SPGS$SGS}9_GS\j" SPGS$SGS}9_GS\j" SPSXSGS}9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPGSSGS}9_GS\j" SPSSGS}9_E0 GS\j" SPSXSGS}9_GS\j" SPSSGS}9_GS\j" SPSS9_GS\j" SPSS9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPSXSGS}9_GS\j" SPGSSGS}9_GS\j" SPGSSGS}9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPSXSGS}9_GS\j" SPGSSGS}9_GS\j" SPSS9_E0 GS\j" SPSS9_GS\j" SPSS9_GS\j" SPSS9_GS\j" SPSS9_GS\j" SPSS9_GS\j" SPGS$SGS}9_GS\j" SPGS$SYSS9_GS\j" SPSXSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPSSYSS9_GS\j" SPGSSYSS9_E0 GS\j" SPGSSYSS9_GS\j" SPGSSYSS9_GS\j" SPGSSYSS9_GS\j" SPGSSYSS9_GS\j" SPSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPGSSGS}9_GS\j" SPSSGS}9_GS\j" SPSXSGS}9_GS\j" SPGS$SYSS9_GS\j" SPGS$SYSS9_GS\j" SPGS\#\$GSGS9_GS\j" SPGS\#\$GSGS9_GS\j" SPGS\#\$GSGS9_GS\j" SPGS\#\$GSGS9_GS\j" SPSSS9_E0 GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSSGS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSSGS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSSGS9_GS\j" SPSSSGS9_E0 GS\j" SPSXSYS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSS9_GS\j" SPSSSGS9_GS\j" SPSSSGS9_GS\j" SPSXSYS9_GS\j" SGS9_GS\j" SPSSS9_GS\j" SPSSSGS9_GS\j" SPSXSYS9_GS\j" SGS9_GS\j" SPSSYS9_GS\j" SPSSSYGS9_GS\j" SPSXSYS9_GS\j" SPSSYSS9_E0 GS\j" SPGS\\ SSYSGS9_GS\j" SP\\ SSYSGS9_GS\j" SP\#\$GSSYSGS9_GS\j" SP\#\$GSSYSGS9_GS\j" SP\#\$GSSYSGS9_GS\j" SP\#\$GSSYSGS9_GS\j" SP\#\$GSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS \j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SP\#\$GSSSYSGS9_GS\j" SPGSSS9_E\j" SPGSSS9\j" SPGSSS9\j" GSSS9\j" SP\\ SYS9\j" SP\\ SYS9\j" SP\\ SYS9\j" SP\\ SYS9\j" S/\\ SYGS9\j" S/S\\ SYGS9\j" SP\\ SYSGS 9\j" SP\\ SYSGS 9\j" SP\\ SYSGS 9\j" SP\\ SYSGS 9GS.Erk\kR                  5        V Vs/ s H1  u  pUR                  GSS:5      (       d  M  GS	UGS   S   ;   d  M/  U PM3     snn rn\n HB  ro\R                  " \k\o   5      rq\qGS
   SP:X  a	  SP\o-   \qGS
'   \q\k\oR                  GSGS5      '   MD     \F" \k5      rk\R                  R                  GSGS5      R                  5       GS:H  ru  GSGS\YGS\OGS\\O   S\\[GS4   4GS jjrv\GGSGS\OS\[4GS jj5       rw\GGSGS\OS\[4GS jj5       rx\GGSGS\OS\[4GS jj5       ry\GGSGS\OS\[4GS jj5       rz\GGSGS\OS\[4GS jj5       r{\GGSGS\OS\[4GS jj5       r|\GGSGS\OS\[4GS jj5       r}\GGSGS\OS\[4GS jj5       r~\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS  jj5       r\GGSGS\OS\[4GS! jj5       r\GGSGS\OS\[4GS" jj5       r\GGSGS\OS\[4GS# jj5       r\GGSGS\OS\[4GS$ jj5       r\GGSGS\OS\[4GS% jj5       r\GGSGS\OS\[4GS& jj5       r\GGSGS\OS\[4GS' jj5       r\GGSGS\OS\[4GS( jj5       r\GGSGS\OS\[4GS) jj5       r\GGSGS\OS\[4GS* jj5       r\GGSGS\OS\[4GS+ jj5       r\GGSGS\OS\[4GS, jj5       r\GGSGS\OS\[4GS- jj5       r\GGSGS\OS\[4GS. jj5       r\GGSGS\OS\[4GS/ jj5       r\GGSGS\OS\[4GS0 jj5       r\GGSGS\OS\[4GS1 jj5       r\GGSGS\OS\[4GS2 jj5       r\GGSGS\OS\[4GS3 jj5       r\GGSGS\OS\[4GS4 jj5       r\GGSGS\OS\[4GS5 jj5       r\GGSGS\OS\[4GS6 jj5       r\GGSGS\OS\[4GS7 jj5       r\GGSGS\OS\[4GS8 jj5       r\GGSGS\OS\[4GS9 jj5       r\GGSGS\OS\[4GS: jj5       r\GGSGS\OS\[4GS; jj5       r\GGSGS\OS\[4GS< jj5       r\GGSGS\OS\[4GS= jj5       r\GGSGS\OS\[4GS> jj5       r\GGSGS\OS\[4GS? jj5       r\GGSGS\OS\[4GS@ jj5       r\GGSGS\OS\[4GSA jj5       r\GGSGS\OS\[4GSB jj5       r\GGSGS\OS\[4GSC jj5       r\GGSGS\OS\[4GSD jj5       r\GGSGS\OS\[4GSE jj5       r\GGSGS\OS\[4GSF jj5       r\GGSGS\OS\[4GSG jj5       r\GGSGS\OS\[4GSH jj5       r\GGSGS\OS\[4GSI jj5       r\GGSGS\OS\[4GSJ jj5       r\GGSGS\OS\[4GSK jj5       r\GGSGS\OS\[4GSL jj5       r\GGSGS\OS\[4GSM jj5       r\GGSGS\OS\[4GSN jj5       r\GGSGS\OS\[4GSO jj5       r\GGSGS\OS\[4GSP jj5       r\GGSGS\OS\[4GSQ jj5       r\GGSGS\OS\[4GSR jj5       r\GGSGS\OS\[4GSS jj5       r\GGSGS\OS\[4GST jj5       r\GGSGS\OS\[4GSU jj5       r\GGSGS\OS\[4GSV jj5       r\GGSGS\OS\[4GSW jj5       r\GGSGS\OS\[4GSX jj5       r\GGSGS\OS\[4GSY jj5       r\GGSGS\OS\[4GSZ jj5       r\GGSGS\OS\[4GS[ jj5       r\GGSGS\OS\[4GS\ jj5       r\GGSGS\OS\[4GS] jj5       r\GGSGS\OS\[4GS^ jj5       r\GGSGS\OS\[4GS_ jj5       r\GGSGS\OS\[4GS` jj5       r\GGSGS\OS\[4GSa jj5       r\GGSGS\OS\[4GSb jj5       r\GGSGS\OS\[4GSc jj5       r\GGSGS\OS\[4GSd jj5       r\GGSGS\OS\[4GSe jj5       r\GGSGS\OS\[4GSf jj5       r\GGSGS\OS\[4GSg jj5       r\GGSGS\OS\[4GSh jj5       r\GGSGS\OS\[4GSi jj5       r\GGSGS\OS\[4GSj jj5       r\GGSGS\OS\[4GSk jj5       r\GGSGS\OS\[4GSl jj5       r\GGSGS\OS\[4GSm jj5       r\GGSGS\OS\[4GSn jj5       r\GGSGS\OS\[4GSo jj5       r\GGSGS\OS\[4GSp jj5       r\GGSGS\OS\[4GSq jj5       r\GGSGS\OS\[4GSr jj5       r\GGSGS\OS\[4GSs jj5       r\GGSGS\OS\[4GSt jj5       r\GGSGS\OS\[4GSu jj5       r\GGSGS\OS\[4GSv jj5       r\GGSGS\OS\[4GSw jj5       r\GGSGS\OS\[4GSx jj5       r\GGSGS\OS\[4GSy jj5       r\GGSGS\OS\[4GSz jj5       r\GGSGS\OS\[4GS{ jj5       r\GGSGS\OS\[4GS| jj5       r\GGSGS\OS\[4GS} jj5       r\GGSGS\OS\[4GS~ jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       r\GGSGS\OS\[4GS jj5       Gr \GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr	\GGSGS\OS\[4GS jj5       Gr
\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\GGSGS\OS\[4GS jj5       Gr\H" \K0 GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSGS_GSGS_GSS_GSS_GSS_GSS_GSS_GSGS_GSGS	GSGS.E5        g! \ a
    SSKJr   GNf = fs  snn f (  a  Vision Transformer (ViT) in PyTorch

A PyTorch implement of Vision Transformers as described in:

'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
    - https://arxiv.org/abs/2010.11929

`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
    - https://arxiv.org/abs/2106.10270

`FlexiViT: One Model for All Patch Sizes`
    - https://arxiv.org/abs/2212.08013

The official jax code is released and available at
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision

Acknowledgments:
  * The paper authors for releasing code and weights, thanks!
  * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch
  * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
  * Bert reference code checks against Huggingface Transformers and Tensorflow Bert

Hacked together by / Copyright 2020, Ross Wightman
    N)OrderedDict)partial)	AnyCallableDictOptionalSetTupleTypeUnionList)Literal)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)	AttentionDiffAttentionAttentionPoolLatent
PatchEmbedMlpSwiGLUPackedSwiGLU	LayerNormRmsNormDropPathcalculate_drop_path_ratesPatchDropouttrunc_normal_lecun_normal_resample_patch_embedresample_abs_pos_embeduse_fused_attnget_act_layerget_norm_layermaybe_add_mask	LayerType
LayerScale   )build_model_with_cfg)feature_take_indices)named_apply
checkpointcheckpoint_seqadapt_input_conv)generate_default_cfgsregister_modelregister_model_deprecationsVisionTransformer) attndiffT
attn_layerdim	num_headsqkv_biasqk_norm
scale_norm	proj_bias	attn_drop	proj_drop
norm_layerdepthreturnc                     [        U [        5      (       a#  [        R                  U S 5      n U c
   SU  35       e[	        U [
        5      (       a  XS'   U " U4UUUUUUUU	S.UD6$ )NzUnknown attn_layer: rD   )r<   r=   r>   r?   r@   rA   rB   rC   )
isinstancestrATTN_LAYERSget
issubclassr   )r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   kwargss               ]/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/timm/models/vision_transformer.py_create_attnrN   Z   s     *c"" __Z6
%J)=j\'JJ% *m,,w      c            %       b  ^  \ rS rSrSrSSSSSSSSSS\R                  \\\	SSS4S	\
S
\
S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\R                     S\S\
SS4$U 4S jjjrS S\R&                  S\\R&                     S\R&                  4S jjrSrU =r$ )!Block~   z)Transformer block with pre-normalization.      @FT        Nr   r;   r<   	mlp_ratior=   r>   scale_attn_normscale_mlp_normr@   rB   rA   init_values	drop_path	act_layerrC   	mlp_layerr:   rD   rE   c                 `  > [         TU ]  5         UUS.nU" U40 UD6U l        [        UU4UUUUUU
U	UUS.	UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      O[        R                  " 5       U l
        U" U40 UD6U l        U" SU[        X-  5      UU(       a  UOSUU	S.UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)a  Initialize Block.

Args:
    dim: Number of input channels.
    num_heads: Number of attention heads.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    qk_norm: If True, apply normalization to query and key.
    proj_bias: If True, add bias to output projection.
    proj_drop: Projection dropout rate.
    attn_drop: Attention dropout rate.
    init_values: Initial values for layer scale.
    drop_path: Stochastic depth rate.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
    mlp_layer: MLP layer.
    attn_layer: Attention layer type (class or string).
    depth: Block index, passed to attention layer for depth-dependent init.
devicedtype	r<   r=   r>   r?   r@   rA   rB   rC   rD   rX   rT   Nin_featureshidden_featuresrZ   rC   biasdrop )super__init__norm1rN   r8   r+   nnIdentityls1r   
drop_path1norm2intmlpls2
drop_path2selfr;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   r^   r_   dd	__class__s                        rM   rh   Block.__init__   s;   R 	/*r*
 
  &!
 
	 FQ:cA{AbAVXVaVaVc1:R(9-R[[]*r*
 
0%3z
 
 FQ:cA{AbAVXVaVaVc1:R(9-R[[]rO   x	attn_maskc           
         XR                  U R                  U R                  U R                  U5      US95      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ Nry   )rm   rl   r8   ri   rr   rq   rp   rn   rt   rx   ry   s      rM   forwardBlock.forward   sa    4::a=I)V WXX$**Q-)@ ABBrO   )r8   rm   rr   rl   rq   rp   ri   rn   N__name__
__module____qualname____firstlineno____doc__rj   GELUr   r   r   ro   floatboolr   r   Moduler*   rh   torchTensorr~   __static_attributes____classcell__rv   s   @rM   rQ   rQ   ~   ss   3  ""!$)#("!!+/!)+*3),$-)ISIS IS 	IS
 IS IS "IS !IS IS IS IS "%IS IS BIIIS RYYIS  BII!IS" "#IS$ %IS* 
+IS ISV (5<<2H TYT`T`  rO   rQ   c            %       h  ^  \ rS rSrSSSSSSSSSS\R
                  \\\SSS4S\	S	\	S
\
S\S\S\S\S\S\
S\
S\\
   S\
S\\R                     S\\R                     S\\R                     S\S\	SS4$U 4S jjjrS S jrS!S\R&                  S\\R&                     S\R&                  4S jjrSrU =r$ )"ResPostBlock   rS   FTrT   Nr   r;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   rE   c                   > [         TU ]  5         UUS.nXl        [        UU4UUUUUU
U	UUS.	UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l	        U" SU[        X-  5      UU(       a  UOS UU	S.UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l        U R                  5         g )Nr]   r`   rT   ra   rf   )rg   rh   rX   rN   r8   ri   r   rj   rk   rm   ro   rp   rn   rr   init_weightsrs   s                        rM   rh   ResPostBlock.__init__   s    , 	/& 
  &!
 
	  *r*
1:R(9-R[[] 
0%3z
 
  *r*
1:R(9-R[[]rO   c                    U R                   b}  [        R                  R                  U R                  R
                  U R                   5        [        R                  R                  U R                  R
                  U R                   5        g g r   )rX   rj   init	constant_ri   weightrn   rt   s    rM   r   ResPostBlock.init_weights  s[    'GGdjj//1A1ABGGdjj//1A1AB (rO   rx   ry   c           
          XR                  U R                  U R                  XS95      5      -   nXR                  U R	                  U R                  U5      5      5      -   nU$ r{   )rm   ri   r8   rr   rn   rp   r}   s      rM   r~   ResPostBlock.forward  sN    

499Q9+L MNN

488A; 788rO   )r8   rm   rr   rX   rp   ri   rn   rE   Nr   )r   r   r   r   rj   r   r   r   r   ro   r   r   r   r   r   r*   rh   r   r   r   r~   r   r   r   s   @rM   r   r      sN   
  ""!$)#("!!+/!)+*3),$-)77 7 	7
 7 7 "7 !7 7 7 7 "%7 7 BII7 RYY7  BII!7" "#7$ %7* 
+7 7rC (5<<2H TYT`T`  rO   r   c            '         ^  \ rS rSr% Sr\\   \S'   SSSSSSSSSS\R                  \
SSS	SSS4S
\S\S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\\R                        S\\   S\S\SS4&U 4S jjjrS#S jrS$S\R(                  S \\R(                     S\R(                  4S! jjrS"rU =r$ )%ParallelScalingBlocki  zParallel ViT block (MLP & Attention in parallel)
Based on:
  'Scaling Vision Transformers to 22 Billion Parameters` - https://arxiv.org/abs/2302.05442

fused_attnrS   FTrT   Nr   r;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   fuse_out_projrE   c                 &  > [         TU ]  5         UUS.nX-  S:X  d   S5       eU(       d  U(       a   S5       eX l        X-  U l        U R                  S-  U l        [        5       U l        [        X1-  5      nUSU-  -   nU" U40 UD6U l        [        R                  " UU4SU0UD6U l        U/U/S-  -   U l        U(       a  U R                  SS 5        O0[        R                  " [        R                   " U40 UD65      U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        [        R*                  " U
5      U l        [        R*                  " U	5      U l        U" 5       U l        U(       a1  [        R                  " UU-   U4SU0UD6U l        S U l        S U l        ODS U l        [        R                  " X4SU0UD6U l        [        R                  " UU4SU0UD6U l        Ub  [9        U4S	U0UD6O[        R$                  " 5       U l        US
:  a  [=        U5      O[        R$                  " 5       U l        U RA                  5         g )Nr]   r   $dim should be divisible by num_headsScale norms not supported         rd   mlp_biasrX   rT   )!rg   rh   r<   head_dimscaler&   r   ro   in_normrj   Linearin_projin_splitregister_parameter	Parameterr   emptyr   rk   q_normk_normDropoutrA   mlp_dropmlp_actout_projattn_out_projmlp_out_projr+   lsr   rY   reset_parameters)rt   r;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   r   r^   r_   ru   mlp_hidden_dimin_proj_out_dimrv   s                           rM   rh   ParallelScalingBlock.__init__  s$   . 	/!#K%KK#">V;VV9"(]]d*
(*Y_-(1s72!#,,yyoKHKK'(C5194##J5LL^)Jr)JKDM9@j5"5bkkm9@j5"5bkkmI.

9- {IIcN&:CViVSUVDM!%D $D !DM!#3!J)!Jr!JD "		.# TI TQS TDDOD[*S@k@R@acalalan09B),BKKM 	rO   c                 r    U R                   b*  [        R                  R                  U R                   5        gg)"Initialize parameters and buffers.N)r   rj   r   zeros_r   s    rM   r   %ParallelScalingBlock.reset_parametersb  s%    ==$GGNN4==) %rO   rx   ry   c                 "   UR                   u  p4nU R                  U5      nU R                  U5      n[        R                  " X`R
                  SS9u  pxpU R                  b  XpR                  -   nU R                  UR                  X4U R                  U R                  5      5      R                  SS5      nU R                  U	R                  X4U R                  U R                  5      5      R                  SS5      n	U
R                  X4U R                  U R                  5      R                  SS5      n
U R                  (       a?  [        R                  " XU
UU R                   (       a  U R"                  R$                  OSS9nOQXR&                  -  nXR                  SS5      -  n[)        X5      nUR+                  SS9nU R#                  U5      nX-  nUR                  SS5      R-                  X4U5      nU R/                  U5      nU R1                  U5      nU R2                  b&  U R3                  [        R4                  " X4SS95      nO#U R7                  U5      U R9                  U5      -   nXR;                  U R=                  U5      5      -   nU$ )Nr;   r,      rT   ry   	dropout_p)shaper   r   r   splitr   r   r   viewr<   r   	transposer   r   Fscaled_dot_product_attentiontrainingrA   pr   r)   softmaxreshaper   r   r   catr   r   rY   r   )rt   rx   ry   BNCyx_mlpqkvx_attnr8   s                rM   r~   ParallelScalingBlock.forwardg  s   ''a LLOLLOQ2>!==$MM)E KKqT^^T]]CDNNqRSTKKqT^^T]]CDNNqRSTFF17AA!QG??33a#.2mm$..**F JJA{{2r**D!$2D<<B<'D>>$'DXF!!!Q'//a8 U#e$ ==$eiiR@AA""6*T->->u-EEA twwqz**rO   )rA   r   rY   r   r   r   r   r   r   r   r   r   r   r   r<   r   r   r   r   r   )r   r   r   r   r   r   r   __annotations__rj   r   r   ro   r   r   r   r   r*   rh   r   r   r   r~   r   r   r   s   @rM   r   r     s    d  ""!$)#("!!+/!)+*337.2"'+A A  A  	A 
 A  A  "A  !A  A  A  A  "%A  A  BIIA  RYYA    RYY0!A " !+#A $ %A &  'A , 
-A  A F*
* *(5<<2H *TYT`T` * *rO   r   c            '         ^  \ rS rSr% Sr\\   \S'   SSSSSSSSSS\R                  \
SSS	SSS4S
\S\S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\\R                        S\\   S\S\SS4&U 4S jjjrS\4S jrS%S jrS\R*                  4S  jrS&S!\R*                  S"\\R*                     S\R*                  4S# jjrS$rU =r$ )'DiffParallelScalingBlocki  a<  Parallel ViT block with Differential Attention (MLP & Attention in parallel).

Combines the parallel MLP+Attention structure from 'Scaling Vision Transformers to
22 Billion Parameters' (https://arxiv.org/abs/2302.05442) with differential attention
from 'Differential Transformer' (https://arxiv.org/abs/2410.05258).
r   rS   FTrT   Nr   r;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   dual_lambdarE   c                   > [         TU ]  5         UUS.nX-  S:X  d   S5       eU(       d  U(       a   S5       eX l        X-  S-  U l        U R                  S-  U l        [        5       U l        [        X1-  5      nUSU-  -   nU" U40 UD6U l        [        R                  " UU4SU0UD6U l        U/U/S-  -   U l        U(       a  U R                  S	S 5        O0[        R                  " [        R                   " U40 UD65      U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        [        R*                  " U
5      U l        Xl        [1        SU R                  -  4S
S0UD6U l        UU l        U(       a  [        R                  " [        R                   " S[        R6                  US95      U l        [        R                  " [        R                   " S[        R6                  US95      U l        S =U l        =U l        =U l         U l!        GO*S =U l        U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l         [        R                  " [        R                   " U R                  [        R6                  US95      U l!        [        R*                  " U	5      U l"        U" 5       U l#        [        R                  " UU-   U4SU0UD6U l$        Ub  [K        U4SU0UD6O[        R$                  " 5       U l&        US:  a  [O        U5      O[        R$                  " 5       U l(        SU l)        U RU                  U5        U RW                  5         g )Nr]   r   r   r   r   r   r   rd   r   epsh㈵>rf   )r_   r^   rX   rT   皙?),rg   rh   r<   r   r   r&   r   ro   r   rj   r   r   r   r   r   r   r   r   rk   r   r   r   rA   attn_drop_pr   sub_normr   float32lambda_alambda_b	lambda_q1	lambda_k1	lambda_q2	lambda_k2r   r   r   r+   r   r   rY   lambda_initset_lambda_initr   )rt   r;   r<   rU   r=   r>   rV   rW   r@   rB   rA   rX   rY   rZ   rC   r[   r:   rD   r   r^   r_   ru   r   r   rv   s                           rM   rh   !DiffParallelScalingBlock.__init__  s#   . 	/!#K%KK#">V;VV9"(A-]]d*
(*Y_-(1s72!#,,yyoKHKK'(C5194##J5LL^)Jr)JKDM9@j5"5bkkm9@j5"5bkkmI.$  DMM 1BtBrB&LLRu}}U[)\]DMLLRu}}U[)\]DMPTTDNTT^Tdnt~,00DMDM\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN

9- { 		#"6R)RrRDOD[*S@k@R@acalalan09B),BKKMU# 	rO   c                 L    SS[         R                  " SU-  5      -  -
  U l        g )Nr   g333333?g333333ӿ)mathexpr   )rt   rD   s     rM   r   (DiffParallelScalingBlock.set_lambda_init  s!    txxu'=!==rO   c                    U R                   b)  [        R                  R                  U R                   5        U R                  (       aS  [        R                  R                  U R
                  5        [        R                  R                  U R                  5        g[        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  g)r   Nr   皙?meanstd)r   rj   r   r   r   r   r   normal_r   r   r   r   r   s    rM   r   )DiffParallelScalingBlock.reset_parameters  s    ==$GGNN4==)GGNN4==)GGNN4==)GGOODNNO<GGOODNNO<GGOODNNO<GGOODNNO<rO   c                    U R                   bA  [        R                  " U R                   5      n[        R                  " U R                  5      nO[        R                  " [        R                  " U R
                  U R                  -  SS9R                  5       5      n[        R                  " [        R                  " U R                  U R                  -  SS9R                  5       5      nX-
  U R                  -   $ )Nr   r   )r   r   r   r   sumr   r   r   r   r   r   )rt   lambda_1lambda_2s      rM   _compute_lambda(DiffParallelScalingBlock._compute_lambda  s    ==$yy/Hyy/Hyy4>>DNN+JPR!S!Y!Y![\Hyy4>>DNN+JPR!S!Y!Y![\H"T%5%555rO   rx   ry   c                    UR                   u  p4nU R                  U5      nU R                  U5      n[        R                  " X`R
                  SS9u  pxpU R                  b  XpR                  -   nUR                  X4SU R                  -  U R                  5      R                  SS5      nU	R                  X4SU R                  -  U R                  5      R                  SS5      n	U
R                  X4U R                  SU R                  -  5      R                  SS5      n
U R                  U5      U R                  U	5      pU R                  5       R                  U5      nU R                  (       a  UR                  X0R                  SX@R                  5      nU	R                  X0R                  SX@R                  5      n	UR!                  S5      u  pU	R!                  S5      u  pU R"                  (       a  U R$                  OSn[&        R(                  " XXUS9n[&        R(                  " XXUS9nUUU-  -
  nOXR*                  -  nXR                  SS5      -  n[-        UU5      nUR/                  SS9nU R1                  U5      nUR3                  X0R                  SXD5      nUS S 2S S 2S4   UUS S 2S S 2S4   -  -
  nUU
-  nU R5                  U5      nUSU R6                  -
  -  nUR                  SS5      R                  X4U5      nU R9                  U5      nU R;                  U5      nU R=                  [        R>                  " UU4SS95      nXRA                  U RC                  U5      5      -   nU$ )	Nr   r   r   r,   rT   r   r   r   )"r   r   r   r   r   r   r   r   r<   r   r   r   r   r  type_asr   unbindr   r   r   r   r   r)   r   rA   r   r   r   r   r   r   r   rY   r   )rt   rx   ry   r   r   r   r   r   r   r   r   lambda_fullq1q2k1k2r   attn1attn2r   r8   s                        rM   r~    DiffParallelScalingBlock.forward  s   ''a LLOLLOQ2>!==$MM)E IIaA.>HHANIIaA.>HHANIIaDNNA,=>HHAN{{1~t{{1~1**,44Q7??		!^^Q==AA		!^^Q==AAXXa[FBXXa[FB,0MM((sI2221]fgE2221]fgE[500FJJA{{2r**D!$	2D<<B<'D>>$'D99Q18D1a=;aAg#>>DAXFv&1t///0!!!Q'//a8 U#e$ MM%))VUO<= twwqz**rO   )rA   r   rY   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r<   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   rj   r   r   ro   r   r   r   r   r*   rh   r   r   r   r   r  r~   r   r   r   s   @rM   r   r     s    d  ""!$)#("!!+/!)+*337.2 %+K K  K  	K 
 K  K  "K  !K  K  K  K  "%K  K  BIIK  RYYK    RYY0!K " !+#K $ %K & 'K , 
-K  K Z>S >=6 66 6(5<<2H 6TYT`T` 6 6rO   r   c            '       h  ^  \ rS rSrSrSSSSSSSSSSS\R                  \\\	S	SS4S
\
S\
S\
S\S\S\S\S\S\S\\   S\S\S\S\\R                     S\\R                     S\\R                     S\S\
SS4&U 4S jjjrS"S\R&                  S\\R&                     S\R&                  4S  jjrS!rU =r$ )#ParallelThingsBlocki<  zParallel ViT block (N parallel attention followed by N parallel MLP)
Based on:
  `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
r   rS   FTNrT   r   r;   r<   num_parallelrU   r=   r>   rV   rW   r@   rX   rB   rA   rY   rZ   rC   r[   r:   rD   rE   c                   > UUS.n[         TU ]  5         X0l        [        R                  " 5       U l        [        R                  " 5       U l        [        U5       GHh  nU R
                  R                  [        R                  " [        SU" U40 UD64S[        UU4UUUUU	UUUUS.	UD64SU
(       a  [        U4SU
0UD6O[        R                  " 5       4SUS:  a  [        U5      O[        R                  " 5       4/5      5      5        U R                  R                  [        R                  " [        SU" U40 UD64S	U" U4[        X-  5      UU(       a  UOS U	US
.UD64SU
(       a  [        U4SU
0UD6O[        R                  " 5       4SUS:  a  [        U5      O[        R                  " 5       4/5      5      5        GMk     g )Nr]   normr8   r`   r   rX   rY   rT   rp   )rc   rZ   rC   rd   re   )rg   rh   r  rj   
ModuleListattnsffnsrangeappend
Sequentialr   rN   r+   rk   r   ro   )rt   r;   r<   r  rU   r=   r>   rV   rW   r@   rX   rB   rA   rY   rZ   rC   r[   r:   rD   r^   r_   ru   _rv   s                          rM   rh   ParallelThingsBlock.__init__A  s   . /(]]_
MMO	|$AJJbmmKC.2./ (%#.''')   z#E;E"EZ\ZeZeZghY^hy1W#9 -  & IIR]];C.2./	$'$8'-;z""   z#E;E"EZ\ZeZeZghY^hy1W8 ,  ) %rO   rx   ry   c           	      V   Ub  / nU R                    HV  nUR                  U5      nUR                  XRS9nUR                  U5      nUR	                  U5      nUR                  U5        MX     U[        R                  " U5      R                  SS9-   nOFU[        R                  " U R                    Vs/ s H
  oD" U5      PM     sn5      R                  SS9-   nU[        R                  " U R                   Vs/ s H
  of" U5      PM     sn5      R                  SS9-   nU$ s  snf s  snf )Nr|   r   r   )
r  r  r8   r   rY   r  r   stackr   r  )rt   rx   ry   attn_outr8   r   ffns          rM   r~   ParallelThingsBlock.forward  s     H

16?/' # EKK)--!-44AEKKTZZ @ZTaZ @AEE!ELLAtyy9ySVy9:>>1>EE !A9s   2D!
8D&
)r  r  r  r   r   r   s   @rM   r  r  <  sZ    !"!"!$)#("+/!!!)+*3),$-+== = 	=
 = = = "= != = "%= = = = BII=  RYY!=" BII#=$ "%=& '=, 
-= =~ (5<<2H TYT`T`  rO   r  rx   	pool_typenum_prefix_tokensreduce_include_prefixc                 4   U(       d  U $ US:X  a  U S S 2S4   n U $ U(       a  U O
U S S 2US 24   n US:X  a  U R                  SS9n U $ US:X  a$  SU R                  SS9U R                  SS9-   -  n U $ US:X  a  U R                  SS9n U $ U(       a
   S	U 35       eU $ )
Ntokenr   avgr,   r   avgmaxg      ?maxzUnknown pool type )r   amax)rx   r!  r"  r#  s       rM   global_pool_nlcr*    s     GadG H 'AAa1B1C.C,D1A H ("qvv!v}qvv!v}45A H %1A H !B$6yk"BB=HrO   c            S       :  ^  \ rS rSr% Sr\\   \S'   SSSSSS	S
S
SSSSSSSSSSSSSSSSSSSSSSSSS\SSS\	\
\SS4*S\\\\\4   4   S\\\\\4   4   S\S\S\S   S\S\S\S\S\S\S\S \S!\S"\\   S#\S$\S%\S&\S'\S(\S)\\   S*\S\S+\S,\S-\S.\S/\S0\S1\S2\S3   S4\S5\S6\\   S7\\   S8\\   S9\\R.                     S:\\R.                     S;\S<S4RU 4S= jjjrSgS> jrShS?\S@\S<S4SA jjrSB\R.                  S<S4SC jr\R:                  R=                  5       SiSD\SE\S<S4SF jj5       r\R:                  R<                  S<\ \   4SG j5       r!\R:                  R<                  SjSH\S<\"\\\\#4   4   4SI jj5       r$\R:                  R<                  SkSJ\S<S4SK jj5       r%\R:                  R<                  S<\R.                  4SL j5       r&SlS\S\\   S<S4SM jjr'  SmS\\\\4      S\\\\4      S<S4SN jjr(SO\RR                  S<\RR                  4SP jr*        SnSO\RR                  SQ\\\\#\   4      SR\SS\ST\SU\SV\SW\SX\\RR                     S<\\#\RR                     \\RR                  \#\RR                     4   \"\\+4   4   4SY jjr,   SoSQ\\\#\   4   SZ\S[\S<\#\   4S\ jjr-     SpSO\RR                  S]\\\#\   \\   4   S^\SR\SS\SX\\RR                     S<\#\RR                     4S_ jjr.SlSO\RR                  SX\\RR                     S<\RR                  4S` jjr/SlSO\RR                  Sa\\   S<\RR                  4Sb jjr0SjSO\RR                  Sc\S<\RR                  4Sd jjr1SlSO\RR                  SX\\RR                     S<\RR                  4Se jjr2Sfr3U =r4$ )qr6   i  zVision Transformer

A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
    - https://arxiv.org/abs/2010.11929
dynamic_img_size      r     r%        rS   TFNlearnr   rT   r7   img_size
patch_sizein_chansnum_classesglobal_poolr7   r&  r'  r(  r%  map	embed_dimrD   r<   rU   r=   r>   rV   rW   r@   rX   class_token	pos_embedno_embed_class
reg_tokenspre_norm
final_normfc_normpool_include_prefixdynamic_img_pad	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)skipresetjaxjax_nlhbmocor7   fix_initembed_layerembed_norm_layerrC   rZ   block_fnr[   r:   rE   c+                 P	  > [         T3U ]  5         U)U*S.n+US;   d   eU(       d  US:w  d   eUS;   d   eUc  US;   OUn,[        U$5      =(       d    [        n$[        U#5      n#[	        U%5      =(       d    [
        R                  n%X@l        X0l        XPl	        U=U l
        =U l        U l        U(       a  SOSU l        U =R                  U-  sl        UU l        UU l        UU l        UU l        UU l        S	U l        0 n-U(       a  U-R)                  [+        S	S
S95        U#b  U#U-S'   U"" S)UUUUU(       + US.U-DU+D6U l        U R,                  R.                  n.[1        U R,                  S5      (       a  U R,                  R3                  5       OUn/U(       a-  [
        R4                  " [6        R8                  " SSU40 U+D65      OSU l        U(       a-  [
        R4                  " [6        R8                  " SUU40 U+D65      OSU l        U(       a  U.OU.U R                  -   n0U(       a  US:X  a  SU l        O2[
        R4                  " [6        R8                  " SU0U40 U+D65      U l        [
        R@                  " US9U l!        US:  a  [E        UU R                  S9U l#        O[
        RH                  " 5       U l#        U(       a	  U$" U40 U+D6O[
        RH                  " 5       U l%        [M        UU5      n1[
        RN                  " [Q        U5       V2s/ s HA  n2U&" S)0 SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SU1U2   _SU$_SU%_SU'_S U(_S!U2_U+D6PMC     sn26 U l)        [Q        U5       V2s/ s H  n2[+        S"U2 3UU/S#9PM     sn2U l*        U(       a  U,(       d	  U$" U40 U+D6O[
        RH                  " 5       U l+        US$:X  a!  [Y        U R                  4UU	U$U%S%.U+D6U l-        OSU l-        U(       a  U,(       a	  U$" U40 U+D6O[
        RH                  " 5       U l.        [
        R@                  " U5      U l/        US:  a"  [
        R`                  " U R                  U40 U+D6O[
        RH                  " 5       U l1        U S&:X  a  S'OU U l2        U!U l3        U S&:w  a  U Ri                  S	S(9  ggs  sn2f s  sn2f )*a  
Args:
    img_size: Input image size.
    patch_size: Patch size.
    in_chans: Number of image input channels.
    num_classes: Number of classes for classification head.
    global_pool: Type of global pooling for final sequence (default: 'token').
    embed_dim: Transformer embedding dimension.
    depth: Depth of transformer.
    num_heads: Number of attention heads.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: Enable bias for qkv projections if True.
    init_values: Layer-scale init values (layer-scale enabled if not None).
    class_token: Use class token.
    no_embed_class: Don't include position embeddings for class (or reg) tokens.
    reg_tokens: Number of register tokens.
    pre_norm: Enable norm after embeddings, before transformer blocks (standard in CLIP ViT).
    final_norm: Enable norm after transformer blocks, before head (standard in most ViT).
    fc_norm: Move final norm after pool (instead of before), if None, enabled when global_pool == 'avg'.
    drop_rate: Head dropout rate.
    pos_drop_rate: Position embedding dropout rate.
    attn_drop_rate: Attention dropout rate.
    drop_path_rate: Stochastic depth rate.
    weight_init: Weight initialization scheme.
    fix_init: Apply weight initialization fix (scaling w/ layer index).
    embed_layer: Patch embedding layer.
    embed_norm_layer: Normalization layer to use / override in patch embed module.
    norm_layer: Normalization layer.
    act_layer: MLP activation layer.
    block_fn: Transformer block layer.
r]   r8  r%  )r7   noner2  N)r&  r'  r(  r,   r   FNHWC)strict_img_size
output_fmtrC   )r3  r4  r5  r:  rd   rC  
feat_ratiorU  )r   )r"  r;   r<   rU   r=   r>   rV   rW   r@   rX   rB   rA   rY   rZ   r[   r:   rD   blocks.)modulenum_chs	reductionr9  )r<   rU   rC   rZ   rK  rL  needs_resetrf   )5rg   rh   r(   r   r'   rj   r   r6  r5  r7  num_featureshead_hidden_sizer:  r"  num_reg_tokenshas_class_tokenr=  rB  r,  grad_checkpointingupdatedictpatch_embednum_patcheshasattrrY  r   r   r   	cls_token	reg_tokenr<  r   pos_dropr!   
patch_droprk   norm_prer    r  r  blocksfeature_infor  r   	attn_poolrA  	head_dropr   headweight_init_moderP  r   )4rt   r3  r4  r5  r6  r7  r:  rD   r<   rU   r=   r>   rV   rW   r@   rX   r;  r<  r=  r>  r?  r@  rA  rB  r,  rC  rD  rE  rF  rG  rH  rI  rJ  rP  rQ  rR  rC   rZ   rS  r[   r:   r^   r_   ru   use_fc_norm
embed_argsrh  r]  	embed_lendprirv   s4                                                      rM   rh   VisionTransformer.__init__  s   X 	/JJJJkW4441111AHk%==V]#J/<9
)*:;!),7	& &ENNND1DN&1q*,(*,#6  0"'
d5VLM''7J|$& 	
!+	
 	
 	
 &&225<T=M=M|5\5\D$$//1bl	MXekk!Q	&HR&HI^bV`ekk!Z&Qb&QRfj#1K{TE[E[7[	I/!DN\\%++aI*TQS*TUDN

]3Q*"&"8"8DO
 !kkmDO7?
933R[[]'>mm* 5\+&#* ")  # $ "	
   !0  . $ ( ) ) a& & $ $  &!" %( "+&# $. Y^^cXdfXdSTD'!yINXdf3=kJy/B/WYWbWbWd	 %0##%# DN "DN6@[z)2r2VXVaVaVcI.DORSOBIIdnnk@R@Y[YdYdYf	+6&+@k & %0 ![&#,fs   AR$R#c                    [         R                  " 5          [        U R                  5       H  u  p[        R
                  " SUS-   -  5      nUR                  R                  R                  R                  U5        UR                  R                  R                  R                  U5        M     SSS5        g! , (       d  f       g= f)z9Apply weight initialization fix (scaling w/ layer index).g       @r,   N)r   no_grad	enumeratero  r   sqrtr8   projr   div_rp   fc2)rt   layer_idlayerr   s       rM   fix_init_weight!VisionTransformer.fix_init_weighti  sy    ]]_#,T[[#9		#A"67

&&++E2		$$))%0 $: __s   BB88
Cmoder_  c                    U=(       d    U R                   nUS;   d   eSU;   a!  [        R                  " U R                  5      * OSnU R                  b  [        U R                  SS9  U R                  b(  [        R                  R                  U R                  SS9  U R                  b(  [        R                  R                  U R                  SS9  [        [        XUS9U 5        U R                  (       a  U R                  5         gg)	a  Initialize model weights.

Args:
    mode: Weight initialization mode ('jax', 'jax_nlhb', 'moco', or '').
    needs_reset: If True, call reset_parameters() on modules that have it.
        Set to False when modules have already self-initialized in __init__.
)rM  rN  rO  rL  r7   nlhbrT   N{Gz?r   ư>r^  )rt  r   logr6  r<  r"   rj  rj   r   r   rk  r/   get_init_weights_vitrP  r  )rt   r  r_  	head_biass       rM   r   VisionTransformer.init_weightsq  s     ,t,,????39T>TXXd..//r	>>%$..c2>>%GGOODNNO5>>%GGOODNNO5(kRTXY==  " rO   mc                     [        U5        g)z>Initialize weights for a single module (compatibility method).N)init_weights_vit_timm)rt   r  s     rM   _init_weightsVisionTransformer._init_weights  s     	a rO   checkpoint_pathprefixc                     [        XU5        g)zqLoad pretrained weights.

Args:
    checkpoint_path: Path to checkpoint.
    prefix: Prefix for state dict keys.
N)_load_weights)rt   r  r  s      rM   load_pretrained!VisionTransformer.load_pretrained  s     	dV4rO   c                 
    1 Sk$ )z3Set of parameters that should not use weight decay.>   rj  r<  
dist_tokenrf   r   s    rM   no_weight_decay!VisionTransformer.no_weight_decay  s
     87rO   coarsec                     [        SSS/S9$ )zCreate regex patterns for parameter grouping.

Args:
    coarse: Use coarse grouping.

Returns:
    Dictionary mapping group names to regex patterns.
z ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemro  )rf  )rt   r  s     rM   group_matcherVisionTransformer.group_matcher  s     4-/CD
 	
rO   enablec                 ~    Xl         [        U R                  S5      (       a  U R                  R                  U5        gg)zgEnable or disable gradient checkpointing.

Args:
    enable: Whether to enable gradient checkpointing.
set_grad_checkpointingN)rd  ri  rg  r  )rt   r  s     rM   r  (VisionTransformer.set_grad_checkpointing  s7     #)4##%=>>33F; ?rO   c                     U R                   $ )zGet the classifier head.)rs  r   s    rM   get_classifier VisionTransformer.get_classifier  s     yyrO   c                 (   Xl         UbB  US;   d   eUS:X  a  U R                  c   S5       eUS:w  a  U R                  b  SU l        X l        US:  a'  [        R                  " U R
                  U5      U l        g[        R                  " 5       U l        g)zReset the classifier head.

Args:
    num_classes: Number of classes for new classifier.
    global_pool: Global pooling type.
Nr8  r9  z=Cannot currently add attention pooling in reset_classifier().r   )r6  rq  r7  rj   r   r:  rk   rs  )rt   r6  r7  s      rM   reset_classifier"VisionTransformer.reset_classifier  s     '""NNNNe#(>]]]u%$..*D!%*>IAoBIIdnnk:	SUS^S^S`	rO   c           
         U R                   R                  nU R                   R                  XS9  U R                  b  U R                  (       a  SOU R
                  nU R                   R                  U-   nXPR                  R                  S   :w  aE  [        R                  " [        U R                  U R                   R                  UUSS95      U l        ggg)zUpdate the input image resolution and patch size.

Args:
    img_size: New input resolution, if None current resolution is used.
    patch_size: New patch size, if None existing patch size is used.
)r3  r4  Nr   r,   T)new_sizeold_sizer"  verbose)rg  	grid_sizeset_input_sizer<  r=  r"  rh  r   rj   r   r%   )rt   r3  r4  prev_grid_sizer"  num_new_tokenss         rM   r   VisionTransformer.set_input_size  s     ))33'''Q>>%%)%8%8d>T>T!--99<MMN!5!5a!88!#.DNN!--77+&7 / " 9 &rO   rx   c                 x   U R                   c-  UR                  UR                  S   SUR                  S   5      $ U R                  (       am  UR                  u  p#pEU R                  R
                  n[        U R                   X44UU R                  (       a  SOU R                  S9nUR                  USU5      nOU R                   n/ nU R                  b9  UR                  U R                  R                  UR                  S   SS5      5        U R                  b9  UR                  U R                  R                  UR                  S   SS5      5        U R                  (       a$  X-   nU(       a  [        R                  " X/-   SS9nO#U(       a  [        R                  " X/-   SS9nX-   nU R                  U5      $ )z$Apply positional embedding to input.r   r   )r  r  r"  r,   r   )r<  r   r   r,  rg  r  r%   r=  r"  rj  r  expandrk  r   r   rl  )	rt   rx   r   HWr   r  r<  to_cats	            rM   
_pos_embedVisionTransformer._pos_embed  s_   >>!66!''!*b!''"+66  JA!!--77N.''+':':!@V@V	I q"a AI>>%MM$..//
BCD>>%MM$..//
BCD AIIfsl2 IIfsl2A}}QrO   indicesreturn_prefix_tokensr  
stop_earlyrX  intermediates_onlyoutput_dictry   c
           	         US;   d   S5       eUS:H  n
/ n[        [        U R                  5      U5      u  pUR                  u  pnnU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SUS-    n[        U5       H  u  nnU	b  U" XS9nOIU R                  (       a0  [        R                  R                  5       (       d  [        UU5      nOU" U5      nUU;   d  Ma  UR                  U(       a  U R                  U5      OU5        M     U R                   (       aK  U Vs/ s H  nUSS2SU R                   24   PM     nnU Vs/ s H  nUSS2U R                   S24   PM     nnOSnU
(       ad  U R                  R#                  UU45      u  nnU Vs/ s H7  nUR%                  UUUS5      R'                  SS	SS
5      R)                  5       PM9     nnU(       a5  0 nUUS'   Ub  U(       a  UUS'   U(       d  U R                  U5      nUUS'   U$ [        R                  R                  5       (       d  U(       a  Ub  [+        [-        UU5      5      nU(       a  U$ U R                  U5      nX4$ s  snf s  snf s  snf )aj  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    return_prefix_tokens: Return both prefix and spatial intermediate tokens
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
    output_dict: Return outputs as a dictionary with 'image_features' and 'image_intermediates' keys
    attn_mask: Optional attention mask for masked attention (e.g., for NaFlex)
Returns:
    A tuple with (final_features, intermediates), a list of intermediate features, or a dictionary containing
    'image_features' and 'image_intermediates' (and optionally 'image_intermediates_prefix')
)NCHWNLCz)Output format must be one of NCHW or NLC.r  Nr,   r|   r   r   r   r   image_intermediatesimage_intermediates_prefiximage_features)r.   lenro  r   rg  r  rm  rn  r   jitis_scriptingr}  rd  r0   r  r  r"  dynamic_feat_sizer   permute
contiguouslistzip)rt   rx   r  r  r  r  rX  r  r  ry   r   intermediatestake_indices	max_indexr   r  heightwidthro  ry  blkr   prefix_tokensr  r  result_dictx_finals                              rM   forward_intermediates'VisionTransformer.forward_intermediates  s   8 _,Y.YY,&"6s4;;7G"Q  ggfeQOOAOOAMM!99!!##:[[F[[)a-0F'FAs$/((1G1G1I1IsA&FL $$TTYYq\qA ( !!ERS]Qq!D$:$:"::;]MSDQRMqQq$"8"8"99:MMRM M##55vuoFDAq^kl^kYZQYYq!Q3;;Aq!QGRRT^kMl K1>K-.(-A<I89 &))A,07,- yy%%'',@]E^ ]M!BCM  IIaLG TR ms   K&K5>K
prune_norm
prune_headc                    [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a,  [        R                  " 5       U l        U R                  SS5        U$ )a  Prune layers not required for specified intermediates.

Args:
    indices: Indices of intermediate layers to keep.
    prune_norm: Whether to prune normalization layer.
    prune_head: Whether to prune the classifier head.

Returns:
    List of indices that were kept.
Nr,   r   r7   )r.   r  ro  rj   rk   r  rA  r  )rt   r  r  r  r  r  s         rM   prune_intermediate_layers+VisionTransformer.prune_intermediate_layersk  sh      #7s4;;7G"Qkk.9q=1DI;;=DL!!!R(rO   nr   c           
      <    U R                  XUUU(       a  SOSSUS9$ )ax  Get intermediate layer outputs (DINO interface compatibility).

NOTE: This API is for backwards compat, favour using forward_intermediates() directly.

Args:
    x: Input tensor.
    n: Number or indices of layers.
    reshape: Reshape to NCHW format.
    return_prefix_tokens: Return prefix tokens.
    norm: Apply normalization.

Returns:
    List of intermediate features.
r  r  T)r  r  rX  r  ry   )r  )rt   rx   r  r   r  r  ry   s          rM   get_intermediate_layers)VisionTransformer.get_intermediate_layers  s2    . ))!5!(ve# * 
 	
rO   c                    U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUb  U R                   H	  nU" XS9nM     O\U R
                  (       a:  [        R                  R                  5       (       d  [        U R                  U5      nOU R	                  U5      nU R                  U5      nU$ )z\Forward pass through feature layers (embeddings, transformer blocks, post-transformer norm).r|   )rg  r  rm  rn  ro  rd  r   r  r  r1   r  )rt   rx   ry   r  s       rM   forward_features"VisionTransformer.forward_features  s    QOOAOOAMM! {{/ #$$UYY-C-C-E-Et{{A.AAAIIaLrO   r!  c                     U R                   b9  U R                  (       d  USS2U R                  S24   nU R                  U5      nU$ Uc  U R                  OUn[	        UUU R                  U R                  S9nU$ )zApply pooling to feature tokens.

Args:
    x: Feature tensor.
    pool_type: Pooling type override.

Returns:
    Pooled features.
N)r!  r"  r#  )rq  rB  r"  r7  r*  )rt   rx   r!  s      rM   poolVisionTransformer.pool  s}     >>%++a//001q!AH(1(9D$$y	"44"&":":	
 rO   
pre_logitsc                     U R                  U5      nU R                  U5      nU R                  U5      nU(       a  U$ U R                  U5      $ )zForward pass through classifier head.

Args:
    x: Feature tensor.
    pre_logits: Return features before final classifier.

Returns:
    Output tensor.
)r  rA  rr  rs  )rt   rx   r  s      rM   forward_headVisionTransformer.forward_head  sA     IIaLLLONN1q0DIIaL0rO   c                 F    U R                  XS9nU R                  U5      nU$ r{   )r  r  r}   s      rM   r~   VisionTransformer.forward  s*    !!!!9a rO   )rq  ro  rj  r,  r:  rA  rp  rP  r7  rd  rc  rs  rr  ra  r5  r=  r  rn  r6  r`  r"  rb  rm  rg  rB  rl  r<  rk  rt  r   r7   Tr7   F)Tr   )NN)NFFFr  FFN)r,   FT)r,   FFFN)5r   r   r   r   r   r   r   r   r   rQ   r   r   r   ro   r
   r   r   r   rH   r   r*   r   rj   r   rh   r  r   r  r   r  ignorer  r	   r  r   r   r  r  r  r  r  r   r  r   r  r  r  r  r  r  r~   r   r   r   s   @rM   r6   r6     s   
 Dk! 5868#OV !!!$)#("+/ $$#("#&*(-%*$)!#%%'$&$&$&SU"$.48.2-1(-),$-Wv1CsCx01v1 c5c?23v1 	v1
 v1 !!KLv1 v1 v1 v1 v1 v1 v1 "v1 !v1 v1  "%!v1" #v1$ %v1& !'v1( )v1* +v1, -v1. d^/v10 "&1v12 #3v14 "5v16 7v18 !9v1: #;v1< "=v1> "?v1@ "Av1B !!OPCv1D Ev1F "Gv1H 'y1Iv1J !+Kv1L  	*Mv1N 299oOv1P BIIQv1R "Sv1X 
Yv1 v1p1# # # #.!ryy !T !
 YY5s 5C 5 5 5 YY8S 8 8 YY
D 
T#uS$Y?O:O5P 
 
 YY<T <T < < YY		  aC ahsm aW[ a& 3748uS#X/ !sCx1 
	2% ELL % U\\ % T 8<).$$', %04] ||]  eCcN34]  #'	] 
 ]  ]  ]  !%]  ]   -]  
tELL!5tELL7I)I#JDQTVYQYNZ	[] B ./$#	3S	>*  	
 
c8 45!).04
||
 S$s)U3Z/0
 	

 #'
 
  -
 
ell	
@%,, 8ELL;Q ]b]i]i &ell x}  01ell 1 1 1 (5<<2H TYT`T`  rO   r7   r[  namer_  c                 r   [        U [        R                  5      (       aL  [        U R                  SS9  U R
                  b*  [        R                  R                  U R
                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)zViT weight initialization, original timm impl (for reproducibility).

Args:
    module: Module to initialize.
    name: Module name for context.
    needs_reset: If True, call reset_parameters() on modules that have it.
r  r  Nr   r   )rG   rj   r   r"   r   rd   r   r   ri  r   r   r[  r  r_  s      rM   r  r    s     &"))$$fmm-;;"GGNN6;;' #		(	(	);<<! =rO   r  c                    [        U [        R                  5      (       a  UR                  S5      (       aT  [        R                  R                  U R                  5        [        R                  R                  U R                  U5        g[        R                  R                  U R                  5        U R                  bX  SU;   a(  [        R                  R                  U R                  SS9O([        R                  R                  U R                  5        gg[        U [        R                  5      (       aM  [        U R                  5        U R                  b*  [        R                  R                  U R                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)zViT weight initialization, matching JAX (Flax) impl.

Args:
    module: Module to initialize.
    name: Module name for context.
    head_bias: Bias value for head layer.
    needs_reset: If True, call reset_parameters() on modules that have it.
rs  Nrp   r  r  r   r   )rG   rj   r   
startswithr   r   r   r   rd   xavier_uniform_r   Conv2dr#   ri  r   r   )r[  r  r  r_  s       rM   init_weights_vit_jaxr    s&    &"))$$??6""GGNN6==)GGfkk95GG##FMM2{{&:?4-6RWW^^\b\g\gMh '	FBII	&	&fmm$;;"GGNN6;;' #		(	(	);<<! =rO   c                    [        U [        R                  5      (       a  SU;   a  [        R                  " S[        U R                  R                  S   S-  U R                  R                  S   -   5      -  5      n[        R                  R                  U R                  U* U5        O)[        R                  R                  U R                  5        U R                  b*  [        R                  R                  U R                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)	zViT weight initialization, matching moco-v3 impl minus fixed PatchEmbed.

Args:
    module: Module to initialize.
    name: Module name for context.
    needs_reset: If True, call reset_parameters() on modules that have it.
qkvg      @r   r   r,   Nr   r   )rG   rj   r   r   r~  r   r   r   r   uniform_r  rd   r   ri  r   r   )r[  r  r_  vals       rM   init_weights_vit_mocor    s     &"))$$D=))Bv}}':':1'='BV]]EXEXYZE['[!\\]CGGV]]SD#6GG##FMM2;;"GGNN6;;' #		(	(	);<<! =rO   c                 X    U(       a#  [        U S5      (       a  U R                  5         g g g )Nr   )ri  r   r  s      rM   init_weights_reset_parametersr  -  s%    wv'9::! ;{rO   r  c                     U R                  S5      (       a  [        [        XS9$ U R                  S5      (       a  [        [        US9$ U S:X  a  [        [        US9$ [        [
        US9$ )NrM  )r  r_  rO  r^  rL  )r  r   r  r  r  r  )r  r  r_  s      rM   r  r  2  sa    u+yZZ		 	 ,+FF	4+NN ,+FFrO   rf   posemb
posemb_newgs_newinterpolation	antialiasc           
         UR                   S   U-
  nU R                   S   U-
  n[        [        R                  " U5      5      /S-  n[	        U5      (       d#  [        [        R                  " U5      5      /S-  n[        XUUUUSS9$ )zRescale the grid of position embeddings when loading from state_dict.
*DEPRECATED* This function is being deprecated in favour of using resample_abs_pos_embed
r,   r   T)r"  r  r	  r  )r   ro   r   r~  r  r%   )	r  r  r"  r  r  r	  ntok_newntok_oldgs_olds	            rM   resize_pos_embedr  ?  s     "%66H||A!22H$))H%&'!+Fv;;dii)*+a/!+# rO   modelr  r  load_bfloat16c                   ^^^^ SSK mT(       a
  SSKJ m  SSKmSIUUUU4S jjnT(       a  TR                  U5      nOTR                  U5      nSnSnSnU(       d  SU;   a  SnOS	U;   a  S
nSnO
SU;   a  SnSn[	        U R
                  S5      (       Ga  U R
                  R                  n	[	        U	S5      (       + n
U
(       a  U	OU	R                  nUR                  R                  R                  [        UR                  R                  R                  S   U" XR S3   5      5      5        UR                  R                  R                  U" XR S3   5      5        UR                  R                  R                  U" XR S3   5      5        U
(       Gd  [        U	R                   5       GH  u  p[        UR"                  5       GH  u  pU SUS-    SUS-    S3n[%        S5       H  n['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        ['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        ['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        M     UR(                  c  M  UR(                  R                  R                  R                  U" UU S3   5      5        UR(                  R                  R                  R                  U" UU S3   5      5        UR(                  R                  R                  R                  U" UU S3   5      5        GM     GM     U" XR S 3   5      nOB[        U R
                  R*                  R                  R                  S   U" XR S 3   5      5      nUR                  S!S U R
                  R*                  R                  R                  S!S :w  a8  [-        UU R
                  R*                  R                  R                  S!S UUSS"9nU R
                  R*                  R                  R                  U5        U R
                  R*                  R                  R                  U" XR S#3   5      5        U R.                  b%  U R.                  R                  U" XR S$3   SS%95        U(       a  U" XR S&3   SS%9nOU" XR S'3   SS%9nUR                  U R0                  R                  :w  aC  ['        U S(S5      (       a  SO['        U S)S5      n[3        UU R
                  R4                  UUUSS*9nU R0                  R                  U5        U R                  R                  R                  U" XR S+3   5      5        U R                  R                  R                  U" XR S,3   5      5        [7        U R8                  [:        R<                  5      (       a  U S-3U;   a  U R8                  R                  R                  S   XR S-3   R                  S.   :X  a`  U R8                  R                  R                  U" XR S/3   5      5        U R8                  R                  R                  U" XR S-3   5      5        U R>                  Gbi  U S03nUS1-   nU R>                  R@                  R                  U" UU S23   SS%95        U R>                  RB                  R                  R                  [D        RF                  " S3 Vs/ s H+  nU" UU U S3   SS%9RI                  S5      RJ                  PM-     sn5      5        U R>                  RB                  R                  R                  [D        RF                  " S3 Vs/ s H!  nU" UU U S3   SS%9RM                  S.5      PM#     sn5      5        U R>                  RN                  R                  R                  U" UU S43   SS%9RI                  S5      RJ                  5        U R>                  RN                  R                  R                  U" UU S53   SS%9RM                  S.5      5        U R>                  R*                  R                  R                  U" UU S63   5      RI                  S5      5        U R>                  R*                  R                  R                  U" UU S73   5      5        U R>                  R                  R                  R                  U" UU S83   5      5        U R>                  R                  R                  R                  U" UU S93   5      5        [%        S:5       H  n['        U R>                  RP                  S;US-    35      R                  R                  U" UU S<U S3   5      5        ['        U R>                  RP                  S;US-    35      R                  R                  U" UU S<U S3   5      5        M     U(       a  S=OS>u  nnn[        U R"                  RS                  5       5       GH  u  pU S?3U;   a  U S@3nUnO
U SAU S3nSnUSBU S3-   nURT                  R                  R                  U" UU S83   USC95        URT                  R                  R                  U" UU S93   USC95        URV                  RX                  R                  R                  [D        RF                  " SD Vs/ s H,  nU" UU U S3   SUSE9RI                  S5      RJ                  PM.     sn5      5        URV                  RX                  R                  R                  [D        RF                  " SD Vs/ s H"  nU" UU U S3   SUSE9RM                  S.5      PM$     sn5      5        URV                  R*                  R                  R                  U" UU S63   USC9RI                  S5      5        URV                  R*                  R                  R                  U" UU S73   USC95        URZ                  R                  R                  U" UU SFU S3   USC95        URZ                  R                  R                  U" UU SFU S3   USC95        [%        S:5       H  n['        URP                  S;US-    35      R                  R                  U" UU SGU SHU S3   USC95        ['        URP                  S;US-    35      R                  R                  U" UU SGU SHU S3   USC95        M     GM      gs  snf s  snf s  snf s  snf )JzULoad weights from .npz checkpoints for official Google Brain Flax implementation
    r   NTc                 z  > Ub  X   n T(       aE  U R                  TR                  5      R                  TR                  5      n TR	                  U 5      n U R
                  S:X  aN  U R                  S   U R                  S   s=:X  a  U R                  S   s=:X  a  S:X  a  O  OU R                  5       n U(       ak  U R
                  S:X  a  U R                  / SQ5      n OGU R
                  S:X  a  U R                  / SQ5      n O#U R
                  S:X  a  U R                  SS/5      n [        R                  " U 5      n U $ )N   r   r,   r   )r   r   r   r,   r   )r   r   r,   )r   bfloat16astyper   arrayndimr   flattenr   r   
from_numpy)_wtidxjnpr  	ml_dtypesnps      rM   _n2p_load_weights.<locals>._n2pa  s    ?B++,33CKK@B"B77a<BHHQK288A;J"((1+JJBww!|\\,/A\\),A\\1a&)b!	rO   bilinearFzopt/target/embedding/kernelzopt/target/zparams/embedding/kernelzparams/zparams/img/embedding/kernelzparams/img/backboner  r,   zconv_root/kernelzgn_root/scalezgn_root/biasblockz/unit/r   convz/kernelr  gnz/scalez/biaszconv_proj/kernelzgn_proj/scalezgn_proj/biaszembedding/kernelr   r  r	  r  zembedding/biascls)r  pos_embeddingz(Transformer/posembed_input/pos_embeddingr=  r"  r  r"  r  r	  r  zTransformer/encoder_norm/scalezTransformer/encoder_norm/biasz	head/biasr   zhead/kernelz
MAPHead_0/zMultiHeadDotProductAttention_0/probe)keyvaluezquery/kernelz
query/biasz
out/kernelzout/biaszLayerNorm_0/scalezLayerNorm_0/biasr   fczMlpBlock_0/Dense_)r   r   r,   )r,   r   r   z*Transformer/encoderblock/LayerNorm_0/scalezTransformer/encoderblock/zTransformer/encoderblock_MultiHeadDotProductAttention_)r  )queryr-  r.  )r  r  
LayerNorm_	MlpBlock_z/Dense_)TN).numpy	jax.numpyr  loadri  rg  r#  r  r&  r   copy_r2   r   r  rd   r}  stagesro  r  getattr
downsampler  r$   rj  r<  r%   r  rG   rs  rj   r   rq  latentkvr   r   r  Tr   r   rp   childrenri   r8   r  rn   )r  r  r  r  r   wr  r	  
big_visionr#  	stem_onlyr  ry  stagejr$  bprembed_conv_wpos_embed_wr"  block_prefix
mha_prefixr  mha_subb_subln1_subr  r  r  r  s      `                        @@@rM   r  r  X  s:     * HH_%GGO$MIJ(A-"F&!+FJ*a/"FJu  *--$$--&11	$x(--		/		0@0@0F0Fq0I4PQT\\lRmPnKopq		tA&>$?@A		T!hl$;"<=>%hoo6 )%,, 7HA"85QuQUG1=B"1Xa!eW~6==CCDbTQUVWZ[V[U\\cKdIeDfga!eW~6==CCDbTQSTUXYTYSZZ`KaIbDcda!eW~6;;AA$qB4rRSVWRWQXX]I^G_B`a & ''3((--44::4RDHXBY@Z;[\((--44::4RDBV@W;XY((--2288a2$l@S>T9UV !8 7 A(89:;'""))//2DXEU;V9W4XZ"#%"3"3"8"8"?"?"E"Ebc"JJ+""))//4'
 
!!''5	%%d1x~-F+G&HI"d1xs^#4>?1x}56%@1x'OPQUZ[EOO111!(0@%!H!HAgV[]prsNt,&&00/'
 
OO+&	JJDX-K#L!MNO	JJOO$q8+H!IJKL5::ryy))hi A%JJOO!!!$HI*>(?(E(Eb(II

Q'=%> ?@

d1xy%9#:;<
 " ,!&EF
$$T!|nE,B*Cu%MN!!''		N^3`N^Dj\!G,-7??BDDN^3` )a 	b%%eiiK[1]K[aDj\!E*+u5==bAK[1] '^ 	_  &&tAL.I,Je'T'\'\]^'_'a'ab$$T!zl*,E*F%%P%X%XY[%\]##))$qJ<z1J/K*L*T*TUV*WX!!''Q*X/F-G(HI##))$qL>AR1S/T*UV!!''Q,?O/P-Q(RSqAEOO''2a!eW6==CCDl^[lmnloovKwIxDyzEOO''2a!eW6;;AA$qL>YjkljmmrIsGtBuv  ,6i9GUGell3356X?@AE$X%>?LC$X%>qcCLC!&CG9A$NN
  a<.8I(J&KQT!UVtA6F&G$HcRS

##EIIWp/rWpRSDj\!G,-C@HHKMMWp/r %s 	t

!!%))Tm-oTmqDj\!E*+u#>FFrJTm-o #p 	q

$$T!zl*,E*FC%P%X%XYZ%[\

""4ZL*A(B#LM  a<.
7)6(R&SY\!]^tAj	&O$PVYZ[qAEIIAE7|,3399Q,ywqcIJPSTVEIIAE7|,1177Q,ywqcGHcRT % 73`1]./r-os   "2u	(u3u)u
state_dictc                    0 n/ SQnU R                  5        H  u  pVUR                  U5      (       d  M  UR                  US5      nU H  nUR                  US   US   5      nM     US:X  a;  SnUR                  SS5      n[        R
                  " UR                  S   5      US'   O@US:X  a#  S	nUR                  S5      R                  S5      nOUS
:X  a  UR                  S5      nXcU'   M     U$ )N)
)conv1patch_embed.proj)positional_embeddingr<  )ztransformer.resblocks.rZ  )ln_prern  )ln_postr  )ln_r  )in_proj_zqkv.r   r  )zmlp.c_fcmlp.fc1)z
mlp.c_projzmlp.fc2r7   r   r,   r  head.weight	head.biasclass_embeddingrj  r<  )itemsr  replacer   r   zerosr   	unsqueeze)rM  r  r  out_dictswapsr   r   sps           rM   _convert_openai_cliprb    s    
 HE   "||F##IIfb!B		"Q%A'A  ;AAq!A$)KK
$;H[!##AA((+A+AA! #" OrO   c                    SS K n0 nU R                  SS 5        SU ;   aR  U R                  S5      US'   U R                  S5      U S   S S 2S4   -   US'   U R                  S5      S S 2SS 24   US'   U R                  5        Hc  u  pEUR                  SU5      (       a  XSUR	                  S	S
5      '   M2  UR                  SU5      (       a  XSUR	                  SS5      '   M_  XSU'   Me     U$ )Nr   
mask_tokenregister_tokensrk  rj  r<  r,   z(blocks\.(\d+)\.mlp\.w12\.(?:weight|bias)w12fc1z'blocks\.(\d+)\.mlp\.w3\.(?:weight|bias)w3r  )repopr[  matchr\  )rM  r  ri  r_  r   r   s         rM   _convert_dinov2rl    s     HNN<&J& */@ A *{ ;j>UVWYZVZ>[ [ *{ ;AqrE B  "88?CC01QYYue,-XX@!DD/0QYYtU+, # OrO   c                 d   0 nU R                  5        H  u  p4UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  S	S
5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nXBU'   M     U$ )Nnorm_1ri   norm_2rn   zpreprocessor.patchifier.patch_embed.zpreprocessor.pos_embedr<  ztrunk.r7   zpost_trunk_norm.norm.rW  z	mlp.fc1_gzmlp.fc3z	mlp.fc1_x)r[  r\  )rM  r  r_  r   r   s        rM   _convert_aimv2rr  0  s     H  "IIh(IIh(II0.AII.<IIh#II('2IIi-IIi- # OrO   c                    SSK nU R                  SU 5      n S H  nU R                  US5        M     / SQn0 nU R                  5        HM  u  p6SU;   a  M  U H  u  pxUR	                  XxU5      nM     US:X  a  USS R                  S5      US	'   MI  XeU'   MO     0 0 pUR                  S
5      nUR                  5        H  u  p6UR                  U5      nU(       d  XiU'   M#  UR                  5       u  pnU
R                  X40 5      nUUU'   [        U5      S:X  d  M_  [        R                  " US   US   US   /SS9U	SU SU 3'   M     U	$ )zH
Turn a BEiT-3 checkpoint into a standard VisionTransformer state-dict.
r   Nr  )zbeit3.text_embed.weightzbeit3.vision_embed.mask_token))zbeit3\.r7   )zvision_embed\.cls_tokenrj  )zvision_embed\.rp  )zembed_positions\.z
pos_embed.)z	encoder\.r7   )zlayers\.rZ  )zffn_layernorm\.rq  )zffn\.zmlp.)zself_attn_layer_norm\.znorm1.)zself_attn\.zattn.)zfinal_layer_norm\.znorm2.)inner_attn_lnr  rV  )z\.A\..z.B.zpos_embed.weightr   r<  z1blocks\.(\d+)\.attn\.(q|k|v)_proj\.(weight|bias)$r   r   r   r   r   rZ  z
.attn.qkv.)ri  rJ   rj  r[  subr^  compile	fullmatchgroups
setdefaultr  r   r   )rM  r  ri  r   rulestmpr   oldnewoutbufpatr  r  whichkindstashs                    rM   _convert_beit3r  B  s_    4J Jq$ JE  C  "A:HCs#A "" uq1CF # 2
**I
JC		MM!F88:D{B/eu:?3899sU3Zs4!4C'#j/0  JrO   adapt_layer_scalec           
          SSK n0 nU R                  SU 5      n U R                  SU 5      n SnSU ;   a  [        X5      n OSU ;   a  [        XSS	9n OS
U ;   a  [        X5      n O[	        S U R                  5        5       5      (       a  [        X5      n OSU ;   a  U S   n SnOSU ;   d  SU ;   ac  SnSU ;   aZ  [        UR                  [        R                  5      (       a1  U S   US'   [        R                  " U S   R                  S   5      US'   OSU ;   a  SnOSU ;   a  [        X5      n U(       aI  U R                  5        VV	s0 s H,  u  pUR!                  U5      (       d  M  U[#        U5      S U	_M.     n nn	U R                  5        GHi  u  pSU;   a  UR$                  R&                  R(                  R                  u  pp[#        U	R                  5      S:  a@  UR$                  R&                  R(                  R                  u  ppU	R+                  U
SX5      n	U	R                  S   U:w  d  U	R                  S   U:w  a  [-        U	X4UUSS9n	OUS:X  an  U	R                  S   UR.                  R                  S   :w  aD  [1        USS 5      (       a  SO[1        US!S5      n[3        U	UR$                  R4                  UUUSS"9n	O*U(       a  S#U;   a  UR7                  S$S%U5      nO	S&U;   a  GMe  XU'   GMl     U$ s  sn	nf )'zIconvert patch embedding weight from manual patchify + linear proj to convr   Nr  rM  r7   zvisual.class_embeddingzmodule.visual.class_embeddingzmodule.visual.)r  rd  c              3   ,   #    U  H
  nS U;   v   M     g7f)zbeit3.Nrf   ).0r   s     rM   	<genexpr>'checkpoint_filter_fn.<locals>.<genexpr>  s     6$5qX]$5s   encoderzmodule.zvisual.trunk.pos_embedz"visual.trunk.blocks.0.norm1.weightzvisual.trunk.zvisual.head.proj.weightrX  rY  zmodule.visual.trunk.pos_embedzmodule.visual.trunk.z#preprocessor.patchifier.proj.weightzpatch_embed.proj.weightr  r   r   Tr(  r<  r,   r=  Fr"  r+  gamma_zgamma_([0-9])z
ls\1.gammar  )ri  rJ   rb  rl  anykeysr  rG   rs  rj   r   r   r]  r   rr  r[  r  r  rg  r  r   r   r$   r<  r9  r%   r  rv  )rM  r  r  r  r	  ri  r_  r  r   r   OIr  r  r"  s                  rM   checkpoint_filter_fnr  ~  s    H4Jj9JF:-)*<
	(J	6)*DTU
		#$Z7
	6JOO$56	6	6#J6
	j	 	*
	!Z	/3W[e3e $
2z%**bii7X7X&01J&KH]#$)KK
;T0U0[0[\]0^$_H[!	(J	6'	.*	<#J6
5?5E5E5G`5GTQ1<<X^K_(aFoq(5G
`  "$)**//66<<JA!177|a"..33::@@
aIIaQ*wwr{a1772;!#3(F"/'  +!''!*0E0Ea0H"H%,U4De%L%LRYZ_atvwRx&**44"3+#A 8q=':AQA #B OG as   K:$K:urlc                 4    U SSS SSS[         [        SSSS	.UE$ )
Nr/  )r   r-  r-  g?bicubicTrP  rs  
apache-2.0)r  r6  
input_size	pool_sizecrop_pctr  fixed_input_sizer   r   
first_conv
classifierlicense)r   r   )r  rL   s     rM   _cfgr    s:    #" '%(  rO   z*vit_base_patch16_224.augreg2_in21k_ft_in1kztimm/)	hf_hub_idz)vit_base_patch8_224.augreg2_in21k_ft_in1kz)vit_tiny_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz)r  r  custom_loadz)vit_tiny_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz)r     r        ?)r  r  r  r  r  z*vit_small_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz*vit_small_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz)vit_base_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz)vit_base_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz(vit_base_patch8_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz'vit_base_patch16_224.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth)r  r  z'vit_base_patch16_384.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth)r  r  r  r  z(vit_large_patch32_384.orig_in21k_ft_in1kzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pthz!vit_small_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz!vit_small_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch32_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch32_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzzvit_large_patch14_224.untrained)r  zvit_huge_patch14_224.untrainedzvit_giant_patch14_224.untrainedz"vit_gigantic_patch14_224.untrainedzvit_base_patch32_224.orig_in21k)r  r6  zvit_base_patch16_224.orig_in21kz vit_large_patch32_224.orig_in21kz vit_large_patch16_224.orig_in21kzvit_huge_patch14_224.orig_in21kz!vit_tiny_patch16_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npziSU  )r  r  r  r6  z"vit_small_patch32_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz"vit_small_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch32_224.augreg_in21kzohttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz vit_base_patch8_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz"vit_large_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npzzvit_base_patch32_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_32.npz)r  r  r  zvit_base_patch16_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_16.npzzvit_small_patch16_224.dinoz[https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth)r  r  r   r   r6  zvit_small_patch8_224.dinozYhttps://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_pretrain.pthzvit_base_patch16_224.dinozWhttps://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pthzvit_base_patch8_224.dinozUhttps://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pthz vit_small_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pthr  )r     r  )r  r  r  r   r   r6  r  r  zvit_base_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pthz vit_large_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pthz vit_giant_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pthz%vit_small_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pthz$vit_base_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pthz%vit_large_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pthz%vit_giant_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pthzvit_base_patch16_224_miil.in21kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_in21k_miil-887286df.pth)rT   rT   rT   )r  r  r  g      ?r"  i+  )r  r  r   r   r  r  r6  z'vit_base_patch16_224_miil.in21k_ft_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_1k_miil_84_4-2deb18e3.pth)r  r  r   r   r  r  z vit_base_patch16_rpn_224.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_base_patch16_rpn_224-sw-3b07e89d.pthz#vit_medium_patch16_gap_240.sw_in12k)r      r  gffffff?i-.  )r  r  r  r6  z+vit_medium_patch16_gap_256.sw_in12k_ft_in1k)r      r  )r  r  r  z+vit_medium_patch16_gap_384.sw_in12k_ft_in1ksquash)r  r  r  	crop_modez%vit_betwixt_patch16_gap_256.untrained)r  r  z"vit_base_patch16_gap_224.untrainedz/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k)r  r   r   z/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k)r  r   r   r  r  z/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k)r     r  z/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k)r  r   r   r  z/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k)r  r   r   r  r  r  z0vit_large_patch14_clip_224.laion2b_ft_in12k_in1kz0vit_large_patch14_clip_336.laion2b_ft_in12k_in1k)r   P  r  z/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1kz/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1kz.vit_base_patch32_clip_224.openai_ft_in12k_in1kr   z.vit_base_patch32_clip_384.openai_ft_in12k_in1kz.vit_base_patch16_clip_224.openai_ft_in12k_in1kz.vit_base_patch16_clip_384.openai_ft_in12k_in1kz/vit_large_patch14_clip_224.openai_ft_in12k_in1kz/vit_large_patch14_clip_336.openai_ft_in12k_in1kz)vit_base_patch32_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_384.laion2b_ft_in1kz*vit_large_patch14_clip_224.laion2b_ft_in1kz*vit_large_patch14_clip_336.laion2b_ft_in1kz)vit_huge_patch14_clip_224.laion2b_ft_in1kz)vit_huge_patch14_clip_336.laion2b_ft_in1kz(vit_base_patch32_clip_224.openai_ft_in1kz(vit_base_patch16_clip_224.openai_ft_in1kz(vit_base_patch16_clip_384.openai_ft_in1kz)vit_large_patch14_clip_224.openai_ft_in1kz*vit_base_patch16_clip_224.laion2b_ft_in12k)r  r   r   r6  z+vit_large_patch14_clip_224.laion2b_ft_in12k)r  r   r   r  r6  z*vit_huge_patch14_clip_224.laion2b_ft_in12kz)vit_base_patch16_clip_224.openai_ft_in12kz*vit_large_patch14_clip_224.openai_ft_in12kz!vit_base_patch32_clip_224.laion2b   z!vit_base_patch16_clip_224.laion2bz"vit_large_patch14_clip_224.laion2br0  z!vit_huge_patch14_clip_224.laion2b   z"vit_giant_patch14_clip_224.laion2bz%vit_gigantic_patch14_clip_224.laion2b   z'vit_base_patch32_clip_224.laion400m_e32mit)zDnatively QuickGELU, use quickgelu model variant for original results)r  r  notesr   r   r6  z'vit_base_patch16_clip_224.laion400m_e32)r  r  r   r   r  r6  z,vit_base_patch16_plus_clip_240.laion400m_e32  )r  r  r   r   r  r  r6  z(vit_large_patch14_clip_224.laion400m_e32z$vit_base_patch32_clip_224.datacompxlz$vit_base_patch32_clip_256.datacompxl)r  r   r   r  r  r6  z$vit_base_patch16_clip_224.datacompxlz%vit_large_patch14_clip_224.datacompxlzvit_base_patch16_clip_224.dfn2bz
apple-asclz%vit_large_patch14_clip_224.dfn2b_s39bz vit_large_patch14_clip_224.dfn2b)r  r  r  r   r   r  r6  zvit_huge_patch14_clip_224.dfn5bzvit_huge_patch14_clip_378.dfn5b)r   z  r  )r  r   r   r  r  r  r  r6  z-vit_huge_patch14_clip_224.metaclip2_worldwidezcc-by-nc-4.0z-vit_huge_patch14_clip_378.metaclip2_worldwide)r  r  r   r   r  r  r  r6  z1vit_gigantic_patch14_clip_224.metaclip2_worldwidez1vit_gigantic_patch14_clip_378.metaclip2_worldwidez(vit_base_patch32_clip_224.metaclip_2pt5bz(vit_base_patch16_clip_224.metaclip_2pt5bz)vit_large_patch14_clip_224.metaclip_2pt5bz(vit_huge_patch14_clip_224.metaclip_2pt5bz-vit_huge_patch14_clip_224.metaclip_altogetherz,vit_gigantic_patch14_clip_224.metaclip_2pt5bz'vit_base_patch32_clip_224.metaclip_400mz'vit_base_patch16_clip_224.metaclip_400mz(vit_large_patch14_clip_224.metaclip_400mz vit_base_patch32_clip_224.openai)r  r  r   r   r6  z vit_base_patch16_clip_224.openaiz!vit_large_patch14_clip_224.openai)r  r  r   r   r  r6  z!vit_large_patch14_clip_336.openai)r  r  r   r   r  r  r6  z/vit_large_patch14_clip_224.apple_mclip2_dfndr2bz
apple-amlr)r  r6  r   r   r  r  z#vit_base_patch32_plus_256.untrained)r  r  r  z#vit_base_patch16_plus_240.untrainedz$vit_small_patch16_36x1_224.untrainedz$vit_small_patch16_18x2_224.untrainedz#vit_base_patch16_18x2_224.untrainedz)eva_large_patch14_196.in22k_ft_in22k_in1k)r      r  )r  r  r   r   r  r  z)eva_large_patch14_336.in22k_ft_in22k_in1k)r  r  r   r   r  r  r  z#eva_large_patch14_196.in22k_ft_in1kz#eva_large_patch14_336.in22k_ft_in1kzflexivit_small.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k.npz)r  r  r  r  r  zflexivit_small.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_600ep.npzzflexivit_small.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_300ep.npzzflexivit_base.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k.npzzflexivit_base.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_600ep.npzzflexivit_base.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_300ep.npzzflexivit_base.1000ep_in21kzMhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_1000ep.npz)r  r  r  r  r  r6  zflexivit_base.300ep_in21kzLhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_300ep.npzzflexivit_large.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k.npzzflexivit_large.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_600ep.npzzflexivit_large.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_300ep.npzzflexivit_base.patch16_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b16_i21k_300ep.npzzflexivit_base.patch30_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b30_i21k_300ep.npzz!vit_base_patch16_xp_224.untrainedz"vit_large_patch14_xp_224.untrainedz!vit_huge_patch14_xp_224.untrainedzvit_base_patch16_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth)r  r  r  r   r   r6  zvit_large_patch16_224.maezFhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pthzvit_huge_patch14_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pthz#vit_huge_patch14_gap_224.in1k_ijepaz?https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.14-300e.pth.tar)r  r  r   r   r6  z$vit_huge_patch14_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.h.14-900e.pth.tarz#vit_huge_patch16_gap_448.in1k_ijepazEhttps://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar)r  r  r  r  r   r   r6  z%vit_giant_patch16_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tarz$vit_base_patch32_siglip_256.v2_webli)r  r  r6  z$vit_base_patch16_siglip_224.v2_webliz!vit_base_patch16_siglip_224.webliz$vit_base_patch16_siglip_256.v2_webliz!vit_base_patch16_siglip_256.webliz&vit_base_patch16_siglip_256.webli_i18nz$vit_base_patch16_siglip_384.v2_webliz!vit_base_patch16_siglip_384.webliz$vit_base_patch16_siglip_512.v2_webli)r   r  r  z!vit_base_patch16_siglip_512.webliz%vit_large_patch16_siglip_256.v2_webliz"vit_large_patch16_siglip_256.webliz%vit_large_patch16_siglip_384.v2_webliz"vit_large_patch16_siglip_384.webliz%vit_large_patch16_siglip_512.v2_webliz&vit_so400m_patch14_siglip_224.v2_webliz#vit_so400m_patch14_siglip_224.webliz&vit_so400m_patch14_siglip_378.v2_webliz#vit_so400m_patch14_siglip_378.webliz#vit_so400m_patch14_siglip_384.webliz&vit_so400m_patch16_siglip_256.v2_webliz(vit_so400m_patch16_siglip_256.webli_i18nz&vit_so400m_patch16_siglip_384.v2_webliz&vit_so400m_patch16_siglip_512.v2_webliz(vit_giantopt_patch16_siglip_256.v2_webliz(vit_giantopt_patch16_siglip_384.v2_webliz(vit_base_patch32_siglip_gap_256.v2_webliz(vit_base_patch16_siglip_gap_224.v2_webliz%vit_base_patch16_siglip_gap_224.webliz(vit_base_patch16_siglip_gap_256.v2_webliz%vit_base_patch16_siglip_gap_256.webliz*vit_base_patch16_siglip_gap_256.webli_i18nz(vit_base_patch16_siglip_gap_384.v2_webliz%vit_base_patch16_siglip_gap_384.webliz(vit_base_patch16_siglip_gap_512.v2_webliz%vit_base_patch16_siglip_gap_512.webliz)vit_large_patch16_siglip_gap_256.v2_webliz&vit_large_patch16_siglip_gap_256.webliz)vit_large_patch16_siglip_gap_384.v2_webliz&vit_large_patch16_siglip_gap_384.webliz)vit_large_patch16_siglip_gap_512.v2_webliz*vit_so400m_patch14_siglip_gap_224.v2_webliz'vit_so400m_patch14_siglip_gap_224.webliz*vit_so400m_patch14_siglip_gap_224.pali_mixz)vit_so400m_patch14_siglip_gap_224.pali_ptz-vit_so400m_patch14_siglip_gap_224.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_224.pali2_10b_ptz*vit_so400m_patch14_siglip_gap_378.v2_webliz'vit_so400m_patch14_siglip_gap_378.webliz'vit_so400m_patch14_siglip_gap_384.webliz*vit_so400m_patch14_siglip_gap_448.pali_mixz)vit_so400m_patch14_siglip_gap_448.pali_ptz2vit_so400m_patch14_siglip_gap_448.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_448.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_448.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_448.pali2_10b_ptz0vit_so400m_patch14_siglip_gap_448.pali2_3b_docciz1vit_so400m_patch14_siglip_gap_448.pali2_10b_docciz)vit_so400m_patch14_siglip_gap_896.pali_pt)r     r  z2vit_so400m_patch14_siglip_gap_896.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_896.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_896.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_896.pali2_10b_ptz*vit_so400m_patch16_siglip_gap_256.v2_webliz,vit_so400m_patch16_siglip_gap_256.webli_i18nz*vit_so400m_patch16_siglip_gap_384.v2_webliz*vit_so400m_patch16_siglip_gap_512.v2_webliz,vit_giantopt_patch16_siglip_gap_256.v2_webliz,vit_giantopt_patch16_siglip_gap_384.v2_webliz+vit_so400m_patch14_siglip_378.webli_ft_in1kz/vit_so400m_patch14_siglip_gap_378.webli_ft_in1kz,vit_xsmall_patch16_clip_224.tinyclip_yfcc15m)r  r  r   r   r6  z.vit_medium_patch32_clip_224.tinyclip_laion400mz,vit_medium_patch16_clip_224.tinyclip_yfcc15mz/vit_betwixt_patch32_clip_224.tinyclip_laion400mz%vit_wee_patch16_reg1_gap_256.sbb_in1kz/vit_dwee_patch16_reg1_gap_256.sbb_nadamuon_in1kz&vit_dwee_patch16_reg1_gap_256.sbb_in1kz&vit_pwee_patch16_reg1_gap_256.sbb_in1kz0vit_dpwee_patch16_reg1_gap_256.sbb_nadamuon_in1kz'vit_dpwee_patch16_reg1_gap_256.sbb_in1kz1vit_little_patch16_reg1_gap_256.sbb_in12k_ft_in1kz)vit_little_patch16_reg1_gap_256.sbb_in12k)r  r6  r  r  z(vit_little_patch16_reg4_gap_256.sbb_in1kz2vit_dlittle_patch16_reg1_gap_256.sbb_nadamuon_in1kz(vit_medium_patch16_reg1_gap_256.sbb_in1kz1vit_medium_patch16_reg4_gap_256.sbb_in12k_ft_in1kz(vit_medium_patch16_reg4_gap_256.sbb_in1kz)vit_medium_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1kz0vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_mediumd_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz)vit_betwixt_patch16_reg1_gap_256.sbb_in1kz8vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1kz)vit_betwixt_patch16_reg4_gap_256.sbb_in1kz0vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_betwixt_patch16_reg4_gap_256.sbb_in12kz8vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz'vit_base_patch16_reg4_gap_256.untrained)r  z6vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k_ft_in1kz.vit_so150m_patch16_reg4_gap_256.sbb_e250_in12kz6vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1kz)vit_so150m_patch16_reg4_map_256.untrainedz7vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1kz/vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12kz7vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1kz7vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1kz$vit_intern300m_patch14_448.ogvl_distz$vit_intern300m_patch14_448.ogvl_2pt5)r  r   r   r  r  r6  z aimv2_large_patch14_224.apple_pt)r  r   r   r  r  r6  z%aimv2_large_patch14_224.apple_pt_distzaimv2_huge_patch14_224.apple_ptzaimv2_1b_patch14_224.apple_ptzaimv2_3b_patch14_224.apple_ptz aimv2_large_patch14_336.apple_pt)r  r   r   r  r  r  r6  z%aimv2_large_patch14_336.apple_pt_distzaimv2_huge_patch14_336.apple_ptzaimv2_1b_patch14_336.apple_ptzaimv2_3b_patch14_336.apple_ptz aimv2_large_patch14_448.apple_ptzaimv2_huge_patch14_448.apple_ptzaimv2_1b_patch14_448.apple_ptzaimv2_3b_patch14_448.apple_ptztest_vit.r160_in1k)r      r  )r  r   r   r  )r  r  r   r   r  )ztest_vit2.r160_in1kztest_vit3.r160_in1kztest_vit4.r160_in1kz$beit3_base_patch16_224.in22k_ft_in1kz-beit3_base_patch16_224.indomain_in22k_ft_in1kz%beit3_large_patch16_224.in22k_ft_in1kz.beit3_large_patch16_224.indomain_in22k_ft_in1kz!beit3_giant_patch14_224.untrainedz!beit3_giant_patch14_336.untrainedzbeit3_base_patch16_224.ptz"beit3_base_patch16_224.indomain_ptzbeit3_large_patch16_224.ptz#beit3_large_patch16_224.indomain_ptr  	quickgelur  _clip__clip_quickgelu_TIMM_USE_NAFLEXVITfalsetruevariant
pretrained
use_naflex	NaFlexVitc           
      8   Uc  [         nU(       a  SSKJn  U" X40 UD6$ UR                  SS5      nSU ;   a  [	        [
        SSS9nO[
        nUR                  S	S
5      nSU ;   a  UR                  SS 5      S:w  a  Sn[        [        U U4UU[        USS9S.UD6$ )Nr,   )_create_naflexvit_from_classicout_indicesr   flexir"  F)r  r	  pretrained_strictTsiglipr7  r9  getter)r  feature_cls)pretrained_filter_fnr  feature_cfg)
_USE_NAFLEX_DEFAULT	naflexvitr  rj  r   r  rJ   r-   r6   rf  )r  r  r  rL   r  r  
_filter_fnstricts           rM   _create_vision_transformerr    s     (
=-gLVLL**]A.K' 1W\]
)
 ZZ+T2F7vzz->%G ( [hG  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Tiny (Vit-Ti/16)
    r.     r1  r   r4  r:  rD   r<   r  )vit_tiny_patch16_224rf  r  r  rL   
model_argsr  s       rM   r  r    8     s"JJ&s*sX\]gXrkqXrsELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z$ViT-Tiny (Vit-Ti/16) @ 384x384.
    r.  r  r1  r   r  r  )vit_tiny_patch16_384r  r  s       rM   r  r    r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Small (ViT-S/32)
        r  r1     r  r  )vit_small_patch32_224r  r  s       rM   r  r    8     s"JJ&t:tY]^hYslrYstELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z%ViT-Small (ViT-S/32) at 384x384.
    r  r  r1  r  r  r  )vit_small_patch32_384r  r  s       rM   r  r    r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )ViT-Small (ViT-S/16)
    r.  r  r1  r  r  r  )vit_small_patch16_224r  r  s       rM   r  r    r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )r  r.  r  r1  r  r  r  )vit_small_patch16_384r  r  s       rM   r  r    r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Small (ViT-S/8)
       r  r1  r  r  r  )vit_small_patch8_224r  r  s       rM   r  r    s8     cqIJ&s*sX\]gXrkqXrsELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k, source https://github.com/google-research/vision_transformer.
r  r0  r1  r  r  )vit_base_patch32_224r  r  s       rM   r  r    8    
 s"KJ&s*sX\]gXrkqXrsELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r  r0  r1  r  r  )vit_base_patch32_384r  r  s       rM   r  r    r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r.  r0  r1  r  r  )vit_base_patch16_224r  r  s       rM   r  r  "  r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r.  r0  r1  r  r  )vit_base_patch16_384r  r  s       rM   r  r  ,  r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r  r0  r1  r  r  )vit_base_patch8_224r  r  s       rM   r  r  6  s8    
 crJJ&rrW[\fWqjpWqrELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )znViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
    r  r     r.  r  r  )vit_large_patch32_224r  r  s       rM   r  r  @  8     t2LJ&t:tY]^hYslrYstELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r  r  r  r.  r  r  )vit_large_patch32_384r  r  s       rM   r  r  I  8    
 t2LJ&t:tY]^hYslrYstELrO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r.  r  r  r  r  )vit_large_patch16_224r  r  s       rM   r  r  S  r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r.  r  r  r  r  )vit_large_patch16_384r  r  s       rM   r  r  ]  r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/14)
       r  r  r.  r  r  )vit_large_patch14_224r  r  s       rM   r  r  g  r  rO   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zVViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    r  r  r  r.  r  r  )vit_huge_patch14_224r  r  s       rM   r   r   p  s8     t2LJ&s*sX\]gXrkqXrsELrO   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zpViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    tE]t@(   r.  r4  r:  rU   rD   r<   r  )vit_giant_patch14_224r  r  s       rM   r  r  y  s;     tuBZ\]J&t:tY]^hYslrYstELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zpViT-Gigantic (big-G) model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    ;;@0   r.  r  r  )vit_gigantic_patch14_224r  r  s       rM   r  r    sG     tuBZ\]J&"Y/9Y=A*=WPV=WYELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
r.  r0  r1  F)r4  r:  rD   r<   r=   r  )vit_base_patch16_224_miilr  r  s       rM   r  r    sG    
 s"UZ[J&#Z0:Z>B:>XQW>XZELrO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 240x240
    r.  r  r1  r  Fr&  r  	r4  r:  rD   r<   r;  r7  r=   rX   rA  r  )vit_medium_patch16_gap_240r  r  s       rM   r  r    U     B!EtULJ '$[1;[?CJ?YRX?Y[ELrO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 256x256
    r.  r  r1  r  Fr&  r  r  r  )vit_medium_patch16_gap_256r  r  s       rM   r  r    r  rO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 384x384
    r.  r  r1  r  Fr&  r  r  r  )vit_medium_patch16_gap_384r  r  s       rM   r  r    r  rO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zBViT-Betwixt (ViT-b/16) w/o class token, w/ avg-pool @ 256x256
    r.  r  r1  
   Fr&  r  r  r  )vit_betwixt_patch16_gap_256r  r  s       rM   r  r    sU     B"%EtULJ '%\2<\@DZ@ZSY@Z\ELrO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	z?ViT-Base (ViT-B/16) w/o class token, w/ avg-pool @ 224x224
    r.  r0  r1  Fr&  r4  r:  rD   r<   r;  r7  rA  r  )vit_base_patch16_gap_224r  r  s       rM   r  r    sP     B"%]blqsJ&"Y/9Y=A*=WPV=WYELrO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
z:ViT-Huge model (ViT-H/14) w/ no class token, avg pool
    r  r  r  r.  Fr&  r  r  )vit_huge_patch14_gap_224r  r  s       rM   r  r    P     R25^cmrtJ&"Y/9Y=A*=WPV=WYELrO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zDViT-Huge model (ViT-H/16) w/ no class token, avg pool @ 448x448
    r.  r  r  Fr&  r  r  )vit_huge_patch16_gap_448r  r  s       rM   r   r     r  rO   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
zGViT-Giant (little-gg) model (ViT-g/16) w/ no class token, avg pool
    r.  r  r  r  Fr&  r4  r:  rD   r<   rU   r;  r7  rA  r  )vit_giant_patch16_gap_224r  r  s       rM   r#  r#    sR     R2ue=J '#Z0:Z>B:>XQW>XZELrO   c                 h    [        SSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r  r  Tr   r   r:  rD   r<   r?  rC   r  )vit_xsmall_patch16_clip_224rf  r   r   r  r  s       rM   r'  r'    P     2TV]^gmqVrsJ&%\2<\@DZ@ZSY@Z\ELrO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r1  r  Tr   r%  r4  r:  rD   r<   r?  rC   r  )vit_medium_patch32_clip_224r(  r  s       rM   r,  r,    sV     B!dW^_hnrWsuJ&%\2<\@DZ@ZSY@Z\ELrO   c                 h    [        SSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r1  r  Tr   r%  r&  r  )vit_medium_patch16_clip_224r(  r  s       rM   r.  r.    r)  rO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r1  r  Tr   r%  r+  r  )vit_betwixt_patch32_clip_224r(  r  s       rM   r0  r0    sV     B"tX_`iosXtvJ&&]3=]AEjA[TZA[]ELrO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
(ViT-B/32 CLIP image tower @ 224x224
    r  r0  r1  Tr   r%  r+  r  )vit_base_patch32_clip_224r(  r  s       rM   r3  r3    V     B"tX_`iosXtvJ&#Z0:Z>B:>XQW>XZELrO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 256x256
    r  r0  r1  Tr   r%  r+  r  )vit_base_patch32_clip_256r(  r  s       rM   r6  r6  %  r4  rO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 384x384
    r  r0  r1  Tr   r%  r+  r  )vit_base_patch32_clip_384r(  r  s       rM   r8  r8  0  r4  rO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 448x448
    r  r0  r1  Tr   r%  r+  r  )vit_base_patch32_clip_448r(  r  s       rM   r:  r:  ;  r4  rO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
zViT-B/16 CLIP image tower
    r.  r0  r1  Tr   r%  r+  r  )vit_base_patch16_clip_224r(  r  s       rM   r<  r<  F  r4  rO   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/16 CLIP image tower @ 384x384
    r.  r0  r1  Tr   r%  r+  r  )vit_base_patch16_clip_384r(  r  s       rM   r>  r>  Q  r4  rO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z4ViT-Base (ViT-B/16+) CLIP image tower @ 240x240
    r.  r  r1  r  Tr   r%  r+  r  )vit_base_patch16_plus_clip_240r(  r  s       rM   r@  r@  \  sV     B"tX_`iosXtvJ&(_5?_CG
C]V\C]_ELrO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z0ViT-Large model (ViT-L/14) CLIP image tower
    r  r  r  r.  Tr   r%  r+  r  )vit_large_patch14_clip_224r(  r  s       rM   rB  rB  g  V     R2Y`ajptYuwJ&$[1;[?CJ?YRX?Y[ELrO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z:ViT-Large model (ViT-L/14) CLIP image tower @ 336x336
    r  r  r  r.  Tr   r%  r+  r  )vit_large_patch14_clip_336r(  r  s       rM   rE  rE  r  rC  rO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z0ViT-Huge model (ViT-H/14) CLIP image tower.
    r  r  r  r.  Tr   r%  r+  r  )vit_huge_patch14_clip_224r(  r  s       rM   rG  rG  }  V     R2Y`ajptYuwJ&#Z0:Z>B:>XQW>XZELrO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z9ViT-Huge model (ViT-H/14) CLIP image tower @ 336x336
    r  r  r  r.  Tr   r%  r+  r  )vit_huge_patch14_clip_336r(  r  s       rM   rJ  rJ    rH  rO   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z9ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378
    r  r  r  r.  Tr   r%  r+  r  )vit_huge_patch14_clip_378r(  r  s       rM   rL  rL    rH  rO   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )zViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
Pretrained weights from CLIP image tower.
r  r  r  r  r.  Tr   r%  r4  r:  rU   rD   r<   r?  rC   r  )vit_giant_patch14_clip_224r(  r  s       rM   rO  rO    sX    
 bBY]9$/J '$[1;[?CJ?YRX?Y[ELrO   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )ViT-bigG model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
Pretrained weights from CLIP image tower.
r  r  r	  r
  r.  Tr   r%  rN  r  )vit_gigantic_patch14_clip_224r(  r  s       rM   rR  rR    X    
 bBY]9$/J ''^4>^BFzB\U[B\^ELrO   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )rQ  r  r  r	  r
  r.  Tr   r%  rN  r  )vit_gigantic_patch14_clip_378r(  r  s       rM   rU  rU    rS  rO   c                 l    [        SSSSS[        [        SS9SS9n[         S
S	U 0[        U40 UD6D6nU$ )r2  r  r0  r1  Tr   r%  
quick_gelur4  r:  rD   r<   r?  rC   rZ   r  )#vit_base_patch32_clip_quickgelu_224r(  r  s       rM   rY  rY    W     B"t9$/<J '-d:DdHLZHb[aHbdELrO   c                 l    [        SSSSS[        [        SS9SS9n[         S
S	U 0[        U40 UD6D6nU$ )z/ViT-B/16 CLIP image tower w/ QuickGELU act
    r.  r0  r1  Tr   r%  rW  rX  r  )#vit_base_patch16_clip_quickgelu_224r(  r  s       rM   r\  r\    rZ  rO   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zAViT-Large model (ViT-L/14) CLIP image tower w/ QuickGELU act
    r  r  r  r.  Tr   r%  rW  rX  r  )$vit_large_patch14_clip_quickgelu_224r(  r  s       rM   r^  r^    W     R29$/<J '.e;EeIMjIc\bIceELrO   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zKViT-Large model (ViT-L/14) CLIP image tower @ 336x336 w/ QuickGELU act
    r  r  r  r.  Tr   r%  rW  rX  r  )$vit_large_patch14_clip_quickgelu_336r(  r  s       rM   ra  ra    r_  rO   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zAViT-Huge model (ViT-H/14) CLIP image tower w/ QuickGELU act.
    r  r  r  r.  Tr   r%  rW  rX  r  )#vit_huge_patch14_clip_quickgelu_224r(  r  s       rM   rc  rc    W     R29$/<J '-d:DdHLZHb[aHbdELrO   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zJViT-Huge model (ViT-H/14) CLIP image tower @ 378x378 w/ QuickGELU act
    r  r  r  r.  Tr   r%  rW  rX  r  )#vit_huge_patch14_clip_quickgelu_378r(  r  s       rM   rf  rf  	  rd  rO   c                 n    [        SSSSSS[        [        SS9S	S
9n[         SSU 0[        U40 UD6D6nU$ )z/ViT-bigG model (ViT-G/14) w/ QuickGELU act
    r  r  r	  r
  r.  Tr   r%  rW  )r4  r:  rU   rD   r<   r?  rC   rZ   r  )'vit_gigantic_patch14_clip_quickgelu_224r(  r  s       rM   rh  rh    s[     bBY]9$/<J '1h>HhLPQ[Lf_eLfhELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base (ViT-B/32+)
    r  r  r1  r  r   r4  r:  rD   r<   rX   r  )vit_base_patch32_plus_256r  r  s       rM   rk  rk  %  G     s"X\]J&#Z0:Z>B:>XQW>XZELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base (ViT-B/16+)
    r.  r  r1  r  r   rj  r  )vit_base_patch16_plus_240r  r  s       rM   rn  rn  /  rl  rO   c                 `    [        SSSSSSS[        SS9	n[         S	SU 0[        U40 UD6D6nU$ )
z.ViT-Base (ViT-B/16) w/ residual post-norm
    r.  r0  r1  Fr   r&  )	r4  r:  rD   r<   r=   rX   r;  rS  r7  r  )vit_base_patch16_rpn_224)rf  r   r  r  s       rM   rp  rp  9  sV     B"uZ^LeEJ '"Y/9Y=A*=WPV=WYELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	a  ViT-Base w/ LayerScale + 36 x 1 (36 block serial) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
r.  r  $   r  r   rj  r  )vit_small_patch16_36x1_224r  r  s       rM   rs  rs  E  sG     s"W[\J&$[1;[?CJ?YRX?Y[ELrO   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	a  ViT-Small w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
r.  r     r  r   r4  r:  rD   r<   rX   rS  r  )vit_small_patch16_18x2_224rf  r  r  r  s       rM   rw  rw  Q  sM     B!XkmJ&$[1;[?CJ?YRX?Y[ELrO   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
r.  r0  ru  r1  r   rv  r  )vit_base_patch16_18x2_224rx  r  s       rM   rz  rz  ^  sM    
 B"$YlnJ&#Z0:Z>B:>XQW>XZELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zFEVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrainr  r  r  r.  r&  r4  r:  rD   r<   r7  r  )eva_large_patch14_196r  r  s       rM   r}  r}  j  sF     t2Y^_J&V,6V:>z:TV:TVELrO   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zEEVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrainr  r  r  r.  r&  r|  r  )eva_large_patch14_336r  r  s       rM   r  r  s  s;     t2Y^_J&t:tY]^hYslrYstELrO   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zFlexiViT-Small
    r.  r  r1  r  Tr4  r:  rD   r<   r=  r  )flexivit_smallr  r  s       rM   r  r  {  s;     s"Z^_J&mJmRVWaRlekRlmELrO   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )zFlexiViT-Base
    r.  r0  r1  Tr  r  )flexivit_baser  r  s       rM   r  r    s;     s"[_`J&l:lQUV`QkdjQklELrO   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )zFlexiViT-Large
    r.  r  r  Tr  r  )flexivit_larger  r  s       rM   r  r    s;     t2\`aJ&mJmRVWaRlekRlmELrO   c                 j    [        SSSSSS[        [        SSS9
n[         SSU 0[        U40 UD6D6nU$ )	GViT-Large model (ViT-L/14) w/ parallel blocks and qk norm enabled.
    r.  r0  r1  TF
r4  r:  rD   r<   r?  r=  rC   rS  r=   r>   r  )vit_base_patch16_xp_224rf  r   r   r  r  s       rM   r  r    sX     B"t\`%9ESWJ '!X.8X<@<Vv<VXELrO   c                 j    [        SSSSSS[        [        SSS9
n[         S	SU 0[        U40 UD6D6nU$ )
r  r  r  r  r.  TFr  r  )vit_large_patch14_xp_224r  r  s       rM   r  r    sY     R2]a%9ESWJ '"Y/9Y=A*=WPV=WYELrO   c                 j    [        SSSSSS[        [        SSS9
n[         S	SU 0[        U40 UD6D6nU$ )
zFViT-Huge model (ViT-H/14) w/ parallel blocks and qk norm enabled.
    r  r  r  r.  TFr  r  )vit_huge_patch14_xp_224r  r  s       rM   r  r    sX     R2]a%9ESWJ '!X.8X<@<Vv<VXELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-S/14 for DINOv2
    r  r  r1  r  r   rj  r  )vit_small_patch14_dinov2r  r  s       rM   r  r    sG     s"W[\J&"Y/9Y=A*=WPV=WYELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )zViT-B/14 for DINOv2
    r  r0  r1  r   rj  r  )vit_base_patch14_dinov2r  r  s       rM   r  r    sF     s"X\]J&!X.8X<@<Vv<VXELrO   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-L/14 for DINOv2
    r  r  r  r.  r   rj  r  )vit_large_patch14_dinov2r  r  s       rM   r  r    sG     t2Y]^J&"Y/9Y=A*=WPV=WYELrO   c                 z    [        SSSSSS[        [        R                  S9n[	         S	SU 0[        U40 UD6D6nU$ )
ViT-G/14 for DINOv2
    r     r  r  r   h˹WU@)r4  r:  rD   r<   rX   rU   r[   rZ   r  )vit_giant_patch14_dinov2rf  r   rj   SiLUr  r  s       rM   r  r    sV     R24J '"Y/9Y=A*=WPV=WYELrO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )z'ViT-S/14 for DINOv2 w/ 4 registers
    r  r  r1  r  r   r  Tr4  r:  rD   r<   rX   r>  r=  r  )vit_small_patch14_reg4_dinov2r  r  s       rM   r  r    sP     B!TJ ''^4>^BFzB\U[B\^ELrO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
z'ViT-B/14 for DINOv2 w/ 4 registers
    r  r0  r1  r   r  Tr  r  )vit_base_patch14_reg4_dinov2r  r  s       rM   r  r    sP     B"$TJ '&]3=]AEjA[TZA[]ELrO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )z'ViT-L/14 for DINOv2 w/ 4 registers
    r  r  r  r.  r   r  Tr  r  )vit_large_patch14_reg4_dinov2r  r  s       rM   r  r    sP     R24TJ ''^4>^BFzB\U[B\^ELrO   c                 ~    [        SSSSSS[        [        R                  SSS	9
n[	         SS
U 0[        U40 UD6D6nU$ )r  r  r  r  r  r   r  r  T)
r4  r:  rD   r<   rX   rU   r[   rZ   r>  r=  r  )vit_giant_patch14_reg4_dinov2r  r  s       rM   r  r    s\     R24[f"''aPTJ ''^4>^BFzB\U[B\^ELrO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r0  r1  Fr9  	gelu_tanhr4  r:  rD   r<   r;  r7  rZ   r  )vit_base_patch32_siglip_256r  r  s       rM   r  r  $  sO    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r0  r1  Fr9  r4  r:  rD   r<   r;  r7  r  )vit_base_patch16_siglip_224r  r  s       rM   r  r  /  L    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r0  r1  Fr9  r  r  )vit_base_patch16_siglip_256r  r  s       rM   r  r  9  r  rO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r0  r1  Fr9  r  r  )vit_base_patch16_siglip_384r  r  s       rM   r  r  C  r  rO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r0  r1  Fr9  r  r  )vit_base_patch16_siglip_512r  r  s       rM   r  r  M  r  rO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r  r  Fr9  r  r  )vit_large_patch16_siglip_256r  r  s       rM   r  r  W  L    R25^cJ '&]3=]AEjA[TZA[]ELrO   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr.  r  r  Fr9  r  r  )vit_large_patch16_siglip_384r  r  s       rM   r  r  a  r  rO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr9  r  r  r  )vit_large_patch16_siglip_512r  r  s       rM   r  r  k  sO    R25^cJ '&]3=]AEjA[TZA[]ELrO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr       r.  爅ZӼ@Fr9  r4  r:  rD   r<   rU   r;  r7  r  )vit_so400m_patch14_siglip_224r  r  s       rM   r  r  v  O    R2]bpuJ ''^4>^BFzB\U[B\^ELrO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r.  r  Fr9  r  r  )vit_so400m_patch14_siglip_378r  r  s       rM   r  r    sQ     R2]bpuJ ''^4>^BFzB\U[B\^ELrO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r.  r  Fr9  r  r  )vit_so400m_patch14_siglip_384r  r  s       rM   r  r    r  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r  r  r  Fr9  r  r4  r:  rD   r<   rU   r;  r7  rZ   r  )vit_so400m_patch16_siglip_256r  r  s       rM   r  r    R    R2]bpuJ ''^4>^BFzB\U[B\^ELrO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r  r  r  Fr9  r  r  r  )vit_so400m_patch16_siglip_384r  r  s       rM   r  r    r  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r  r  r  Fr9  r  r  r  )vit_so400m_patch16_siglip_512r  r  s       rM   r  r    r  rO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr9  r  r  r  )vit_giantopt_patch16_siglip_256r  r  s       rM   r  r    O    R25^cJ ')`6@`DHD^W]D^`ELrO   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr9  r  r  r  )vit_giantopt_patch16_siglip_384r  r  s       rM   r  r    r  rO   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r0  r1  Fr&  r  r4  r:  rD   r<   r;  r7  rA  rZ   r  )vit_base_patch32_siglip_gap_256r  r  s       rM   r  r    sR    B"%]blqJ ')`6@`DHD^W]D^`ELrO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	]A SigLIP variant of ViT with global average pooling (GAP) instead of attention pooling (MAP).r.  r0  r1  Fr&  r  r  )vit_base_patch16_siglip_gap_224r  r  s       rM   r  r    Q     B"%]blqJ ')`6@`DHD^W]D^`ELrO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r.  r0  r1  Fr&  r  r  )vit_base_patch16_siglip_gap_256r  r  s       rM   r  r    r  rO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r.  r0  r1  Fr&  r  r  )vit_base_patch16_siglip_gap_384r  r  s       rM   r  r    r  rO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r.  r0  r1  Fr&  r  r  )vit_base_patch16_siglip_gap_512r  r  s       rM   r  r    r  rO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r.  r  r  Fr&  r  r  ) vit_large_patch16_siglip_gap_256r  r  s       rM   r  r    Q     R25^cmrJ '*a7AaEI*E_X^E_aELrO   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r.  r  r  Fr&  r  r  ) vit_large_patch16_siglip_gap_384r  r  s       rM   r  r    r  rO   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr&  r  r  r  ) vit_large_patch16_siglip_gap_512r  r  s       rM   r  r    sP    R255KJ '*a7AaEI*E_X^E_aELrO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r.  r  Fr&  r"  r  )!vit_so400m_patch14_siglip_gap_224r  r  s       rM   r  r  $  R     R2ueJ '+b8BbFJ:F`Y_F`bELrO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r.  r  Fr&  r"  r  )!vit_so400m_patch14_siglip_gap_378r  r  s       rM   r  r  0  r  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r.  r  Fr&  r"  r  )!vit_so400m_patch14_siglip_gap_384r  r  s       rM   r  r  <  r  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r.  r  Fr&  r"  r  )!vit_so400m_patch14_siglip_gap_448r  r  s       rM   r  r  H  r  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r.  r  Fr&  r"  r  )!vit_so400m_patch14_siglip_gap_896r  r  s       rM   r  r  T  r  rO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )r  r.  r  r  r  Fr&  r  	r4  r:  rD   r<   rU   r;  r7  rA  rZ   r  )!vit_so400m_patch16_siglip_gap_256r  r  s       rM   r  r  `  sT     R2ue{J '+b8BbFJ:F`Y_F`bELrO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r  r  r  Fr&  r  r  r  )!vit_so400m_patch16_siglip_gap_384r  r  s       rM   r  r  l  S    R2]b5KJ '+b8BbFJ:F`Y_F`bELrO   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r  r  r  Fr&  r  r  r  )!vit_so400m_patch16_siglip_gap_512r  r  s       rM   r  r  w  r  rO   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr&  r  r  r  )#vit_giantopt_patch16_siglip_gap_256r  r  s       rM   r  r    P    R255KJ '-d:DdHLZHb[aHbdELrO   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr.  r  r  Fr&  r  r  r  )#vit_giantopt_patch16_siglip_gap_384r  r  s       rM   r  r    r  rO   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r  r      FTr,   r&  
r4  r:  rD   r<   rX   rU   r;  r=  r>  r7  r  )vit_wee_patch16_reg1_gap_256r  r  s       rM   r  r    sU    B!YZ$1%J '&]3=]AEjA[TZA[]ELrO   c                 \    [        SSSSSSSSS	S
SS9n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r  r   r   FTr,   r&  r9   r4  r:  rD   r<   rX   rU   r;  r=  r>  r7  r:   r  )vit_dwee_patch16_reg1_gap_256r  r  s       rM   r  r    sX    B!YZ$1%\bJ ''^4>^BFzB\U[B\^ELrO   c                 d    [        SSSSSSSSSS	[        S
9n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r   r   FTr,   r&  r4  r:  rD   r<   rX   rU   r;  r=  r>  r7  rS  r  )vit_pwee_patch16_reg1_gap_256)rf  r   r  r  s       rM   r  r    sX    B!YZ$1%ZnJ ''^4>^BFzB\U[B\^ELrO   c                 d    [        SSSSSSSSSS	[        S
9n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r   r   FTr,   r&  r  r  )vit_dpwee_patch16_reg1_gap_256)rf  r   r  r  s       rM   r
  r
    sX    B!YZ$1%ZrJ '(_5?_CG
C]V\C]_ELrO   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr.  @  r  r   r   ffffff@FTr,   r&  r  r  )vit_little_patch16_reg1_gap_256r  r  s       rM   r  r    U    B!Y\$1%J ')`6@`DHD^W]D^`ELrO   c                 \    [        SSSSSSSSS	S
SS9n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r   r   r  FTr,   r&  r9   r  r  ) vit_dlittle_patch16_reg1_gap_256r  r  s       rM   r  r    sX    B!Y\$1%\bJ '*a7AaEI*E_X^E_aELrO   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r   r   r  FTr  r&  r  r  )vit_little_patch16_reg4_gap_256r  r  s       rM   r  r    r  rO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r1  r  r   FTr,   r&  	r4  r:  rD   r<   rX   r;  r=  r>  r7  r  )vit_medium_patch16_reg1_gap_256r  r  s       rM   r  r    R    B!$1%J ')`6@`DHD^W]D^`ELrO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r1  r  r   FTr  r&  r  r  )vit_medium_patch16_reg4_gap_256r  r  s       rM   r  r    r  rO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r     r  r   FTr  r&  r  r  ) vit_mediumd_patch16_reg4_gap_256r  r  s       rM   r  r    R    B!$1%J '*a7AaEI*E_X^E_aELrO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r  r  r   FTr  r&  r  r  ) vit_mediumd_patch16_reg4_gap_384r  r  s       rM   r  r    r  rO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r1  r  r   FTr,   r&  r  r  ) vit_betwixt_patch16_reg1_gap_256r  r  s       rM   r!  r!    R    B"$$1%J '*a7AaEI*E_X^E_aELrO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r1  r  r   FTr  r&  r  r  ) vit_betwixt_patch16_reg4_gap_256r  r  s       rM   r$  r$    r"  rO   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr.  r  r1  r  r   FTr  r&  r  r  ) vit_betwixt_patch16_reg4_gap_384r  r  s       rM   r&  r&  '  r"  rO   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr.  r0  r1  FTr&  r  )r4  r:  rD   r<   r;  r=  r7  r>  r  )vit_base_patch16_reg4_gap_256r  r  s       rM   r(  r(  2  sP    B"%1J ''^4>^BFzB\U[B\^ELrO   c                 V    [        SSSSSSSSS	9n[         SS
U 0[        U40 UD6D6nU$ )ESO150M (shape optimized, but diff than paper def, optimized for GPU) r.  r  ru  r  ~jt@Fr  r9  )r4  r:  rD   r<   rU   r;  r>  r7  r  )vit_so150m_patch16_reg4_map_256r  r  s       rM   r,  r,  =  sR     B"aUJ ')`6@`DHD^W]D^`ELrO   c                 X    [        SSSSSSSSSS	9	n[         SS
U 0[        U40 UD6D6nU$ )r*  r.  r  ru  r  r+  Fr  r&  	r4  r:  rD   r<   rU   r;  r>  r7  rA  r  )vit_so150m_patch16_reg4_gap_256r  r  s       rM   r/  r/  I  T     B"aUEJ ')`6@`DHD^W]D^`ELrO   c                 X    [        SSSSSSSSSS	9	n[         SS
U 0[        U40 UD6D6nU$ )r*  r.  r  ru  r  r+  Fr  r&  r.  r  )vit_so150m_patch16_reg4_gap_384r  r  s       rM   r2  r2  U  r0  rO   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )HSO150M v2 (shape optimized, but diff than paper def, optimized for GPU) r.  @        NN@r   Fr,   r&  
r4  r:  rD   r<   rU   rX   r=   r;  r>  r7  r  ) vit_so150m2_patch16_reg1_gap_256r  r  s       rM   r:  r:  a  W     B"[_EaUJ '*a7AaEI*E_X^E_aELrO   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )r4  r.  r5  r6  r7  r8  r   Fr,   r&  r9  r  ) vit_so150m2_patch16_reg1_gap_384r  r  s       rM   r=  r=  m  r;  rO   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )r4  r.  r5  r6  r7  r8  r   Fr,   r&  r9  r  ) vit_so150m2_patch16_reg1_gap_448r  r  s       rM   r?  r?  y  r;  rO   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r.  r   FT)r4  r:  rD   r<   rX   r@  r,  r  )vit_intern300m_patch14_448r  r  s       rM   rA  rA    sN    R2EDJ '$[1;[?CJ?YRX?Y[ELrO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT Large AIM-v2 model
    r  r  r  r  F      @r&  silur   r%  r4  r:  rD   r<   r;  rA  rU   r7  r=   r@   rZ   rC   rR  r[   r  )aimv2_large_patch14_224rf  r   r   r   r  r  s       rM   rG  rG    p     R1%Y^EEUV\7-UY@ZflJ
 '!X.8X<@<Vv<VXELrO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT Huge AIM-v2 model
    r  r  r  r1  FAfU@r&  rE  r   r%  rF  r  )aimv2_huge_patch14_224rH  r  s       rM   rM  rM    sp    
 R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT 1B AIM-v2 model
    r     r  r.  FrD  r&  rE  r   r%  rF  r  )aimv2_1b_patch14_224rH  r  s       rM   rQ  rQ    p     R25Z_EEUV\7-UY@ZflJ
 'U+5U9=j9SF9SUELrO   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )ViT 3B AIM-v2 model
    r     r  FrL  r&  rE  r   r%  rF  r  )aimv2_3b_patch14_224rH  r  s       rM   rV  rV    p     R25Z_eeuX^7-UY@ZflJ
 'U+5U9=j9SF9SUELrO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rC  r  r  r  r  FrD  r&  rE  r   r%  rF  r  )aimv2_large_patch14_336rH  r  s       rM   rY  rY    rI  rO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rK  r  r  r  r1  FrL  r&  rE  r   r%  rF  r  )aimv2_huge_patch14_336rH  r  s       rM   r[  r[    p     R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rO  r  rP  r  r.  FrD  r&  rE  r   r%  rF  r  )aimv2_1b_patch14_336rH  r  s       rM   r^  r^    rR  rO   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )rT  r  rU  r  FrL  r&  rE  r   r%  rF  r  )aimv2_3b_patch14_336rH  r  s       rM   r`  r`    rW  rO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rC  r  r  r  r  FrD  r&  rE  r   r%  rF  r  )aimv2_large_patch14_448rH  r  s       rM   rb  rb    rI  rO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rK  r  r  r  r1  FrL  r&  rE  r   r%  rF  r  )aimv2_huge_patch14_448rH  r  s       rM   rd  rd    r\  rO   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rO  r  rP  r  r.  FrD  r&  rE  r   r%  rF  r  )aimv2_1b_patch14_448rH  r  s       rM   rf  rf    rR  rO   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )rT  r  rU  r  FrL  r&  rE  r   r%  rF  r  )aimv2_3b_patch14_448rH  r  s       rM   rh  rh  +  rW  rO   c           	      P    [        SSSSSSS9n[        S	SU 0[        U40 UD6D6nU$ )
ViT Test
    r.  @   r  r   r   T)r4  r:  rD   r<   rU   r,  r  )test_vitr  r  s       rM   rl  rl  9  s=     raSTgklJ&gjgDQ[Lf_eLfgELrO   c                 X    [        SSSSSSSSS	S
S9
n[        SSU 0[        U40 UD6D6nU$ )rj  r.  rk  r  r   r   Fr,   r&  r   T)
r4  r:  rD   r<   rU   r;  r>  r7  rX   r,  r  )	test_vit2r  r  s       rM   rn  rn  B  sK     1QaU_ceJ 'hzhTR\Mg`fMghELrO   c                 X    [        SSSSSSSSS	S
S9
n[        SSU 0[        U40 UD6D6nU$ )rj  r.  `   	   r   r   Fr,   r9  Tr   )
r4  r:  rD   r<   rU   r;  r>  r7  rB  rX   r  )	test_vit3r  r  s       rM   rr  rr  M  sL     1QaUPTbfhJ 'hzhTR\Mg`fMghELrO   c                 Z    [        SSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )rj  r.  rp  rq  r   Fr,   r&  r   Trmsnorm)r4  r:  rD   r<   rU   r;  r>  r7  rX   r,  rC   r  )	test_vit4r  r  s       rM   ru  ru  X  sM     1QaU_cJ
 'hzhTR\Mg`fMghELrO   c                 p    [        SSSSSSSSS[        [        SS9S	9
n[        SS
U 0[        U40 UD6D6nU$ )zlBEiT3 Base model (ViT-Base size) with patch size 16x16.
Remapped to VisionTransformer with scale_norm=True.
r.  r0  r1  r  Tr&  r   r%  
r4  r:  rD   r<   rU   rV   rW   r;  r7  rC   r  )beit3_base_patch16_224r(  r  s       rM   rx  rx  e  sS    
 B"TtQV9$/J
 'uJuZ^_iZtmsZtuELrO   c                 p    [        SSSSSSSSS[        [        SS9S	9
n[        SS
U 0[        U40 UD6D6nU$ )znBEiT3 Large model (ViT-Large size) with patch size 16x16.
Remapped to VisionTransformer with scale_norm=True.
r.  r  r  r  Tr&  r   r%  rw  r  )beit3_large_patch16_224r(  r  s       rM   rz  rz  s  sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrO   c                 p    [        SSSSSSSSS[        [        SS	9S
9
n[        SSU 0[        U40 UD6D6nU$ )z]BEiT3 Giant model with patch size 14x14.
Remapped to VisionTransformer with scale_norm=True.
r  r  r  r.  8mt@Tr&  r   r%  rw  r  )beit3_giant_patch14_224r(  r  s       rM   r}  r}    sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrO   c                 r    [        SSSSSSSSSS[        [        S	S
9S9n[        SSU 0[        U40 UD6D6nU$ )ztBEiT3 Giant model with patch size 14x14 and image size 336x336.
Remapped to VisionTransformer with scale_norm=True.
r  r  r  r  r.  r|  Tr&  r   r%  )r3  r4  r:  rD   r<   rU   rV   rW   r;  r7  rC   r  )beit3_giant_patch14_336r(  r  s       rM   r  r    sV    
 t2W]TtQV9$/J
 'vZv[_`j[unt[uvELrO   vit_tiny_patch16_224_in21kvit_small_patch32_224_in21kvit_small_patch16_224_in21kvit_base_patch32_224_in21kvit_base_patch16_224_in21kvit_base_patch8_224_in21kvit_large_patch32_224_in21kvit_large_patch16_224_in21kvit_huge_patch14_224_in21kvit_base_patch32_224_samzvit_base_patch32_224.samvit_base_patch16_224_samzvit_base_patch16_224.samvit_small_patch16_224_dinovit_small_patch8_224_dinovit_base_patch16_224_dinovit_base_patch8_224_dinovit_base_patch16_224_miil_in21k!vit_base_patch32_224_clip_laion2b)"vit_large_patch14_224_clip_laion2b!vit_huge_patch14_224_clip_laion2b"vit_giant_patch14_224_clip_laion2b)FFFTrT   rT   Nr   )r%  r,   Fr  )r7   rT   T)rM  rT   T)r,   rf   r  F)r7   F)zvisual.)Fr  Tr  )FNr  (  r   copyloggingr   oscollectionsr   	functoolsr   typingr   r   r   r   r	   r
   r   r   r   r   ImportErrortyping_extensionsr   torch.nnrj   torch.nn.functional
functionalr   	torch.jitr   	timm.datar   r   r   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   _builderr-   	_featuresr.   _manipulater/   r0   r1   r2   	_registryr3   r4   r5   __all__	getLoggerr   _loggerrI   ro   r   r   r   rN   rQ   r   r   r   r  r   rH   r*  r6   r  r  r  r  r  r  r|  r  rb  rl  rr  rf  r  r  r  default_cfgsr[  rJ   _quick_gelu_cfgsr  deepcopycr\  environlowerr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r   r#  r'  r,  r.  r0  r3  r6  r8  r:  r<  r>  r@  rB  rE  rG  rJ  rL  rO  rR  rU  rY  r\  r^  ra  rc  rf  rh  rk  rn  rp  rs  rw  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r  r  r!  r$  r&  r(  r,  r/  r2  r:  r=  r?  rA  rG  rM  rQ  rV  rY  r[  r^  r`  rb  rd  rf  rh  rl  rn  rr  ru  rx  rz  r}  r  )r  r  s   00rM   <module>r     s@  2    	 #  O O O*      
     0 + + R R Y Y
 

H
% 	  04!!! ! 	!
 ! ! ! ! ! T"))_-! ! YY!HQBII QhC299 CLy299 yxeryy ePQ")) Ql !!"&+	<<   $	2x		 xv"")) "3 "$ "Z^ "(  	"		"" " 	"
 
"@"")) "3 "$ "Z^ "."")) "3 "RV "bf "

Gs 
Gu 
GQU 
Gai 
G  "#"$&LL  c3h	
   \\2 WT* WTS WT# WTcg WTtx WT WTz  #ell*+# # # 
#u||
	#Lell*+  
#u||
.ell*+  
#u||
$9t 9~ #(&Lell*+L L  L 	L
 L 
#u||
L^c T#s(^ "| 1$3|
 02| 0 d2| 0 d]S2B|" 1$ e3#|* 1$ e]S3B+|2 1$ e33|: 1$ e]S3B;|B 0 f2C|J 0 d]S2BK|R 0 e2S|Z 0 e]S2B[|b / d1c|j 1$ e3k|r 1$ e]S3Bs|~ .t}0|D .t} 300E|L /~ 310M|X ( d*Y|` ( d]S*Ba|h ' d)i|p ' d]S)Bq|x ' d)y|@ ' d]S)BA|J &t|K|L %drlM|N &t|O|P )$2,Q|V &t(W|^ &t(_|f ')g|n ')o|v &t(w|@ ({e*-A|H )$|e+-I|P )$|e+-Q|X (}e*-Y|` (|e*-a|h '{e)-i|p )$|e+-q|| $THVZ&}|B $THVZ&C|L !$i"(<!#MM|T  g"(<!"MU|\  e"(<!"M]|d c"(<!!Me|p '\"(<! 3)0q|| &t\"(<! 3(0}|H '\"(<! 3)0I|T '\"(<! 3)0U|d ,Ta"(<! 3.0e|p +Da"(<! 3-0q|| ,Ta"(<! 3.0}|H ,Ta"(<! 3.0I|X &t L|e:ch(jY|` .t N|e:0Wa|l ' L)m|r *4 4U,Ds|x 24 441y|~ 24 484E|D ,T 4.1E|H )$&I|N 6t?84O|T 6t?S]8\U|Z 6t?S]8\[|` 6t?T8Ca|f 6t?(8Dg|n 7$*@39Po|t 7$*@(9Du|| 6t?S8B}|B	 6t?(8DC	|L	 5d?74M	|R	 5d?-87ES	|Z	 5d?T7C[	|`	 5d?-87Ea	|h	 6t?S8Bi	|n	 6t?(8Do	|x	 0?24y	|~	 0?S2B	|D
 0?(2DE
|L
 1$$*@33PM
|R
 1$$*@(3DS
|Z
 0?S2B[
|`
 0?(2Da
|j
 /?14k
|p
 /?14q
|v
 /?(1Dw
|~
 0?S2B
|F 1$?3GG|L 24$*@3\a4cM|R 1$?Se3US|Z 0?2G[|` 1$?Se3Ua|h (?*Ei|n (?Sc*So|t )$$*@3\_+au|z (?Sd*T{|@ )$?Sd+TA|F ,T?Sd.TG|N .tW?	0EO|X .t?Sc0SY|` 3D? 3C	5Aa|j /?Sc1Sk|t +D?Sc-Su|z +D?C-A{|B +D?Sc-SC|H ,T?Sc.SI|P &t?Sc(SQ|X ,T?Sc.SY|` 'W?Sc	)Sa|j &tW?Sd	(Tk|t &t?WD(Bu|L 4TW?Sd	6TM|V 4T? 3(PT	6VW|` 8?Sd:Ta|h 8? 3(PT	:Vi|t /W?Sc	1Su|~ /W?Sc	1S|H 0W?Sc	2SI|R /W?Sd	1TS|\ 4T?Sd6T]|d 3DW?Sd	5Te|n .tW?Sc	0So|x .tW?Sc	0Sy|B /W?Sc	1SC|N 'W?)EO|V 'W?)EW|^ (W?Sc*S_|f (W?C	*Ag|r 6t?S	8s|B *4B=[_+`C|D *4B=[_+`E|F +DRLG|H +DRLI|J *4B<K|R 05? 3	20S|\ 05? 3(	2D]|f *45? 3	,0g|p *45? 3(	,Dq|| !$Sae 4#1}|D  Ygk 4"1E|L  Ygk 4"1M|V  Sae 4"1W|^ Ygk 4!1_|f Ygk 4!1g|n !$[im 4U#Do|v  Zhl 4U"Dw|@ !$Sae 4#1A|H  Ygk 4"1I|P  Ygk 4"1Q|Z "4Wei 4U$D[|b "4Wei 4U$Dc|l ("m|n )$2,o|p ("q|t S"(<!	!Mu|~  T"(<!	"M|H S"(<!	!MI|T *4M"(<!	,MU|^ +DN"(<!	-M_|h *4S 3"(<!,Mi|t ,TN"(<!	.Mu|@ +D -A|H +D-I|N (*O|T +D -U|\ ( *]|d -d /e|l +D -m|t ( *u|| +D -}|D ( *E|L ,T .M|T )$ +U|\ ,T .]|d )$ +e|l ,T .m|t -d/u|z *4,{|@ -d /A|H *4 ,I|P *4 ,Q|X -d /Y|` / 1a|h -d /i|p -d /q|x / 1y|@ / 1A|J / 1K|R /1S|X ,T.Y|^ / 1_|f ,T .g|n 1$ 3o|v / 1w|~ ,T .|F / 1G|N ,T .O|V 0 2W|^ -d /_|f 0 2g|n -d /o|v 0 2w|~ 1$3|D .t0E|J 1$3K|P 02Q|V 4T6W|\ 5d7]|l 1$ 3m|t .t 30u|| .t 30}|D 1$ 33E|L 0 32M|T 9$ 3;U|\ 4T 36]|d 4T 36e|l 5d 37m|@ 7 39A|H 8 3:I|P 0 32Q|X 9$ 3;Y|` 4T 36a|h 4T 36i|p 5d 37q|D 1$ 3E|L 3D 5M|T 1$ 3U|\ 1$ 3]|d 3D 5e|l 3D 5m|v 24 3(4w|~ 6t 3(8|H 3D?5EI|P 5d?7EQ|X 3D?5EY|` 6t?8Ea|j ,T 4.1k|p 6t 481q|v -d 4/1w|| -d 4/1}|B 7 491C|H .t 401I|N 8 4:1O|T 0 421U|\ / 411]|b 9$ 4;1c|h / 411i|n 8 4:1o|t / 411u|z 0 421{|B  ? 4A1C |H  9$ 4;1I |N  7 491O |V  1$ 431W |^  ? 3A0_ |d  0 421e |j  ? 4A1k |p  9$ 4;1q |v  0 421w ||  7 491} |D! 1$ 431E!|L! ? 3A0M!|R! .t 0"S!|X! =d 4?1Y!|^! 5d 471_!|f! =d 3?0g!|l! 0 2"m!|p! >t 3@0q!|v! 6t 380w!|~! >t 3@0!|D" >t 3(@DE"|L" +D"(< 3A	-M"|X" +D"(< 3A-Y"|d" '?L!)%e"|l" ,T?L!.%m"|t" &t?L!(%u"||" $T?L!&%}"|D# $T?L!&%E#|L# '?L 3A)?M#|T# ,T?L 3A.?U#|\# &t?L 3A(?]#|d# $T?L 3A&?e#|l# $T?L 3A&?m#|t# '?L 3A)?u#||# &t?L 3A(?}#|D$ $T?L 3A&?E$|L$ $T?L 3A&?M$|V$ $ 41W$|\$   41   41   41 -1"(<s-L 6:"(<s6L .2"(<s.L 7;"(<s7L *.*0Ds*T)-=/DJ^il*n!%"(<s"
 +/"(<s+
 #'"(<s#
 ,0"(<s,o%||% #/"4"4"6n"6$!!%%:LAQ\`abi`jkl`mQmA"6n 	Al1o&A~  1+<=L8%789 
 %\2 jjnn%97CIIKvU  !%)### TN#
 k)*#L T @Q   T @Q   d AR   d AR   d AR   d AR   T @Q   T @Q   T @Q   T @Q   T @Q   D ?P   d AR   d AR   d AR   d AR   d AR   T @Q   d AR    DU   $ EV   4 FW   4 FW   4 FW   D GX    DU    DU    DU   $ EV   D GX   D GX   D GX   T HY   $ EV   $ EV   $ EV   $ EV   $ EV   $ EV   t J[   4 FW   4 FW   $ EV   $ EV   $ EV   
4 
FW 
 
 
d 
IZ 
 
 
d 
IZ 
 
 	D 	O` 	 	 	D 	O` 	 	 	T 	Pa 	 	 	T 	Pa 	 	 	D 	O` 	 	 	D 	O` 	 	 	 	Sd 	 	 $ EV   $ EV    DU   4 FW   	4 	FW 	 	 $ EV   d AR   d AR   t :K   d 9J   t :K   	 	CT 	 	 	 	DU 	 	 	 	CT 	 	  DU    CT    DU    DU    	d 	IZ 	 	 	T 	HY 	 	 	d 	IZ 	 	 d IZ    D GX   D GX   D GX   D GX   D GX   T HY   T HY   T HY   d IZ   d IZ   d IZ   d IZ   d IZ   d IZ    K\    K\    K\    K\    K\    K\    K\    L]    L]    L]   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   D O`   D O`   T HY   d IZ   d IZ   t J[    K\    L]    K\    K\    K\    L]    L]    L]    L]    L]   d IZ    K\    K\    K\    L]    L]    L]   4 FW   
 
CT 
 
 t BS   
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
  4E   $ 5F   $ 5F   	$ 	5F 	 	 
t 
BS 
 
 
 
CT 
 
 
 
CT 
 
 
 
CT 
 
 H ' "E'!#G' "#G' !"E	'
 !"E'  !C' "#E' "#G' !"C'  :'  :' !">'  !<'  !<'  :'  &'H!'" ()L#'$ +O)L*N)' uS  *))*r[ os.   Ax$ FAx7F%Ax7F5Ax7x$Ax4x3Ax4