
    jAQ                     
   U d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ ddlmZ ddlmZmZ ddlmZmZ ddgZdddded         ddiZ e!e"e!e"ef         f         e#d<    G d dej$                  Z% G d dej$                  Z& G d deej$                  Z' G d de          Z(	 d*d e"d!e)d"ee)gej$        f         d#e"d$e*e"         dz  d%ed&e'fd'Z+d+d!e)d%ed&e'fd)Z,dS ),    N)Callable)deepcopy)permutations)Any)nn)
functional)IntermediateLayerGetter)VOCABS)MultiHeadAttentionPositionwiseFeedForward   )vit_s)_bf16_to_float32load_pretrained_params   )_PARSeq_PARSeqPostProcessorPARSeqparseq)gh|?5?g=
ףp=?gV-?)gA`"?gl?g$C?r          frenchzIhttps://doctr-static.mindee.com/models?id=v0.7.0/parseq-56125471.pt&src=0)meanstdinput_shapevocaburldefault_cfgsc                   P     e Zd ZdZdedef fdZdej        dej        fdZ xZ	S )CharEmbeddingzImplements the character embedding module

    Args:
        vocab_size: size of the vocabulary
        d_model: dimension of the model
    
vocab_sized_modelc                     t                                                       t          j        ||          | _        || _        d S N)super__init__r   	Embedding	embeddingr#   )selfr"   r#   	__class__s      j/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/models/recognition/parseq/pytorch.pyr'   zCharEmbedding.__init__.   s7    j'::    xreturnc                 `    t          j        | j                  |                     |          z  S r%   )mathsqrtr#   r)   )r*   r.   s     r,   forwardzCharEmbedding.forward3   s%    y&&):):::r-   )
__name__
__module____qualname____doc__intr'   torchTensorr3   __classcell__r+   s   @r,   r!   r!   &   s{         3       
; ;%, ; ; ; ; ; ; ; ;r-   r!   c                   b     e Zd ZdZ	 	 	 	 ddededed	ed
ef
 fdZ	 ddej        dz  fdZ	 xZ
S )PARSeqDecodera   Implements decoder module of the PARSeq model

    Args:
        d_model: dimension of the model
        num_heads: number of attention heads
        ffd: dimension of the feed forward layer
        ffd_ratio: depth multiplier for the feed forward layer
        dropout: dropout rate
             皙?r#   	num_headsffd	ffd_ratiodropoutc                 h   t                                                       t          |||          | _        t          |||          | _        t          |||z  |t          j                              | _        t          j	        |d          | _
        t          j	        |d          | _        t          j	        |d          | _        t          j	        |d          | _        t          j        |          | _        t          j        |          | _        t          j        |          | _        d S )N)rF   gh㈵>)eps)r&   r'   r   	attentioncross_attentionr   r   GELUposition_feed_forward	LayerNorm
query_normcontent_normfeed_forward_normoutput_normDropoutattention_dropoutcross_attention_dropoutfeed_forward_dropout)r*   r#   rC   rD   rE   rF   r+   s         r,   r'   zPARSeqDecoder.__init__B   s    	+IwPPP1)WgVVV%<WcIoW^`b`g`i`i%j%j",wD999Ld;;;!#g4!@!@!@<T:::!#G!4!4')z'':':$$&Jw$7$7!!!r-   Ntarget_maskc           	      F   |                      |          }|                     |          }|                                |                     |                     ||||                    z   }|                                |                     |                     |                      |          ||                    z   }|                                |                     |                     | 	                    |                              z   }| 
                    |          S )N)mask)rN   rO   clonerS   rI   rT   rJ   rU   rL   rP   rQ   )r*   targetcontentmemoryrV   rN   rO   s          r,   r3   zPARSeqDecoder.forwardW   s     __V,,
((11$"8"8NN:|\NTT#
 #
 
 $">">  !8!8&&II#
 #
 
 $";";D<V<VW[WmWmntWuWu<v<v"w"ww'''r-   )r?   r@   rA   rB   r%   )r4   r5   r6   r7   r8   floatr'   r9   r:   r3   r;   r<   s   @r,   r>   r>   7   s          8 88 8 	8
 8 8 8 8 8 8 84 ,0( (
 \D(( ( ( ( ( ( ( (r-   r>   c                       e Zd ZdZ	 	 	 	 	 	 	 	 d)d
edededededededeeeef         dede	ee
f         d	z  dd	f fdZdede
dd	fdZdej        dej        fdZdej        deej        ej        f         fdZ	 	 d*dej        dej        dej        d	z  d ej        d	z  dej        f
d!Zd+d"ej        d#ed	z  dej        fd$Z	 	 	 d,d%ej        dee         d	z  d&ed'ede	ee
f         f
d(Z xZS )-r   a  Implements a PARSeq architecture as described in `"Scene Text Recognition
    with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.
    Slightly modified implementation based on the official Pytorch implementation: <https://github.com/baudm/parseq/tree/main`_.

    Args:
        feature_extractor: the backbone serving as feature extractor
        vocab: vocabulary used for encoding
        embedding_units: number of embedding units
        max_length: maximum word length handled by the model
        dropout_prob: dropout probability for the decoder
        dec_num_heads: number of attention heads in the decoder
        dec_ff_dim: dimension of the feed forward layer in the decoder
        dec_ffd_ratio: depth multiplier for the feed forward layer in the decoder
        input_shape: input shape of the image
        exportable: onnx exportable returns only logits
        cfg: dictionary containing information about the model
    r   rB   r?     rA   r   FNr   embedding_units
max_lengthdropout_probdec_num_heads
dec_ff_dimdec_ffd_ratior   
exportablecfgr/   c                    t                                                       || _        |
| _        || _        || _        t          |          | _        t          j	        
                                | _        || _        t          |||||          | _        t          j        || j        dz             | _        t%          | j        dz   |          | _        t          j        t+          j        d| j        dz   |                    | _        t          j        |          | _        t5          | j                  | _        t          j                            | j        d           |                                 D ]i\  }}|                    d          rtA          |t          j                  rRt          j                            |j!        d           |j"        $t          j        #                    |j"                   tA          |t          j$                  rWt          j                            |j!        d           |j%        )|j!        j&        |j%                 '                                 tA          |t          j(        t          j)        f          rJt          j        *                    |j!        d           t          j        *                    |j"        d           kd S )	Nr   r   )p)r   g{Gz?)r   zfeat_extractor.r   )+r&   r'   r   rf   rg   ra   lenr"   nprandomdefault_rngrngfeat_extractorr>   decoderr   Linearheadr!   embed	Parameterr9   r:   pos_queriesrR   rF   PARSeqPostProcessorpostprocessorinittrunc_normal_named_modules
startswith
isinstanceweightbiaszeros_r(   padding_idxdatazero_BatchNorm2d	GroupNorm	constant_)r*   feature_extractorr   r`   ra   rb   rc   rd   re   r   rf   rg   nmr+   s                 r,   r'   zPARSeq.__init__}   sR    	
$$e**9((**/$_mZQ^`lmmIot/BCC	"4?Q#6HH
<Q!8K_(](]^^zL1110tzBBB
d.D999&&(( 	- 	-DAq||-.. !RY'' 
-%%ahD%9996%GNN16***Ar|,, -%%ahD%999=,HM!-066888A=>> -!!!(A...!!!&!,,,	- 	-r-   path_or_urlkwargsc                     |                     d          g |d<   |d                             g d           t          | |fi | dS )zLoad pretrained parameters onto the model

        Args:
            path_or_url: the path or URL to the model parameters (checkpoint)
            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
        ignore_keysN)zdecoder.attention_norm.weightzdecoder.attention_norm.biasz#decoder.cross_attention_norm.weightz!decoder.cross_attention_norm.bias)getextendr   )r*   r   r   s      r,   from_pretrainedzPARSeq.from_pretrained   sl     ::m$$,$&F=!}$$ &
 &
 &
 	 	 	 	t[;;F;;;;;r-   seqlenc           	         t                                                                                    t          j        j                  g}t          j                  dz  }t          d|          }dk     rdk    rg d}nt          t          |                    }t          j        t          t          t                                        j                  |         }|dd          }t          j        |          }t          |          rV| j                            t          |          |t          |          z
  d	          }t          j        |||         g          }nS|                    fd
t          |t          |          z
            D                        t          j        |          }|                    d          }	t          j        ||	g                              dd                              d          }t          j        t          |          dj                  }
t          j        t          |          dfdz   j                  }t          j        |
|dz   |gd                                           }t          |          dk    r+dz   t          j        dz   j                  z
  |ddd f<   |S )Ndevice   r      rA   )r   r   rA      	   
   r?                  r   F)sizereplacec                 F    g | ]}t          j        j                   S )r   )r9   randpermr   ).0_max_num_charsr   s     r,   
<listcomp>z0PARSeq.generate_permutations.<locals>.<listcomp>   s7       HI}V]CCC  r-   r   dim)r8   maxitemr9   aranger   r1   	factorialminlistrange	as_tensorr   stackrj   rn   choicecatr   flip	transposereshapezerosfull)r*   r   perms	max_permsnum_gen_permsselector	perm_poolfinal_permsicompsos_idxeos_idxcombinedr   s    `           @r,   generate_permutationszPARSeq.generate_permutations   s   
 FJJLL--//00mFMBBBCN=11Q6	Ay))1 !!FFFi 0 011\%:N:NP]-^-^(_(_hnhuvvvI "!""I+e,,K9~~ EHOOC	NN[IYIY9YchOii#iil(CDDLL     MRS`cfglcmcmSmMnMn       +e,,K##k;"566@@AFFNNrS`aa+c+..&-HHH*c+..2MA4Efm\\\9g{Q@aHHHLLNNx==1+a/%,}q?PY_Yf2g2g2ggHQUOr-   permutationc                    |j         d         }t          j        ||f|j                  }t	          |          D ]}||         }||dz   d          }d|||f<   |d dd df                                         }d|t          j        |t          j        |j                  <   |dd d df         }|                                |                                fS )Nr   r   r           r   dtyper   )	shaper9   onesr   r   rY   eyeboolr8   )	r*   r   szrX   r   	query_idxmasked_keyssource_maskrV   s	            r,   %generate_permutations_attention_masksz,PARSeq.generate_permutations_attention_masks   s    q!z2r(;+=>>>r 	/ 	/A#AI%a!egg.K+.DK'((3B38n**,,KNUYrK4FGGGH122ss7m  +//"3"333r-   rZ   r\   rV   target_queryc                    |j         \  }}|                     |ddddf                   }| j        ddd|dz
  f         |                     |ddddf                   z   }|                     t	          j        ||gd                    }|(| j        ddd|f                             |dd          }|                     |          }|                     ||||          S )zRAdd positional information to the target sequence and pass it through the decoder.Nr   r   r   )r   rs   ru   rF   r9   r   expandrp   )	r*   rZ   r\   rV   r   
batch_sizesequence_lengthnull_ctxr[   s	            r,   decodezPARSeq.decode   s     '-l#
O::fQQQUm,,"111&;!(;&;#;<tzz&QRQRQRTUTVTVQV-?X?XX,,uy(G)<!DDDEE+AAA/?/?,?@GG
TVXZ[[L||L11||L'6;GGGr-   featuresmax_lenc                 v   ||n| j         }t          || j                   dz   }t          j        |                    d          |f| j        dz   t          j        |j                  }| j        dz   |dddf<   | j        ddd|f         	                    |                    d          dd          }t          j
        t          j        ||f|j                  d                              t          j        	                                          }g }t          |          D ]}|                     |ddd|dz   f         ||||dz   d|dz   f         |dd||dz   f         
          }	|                     |	          }
|                    |
           |dz   |k     rm|
                                                    d          |dd|dz   f<   | j        s5|3|| j        k                        d                                          r nt          j        |d          }d|t          j        t          j        ||t          j        |j                  d          <   t          j        |                    d          df| j        dz   t          j        |j                  }t          j        ||ddddf                             d          gd          }|| j        k                                                        d          dk                        d                              d           }|                                |ddd|j        d         f                                         z                                  }|                     |                     ||||
                    }|S )z,Generate predictions for the given features.Nr   r   r   r   r   r   )diagonalr   )r   r   )ra   r   r9   r   r   r"   longr   ru   r   trilr   tor   r8   r   r   rr   appendsqueezeargmaxrf   anyallr   triucumsum	unsqueezer   )r*   r   r   ra   ysru   
query_mask
pos_logitsr   tgt_outpos_problogitssostarget_pad_maskrX   s                  r,   decode_autoregressivezPARSeq.decode_autoregressive  s    ' 3WW
T_559
Z]]1z*DOa,?uzZbZi
 
 
 ?Q&111a4&qqq+:+~6==hmmA>N>NPRTVWW Juz:z":8?SSS^_```ccjojtcuu
#%% 	 
z"" 	 	Akk111gAg:1q1u9gAg-.(AAI6	 "  G yy))Hh'''1uz!!'//1188<<111a!e8  7?do@U?Z?Z_a?Z?b?b?f?f?h?h?E:1--- st
5:ejZuzZbZijjjlmnno j(--**A.!0C5:^f^mnnnYVAAAssF^222667Q???  4?27799@@DDqHSSTUVV``abccc$$&&AAA}!}4D)E)J)J)L)LLQQSS4;;r8T;TTUUr-   r.   return_model_outputreturn_predsc           	                                |          d         }|d d dd d d f         } j        r|t          d          |                     |          \  }}t	          j        |                              t          j                                      |j                  t	          j	        |                              |j                  }	}|d d d t          |	                                                                          dz   f         } j        r3                     |	          }
|d d d df         }|d d dd f         }| j        dz   k    | j        k    z  
                                                    d          dk                        d                              d           }t	          j	        d|j        	          }d}| j        dz   k                                                                    }t%          |
          D ]2\  }}                     |          \  }}|                                |                                z  
                                }                                          |||                                        d
          }||t1          j        ||                                 j        dz             z  z  }||z  }|dk    rYt	          j        | j        k     j        dz   |          }| j        dz   k                                                                    }4||z  }n|d d dd f         }|j        d         dz
  }                     ||          }t1          j        |                    d
          |                                 j        dz             }n                     |          }t;          |          }i } j        r||d<   |S |r||d<   ||r[t          j        j         dt          j!        dtD          tF          tH          tJ          f                  f fd            } ||          |d<   |||d<   |S )Nr   r   z&Need to provide labels during trainingr   r   r   r   r   r   )end_dim)ignore_indexr   out_mapr/   c                 .                         |           S r%   )rw   )r   r*   s    r,   _postprocessz$PARSeq.forward.<locals>._postprocess  s    ))&111r-   predsloss)&ro   training
ValueErrorbuild_targetr9   
from_numpyr   r   r   tensorr8   r   r   r   r"   r   r   sum	enumerater   r   rr   r   flattenFcross_entropywherer   r   r   rf   compilerdisabler:   r   tuplestrr]   )r*   r.   rZ   r   r   r   _gt_seq_lengtseq_len	tgt_permsgt_ingt_outpadding_maskr   
loss_numelr   r   permr   rV   rX   r   r   outr   s   `                         r,   r3   zPARSeq.forwardC  sb    &&q))*5AAAqrr111H%= 	GV^EFFF --f55MC*3//222DDGGQQSXS_`hSiSiSlSlmnmuSvSvBAAA6W[[]]//1122Q6667B} #r 66w??	111crc6
AAAqrrE t22u7OPUUWW^^_abbeff)A,,yy|| , |C@@@*+
t227799>>@@(33 I IGAt%)%O%OPT%U%UNA{',,..1B1B1D1DDIIKKD!YYt{{5(D'I'IJJRR[\R]]FA8H8HW[WfijWj k k kkkD!OJ Avv!&Vt-FZ[H[]c!d!d#t'::??AAFFHH
" 122Y(1+/33HgFFv~~a~'@'@"**,,]a]lop]pqqq//99F!&)) ? 	"CMJ 	$#C	N>\>^#2U\ 2d5e;L6M 2 2 2 2 2 $#2 (<//CLCK
r-   )r   rB   r?   r_   rA   r   FN)NNr%   )NFF)r4   r5   r6   r7   r  r8   r]   r
  r   dictr   r'   r   r9   r:   r   r   r   r   r   r3   r;   r<   s   @r,   r   r   j   s        . !,8 %)/- /- /- 	/-
 /- /- /- /- /- 3S=)/- /- #s(^d"/- 
/- /- /- /- /- /-b<3 <# <$ < < < <((EL (U\ ( ( ( (T4 4RWX]XdfkfrXrRs 4 4 4 4& ,0,0H HH H \D(	H
 lT)H 
H H H H$4 4el 4S4Z 4[`[g 4 4 4 4r $($)"Q Q<Q S	D Q "	Q
 Q 
c3hQ Q Q Q Q Q Q Qr-   c                   H    e Zd ZdZdej        deeee	f                  fdZ
dS )rv   zPost processor for PARSeq architecture

    Args:
        vocab: string containing the ordered sequence of supported characters
    r   r/   c                 b    |                     d          }t          j        |d                              d          d          fd|                                                                D             }fdt          |          D             }t          t          ||                    S )Nr   r   r   c                     g | ]<}d                      fd|D                                           d          d         =S ) c              3   2   K   | ]}j         |         V  d S r%   )
_embedding)r   idxr*   s     r,   	<genexpr>z:PARSeqPostProcessor.__call__.<locals>.<listcomp>.<genexpr>  s*      @@SDOC(@@@@@@r-   z<eos>r   )joinsplit)r   encoded_seqr*   s     r,   r   z0PARSeqPostProcessor.__call__.<locals>.<listcomp>  s^     
 
 
 GG@@@@K@@@@@FFwOOPQR
 
 
r-   c                     g | ]Z\  }}|rQ|d t          |          f                             dd                                                                          nd[S )Nr   r   r   )rj   clipr   r   )r   r   word
preds_probs      r,   r   z0PARSeqPostProcessor.__call__.<locals>.<listcomp>  sq     
 
 
U\UVX\4PJq+CII+~&++Aq116688==???S
 
 
r-   )	r   r9   softmaxr   cpunumpyr  r   zip)r*   r   out_idxsword_valuesprobsr&  s   `    @r,   __call__zPARSeqPostProcessor.__call__  s    
 ==$$]62..22r2::1=

 
 
 
'||~~3355
 
 


 
 
 
`iju`v`v
 
 
 CU++,,,r-   N)r4   r5   r6   r7   r9   r:   r   r
  r  r]   r.   r-   r,   rv   rv     sT         -- 
eCJ	 - - - - - -r-   rv   arch
pretrainedbackbone_fnlayerr   r   r/   c                 b   t          t          |                    }|                    d|d                   |d<   |                    d|d                   |d<   |                    dd          }|d         |d<   |d         |d<   t           |d|d         |          |di          }|                    dd            |                    dd            t          |fd	|i|}	|rI|d         t          |          d         k    r|nd }
|	                    t          |          d
         |
           |	S )Nr   r   
patch_sizerA      F)r   r5  r   pretrained_backbonerg   r   )r   )r   r   r   r	   popr   r   )r0  r1  r2  r3  r   r   _cfgr5  ro   model_ignore_keyss              r,   _parseqr=    sK    L&''DJJwW66DM **]D4GHHDL&11J7mF7O /F= -EtM':zRRR	
 N JJ|T"""
JJ$d+++ >66t6v66E S '+7m|D7I'7R&R&R{{X\l407\RRRLr-   Fc                 8    t          d| t          dfddg dd|S )a(  PARSeq architecture from
    `"Scene Text Recognition with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.

    >>> import torch
    >>> from doctr.models import parseq
    >>> model = parseq(pretrained=False)
    >>> input_tensor = torch.rand((1, 3, 32, 128))
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keyword arguments of the PARSeq architecture

    Returns:
        text recognition architecture
    r   1r_   r6  )zembed.embedding.weightzhead.weightz	head.bias)r`   r5  r   )r=  r   )r1  r   s     r,   r   r     sG    " 		
 JJJ	 	 	 	 	r-   r%   )F)-r1   collections.abcr   copyr   	itertoolsr   typingr   r)  rk   r9   r   torch.nnr   r  torchvision.models._utilsr	   doctr.datasetsr
    doctr.models.modules.transformerr   r   classificationr   utilsr   r   baser   r   __all__r   r  r  __annotations__Moduler!   r>   r   rv   r   r   r=  r   r/  r-   r,   <module>rN     s    $ $ $ $ $ $       " " " " " "                  $ $ $ $ $ $ = = = = = = ! ! ! ! ! ! X X X X X X X X # # # # # # = = = = = = = = / / / / / / / /X
 %$#!Z +d3S#X&'   ; ; ; ; ;BI ; ; ;"0( 0( 0( 0( 0(BI 0( 0( 0(fj j j j jWbi j j jZ	- - - - -. - - -B %)$ $
$$ 4&")+,$ 	$
 cT!$ $ $ $ $ $N t s v      r-   