
    j=                     
   U d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dlm	Z
 d dlmZ d dlmZ d	d
lmZ d	dlmZmZ ddlmZmZ ddgZdddded         ddiZeeeeef         f         ed<    G d dej                  Z G d dej                  Z G d dej                  Z G d dej        e          Z  G d de          Z!	 	 d,d!ed"e"d#ee"gej        f         d$ed%e"d&e#e         dz  d'ed(e fd)Z$d-d"e"d'ed(e fd+Z%dS ).    )Callable)deepcopy)AnyN)nn)
functional)IntermediateLayerGetter)VOCABS   )resnet31)_bf16_to_float32load_pretrained_params   )RecognitionModelRecognitionPostProcessorSARsar_resnet31)gh|?5?g=
ףp=?gV-?)gA`"?gl?g$C?r
          frenchzOhttps://doctr-static.mindee.com/models?id=v0.7.0/sar_resnet31-9a1deedf.pt&src=0)meanstdinput_shapevocaburldefault_cfgsc            	       V     e Zd Zd
dedededdf fdZdej        dej        fd	Z xZ	S )
SAREncoder        in_feats	rnn_unitsdropout_probreturnNc                     t                                                       t          j        ||dd|          | _        t          j        ||          | _        d S )Nr   T)batch_firstdropout)super__init__r   LSTMrnnLinearlinear)selfr    r!   r"   	__class__s       g/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/models/recognition/sar/pytorch.pyr(   zSAREncoder.__init__#   sL    78Yt\ZZZi	955    xc                 |    |                      |          d         }|                     |d d dd d f                   S )Nr   )r*   r,   )r-   r1   encodeds      r/   forwardzSAREncoder.forward(   s:    ((1++a.{{7111b!!!8,---r0   )r   )
__name__
__module____qualname__intfloatr(   torchTensorr5   __classcell__r.   s   @r/   r   r   "   s        6 6 6 6E 6TX 6 6 6 6 6 6
. .%, . . . . . . . .r0   r   c                   b     e Zd Zdedededdf fdZdej        dej        dej        fd	Z xZS )
AttentionModule
feat_chansstate_chansattention_unitsr#   Nc                     t                                                       t          j        ||dd          | _        t          j        ||dd          | _        t          j        |ddd          | _        d S )Nr
      )kernel_sizepaddingF)rF   bias)r'   r(   r   Conv2d	feat_conv
state_convattention_projector)r-   rA   rB   rC   r.   s       r/   r(   zAttentionModule.__init__0   sq    :AWXYYY)KaV[\\\#%9_aQUZ#[#[#[   r0   featureshidden_statec                 b   |j         dd          \  }}|                     |          }|                    |                    d          |                    d          dd          }|                     |          }|                    dd||          }t          j        ||z             }|                     |          }|                                \  }}	}
}t          j	        |                    |d          d                              ||	|
|          }||z  
                    d          S )Nr   r   rE   r3   dim)r   r
   )shaperJ   viewsizerK   expandr;   tanhrL   softmaxsum)r-   rM   rN   H_fW_ffeat_projectionstate_projectionattention_weightsBCHWs               r/   r5   zAttentionModule.forward7   s0   
 >!""%S ..22#(():):1)=)=|?P?PQR?S?SUVXYZZ??<88+222r3DD!J9I'IJJ 445FGG&++--
1a "M*;*@*@B*G*GRPPPUUVWYZ\]_`aa,,11f1===r0   )	r6   r7   r8   r9   r(   r;   r<   r5   r=   r>   s   @r/   r@   r@   /   s        \3 \S \3 \SW \ \ \ \ \ \>,> l> 
	> > > > > > > >r0   r@   c                        e Zd ZdZ	 	 ddededededed	ed
eddf fdZ	 ddej        dej        dej        dz  dej        fdZ	 xZ
S )
SARDecoderaY  Implements decoder module of the SAR model

    Args:
        rnn_units: number of hidden units in recurrent cells
        max_length: maximum length of a sequence
        vocab_size: number of classes in the model alphabet
        embedding_units: number of hidden embedding units
        attention_units: number of hidden attention units

       r   r!   
max_length
vocab_sizeembedding_unitsrC   rA   r"   r#   Nc                    t                                                       || _        || _        t	          j        | j        dz   |          | _        t	          j        || j        dz             | _        t          |||          | _
        t	          j        ||          | _        t	          j        d|z  | j        dz             | _        t	          j        |          | _        d S )NrE   r   )r'   r(   rf   re   r   r+   embed	Embedding	embed_tgtr@   attention_moduleLSTMCell	lstm_celloutput_denseDropoutr&   )	r-   r!   re   rf   rg   rC   rA   r"   r.   s	           r/   r(   zSARDecoder.__init__\   s     	$$Yt2ODD
ot7JKK /
I W WY	::Ia)mT_q5HIIz,//r0   rM   holisticgtc                 T   ||                      |          }g }t          | j        dz             D ]}|dk    rQt          j        |                    d          |                    d          |j        |j                  x}}||}
}	|}n|dk    rSt          j        |                    d          | j        dz   |j        |j                  }| 	                    |          }nr|*| j
        r#| 	                    |d d |dz
  f                   }nF||dz
                               d          }| 	                    |                      |                    }|                     |||f          \  }}|                     ||	|
f          \  }	}
|                     ||	          }t          j        |	|gd          }|                     |          }|                    |                     |                     t          j        |dd                                        ddd          S )NrE   r   )devicedtyper   r3   rP   )rk   rangere   r;   zerosrT   rt   ru   rf   ri   trainingargmaxrn   rl   catr&   appendro   stackpermute)r-   rM   rq   rr   gt_embeddinglogits_listthidden_state_initcell_state_initrN   
cell_stateprev_symbolindexglimpselogitss                  r/   r5   zSARDecoder.forwardq   s0    >>>"--L*,t*++ "	: "	:AAvv6;kMM!$$hmmA&6&6xV^Vd7 7 7 !O ,=oj&a $kMM!$$do&9(/YaYg   #jj55>dm>"&**\!!!QU(-C"D"DKK (A.55b99E"&**T^^E-B-B"C"CK 26N_apMq1r1r.'+~~6G,XbIc'd'd$L*++HlCCGYg6A>>>F\\&))Ft00889999 {;qrr?++33Aq!<<<r0   )rd   r   N)r6   r7   r8   __doc__r9   r:   r(   r;   r<   r5   r=   r>   s   @r/   rc   rc   P   s        	 	$ !0 00 0 	0
 0 0 0 0 
0 0 0 0 0 02 #'	0= 0=,0= ,0= L4	0=
 
0= 0= 0= 0= 0= 0= 0= 0=r0   rc   c                   :    e Zd ZdZ	 	 	 	 	 	 	 	 dded	ed
ededededeeeef         dede	ee
f         dz  ddf fdZdede
ddfdZ	 	 	 d dej        dee         dz  dedede	ee
f         f
dZedej        dej        dej        dej        fd            Z xZS )!r   a  Implements a SAR architecture as described in `"Show, Attend and Read:A Simple and Strong Baseline for
    Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.

    Args:
        feature_extractor: the backbone serving as feature extractor
        vocab: vocabulary used for encoding
        rnn_units: number of hidden units in both encoder and decoder LSTM
        embedding_units: number of embedding units
        attention_units: number of hidden units in attention module
        max_length: maximum word length handled by the model
        dropout_prob: dropout probability of the encoder LSTM
        exportable: onnx exportable returns only logits
        cfg: dictionary containing information about the model
    rd      r   r   FNr   r!   rg   rC   re   r"   r   
exportablecfgr#   c                 B   t                                                       || _        |	| _        |
| _        |dz   | _        || _        | j                                         t          j	                    5  |                     t          j
        dg|R                     d         j        }d d d            n# 1 swxY w Y   | j                                         t          |d         ||          | _        t          || j        t!          | j                  |||          | _        t%          |          | _        |                                 D ]\  }}|                    d          rt-          |t.          j                  r(t.          j                            |j        dd           ]t-          |t.          j        t.          j        f          rJt.          j                            |j        d           t.          j                            |j        d	           d S )
NrE   rM   )r"   )r   zfeat_extractor.fan_outrelu)modenonlinearityr   ) r'   r(   r   r   r   re   feat_extractorevalr;   no_gradrw   rR   trainr   encoderrc   lendecoderSARPostProcessorpostprocessornamed_modules
startswith
isinstancer   rI   initkaiming_normal_weightBatchNorm2d	GroupNorm	constant_rH   )r-   feature_extractorr   r!   rg   rC   re   r"   r   r   r   	out_shapenmr.   s                 r/   r(   zSAR.__init__   s8    	
$$q./ 	  """]__ 	^ 	^++EK8I[8I8I,J,JKKJW]I	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	^ 	!!###!)A,	<HH!O
OO%
 
 
 .E:::&&(( 	- 	-DAq||-.. !RY'' -''yv'VVVVA=>> -!!!(A...!!!&!,,,	- 	-s   47B77B;>B;path_or_urlkwargsc                 "    t          | |fi | dS )zLoad pretrained parameters onto the model

        Args:
            path_or_url: the path or URL to the model parameters (checkpoint)
            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
        N)r   )r-   r   r   s      r/   from_pretrainedzSAR.from_pretrained   s"     	t[;;F;;;;;r0   r1   targetreturn_model_outputreturn_predsc                                           |          d         }|                    d          j        }|                    ddd                                          }                     |          }|                     |          \  }}	t          j        |          	                    t          j
                  t          j        |	          }}
|
	                    |j                  |	                    |j                  }}
 j        r|t          d          t                               |||d n|
	                    }i } j        r||d
<   |S |r||d<   ||r[t          j        j        dt          j        dt*          t,          t.          t0          f                  f fd            } ||          |d<   |                     ||
|          |d<   |S )NrM   rP   r   r   rE   ru   z:Need to provide labels during training for teacher forcing)rr   r   out_mapdecoded_featuresr#   c                 .                         |           S r   )r   )r   r-   s    r/   _postprocessz!SAR.forward.<locals>._postprocess  s    ))*:;;;r0   predsloss)r   maxvaluesr}   
contiguousr   build_targetr;   
from_numpytolongtensorrt   rx   
ValueErrorr   r   r   compilerdisabler<   listtuplestrr:   compute_loss)r-   r1   r   r   r   rM   pooled_featuresr4   _gt_seq_lenrr   seq_lenr   outr   s   `              r/   r5   zSAR.forward   s    &&q))*5 #,,2,..5)11!Q::EEGG,,// --f55MC*3//222DDelS[F\F\B%%//7::ah+?+?B= 	[V^YZZZ+DLL7W]WettkmL,n,noo ? 	,CMJ 	.-C	N>\>^#<u| <U3PU:EV@W < < < < < $#< (<(899CL++,<b'JJCK
r0   model_outputrr   r   c                 x   | j         d         }|dz   }t          j        |                     ddd          |d          }t	          j        || j                  dddf         |dddf         k    }d||<   |                    d          |                    | j	                  z  }|
                                S )	al  Compute categorical cross-entropy loss for the model.
        Sequences are masked after the EOS character.

        Args:
            model_output: predicted logits of the model
            gt: the encoded tensor with gt labels
            seq_len: lengths of each gt word inside the batch

        Returns:
            The loss of the model on the batch
        rE   r   r   none)	reduction)rt   Nr   )rR   Fcross_entropyr}   r;   arangert   rX   r   ru   r   )r   rr   r   	input_lenccemask_2dce_losss          r/   r   zSAR.compute_loss  s    $ !&q)	A+ ol221a;;R6RRR,y1DEEEdAAAgNRYZ[Z[Z[]aZaRbbG''!**wzz0BzCCC||~~r0   )rd   rd   rd   r   r   r   FN)NFF)r6   r7   r8   r   r   r9   r:   r   booldictr   r(   r   r;   r<   r   r5   staticmethodr   r=   r>   s   @r/   r   r      s        & ""!,8 %)1- 1- 1- 	1-
 1- 1- 1- 1- 3S=)1- 1- #s(^d"1- 
1- 1- 1- 1- 1- 1-f<3 <# <$ < < < < $($)"- -<- S	D - "	-
 - 
c3h- - - -^ lL  
	   \    r0   c                   H    e Zd ZdZdej        deeee	f                  fdZ
dS )r   zPost processor for SAR architectures

    Args:
        vocab: string containing the ordered sequence of supported characters
    r   r#   c           	      |    |                     d          }t          j        t          j        |d          d|                    d                                        d          }|                    d          j                                        	                                } fd|                                	                                
                                D             }t          t          ||
                                                    dd                                                              S )Nr3   rE   rP   c                     g | ]<}d                      fd|D                                           d          d         =S ) c              3   2   K   | ]}j         |         V  d S r   )
_embedding).0idxr-   s     r/   	<genexpr>z7SARPostProcessor.__call__.<locals>.<listcomp>.<genexpr>R  s*      @@SDOC(@@@@@@r0   z<eos>r   )joinsplit)r   encoded_seqr-   s     r/   
<listcomp>z-SARPostProcessor.__call__.<locals>.<listcomp>Q  s^     
 
 
 GG@@@@K@@@@@FFwOOPQR
 
 
r0   r   )ry   r;   gatherrW   	unsqueezesqueezeminr   detachcpunumpyr   zipcliptolist)r-   r   out_idxsprobsword_valuess   `    r/   __call__zSARPostProcessor.__call__E  s   
 ==$$U]6266H<N<Nr<R<RSS[[\^__		a	  '..004466
 
 
 
'004466<<>>
 
 

 CU[[]]%7%71%=%=%D%D%F%FGGHHHr0   N)r6   r7   r8   r   r;   r<   r   r   r   r:   r    r0   r/   r   r   >  s\         II 
eCJ	 I I I I I Ir0   r   Tarch
pretrainedbackbone_fnlayerpretrained_backboneignore_keysr   r#   c                    |o| }t          t          |                    }|                    d|d                   |d<   |                    d|d                   |d<   t           ||          |di          }|d         |d<   |d         |d<   t	          |fd|i|}	|rI|d         t          |          d         k    r|nd }
|	                    t          |          d         |
           |	S )Nr   r   rM   r   r   )r   )r   r   getr   r   r   )r   r   r   r   r   r   r   _cfgr   model_ignore_keyss              r/   _sarr  Y  s    .@j. L&''DJJwW66DM **]D4GHHD -'((	
 N 7mF7O /F= 33D3F33E S '+7m|D7I'7R&R&R{{X\l407\RRRLr0   Fc                 4    t          d| t          dfdg di|S )aa  SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
    Baseline for Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.

    >>> import torch
    >>> from doctr.models import sar_resnet31
    >>> model = sar_resnet31(pretrained=False)
    >>> input_tensor = torch.rand((1, 3, 32, 128))
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keyword arguments of the SAR architecture

    Returns:
        text recognition architecture
    r   10r   )zdecoder.embed.weightzdecoder.embed_tgt.weightzdecoder.output_dense.weightzdecoder.output_dense.bias)r  r   )r   r   s     r/   r   r   }  sE    " 	 

 
 
   r0   )TN)F)&collections.abcr   copyr   typingr   r;   r   torch.nnr   r   torchvision.models._utilsr   doctr.datasetsr	   classificationr   utilsr   r   corer   r   __all__r   r   r   __annotations__Moduler   r@   rc   r   r   r   r   r  r   r   r0   r/   <module>r     s   % $ $ $ $ $ $                    $ $ $ $ $ $ = = = = = = ! ! ! ! ! ! & & & & & & = = = = = = = = = = = = = = = =.
! %$#!` +d3S#X&'   
. 
. 
. 
. 
. 
. 
. 
.> > > > >bi > > >BQ= Q= Q= Q= Q= Q= Q= Q=hW W W W W")% W W WtI I I I I/ I I I@ !%$(! !
!! 4&")+,! 	!
 ! cT!! ! 	! ! ! !H T S S      r0   