
    j1                        U d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dlm	Z
 d dlmZ d dlmZ d d	lmZ d d
lmZmZ ddlmZmZ ddlmZmZ ddgZdddded         ddiZeeeeef         f         ed<    G d deej                  Z  G d de          Z!	 	 d'dede"dee"gej        f         ded e"d!e#e         dz  d"ed#e fd$Z$d(de"d"ed#e fd&Z%dS ))    )Callable)deepcopy)AnyN)nn)
functional)IntermediateLayerGetter)VOCABS)magc_resnet31)DecoderPositionalEncoding   )_bf16_to_float32load_pretrained_params   )_MASTER_MASTERPostProcessorMASTERmaster)gh|?5?g=
ףp=?gV-?)gA`"?gl?g$C?r          frenchzIhttps://doctr-static.mindee.com/models?id=v0.7.0/master-fde31e4a.pt&src=0)meanstdinput_shapevocaburldefault_cfgsc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 d(dej        dededededededede	eeef         de
deeef         d
z  dd
f fdZdej        dej        de	ej        ej        f         fdZedej        dej        dej        dej        fd            Zded edd
fd!Z	 	 	 d)d"ej        dee         d
z  d#e
d$e
deeef         f
d%Zd&ej        dej        fd'Z xZS )*r   a_  Implements MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.
    Implementation based on the official Pytorch implementation: <https://github.com/wenwenyu/MASTER-pytorch>`_.

    Args:
        feature_extractor: the backbone serving as feature extractor
        vocab: vocabulary, (without EOS, SOS, PAD)
        d_model: d parameter for the transformer decoder
        dff: depth of the pointwise feed-forward layer
        num_heads: number of heads for the mutli-head attention module
        num_layers: number of decoder layers to stack
        max_length: maximum length of character sequence handled by the model
        dropout: dropout probability of the decoder
        input_shape: size of the image inputs
        exportable: onnx exportable returns only logits
        cfg: dictionary containing information about the model
             r   2   皙?r   FNfeature_extractorr   d_modeldff	num_heads
num_layers
max_lengthdropoutr   
exportablecfgreturnc           	         t                                                       |
| _        || _        || _        || _        || _        t          |          | _        || _	        t          | j        ||	d         |	d         z            | _        t          || j        || j        dz   ||| j                  | _        t          j        | j        | j        dz             | _        t#          | j                  | _        |                                 D ]\  }}|                    d          rt+          |t          j                  r(t          j                            |j        dd	
           ]t+          |t          j        t          j        f          rJt          j                            |j        d           t          j                            |j        d           d S )Nr      )max_lenr   )r)   r&   r(   
vocab_sizer'   r+   maximum_position_encoding)r   zfeat_extractor.fan_outrelu)modenonlinearityr   )super__init__r,   r*   r&   r   r-   lenr2   feat_extractorr   positional_encodingr   decoderr   LinearlinearMASTERPostProcessorpostprocessornamed_modules
startswith
isinstanceConv2dinitkaiming_normal_weightBatchNorm2d	GroupNorm	constant_bias)selfr%   r   r&   r'   r(   r)   r*   r+   r   r,   r-   nm	__class__s                 j/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/models/recognition/master/pytorch.pyr9   zMASTER.__init__6   s    	$$
e**/#5dlGU`abUcfqrsftUt#u#u#u !L*&*o
 
 
 ido.ABB0tzBBB&&(( 	- 	-DAq||-.. !RY'' -''yv'VVVVA=>> -!!!(A...!!!&!,,,	- 	-    sourcetargetc                    || j         dz   k                        d                              d          }|                    d          }t          j        t          j        ||f|j                  d                              t          j                  }t          j        ||                    d          ft          j	        |j                  }||z  }||
                                fS )Nr0   r   devicer   )diagonaldtyperZ   rW   )r2   	unsqueezesizetorchtrilonesrW   tobooluint8int)rM   rS   rT   target_pad_masktarget_lengthtarget_sub_masksource_masktarget_masks           rQ   make_source_and_target_maskz"MASTER.make_source_and_target_maskg   s     "T_q%88CCAFFPPQRSSA  *UZ0NW]Wd%e%e%epqrrruu* v 
 
 j-Q!@\b\ijjj%7KOO----rR   model_outputgtseq_lenc                    | j         d         }|dz   }t          j        | ddddddf                             ddd          |ddddf         d          }t	          j        |dz
  | j                  dddf         |dddf         k    }d||<   |                    d          |                    | j	        	          z  }|
                                S )
al  Compute categorical cross-entropy loss for the model.
        Sequences are masked after the EOS character.

        Args:
            gt: the encoded tensor with gt labels
            model_output: predicted logits of the model
            seq_len: lengths of each gt word inside the batch

        Returns:
            The loss of the model on the batch
        r   Nr   r0   none)	reductionrV   rY   )shapeFcross_entropypermuter^   arangerW   sumra   rZ   r   )rk   rl   rm   	input_lenccemask_2dce_losss          rQ   compute_losszMASTER.compute_loss{   s    $ !&q)	A+ ol111crc11195==aAFF111abb5	]cddd,y1}\5HIII$PQPQPQ'RV]^_^_^_ae^eVffG''!**wzz0BzCCC||~~rR   path_or_urlkwargsc                 "    t          | |fi | dS )zLoad pretrained parameters onto the model

        Args:
            path_or_url: the path or URL to the model parameters (checkpoint)
            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
        N)r   )rM   r}   r~   s      rQ   from_pretrainedzMASTER.from_pretrained   s"     	t[;;F;;;;;rR   xreturn_model_outputreturn_predsc                 (                          |          d         }|j        \  }}}}	|                    ||||	z                                d          }                     |          }
i } j        r|t          d          |ى                     |          \  }}t          j	        |          
                    t          j                  t          j        |          }}|
                    |j                  |
                    |j                  }}                     |
|          \  }}                     ||
||          }                     |          }n                     |
          }t%          |          } j        r||d<   |S |                     |||          |d<   |r||d<   |r[t          j        j        dt          j        d	t0          t2          t4          t6          f                  f fd
            } ||          |d<   |S )a7  Call function for training

        Args:
            x: images
            target: list of str labels
            return_model_output: if True, return logits
            return_preds: if True, decode logits

        Returns:
            A dictionary containing eventually loss, logits and predictions.
        features)r   r0   r   Nz&Need to provide labels during trainingrY   logitslossout_mapr.   c                 .                         |           S N)rA   )r   rM   s    rQ   _postprocessz$MASTER.forward.<locals>._postprocess   s    ))&111rR   preds)r;   rr   viewru   r<   training
ValueErrorbuild_targetr^   
from_numpyra   longtensorrW   rj   r=   r?   decoder   r,   r|   compilerdisableTensorlisttuplestrfloat)rM   r   rT   r   r   r   bchwencodedout_gt_seq_lenrl   rm   rh   ri   outputr   r   s   `                    rQ   forwardzMASTER.forward   s   & &&q))*5^
1a==Aq1u--55i@@**844 = 	GV^EFFF --f55MC*3//222DDelS[F\F\B%%//7::ah+?+?B (,'G'GQS'T'T$K\\"g{KHHF[[((FF[[))F!&))? 	"CMJ++FB@@CK 	$#C	N 	0^#2U\ 2d5e;L6M 2 2 2 2 2 $#2 (<//CL
rR   r   c                    |                     d          }t          j        || j        f| j        dz   t          j        |j                  }| j        dz   |dddf<   t          | j        dz
            D ]}|                     ||          \  }}| 	                    ||||          }| 
                    |          }t          j        |d          }	t          j        |	d          j        }
|
dd|f         |dd|dz   f<   |S )zDecode function for prediction

        Args:
            encoded: input tensor

        Returns:
            A tuple of torch.Tensor: predictions, logits
        r   r0   r[   r   Nro   dim)r]   r^   fullr*   r2   r   rW   rangerj   r=   r?   softmaxmaxindices)rM   r   r   ysirh   ri   r   r   prob
next_tokens              rQ   r   zMASTER.decode   s    LLOO ZDO,do.A\c\jkkk?Q&111a4 t*++ 	, 	,A'+'G'GQS'T'T$K\\"g{KHHF[[((F=R000D4R0008J%aaad+Bqqq!a%xLL rR   )	r    r!   r"   r   r#   r$   r   FN)NFF)__name__
__module____qualname____doc__r   Moduler   rd   r   r   rb   dictr   r9   r^   r   rj   staticmethodr|   r   r   r   r   __classcell__)rP   s   @rQ   r   r   $   sE        * ,8 %)/- /-9/- /- 	/-
 /- /- /- /- /- 3S=)/- /- #s(^d"/- 
/- /- /- /- /- /-b.l.,1L.	u|U\)	*. . . .( lL  
	   \<<3 <# <$ < < < < $($)"B B<B S	D B "	B
 B 
c3hB B B BHel u|        rR   c                   H    e Zd ZdZdej        deeee	f                  fdZ
dS )r@   z'Post processor for MASTER architecturesr   r.   c           	      X    |                     d          }t          j        t          j        |d          d|                    d                                        d          }|                    d          j                                        	                                } fd|	                                
                                D             }t          t          ||
                                                    dd                                                              S )Nro   r   r   c                     g | ]<}d                      fd|D                                           d          d         =S ) c              3   2   K   | ]}j         |         V  d S r   )
_embedding).0idxrM   s     rQ   	<genexpr>z:MASTERPostProcessor.__call__.<locals>.<listcomp>.<genexpr>  s*      @@SDOC(@@@@@@rR   z<eos>r   )joinsplit)r   encoded_seqrM   s     rQ   
<listcomp>z0MASTERPostProcessor.__call__.<locals>.<listcomp>  s^     
 
 
 GG@@@@K@@@@@FFwOOPQR
 
 
rR   r   )argmaxr^   gatherr   r\   squeezeminvaluesdetachcpunumpyr   zipcliptolist)rM   r   out_idxsprobsword_valuess   `    rQ   __call__zMASTERPostProcessor.__call__  s    
 ==$$U]6266H<N<Nr<R<RSS[[\^__		a	  '..004466
 
 
 
'||~~3355
 
 

 CU[[]]%7%71%=%=%D%D%F%FGGHHHrR   N)r   r   r   r   r^   r   r   r   r   r   r    rR   rQ   r@   r@     sX        11II 
eCJ	 I I I I I IrR   r@   Tarch
pretrainedbackbone_fnlayerpretrained_backboneignore_keysr~   r.   c                    |o| }t          t          |                    }|                    d|d                   |d<   |                    d|d                   |d<   |d         |d<   |d         |d<   t           ||          |di          }t	          |fd|i|}	|rI|d         t          |          d         k    r|nd }
|	                    t          |          d         |
           |	S )Nr   r   r   r-   r   )r   )r   r   getr   r   r   )r   r   r   r   r   r   r~   _cfgr;   model_ignore_keyss              rQ   _masterr     s    .@j. L&''D **]D4GHHDJJwW66DM7mF7O /F= -'((	
 N >66t6v66E S '+7m|D7I'7R&R&R{{X\l407\RRRLrR   Fc                 4    t          d| t          dfdg di|S )a  MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.

    >>> import torch
    >>> from doctr.models import master
    >>> model = master(pretrained=False)
    >>> input_tensor = torch.rand((1, 3, 32, 128))
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keywoard arguments passed to the MASTER architecture

    Returns:
        text recognition architecture
    r   10r   )zdecoder.embed.weightzlinear.weightzlinear.bias)r   r
   )r   r~   s     rQ   r   r   >  sE      	 

 
 
   rR   )TN)F)&collections.abcr   copyr   typingr   r^   r   torch.nnr   rs   torchvision.models._utilsr   doctr.datasetsr	   doctr.models.classificationr
    doctr.models.modules.transformerr   r   utilsr   r   baser   r   __all__r   r   r   __annotations__r   r   r@   rb   r   r   r   r   rR   rQ   <module>r      si   % $ $ $ $ $ $                    $ $ $ $ $ $ = = = = = = ! ! ! ! ! ! 5 5 5 5 5 5 H H H H H H H H = = = = = = = = / / / / / / / /X
 %$#!Z +d3S#X&'   ] ] ] ] ]Wbi ] ] ]@I I I I I. I I I8 !%$(   
   4&")+,  	 
   cT!           F t s v      rR   