
    j/                        U d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d	d
lmZmZmZ d	dlmZ ddlmZmZ g dZddded         ddddded         ddddded         dddZeeeeef         f         ed<    G d de          Z G d dee	j                  Z 	 	 d*dede!d eege	j        f         d!e!d"e"e         dz  d#ed$e fd%Z#d+de!d#ed$e fd'Z$d+de!d#ed$e fd(Z%d+de!d#ed$e fd)Z&dS ),    )Callable)deepcopy)groupby)AnyN)nn)
functional)VOCABSdecode_sequence   )mobilenet_v3_large_rmobilenet_v3_small_r
vgg16_bn_rload_pretrained_params   )RecognitionModelRecognitionPostProcessor)CRNNcrnn_vgg16_bncrnn_mobilenet_v3_smallcrnn_mobilenet_v3_large)gh|?5?g=
ףp=?gV-?)gA`"?gl?g$C?r          frenchzQhttps://doctr-static.mindee.com/models?id=v0.12.0/crnn_vgg16_bn-0417f351.pt&src=0)meanstdinput_shapevocaburlz]https://doctr-static.mindee.com/models?id=v0.3.1/crnn_mobilenet_v3_small_pt-3b919a02.pt&src=0z]https://doctr-static.mindee.com/models?id=v0.3.1/crnn_mobilenet_v3_large_pt-f5259ec2.pt&src=0)r   r   r   default_cfgsc                       e Zd ZdZeed         dfdej        dede	de
eeef                  fd            Zdej        de
eeef                  fd	Zd
S )CTCPostProcessorzPostprocess raw prediction of the model (logits) to a list of words using CTC decoding

    Args:
        vocab: string containing the ordered sequence of supported characters
    r   r   logitsr   blankreturnc                 :   t          j        | d                              d          j                            d          j        }fdt          j        | d          D             }t          t          ||	                                                    S )am  Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
        <https://github.com/githubharald/CTCDecoder>`_.

        Args:
            logits: model output, shape: N x T x C
            vocab: vocabulary to use
            blank: index of blank label

        Returns:
            A list of tuples: (word, confidence)
        dim   c           	          g | ]=}t          fd t          |                                          D                       >S )c                 &    g | ]\  }}|k    |S  r.   ).0k_r%   s      h/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/models/recognition/crnn/pytorch.py
<listcomp>z=CTCPostProcessor.ctc_best_path.<locals>.<listcomp>.<listcomp>M   s"    OOO41aAJJQJJJ    )r
   r   tolist)r/   seqr%   r   s     r2   r3   z2CTCPostProcessor.ctc_best_path.<locals>.<listcomp>L   sY     
 
 
 OOOO73::<<+@+@OOOQVWW
 
 
r4   )
Fsoftmaxmaxvaluesmintorchargmaxlistzipr5   )r$   r   r%   probswordss    ``  r2   ctc_best_pathzCTCPostProcessor.ctc_best_path7   s    $ 	&b)))--"-55<@@Q@GGN
 
 
 
 
|F333
 
 

 Cu||~~..///r4   c                 `    |                      || j        t          | j                            S )a9  Performs decoding of raw output with CTC and decoding of CTC predictions
        with label_to_idx mapping dictionary

        Args:
            logits: raw output of the model, shape (N, C + 1, seq_len)

        Returns:
            A tuple of 2 lists: a list of str (words) and a list of float (probs)

        )r$   r   r%   )rB   r   len)selfr$   s     r2   __call__zCTCPostProcessor.__call__S   s)     !!tzTZ!YYYr4   N)__name__
__module____qualname____doc__staticmethodr	   r<   Tensorstrintr>   tuplefloatrB   rF   r.   r4   r2   r#   r#   0   s           H%0 000 0 
eCJ	 	0 0 0 \06Zu| ZU3:5F0G Z Z Z Z Z Zr4   r#   c                   4    e Zd ZU dZg dZee         ed<   	 	 	 	 ddej	        d	ed
e
dee
e
e
f         dedeeef         dz  ddf fdZdededdfdZdej        dee         dej        fdZ	 	 	 ddej        dee         dz  dededeeef         f
dZ xZS )r   a  Implements a CRNN architecture as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    Args:
        feature_extractor: the backbone serving as feature extractor
        vocab: vocabulary used for encoding
        rnn_units: number of units in the LSTM layers
        exportable: onnx exportable returns only logits
        cfg: configuration dictionary
    )feat_extractordecoderlinearpostprocessor_children_namesr   r   FNfeature_extractorr   	rnn_unitsr   
exportablecfgr&   c                    t                                                       || _        || _        d| _        || _        || _        t          j                    5  |                     t          j	        dg|R                     j
        }d d d            n# 1 swxY w Y   |d         |d         z  }t          j        ||ddd          | _        t          j        d|z  t          |          dz             | _        t#          |          | _        |                                 D ]\  }	}
|	                    d          rt+          |
t          j                  rRt          j                            |
j        j        d	d
           |
j        |
j        j                                         t+          |
t          j                  r=|
j        j                            d           |
j        j                                         d S )Nr   r+   r   T)
input_sizehidden_sizebatch_first
num_layersbidirectional)in_featuresout_features)r   zfeat_extractor.fan_outrelu)modenonlinearityg      ?)super__init__r   rZ   
max_lengthrY   rR   r<   inference_modezerosshaper   LSTMrS   LinearrD   rT   r#   rU   named_modules
startswith
isinstanceConv2dinitkaiming_normal_weightdatabiaszero_BatchNorm2dfill_)rE   rW   r   rX   r   rY   rZ   	out_shapelstm_innm	__class__s              r2   rh   zCRNN.__init__p   s-    	
$/ !## 	R 	R++EK8I[8I8I,J,JKKQI	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	RA,1-w!
 
 
 iA	ME

UVWWW-E:::&&(( 
	$ 
	$DAq||-.. !RY'' $''ITZ'[[[6%FK%%'''Ar~.. $##C(((!!###
	$ 
	$s   1BBBpath_or_urlkwargsc                 "    t          | |fi | dS )zLoad pretrained parameters onto the model

        Args:
            path_or_url: the path or URL to the model parameters (checkpoint)
            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
        Nr   )rE   r   r   s      r2   from_pretrainedzCRNN.from_pretrained   s"     	t[;;F;;;;;r4   model_outputtargetc           	         |                      |          \  }}|j        d         }|j        d         t          j        |ft          j                  z  }|                    ddd          }t          j        |d          }t          j        |t          j	        |          |t          j
        |t          j                  t          | j                  d	          }	|	S )
zCompute CTC loss for the model.

        Args:
            model_output: predicted logits of the model
            target: list of target strings

        Returns:
            The loss of the model on the batch
        r   r+   )sizedtyper   r(   r)   )r   T)zero_infinity)build_targetrl   r<   onesint32permuter7   log_softmaxctc_loss
from_numpytensorrN   rD   r   )
rE   r   r   gtseq_len	batch_leninput_lengthr$   r@   r   s
             r2   compute_losszCRNN.compute_loss   s     ''//G &q)	#)!,uz	|SXS^/_/_/__%%aA..f"---:R  L	222
OO
 
 
 r4   xreturn_model_outputreturn_predsc                      j         r|t          d                               |          }|j        d         |j        d         |j        d         }}}t	          j        |d||z  |f          }	t	          j        |	dd          }	                     |	          \  }
}                     |
          }
i } j	        r|
|d<   |S |r|
|d<   ||r[t          j
        j        dt          j        d	t          t          t          t           f                  f fd
            } ||
          |d<   |                     |
|          |d<   |S )Nz&Need to provide labels during trainingr+   r   r   r(   )rl   r$   out_mapr&   c                 .                         |           S )N)rU   )r$   rE   s    r2   _postprocessz"CRNN.forward.<locals>._postprocess   s    ))&111r4   predsloss)training
ValueErrorrR   rl   r<   reshape	transposerS   rT   rY   compilerdisablerL   r>   rO   rM   rP   r   )rE   r   r   r   r   featureschwfeatures_seqr$   r1   outr   s   `             r2   forwardzCRNN.forward   sk    = 	GV^EFFF&&q)).#X^A%6q8Ia1}Xb!a%^DDD|Q::LL..	V$$ ? 	"CMJ 	$#C	N>\>^#2U\ 2d5e;L6M 2 2 2 2 2 $#2 (<//CL++FF;;CK
r4   )r   r   FN)NFF)rG   rH   rI   rJ   rV   r>   rM   __annotations__r   ModulerN   rO   booldictr   rh   r   r<   rL   r   r   __classcell__)r   s   @r2   r   r   b   s        	 	 "Z!Y!YOT#YYYY ,8 %),$ ,$9,$ ,$ 	,$
 3S=),$ ,$ #s(^d",$ 
,$ ,$ ,$ ,$ ,$ ,$\<3 <# <$ < < < <l S	 
	   D $($)"& &<& S	D & "	&
 & 
c3h& & & & & & & &r4   r   Tarch
pretrainedbackbone_fnpretrained_backboneignore_keysr   r&   c                    |o| } ||          j         }|                    dt          |          d                   |d<   |                    dt          |          d                   |d<   t          t          |                    }|d         |d<   |d         |d<   t	          |fd|i|}|r>|d         t          |          d         k    r|nd }	|                    |d         |	           |S )N)r   r   r   rZ   r    )r   )r   getr!   r   r   r   )
r   r   r   r   r   r   rR   _cfgmodel_ignore_keyss
             r2   _crnnr      s    .@j. ![,?@@@INjj,t*<W*EFFF7O"JJ}l46H6WXXF=L&''D7ODM /D 44T4V44E E '+7m|D7I'7R&R&R{{X\d5k|DDDLr4   Fc                 2    t          d| t          fdddgi|S )a~  CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import torch
    >>> from doctr.models import crnn_vgg16_bn
    >>> model = crnn_vgg16_bn(pretrained=True)
    >>> input_tensor = torch.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    r   r   linear.weightlinear.bias)r   r   r   r   s     r2   r   r     s*    " *jqqXeFfqjpqqqr4   c                 2    t          d| t          fdddgi|S )a  CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import torch
    >>> from doctr.models import crnn_mobilenet_v3_small
    >>> model = crnn_mobilenet_v3_small(pretrained=True)
    >>> input_tensor = torch.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    r   r   r   r   )r   r   r   s     r2   r   r   #  =    " !  %m4	
   r4   c                 2    t          d| t          fdddgi|S )a  CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import torch
    >>> from doctr.models import crnn_mobilenet_v3_large
    >>> model = crnn_mobilenet_v3_large(pretrained=True)
    >>> input_tensor = torch.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    r   r   r   r   )r   r   r   s     r2   r   r   =  r   r4   )TN)F)'collections.abcr   copyr   	itertoolsr   typingr   r<   r   torch.nnr   r7   doctr.datasetsr	   r
   classificationr   r   r   utilsr   corer   r   __all__r!   r   rM   r   r#   r   r   r   r>   r   r   r   r   r.   r4   r2   <module>r      s   % $ $ $ $ $ $                          $ $ $ $ $ $ 2 2 2 2 2 2 2 2 T T T T T T T T T T + + + + + + = = = = = = = =
Y
Y
Y &$#!b  &$#!n    &$#!n   + +d3S#X&'   2/Z /Z /Z /Z /Z/ /Z /Z /ZdJ J J J JRY J J Jb !%$( 
 3%*+ 	
 cT!  
   @r rd rc rd r r r r(       4         r4   