
    j                     n   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZmZmZ d	d
lmZ g dZ ed          Z	 d(dedededefdZdededee         fdZdej        ee         z  dedefdZ 	 	 	 	 	 d)dee         dededz  dededz  dedz  de!dej        fd Z"d!ed"ej        e#ee
f         z  de$ee#ee
f         ej        z  f         fd#Z%d$eez  d%ej        deej                 fd&Z&d"e$ej        ef         de$ej        e#eef         f         fd'Z'dS )*    N)Sequence)partial)Path)AnyTypeVar)Image)get_img_shape)convert_to_relative_coordsextract_cropsextract_rcrops   )VOCABS)	translateencode_stringdecode_sequenceencode_sequencespre_transform_multiclasscrop_bboxes_from_imageconvert_target_to_relativeImageTensor   ■input_string
vocab_nameunknown_charreturnc                 F   t          j        |          t          d          d}| D ]x}|t           |         vrb|t          j        v r t          j        d|                              dd                              d          }|dk    s|t           |         vr|}||z  }y|S )a,  Translate a string input in a given vocabulary

    Args:
        input_string: input string to translate
        vocab_name: vocabulary to use (french, latin, ...)
        unknown_char: unknown character for non-translatable characters

    Returns:
        A string translated in a given vocab
    Nz.output vocabulary must be in vocabs dictionary NFDasciiignore)	r   getKeyErrorstring
whitespaceunicodedata	normalizeencodedecode)r   r   r   
translatedchars        W/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/utils.pyr   r   #   s     z*%GHHHJ  vj)))v((((55<<WhOOVVW^__DrzzT
);;;#d

    vocabc           
          	 t          t          j        |                     S # t          $ r:}fd| D             }t          dt	          |           d|  d d          |d}~ww xY w)a  Given a predefined mapping, encode the string to a sequence of numbers

    Args:
        input_string: string to encode
        vocab: vocabulary (string), the encoding is given by the indexing of the character sequence

    Returns:
        A list encoding the input_string
    c                     g | ]}|v|	S  r0   ).0r*   r-   s     r+   
<listcomp>z!encode_string.<locals>.<listcomp>U   s#    LLL$$e:K:K:K:K:Kr,   z,Some characters cannot be found in 'vocab': z!.
Please check the input string `z` and the vocabulary ``N)listmapindex
ValueErrorset)r   r-   emissing_charss    `  r+   r   r   E   s    C\22333   LLLL,LLL[3};M;M [ [.:[ [RW[ [ [
 
 	s   !% 
A)5A$$A)	input_seqmappingc                 x   t          | t          t          j        f          st	          d          t          | t          j                  rI| j        t          j        k    s%|                                 t          |          k    rt          d          d
                    t          |j        |                     S )a  Given a predefined mapping, decode the sequence of numbers to a string

    Args:
        input_seq: array to decode
        mapping: vocabulary (string), the encoding is given by the indexing of the character sequence

    Returns:
        A string, decoded from input_seq
    zInvalid sequence typez>Input must be an array of int, with max less than mapping sizer   )
isinstancer   npndarray	TypeErrordtypeint_maxlenAssertionErrorjoinr5   __getitem__)r;   r<   s     r+   r   r   \   s     i(BJ!788 1/000)RZ(( _io.H.HIMMOO_bcj_k_kLkLk]^^^773w*I66777r,   F	sequencestarget_sizeeossospaddynamic_seq_lengthc           
      t   d|cxk    rt          |          k     rn nt          d          t          |t                    r|rwt	          d | D                       dz   }t          |t                    r|dz  }t          |t                    r|dz  }t          |t                    s|nt          ||          }t          |t                    r/d|cxk    rt          |          k     rn nt          d          |}n|}t          j        t          |           |g|t          j                  }	t          t          t          t          |          |                     D ]v\  }
}t          |t                    r|                    |           |dt          t          |          |                   |	|
dt          t          |          |          f<   wt          |t                    rJd|cxk    rt          |          k     rn nt          d	          t          j        |	d          }	||	dddf<   |	S )
ag  Encode character sequences using a given vocab as mapping

    Args:
        sequences: the list of character sequences of size N
        vocab: the ordered vocab to use for encoding
        target_size: maximum length of the encoded data
        eos: encoding of End Of String
        sos: optional encoding of Start Of String
        pad: optional encoding for padding. In case of padding, all sequences are followed by 1 EOS then PAD
        dynamic_seq_length: if `target_size` is specified, uses it as upper bound and enables dynamic sequence size

    Returns:
        the padded encoded data as a tensor
    r   z<argument 'eos' needs to be outside of vocab possible indicesc              3   4   K   | ]}t          |          V  d S )N)rE   )r1   ws     r+   	<genexpr>z#encode_sequences.<locals>.<genexpr>   s(      33AQ333333r,   r   z<argument 'pad' needs to be outside of vocab possible indicesrB   )r-   Nz<argument 'sos' needs to be outside of vocab possible indices)rE   r7   r>   intrD   minr?   fullint32	enumerater5   r   r   appendroll)rJ   r-   rK   rL   rM   rN   rO   
max_lengthdefault_symbolencoded_dataidxseqs               r+   r   r   q   sY   . 	C#e**WXXXk3'' g+= g3333333a7
c3 	!OJc3 	!OJ(2;(D(Dfjj#jZeJfJf #s     c%jj     [\\\!wI'Dn\^\deeeL c'-u"E"E"EyQQRR \ \Sc3 	JJsOOO:=>ZCHHk@Z@Z>Z:[S6CC+666677#s !    c%jj     [\\\w|Q// QQQTr,   imgtargetc                     t          |t          j                  rt          |t	          |                     }n&t          |d         t	          |                     |d<   | |fS )a  Converts target to relative coordinates

    Args:
        img: tf.Tensor or torch.Tensor representing the image
        target: target to convert to relative coordinates (boxes (N, 4) or polygons (N, 4, 2))

    Returns:
        The image and the target in relative coordinates
    boxes)r>   r?   r@   r
   r	   )ra   rb   s     r+   r   r      s\     &"*%% Z+FM#4F4FGG4VG_mTWFXFXYYw;r,   img_pathgeomsc                    t          j        |           5 }t          j        |                    d                    }ddd           n# 1 swxY w Y   |j        dk    r<|j        dd         dk    r)t          ||                    t                              S |j        dk    r:|j        d         dk    r)t          ||                    t                              S t          d	          )
zCrop a set of bounding boxes from an image

    Args:
        img_path: path to the image
        geoms: a array of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)

    Returns:
        a list of cropped images
    RGBN   r   )      rT   rk   rj   zInvalid geometry format)r   openr?   asarrayconvertndimshaper   astyperU   r   r7   )re   rf   pil_imgra   s       r+   r   r      s    
H		 =*W__U%;%;<<= = = = = = = = = = = = = = = zQ5;qrr?f44c5<<c<#:#:;;;zQ5;q>Q..S%,,S,"9"9:::
.
/
//s   (A		AAc                 J   t          |d         t          |                     }|d         }d t          t          |                    D             }t	          ||          D ] \  }}||                             |           !d |                                D             }| |fS )zConverts multiclass target to relative coordinates.

    Args:
        img: Image
        target: tuple of target polygons and their classes names

    Returns:
        Image and dictionary of boxes, with class names as keys
    r   r   c                     i | ]}|g S r0   r0   )r1   ks     r+   
<dictcomp>z,pre_transform_multiclass.<locals>.<dictcomp>   s    BBB!2BBBr,   c                 B    i | ]\  }}|t          j        |d           S )r   )axis)r?   stack)r1   ru   vs      r+   rv   z,pre_transform_multiclass.<locals>.<dictcomp>   s-    HHHTQ!RXaa(((HHHr,   )r
   r	   sortedr8   ziprZ   items)ra   rb   rd   boxes_classes
boxes_dictru   polys          r+   r   r      s     'vay-2D2DEEE1IMBBvc-.@.@'A'ABBBJ}e,, # #41T""""HHZ5E5E5G5GHHHJ
?r,   )r   )NrI   NNF)(r#   r%   collections.abcr   SequenceType	functoolsr   pathlibr   typingr   r   numpyr?   PILr   doctr.io.imager	   doctr.utils.geometryr
   r   r   vocabsr   __all__r   strr   r4   rU   r   r@   r   boolr   dicttupler   r   r   r0   r,   r+   <module>r      s        $ $ $ $ $ $ 4 4 4 4 4 4                               ( ( ( ( ( ( Z Z Z Z Z Z Z Z Z Z         gm$$    		   D 
#Y   .8zL--88 	8 8 8 80 #$9 9Cy99 t9 
	9
 
t9 
t9 9 Z9 9 9 9x	 j4S>9
;S#X334   &0S4Z 0
 0tBJGW 0 0 0 0(%
D0@*A eBJX\]`bf]fXgLgFh      r,   