
    je                         d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlZ	d dl
mZ d dlmZ ddlmZ ddlmZmZ d	gZ G d
 d	e          ZdS )    N)Path)Any)Image)tqdm   )AbstractDataset)convert_target_to_relativecrop_bboxes_from_imageIMGUR5Kc                   j     e Zd ZdZ	 	 	 	 ddededededed	ed
eddf fdZdefdZdeddfdZ	 xZ
S )r   a  IMGUR5K dataset from `"TextStyleBrush: Transfer of Text Aesthetics from a Single Example"
    <https://arxiv.org/abs/2106.08385>`_ |
    `repository <https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/imgur5k-grid.png&src=0
        :align: center
        :width: 630
        :height: 400

    >>> # NOTE: You need to download/generate the dataset from the repository.
    >>> from doctr.datasets import IMGUR5K
    >>> train_set = IMGUR5K(train=True, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images",
    >>>                     label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json")
    >>> img, target = train_set[0]
    >>> test_set = IMGUR5K(train=False, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images",
    >>>                    label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json")
    >>> img, target = test_set[0]

    Args:
        img_folder: folder with all the images of the dataset
        label_path: path to the annotations file of the dataset
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `AbstractDataset`.
    TF
img_folder
label_pathtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
      
    t                      j        |fd|st          nd i| |r|rt          d          t          j                            |          rt          j                            |          s3t          dt          j                            |          s|n|           g | _        || _	        t          j        }t	          j        |          }	t          t          |	          dz            }
| j	        rt          |
          nt          |
d           }| j	        rdnd}|rd|z   n|}t          j                            t          j                            | j                  |          }d}|r6t          j                            |          r|                     |           d S |r5t          j                            |          st	          j        |d	
           t-          |          5 }t/          j        |          d d d            n# 1 swxY w Y   t3          |	|         dt          |	|                             D ]}t5          ||          }|                    d          d         }t          j                            t          j                            | j        |                    s5t          dt          j                            | j        |                     |d                                         vrd         |         }fd|D             }d |D             }d |D             }d |D             }|sd |D             }t          |          dk    r|rYt;          t          j                            | j        |          t          j        ||                    }t?          ||          D ]\  }}|j         d         dk    r|j         d         dk    rt          |          dk    rt          |          dk     rd|vrt-          t          j                            || d          d          5 }|!                    |           tE          j#        |          }|$                    t          j                            || d                     |dz  }d d d            n# 1 swxY w Y   |r2| j        %                    |t          j        ||          f           | j        %                    |tM          t          j        ||          |          f           |r|                     |           d S d S )Npre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.zunable to locate g?IMGUR5K_recognition_trainIMGUR5K_recognition_testPoly_r   F)exist_okPreparing and Loading IMGUR5Kiterabledesctotal.index_to_ann_mapc                 ,    g | ]}d          |         S )ann_id ).0a_idannotation_files     Y/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/imgur5k.py
<listcomp>z$IMGUR5K.__init__.<locals>.<listcomp>q   s#    OOOt?84T:OOO    c                 6    g | ]}|d          dk    |d          S )wordr    r$   r%   anns     r(   r)   z$IMGUR5K.__init__.<locals>.<listcomp>s   s*    OOOcCK3<N<Nc&k<N<N<Nr*   c           
          g | ]\}|d          dk    t          t          t          |d                             d                              d                              ]S )r,   r    bounding_boxz[ ]z, )listmapfloatstripsplitr-   s     r(   r)   z$IMGUR5K.__init__.<locals>.<listcomp>u   sd       v;#%% SN 3 9 9% @ @ F Ft L LMMNN%%%r*   c                     g | ];}t          j        |d          |d         f|d         |d         f|d         f          <S )r   r            )cv2	boxPoints)r%   boxs     r(   r)   z$IMGUR5K.__init__.<locals>.<listcomp>{   sI    iii[^3=3q63q6*:SVSV<LcRSf)UVViiir*   c                     g | ]@}t          j        |                    d           |                    d           fd          AS )r   )axis)npconcatenateminmax)r%   pointss     r(   r)   z$IMGUR5K.__init__.<locals>.<listcomp>   sB    qqq[ar~vzz!}}fjjmm.LSUVVVqqqr*   )dtype)img_pathgeomsr       .txtwz.png)boxeslabels)'super__init__r	   
ValueErrorospathexistsFileNotFoundErrordatar   r@   float32listdirintlenslicejoindirnamerootisdir_read_from_foldermakedirsopenjsonloadr   r   r5   keysr
   asarrayzipshapewriter   	fromarraysaveappenddict)selfr   r   r   r   r   r   r   np_dtype	img_namestrain_samples	set_slicereco_folder_namereco_folder_pathreco_images_counterfimg_namerF   img_idann_idsannotationsrM   _boxesbox_targetscropscroplabeltmp_imgr'   	__class__s                               @r(   rO   zIMGUR5K.__init__4   s    		
 	
IY'c'A'A_c	
gm	
 	
 	
  	 	c   w~~j)) 	x
1K1K 	x#$v"'..YcJdJd8t

jt$v$vwww]_	
:Jz**	C	NNS011,0JVE-(((E-QU<V<V	 ;?*d66Jd9E[7%555K[7<<	(B(BDTUU 	:.> ? ? 	:""#3444F 	:bgmm4D&E&E 	:K(59999* 	+"illO	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ y)0OWZ[den[oWpWp
 
 
 6	u 6	uH J11H^^C((+F 7>>"',,ty("C"CDD a'(_BGLLT\<]<](_(_``` _-?@EEGGGG%&89&AGOOOOwOOOKOO[OOOF &  F jibhiiiK rqqepqqq ;!### u2!#di!B!B"*U`hpJqJqJq  E (+5&'9'9 9 9e JqMA-- $
1 1 1 #E

Q #E

R #5 0 0 "&bgll3CH[EaEaEa&b&bdg!h!h 9lm !*//$*?*? 'RW\\:JObLhLhLh-i-i j j j 3q 8 3	9 9 9 9 9 9 9 9 9 9 9 9 9 9 99 $ uI$$h
;h0W0W0W%XYYYYI$$h2:kYa;b;b;bkq0r0r0r%sttt 	5""#344444	5 	5s%   5HHHA%R>>SSc                     d| j          S )Nztrain=)r   )rm   s    r(   
extra_reprzIMGUR5K.extra_repr   s    $
$$$r*   rR   c           	         t          j         t          j                            |d                    }t	          |dt          |                    D ]}t          t          j                            |t          j                            |          d d          d          d          5 }| j        	                    ||
                                f           d d d            n# 1 swxY w Y   d S )Nz*.pngr   r   rJ   r)globrQ   rR   r[   r   rY   ra   basenamerU   rk   read)rm   rR   	img_pathsrF   ru   s        r(   r_   zIMGUR5K._read_from_folder   s   Ibgll499::	i6U]`aj]k]klll 	7 	7Hbgll4BG,<,<X,F,Fss,K)Q)Q)QRRTWXX 7\]	  (AFFHH!56667 7 7 7 7 7 7 7 7 7 7 7 7 7 7	7 	7s   */C%%C)	,C)	)TFFF)__name__
__module____qualname____doc__strboolr   rO   r   r_   __classcell__)r   s   @r(   r   r      s         @ "!&$g5 g5g5 g5 	g5
 g5 g5 g5 g5 
g5 g5 g5 g5 g5 g5R%C % % % %7c 7d 7 7 7 7 7 7 7 7r*   )r   rb   rQ   pathlibr   typingr   r:   numpyr@   PILr   r   datasetsr   utilsr	   r
   __all__r   r$   r*   r(   <module>r      s      				             



                 % % % % % % E E E E E E E E+M7 M7 M7 M7 M7o M7 M7 M7 M7 M7r*   