
    j                         d dl Z d dlZd dlmZ d dlZd dlmZ d dlm	Z
 d dlmZ ddlmZ ddlmZmZ d	gZ G d
 d	e          ZdS )    N)Any)Image)io)tqdm   )VisionDataset)convert_target_to_relativecrop_bboxes_from_image	SynthTextc                   n     e Zd ZdZdZdZdZ	 	 	 	 ddeded	ed
ededdf fdZ	de
fdZde
ddfdZ xZS )r   a  SynthText dataset from `"Synthetic Data for Text Localisation in Natural Images"
    <https://arxiv.org/abs/1604.06646>`_ | `"repository" <https://github.com/ankush-me/SynthText>`_ |
    `"website" <https://www.robots.ox.ac.uk/~vgg/data/scenetext/>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/svt-grid.png&src=0
        :align: center

    >>> from doctr.datasets import SynthText
    >>> train_set = SynthText(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `VisionDataset`.
    z>https://thor.robots.ox.ac.uk/~vgg/data/scenetext/SynthText.zip@28ab030485ec8df3ed612c568dd71fb2793b9afbfa3a9d9c6e792aef33265bf1)z67/fruits_129_z194/window_19_TFtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
      p
    t                      j        | j        d fd d|st          nd d| |r|rt	          d          || _        g | _        t          j        }| j	        r%t          j                            | j        d          n| j        }| j        rdnd}|rd|z   n|}t          j                            ||          }	d}
|r6t          j                            |	          r|                     |	           d S |r5t          j                            |	          st          j        |	d	
           t#          j        t          j                            |d                    }t'          t)          |d         d                   dz            }| j        rt+          |          nt+          |d           }|d         d         |         }|d         d         |         }|d         d         |         }~t-          t/          |||          dt)          |                    D ]\  }}}t          j                            t          j                            ||d                             s6t3          dt          j                            ||d                              d |                                D             }|j        dk    r|                    ddd          n)t          j        |                    dd          d          }|s@t          j        |                    d          |                     d          fd          }|rFtC          t          j                            ||d                   |          }t/          ||          D ]\  }}|j"        d         dk    r|j"        d         dk    rt)          |          dk    rt)          |          dk     rd|vrtG          t          j                            |	|
 d          d          5 }|$                    |           tK          j&        |          }|'                    t          j                            |	|
 d                     |
dz  }
d d d            n# 1 swxY w Y   u|r8| j        (                    |d         t          j)        ||          f           | j        (                    |d         tU          t          j)        ||          |          f           |r|                     |	           || _        d S ) NT)	file_hashextract_archivepre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.r   SynthText_recognition_trainSynthText_recognition_testPoly_r   F)exist_okzgt.matimnamesg?wordBBtxtPreparing and Loading SynthTextiterabledesctotalzunable to locate c                 @    g | ]}|                                 D ]}|S  )split).0wordelts      [/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/synthtext.py
<listcomp>z&SynthText.__init__.<locals>.<listcomp>i   s-    KKKddjjllKKscKKKK          r   )axis)img_pathgeoms    .txtwz.png)dtype)boxeslabels)+super__init__URLr	   
ValueErrorr   datanpfloat32SHA256ospathjoinrootisdir_read_from_foldermakedirssioloadmatintlenslicer   zipexistsFileNotFoundErrortolistndim	transposeexpand_dimsconcatenateminmaxr
   shapeopenwriter   	fromarraysaveappendasarraydict)selfr   r   r   r   r   np_dtypetmp_rootreco_folder_namereco_folder_pathreco_images_countermat_datatrain_samples	set_slicepathsr7   r8   r0   
word_boxesr   cropscroplabelftmp_img	__class__s                            r*   r:   zSynthText.__init__2   s{    	H	
  =MW55SW	
 	
 	
 	
 	
  	 	c  
 
VX	: <@;U27<<	;777DI<@Jh88Lh9E[7%555K[7<<2BCC 	:.> ? ? 	:""#3444F 	:bgmm4D&E&E 	:K(59999;rw||Hh??@@C 3A 677#=>>,0JVE-(((E-QU<V<V	#A&y1"1%i0%#I.)-v..5V^abg^h^h*
 *
 *
 &	s &	s%Hj# 7>>"',,x!"E"EFF c'(aBGLLS[\]S^<_<_(a(abbbKKcjjllKKKF ?a'' $$Q1---^J$8$8A$>$>QGGG    f^Z^^^-C-CZ^^YZ^E[E[,\cdeee
 s.XxXY{8[8[cmnnn#&uf#5#5 5 5KD%
1)) JqMA--JJNNJJOOu,, ""',,/?DWA]A]A]"^"^`cdd 5hiGGENNN&+od&;&;G#LL6FK^HdHdHd)e)efff/14/	5 5 5 5 5 5 5 5 5 5 5 5 5 5 55   s	  (1+rz*H/U/U/U!VWWWW	  (1+t"*ZW_:`:`:`io/p/p/p!qrrrr 	5""#3444			s   A%R		RRc                     d| j          S )Nztrain=)r   )r_   s    r*   
extra_reprzSynthText.extra_repr   s    $
$$$r,   rB   c           	         t          j         t          j                            |d                    }t	          |dt          |                    D ]}t          t          j                            |t          j                            |          d d          d          d          5 }| j        	                    ||
                                f           d d d            n# 1 swxY w Y   d S )Nz*.pngr   r    r4   r)globrA   rB   rC   r   rK   rX   basenamer=   r\   read)r_   rB   	img_pathsr0   rm   s        r*   rF   zSynthText._read_from_folder   s   Ibgll499::	i6W_bcl_m_mnnn 	7 	7Hbgll4BG,<,<X,F,Fss,K)Q)Q)QRRTWXX 7\]	  (AFFHH!56667 7 7 7 7 7 7 7 7 7 7 7 7 7 7	7 	7s   */C%%C)	,C)	)TFFF)__name__
__module____qualname____doc__r;   r@   	BLACKLISTboolr   r:   strrq   rF   __classcell__)ro   s   @r*   r   r      s         & KCOFI "!&$[ [[ [ 	[
 [ [ 
[ [ [ [ [ [z%C % % % %7c 7d 7 7 7 7 7 7 7 7r,   )ru   rA   typingr   numpyr>   PILr   scipyr   rH   r   datasetsr   utilsr	   r
   __all__r   r%   r,   r*   <module>r      s     				                             # # # # # # E E E E E E E E-A7 A7 A7 A7 A7 A7 A7 A7 A7 A7r,   