
    j                         d dl Z d dlmZ d dlZd dlmZ d dlm	Z	 d dl
m
Z
 ddlmZ ddlmZ dgZ G d	 de          ZdS )
    N)Any)Image)tqdm   )VisionDataset)convert_target_to_relativeIIIT5Kc                   Z     e Zd ZdZdZdZ	 	 	 	 ddededed	ed
eddf fdZde	fdZ
 xZS )r	   a  IIIT-5K character-level localization dataset from
    `"BMVC 2012 Scene Text Recognition using Higher Order Language Priors"
    <https://cdn.iiit.ac.in/cdn/cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/home/mishraBMVC12.pdf>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/iiit5k-grid.png&src=0
        :align: center

    >>> # NOTE: this dataset is for character-level localization
    >>> from doctr.datasets import IIIT5K
    >>> train_set = IIIT5K(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `VisionDataset`.
    zVhttps://cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/IIIT5K-Word_V3.0.tar.gz@7872c9efbec457eb23f3368855e7738f72ce10927f52a382deb4966ca0ffa38eTFtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
      x    t                      j        | j        d f| j        d|st          nd d| |r|rt          d          || _        | j        r%t          j        	                    | j
        d          n| j
        }| j        rdnd}t          j        t          j        	                    || d                    |         d         }g | _        t          j        }	t!          |d	t#          |          
          D ]\  }
}}|
d         }|d         }t          j                            t          j        	                    ||                    s0t'          dt          j        	                    ||                     |rd |D             }nd |D             }|rd|vrt)          j        t          j        	                    ||                    5 }| j                            t          j        |                    d                    |f           d d d            n# 1 swxY w Y   2|r2| j                            |t          j        ||	          f           f| j                            |t5          t          j        ||	          t7          |                    f           || _
        d S )NT)	file_hashextract_archivepre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.r	   trainCharBoundtestCharBoundz.matr   zPreparing and Loading IIIT5K)iterabledesctotalzunable to locate c           	          g | ]d}|d          |d         g|d          |d         z   |d         g|d          |d         z   |d         |d         z   g|d          |d         |d         z   ggeS r   r          .0boxs     X/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/iiit5k.py
<listcomp>z#IIIT5K.__init__.<locals>.<listcomp>X   s         QQ(Q#a&#a&1Q#a&#a&3q6/:QQ#a&1	      c                 n    g | ]2}|d          |d         |d          |d         z   |d         |d         z   g3S r   r   r    s     r#   r$   z#IIIT5K.__init__.<locals>.<listcomp>c   sB    iiiVYAAAQQ#a&Qiiir%    RGB)dtype)boxeslabels)super__init__URLSHA256r   
ValueErrorr   ospathjoinrootsioloadmatdatanpfloat32r   lenexistsFileNotFoundErrorr   openappendarrayconvertasarraydictlist)selfr   r   r   r   r   tmp_rootmat_filemat_datanp_dtypeimg_pathlabelbox_targets	_raw_path
_raw_labelpil_img	__class__s                   r#   r-   zIIIT5K.__init__,   s5    	H	
 k =MW55SW	
 	
 	
 	
 	
  	 	c  
 
 9=R27<<	8444'+zF##;rw||H6G6G6GHHII(STUVVX	:,0$B#h---
 -
 -
 $	 $	(He[ !IqJ 7>>"',,x"C"CDD a'(_BGLLS\<]<](_(_``` j   +   ji]hiii j((BGLL9$E$EFF Y'	(("(7??53I3I*J*JJ)WXXXY Y Y Y Y Y Y Y Y Y Y Y Y Y Y 	  )RZ8-T-T-T!UVVVV 	  rz+XFFFtT^O_O_```"    
 			s   AH$$H(	+H(	c                     d| j          S )Nztrain=)r   )rD   s    r#   
extra_reprzIIIT5K.extra_reprt   s    $
$$$r%   )TFFF)__name__
__module____qualname____doc__r.   r/   boolr   r-   strrQ   __classcell__)rO   s   @r#   r	   r	      s         ( cCOF "!&$F FF F 	F
 F F 
F F F F F FP%C % % % % % % % %r%   )r1   typingr   numpyr8   scipy.ioior5   PILr   r   datasetsr   utilsr   __all__r	   r   r%   r#   <module>ra      s    
			                             # # # # # # - - - - - -*a% a% a% a% a%] a% a% a% a% a%r%   