
    j
                     `    d dl Z d dlmZ d dlmZ d dlmZ ddlmZ dgZ G d de          Z	dS )	    N)sample)Any)tqdm   )AbstractDatasetIIITHWSc                   H     e Zd ZdZ	 ddededededdf
 fd	Zdefd
Z xZ	S )r   al  IIITHWS dataset from `"Generating Synthetic Data for Text Recognition"
    <https://arxiv.org/pdf/1608.04224.pdf>`_ | `"repository" <https://github.com/kris314/hwnet>`_ |
    `"website" <https://cvit.iiit.ac.in/research/projects/cvit-projects/matchdocimgs>`_.

    >>> # NOTE: This is a pure recognition dataset without bounding box labels.
    >>> # NOTE: You need to download the dataset.
    >>> from doctr.datasets import IIITHWS
    >>> train_set = IIITHWS(img_folder="/path/to/iiit-hws/Images_90K_Normalized",
    >>>                     label_path="/path/to/IIIT-HWS-90K.txt",
    >>>                     train=True)
    >>> img, target = train_set[0]
    >>> test_set = IIITHWS(img_folder="/path/to/iiit-hws/Images_90K_Normalized",
    >>>                    label_path="/path/to/IIIT-HWS-90K.txt")
    >>>                    train=False)
    >>> img, target = test_set[0]

    Args:
        img_folder: folder with all the images of the dataset
        label_path: path to the file with the labels
        train: whether the subset should be the training one
        **kwargs: keyword arguments from `AbstractDataset`.
    T
img_folder
label_pathtrainkwargsreturnNc                     t                      j        |fi | t          j                            |          rt          j                            |          s3t          dt          j                            |          s|n|           g | _        || _        t          |          5 }|	                                }d d d            n# 1 swxY w Y   t          |t          |                    }t          t          |          dz            }| j        rt          |          nt          |d           }t          ||         dt          ||                             D ]]}	|	                                dd         \  }
}t          j                            ||
          }
| j                            |
|f           ^d S )Nzunable to locate g?zPreparing and Loading IIITHWS)iterabledesctotalr      )super__init__ospathexistsFileNotFoundErrordatar   open	readlinesr   lenintslicer   splitjoinappend)selfr
   r   r   r   fannotationstrain_samples	set_slice
annotationimg_pathlabel	__class__s               Y/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/iiithws.pyr   zIIITHWS.__init__)   s    	..v... w~~j)) 	x
1K1K 	x#$v"'..YcJdJd8t

jt$v$vwww+-	
* 	(++--K	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( [#k*:*:;;C,,s233,0JVE-(((E-QU<V<V	 +2QY\]hir]sYtYt
 
 
 	0 	0J )..0015OHew||J99HIh.////	0 	0s   *CCCc                     d| j          S )Nztrain=)r   )r#   s    r,   
extra_reprzIIITHWS.extra_reprI   s    $
$$$    )T)
__name__
__module____qualname____doc__strboolr   r   r.   __classcell__)r+   s   @r,   r   r      s         6 	0 00 0 	0
 0 
0 0 0 0 0 0@%C % % % % % % % %r/   )
r   randomr   typingr   r   datasetsr   __all__r    r/   r,   <module>r<      s    
			                   % % % % % %+9% 9% 9% 9% 9%o 9% 9% 9% 9% 9%r/   