
    jB                     \    d dl Z d dlZd dlmZ d dlmZ ddlmZ dgZ G d de          Z	dS )    N)Path)Any   )AbstractDatasetRecognitionDatasetc                   D     e Zd ZdZdedededdf fdZdeddfd	Z xZ	S )
r   a  Dataset implementation for text recognition tasks

    >>> from doctr.datasets import RecognitionDataset
    >>> train_set = RecognitionDataset(img_folder="/path/to/images",
    >>>                                labels_path="/path/to/labels.json")
    >>> img, target = train_set[0]

    Args:
        img_folder: path to the images folder
        labels_path: path to the json file containing all labels (character sequences)
        **kwargs: keyword arguments from `AbstractDataset`.
    
img_folderlabels_pathkwargsreturnNc                     t                      j        |fi | g | _        t          |d          5 }t	          j        |          }d d d            n# 1 swxY w Y   |                                D ]\  }}t          j        	                    t          j        
                    | j        |                    s5t          dt          j        
                    | j        |                     | j                            ||f           d S )Nzutf-8)encodingzunable to locate )super__init__dataopenjsonloaditemsospathexistsjoinrootFileNotFoundErrorappend)	selfr	   r
   r   flabelsimg_namelabel	__class__s	           ]/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/recognition.pyr   zRecognitionDataset.__init__   s5    	..v...+-	+000 	"AYq\\F	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"  &||~~ 	0 	0OHe7>>"',,ty("C"CDD a'(_BGLLT\<]<](_(_```Ih.////		0 	0s   AAAdsc                      fd j         D              _         t          d           _        |j         D ]S\  }} j                             t	          t          |j                                      |                    |f           Td S )Nc                     g | ];\  }}t          t          j                                      |                    |f<S  )strr   r   joinpath).0img_pathr!   r   s      r#   
<listcomp>z4RecognitionDataset.merge_dataset.<locals>.<listcomp>2   sA    ggg/(TYc$ty//228<<==uEggg    /)r   r   r   r   r(   r)   )r   r$   r+   r!   s   `   r#   merge_datasetz RecognitionDataset.merge_dataset0   s    gggg]a]fggg	II	!w 	M 	MOHeIc$rw--"8"8"B"BCCUKLLLL	M 	Mr-   )
__name__
__module____qualname____doc__r(   r   r   r   r/   __classcell__)r"   s   @r#   r   r      s         00 0 	0
 
0 0 0 0 0 0$M MD M M M M M M M Mr-   )
r   r   pathlibr   typingr   datasetsr   __all__r   r'   r-   r#   <module>r9      s     				             % % % % % %
 'M 'M 'M 'M 'M 'M 'M 'M 'M 'Mr-   