
    j	                     d    d dl Z d dlZd dlmZ d dlmZ d dlZddlm	Z	 dgZ
 G d de	          ZdS )    N)Path)Any   )AbstractDataset
OCRDatasetc                   <     e Zd ZdZ	 d
dededededdf
 fd	Z xZS )r   a  Implements an OCR dataset

    >>> from doctr.datasets import OCRDataset
    >>> train_set = OCRDataset(img_folder="/path/to/images",
    >>>                        label_file="/path/to/labels.json")
    >>> img, target = train_set[0]

    Args:
        img_folder: local path to image folder (all jpg at the root)
        label_file: local path to the label file
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        **kwargs: keyword arguments from `AbstractDataset`.
    F
img_folder
label_fileuse_polygonskwargsreturnNc           
          t                      j        |fi | g | _        t          j        }t          |d          5 }t          j        |          }d d d            n# 1 swxY w Y   |                                D ]V\  }}	t          |          }t          j                            t          j                            | j        |                    s5t          dt          j                            | j        |                     t!          |	d                   dk    r@| j                            |t%          t          j        d|          g           f           d |	d         D             }
|rd	 |
D             }
d
 |	d         D             }| j                            |t%          t          j        |
|          |          f           Xd S )Nrbzunable to locate typed_wordsr   )r      )dtype)boxeslabelsc           
      n    g | ]2}t          t          t          |d          dd                             3S )geometryNr   )listmapfloat.0objs     U/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/ocr.py
<listcomp>z'OCRDataset.__init__.<locals>.<listcomp><   s7    aaasT#eS_RaR%899::aaa    c                 r    g | ]4}|d d         |d         |d         g|dd          |d         |d         gg5S )N   r   r       )r   geoms     r   r   z'OCRDataset.__init__.<locals>.<listcomp>?   s[        "1"XQa148d1gtAw=OP  r   c                     g | ]
}|d          S )valuer#   r   s     r   r   z'OCRDataset.__init__.<locals>.<listcomp>D   s    OOOSCLOOOr   )super__init__datanpfloat32openjsonloaditemsr   ospathexistsjoinrootFileNotFoundErrorlenappenddictzerosasarray)selfr	   r
   r   r   np_dtypefr)   img_nameannotationsgeomstext_targets	__class__s               r   r(   zOCRDataset.__init__!   s    	..v... 8:	:*d## 	 q9Q<<D	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  &*ZZ\\ 	m 	m!HkH~~H7>>"',,ty("C"CDD a'(_BGLLT\<]<](_(_``` ;}-..!33	  (Drxh7W7W7W`b,c,c,c!deeeaakR_F`aaaE   %  
 POK4NOOOLIh2:e83T3T3T]i(j(j(jkllll-	m 	ms   A  A$'A$)F)	__name__
__module____qualname____doc__strboolr   r(   __classcell__)rB   s   @r   r   r      s         $ #	%m %m%m %m 	%m
 %m 
%m %m %m %m %m %m %m %m %m %mr   )r-   r0   pathlibr   typingr   numpyr*   datasetsr   __all__r   r#   r   r   <module>rO      s     				                 % % % % % %.4m 4m 4m 4m 4m 4m 4m 4m 4m 4mr   