
    j]                         d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZ dgZ G d	 de
          ZdS )
    N)Path)Any)tqdm   )AbstractDataset)convert_target_to_relativecrop_bboxes_from_imageWILDRECEIPTc                   Z     e Zd ZdZ	 	 	 	 ddededededed	ed
eddf fdZdefdZ xZ	S )r
   aL  
    WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
    <https://arxiv.org/abs/2103.14470v1>`_ |
    `"repository" <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0
        :align: center

    >>> # NOTE: You need to download the dataset first.
    >>> from doctr.datasets import WILDRECEIPT
    >>> train_set = WILDRECEIPT(train=True, img_folder="/path/to/wildreceipt/",
    >>>                     label_path="/path/to/wildreceipt/train.txt")
    >>> img, target = train_set[0]
    >>> test_set = WILDRECEIPT(train=False, img_folder="/path/to/wildreceipt/",
    >>>                    label_path="/path/to/wildreceipt/test.txt")
    >>> img, target = test_set[0]

    Args:
        img_folder: folder with all the images of the dataset
        label_path: path to the annotations file of the dataset
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `AbstractDataset`.
    TF
img_folder
label_pathtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
      ^    t                      j        |fd|st          nd i| |r|rt          d          t          j                            |          rt          j                            |          s3t          dt          j                            |          s|n|           |}|| _        t          j
        }	g | _        t          |d          5 }
|
                                }d d d            n# 1 swxY w Y   |                                                    d          }t!          |dt#          |                    D ][}g }t%          j        |          }|d         }|d	         }|D ]}|d
         }|rSt          j        |d         |d         g|d         |d         g|d         |d         g|d         |d         gg|	          }nP|d d d         |dd d         }}t+          |          t+          |          t-          |          t-          |          g}|                    |d         |f           t1          | \  }}|rt3          t          j                            ||          t          j        |t8                                        d                    }t1          |t=          |                    D ]'\  }}|r d|vr| j                            ||f           (|rK| j                            |t          j        |t8                                        d          f           | j                            |t?          t          j        |t8                                        d          t=          |                    f           ]|| _         d S )Npre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.zunable to locate r
z!Preparing and Loading WILDRECEIPT)iterabledesctotal	file_nameannotationsboxr   r                     )dtypetext)min)img_pathgeoms )boxeslabels)!super__init__r   
ValueErrorospathexistsFileNotFoundErrorr   npfloat32dataopenreadstripsplitr   lenjsonloadsarrayr&   maxappendzipr	   joinasarrayintcliplistdictroot)selfr   r   r   r   r   r   r   tmp_rootnp_dtypefiler5   json_stringsjson_string_targets	json_datar'   r   
annotationcoordinatesr   xytext_targetsbox_targetscropscroplabel	__class__s                               ]/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/wildreceipt.pyr-   zWILDRECEIPT.__init__0   s)    		
 	
IY'c'A'A_c	
gm	
 	
 	
  	 	c   w~~j)) 	x
1K1K 	x#$v"'..YcJdJd8t

jt$v$vwww
:]_	*c"" 	d99;;D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 zz||))$//  !(KSVWcSdSd
 
 
 '	 '	K H
;//I -H#M2K) ; ;
(/ ;((^[^<(^[^<(^[^<(^[^<	 '  CC 'sss+[A->qAq663q663q663q66:CF!3S 9::::(+X%L+ .W\\(H==RZP[cfEgEgEgElElqrElEsEs   $'ud<.@.@#A#A 8 8KD% 8E!1!1	(($7778   	  (BJ{#,N,N,N,S,SXY,S,Z,Z![\\\\	  rz+SAAAFF1FMMVZ[gVhVhiii"     			s   C88C<?C<c                     d| j          S )Nztrain=)r   )rH   s    rZ   
extra_reprzWILDRECEIPT.extra_repr}   s    $
$$$    )TFFF)
__name__
__module____qualname____doc__strboolr   r-   r\   __classcell__)rY   s   @rZ   r
   r
      s         > "!&$K KK K 	K
 K K K K 
K K K K K KZ%C % % % % % % % %r]   )r;   r/   pathlibr   typingr   numpyr3   r   datasetsr   utilsr   r	   __all__r
    r]   rZ   <module>rl      s     				                       % % % % % % E E E E E E E E/j% j% j% j% j%/ j% j% j% j% j%r]   