
    j                         d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZ dgZ G d	 de
          ZdS )
    N)Path)Any)tqdm   )VisionDataset)convert_target_to_relativecrop_bboxes_from_imageCORDc                   Z     e Zd ZdZdZdZ	 	 	 	 ddededed	ed
eddf fdZde	fdZ
 xZS )r
   a  CORD dataset from `"CORD: A Consolidated Receipt Dataset forPost-OCR Parsing"
    <https://openreview.net/pdf?id=SJl3z659UH>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/cord-grid.png&src=0
        :align: center

    >>> from doctr.datasets import CORD
    >>> train_set = CORD(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `VisionDataset`.
    )zEhttps://doctr-static.mindee.com/models?id=v0.1.1/cord_train.zip&src=0@45f9dc77f126490f3e52d7cb4f70ef3c57e649ea86d19d862a2757c9c455d7f8zcord_train.zip)zDhttps://doctr-static.mindee.com/models?id=v0.1.1/cord_test.zip&src=0@8c895e3d6f7e1161c5b7245e3723ce15c04d84be89eaa6093949b75a66fb3c58zcord_test.zipTFtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc                    |r| j         n| j        \  }}} t                      j        |||dfd|st          nd i| |r|rt          d          t          j                            | j	        d          }	g | _
        || _        t          j        }
t          t          j        |	          dt!          t          j        |	                              D ]}t          j                            t          j                            |	|                    s0t%          dt          j                            |	|                     t'          |          j        }g }t+          t          j                            | j	        d| d	          d
          5 }t-          j        |          }|d         D ]B}|d         D ]5}t!          |d                   dk    r|d         d         |d         d         |d         d         |d         d         f}|d         d         |d         d         |d         d         |d         d         f}|rSt          j        |d         |d         g|d         |d         g|d         |d         g|d         |d         gg|
          }n:t3          |          t3          |          t5          |          t5          |          g}|                    |d         |f           7D	 d d d            n# 1 swxY w Y   t9          | \  }}|rt;          t          j                            |	|          t          j        |t>                                         d                    }t9          |tC          |                    D ]%\  }}d|vr| j
                            ||f           &|rK| j
                            |t          j        |t>                                         d          f           2| j
                            |tE          t          j        |t>                                         d          tC          |                    f           |	| _	        d S ) NTpre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.imagezPreparing and Loading CORD)iterabledesctotalzunable to locate jsonz.jsonrb
valid_linewordstextr   quadx1x2x3x4y1y2y3y4r         )dtype)min)img_pathgeoms )boxeslabels)#TRAINTESTsuper__init__r   
ValueErrorospathjoinrootdatar   npfloat32r   listdirlenexistsFileNotFoundErrorr   stemopenr   loadarrayr+   maxappendzipr	   asarrayintcliplistdict)selfr   r   r   r   r   urlsha256nametmp_rootnp_dtyper,   rA   _targetsflabellinewordxyboxtext_targetsbox_targetscropscrop	__class__s                            V/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/cord.pyr4   zCORD.__init__3   s    +0>DJJTYVT		
 	

 >NW55SW	
 	
 	
 	
  	 	c   7<<	733VX	
:Z))0LTWXZXbckXlXlTmTm
 
 
 0	 0	H 7>>"',,x"B"BCC `'(^BGLLS[<\<\(^(^___>>&DHbgll49fnnnEEtLL APQ	!!,/ A AD $W A AtF|,,q00 $VT 2DL4FVUYHZ\`ag\him\n nA $VT 2DL4FVUYHZ\`ag\him\n nA+ G&(h)*1qt)*1qt)*1qt)*1qt	%& +3'" '" '" (+1vvs1vvs1vvs1vv&F$OOT&\3,?@@@'AAA A A A A A A A A A A A A A A0 ),X%L+ .W\\(H==RZP[cfEgEgEgElElqrElEsEs   $'ud<.@.@#A#A 8 8KD%%''	(($7778   	  (BJ{#,N,N,N,S,SXY,S,Z,Z![\\\\	  rz+SAAAFF1FMMVZ[gVhVhiii"    
 			s   E!K>>L	L	c                     d| j          S )Nztrain=)r   )rM   s    r`   
extra_reprzCORD.extra_repr   s    $
$$$    )TFFF)__name__
__module____qualname____doc__r1   r2   boolr   r4   strrb   __classcell__)r_   s   @r`   r
   r
      s         $ED "!&$N NN N 	N
 N N 
N N N N N N`%C % % % % % % % %rc   )r   r6   pathlibr   typingr   numpyr;   r   datasetsr   utilsr   r	   __all__r
    rc   r`   <module>rr      s     				                       # # # # # # E E E E E E E E(p% p% p% p% p%= p% p% p% p% p%rc   