
    jN                         d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZ dgZ G d	 de
          ZdS )
    N)Path)Any)tqdm   )VisionDataset)convert_target_to_relativecrop_bboxes_from_imageSROIEc                   Z     e Zd ZdZdZdZ	 	 	 	 ddededed	ed
eddf fdZde	fdZ
 xZS )r
   a  SROIE dataset from `"ICDAR2019 Competition on Scanned Receipt OCR and Information Extraction"
    <https://arxiv.org/pdf/2103.10213.pdf>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/sroie-grid.png&src=0
        :align: center

    >>> from doctr.datasets import SROIE
    >>> train_set = SROIE(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `VisionDataset`.
    )zPhttps://doctr-static.mindee.com/models?id=v0.1.1/sroie2019_train_task1.zip&src=0@d4fa9e60abb03500d83299c845b9c87fd9c9430d1aeac96b83c5d0bb0ab27f6fzsroie2019_train_task1.zip)zIhttps://doctr-static.mindee.com/models?id=v0.1.1/sroie2019_test.zip&src=0@41b3c746a20226fddc80d86d4b2a903d43b5be4f521dd1bbe759dbf8844745e2zsroie2019_test.zipTFtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           	      j   |r| j         n| j        \  }}} t                      j        |||dfd|st          nd i| |r|rt          d          || _        t          j        	                    | j
        d          }	g | _        t          j        t          t          j        |	          dt!          t          j        |	                              D ]V}
t          j                            t          j        	                    |	|
                    s0t%          dt          j        	                    |	|
                     t'          |
          j        }t+          t          j        	                    | j
        d| d	          d
          5 }d t-          t/          j        |d                    D             }d d d            n# 1 swxY w Y   d |D             }t          j        fd|D             d          }|s@t          j        |                    d          |                    d          fd          }|rt;          t          j        	                    |	|
          |          }t=          ||          D ]V\  }}|j        d         dk    r@|j        d         dk    r/t!          |          dk    r| j                             ||f           W|r| j                             |
|f           +| j                             |
tC          ||          f           X|	| _
        d S )NTpre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.imageszPreparing and Loading SROIE)iterabledesctotalzunable to locate annotationsz.txtlatin)encodingc                 8    g | ]}t          |          d k    |S )r   )len.0rows     W/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/sroie.py
<listcomp>z"SROIE.__init__.<locals>.<listcomp>X   s%    [[[cRUhhYZlllll    ,)	delimiterc                 H    g | ]}d                      |dd                    S )r%      N)joinr   s     r"   r#   z"SROIE.__init__.<locals>.<listcomp>Z   s,    999Cchhs122w''999r$   c                     g | ]S}t          j        t          t          t          |d d                                                           d          TS )Nr(   )dtype)      )nparraylistmapintreshape)r    r!   np_dtypes     r"   r#   z"SROIE.__init__.<locals>.<listcomp>^   sQ    dddWZ$s3BQB0011BBBJJ6RRdddr$   r   )axisr   )img_pathgeoms)boxeslabels)"TRAINTESTsuper__init__r   
ValueErrorr   ospathr)   rootdatar.   float32r   listdirr   existsFileNotFoundErrorr   stemopenr0   csvreaderstackconcatenateminmaxr	   zipshapeappenddict)selfr   r   r   r   r   urlsha256nametmp_rootr6   rG   f_rowsr9   coordscropscroplabelr4   	__class__s                      @r"   r=   zSROIE.__init__2   s    +0>DJJTYVT		
 	

 >NW55SW	
 	
 	
 	
  	 	c  
 
7<<	844VX	:Z))0MUXY[YcdlYmYmUnUn
 
 
 	P 	PH 7>>"',,x"B"BCC `'(^BGLLS[<\<\(^(^___>>&Dbgll49m]]]KKV]^^^ \bc[[SZS-I-I-I(J(J[[[\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ :95999F "$dddd^cdddkl" " "F   Z););VZZQZ=O=O(PWXYYY P.Xx8X8X`fggg#&uf#5#5 8 8KD%z!}q((TZ]Q->->3u::PQ>>	(($7778   P	  (F!34444	  (Dvf,M,M,M!NOOOO			s   .G

G	G	c                     d| j          S )Nztrain=)r   )rS   s    r"   
extra_reprzSROIE.extra_reprq   s    $
$$$r$   )TFFF)__name__
__module____qualname____doc__r:   r;   boolr   r=   strr`   __classcell__)r^   s   @r"   r
   r
      s         $E
D "!&$= == = 	=
 = = 
= = = = = =~%C % % % % % % % %r$   )rI   r?   pathlibr   typingr   numpyr.   r   datasetsr   utilsr   r	   __all__r
    r$   r"   <module>ro      s    


 				                       # # # # # # E E E E E E E E)^% ^% ^% ^% ^%M ^% ^% ^% ^% ^%r$   