
    j}                         d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZ dgZ G d	 de
          ZdS )
    N)Path)Any)tqdm   )VisionDataset)convert_target_to_relativecrop_bboxes_from_imageFUNSDc                   ^     e Zd ZdZdZdZdZ	 	 	 	 ddeded	ed
ededdf fdZ	de
fdZ xZS )r
   a  FUNSD dataset from `"FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents"
    <https://arxiv.org/pdf/1905.13538.pdf>`_.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/funsd-grid.png&src=0
        :align: center

    >>> from doctr.datasets import FUNSD
    >>> train_set = FUNSD(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `VisionDataset`.
    z2https://guillaumejaume.github.io/FUNSD/dataset.zip@c31735649e4f441bcbb4fd0f379574f7520b42286e80b01d80b445649d54761fz	funsd.zipTFtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
          t                      j        | j        | j        | j        dfd|st
          nd i| |r|rt          d          || _        t          j	        }t          j                            d|rdnd          }t          j                            | j        |d          }g | _        t          t          j        |          dt#          t          j        |                    	          D ]I}	t          j                            t          j                            ||	                    s0t'          d
t          j                            ||	                     t)          |	          j        }
t-          t          j                            | j        |d|
 d          d          5 }t/          j        |          }d d d            n# 1 swxY w Y   d |d         D             }t3          | \  }}|rd |D             }|rt5          t          j                            ||	          t          j        ||                    }t3          |t9          |                    D ]P\  }t;          fddD                       s0| j                            |                    dd          f           Q|r2| j                            |	t          j        ||          f           | j                            |	tA          t          j        ||          t9          |                    f           K|| _        d S )NTpre_transformsz`recognition_task` and `detection_task` cannot be set to True simultaneously. To get the whole dataset with boxes and labels leave both parameters to False.datasettraining_datatesting_dataimageszPreparing and Loading FUNSD)iterabledesctotalzunable to locate annotationsz.jsonrbc                 v    g | ]6}|d          D ]+}t          |d                   dk    |d         |d         f,7S )wordstextr   box)len).0blockwords      W/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/funsd.py
<listcomp>z"FUNSD.__init__.<locals>.<listcomp>U   sa       !'N  tF|$$q(( ftE{+ )(((    formc                     g | ]@}|d          |d         g|d         |d         g|d         |d         g|d          |d         ggAS )r   r          )r#   r!   s     r&   r'   z"FUNSD.__init__.<locals>.<listcomp>^   sl         QQ(QQ(QQ(QQ(	  r(   )dtype)img_pathgeomsc              3       K   | ]}|v V  	d S )Nr-   )r#   charlabels     r&   	<genexpr>z!FUNSD.__init__.<locals>.<genexpr>n   s'      kktu}kkkkkkr(   )u   ☑u   ☐u   οu   u    u   –-)boxeslabels)!super__init__URL	FILE_NAMESHA256r   
ValueErrorr   npfloat32ospathjoinrootdatar   listdirr"   existsFileNotFoundErrorr   stemopenjsonloadzipr	   asarraylistanyappendreplacedict)selfr   r   r   r   r   np_dtype	subfoldertmp_rootr/   rI   frE   _targetstext_targetsbox_targetscropscropr3   	__class__s                     @r&   r:   zFUNSD.__init__+   s    	HNK		
 	

 >NW55SW	
 	
 	
 	
  	 	c  
 
: GLLu,XOO.YY	 7<<	9h??VX	Z))0MUXY[YcdlYmYmUnUn
 
 
 ,	 ,	H 7>>"',,x"B"BCC `'(^BGLLS[<\<\(^(^___>>&Dbgll49i4WWY]^^ $bcy||$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ !&\  H ),X%L+ 
   +     .W\\(H==RZP[ckElElEl   $'ud<.@.@#A#A L LKD%kkkk9jkkkkk L	(($eS0I0I)JKKKL   	  (BJ{(,S,S,S!TUUUU	  rz+XFFFtT`OaOabbb"    
 			s   0GG	G	c                     d| j          S )Nztrain=)r   )rT   s    r&   
extra_reprzFUNSD.extra_reprz   s    $
$$$r(   )TFFF)__name__
__module____qualname____doc__r;   r=   r<   boolr   r:   strr`   __classcell__)r^   s   @r&   r
   r
      s         $ ?COFI "!&$M MM M 	M
 M M 
M M M M M M^%C % % % % % % % %r(   )rK   rA   pathlibr   typingr   numpyr?   r   datasetsr   utilsr   r	   __all__r
   r-   r(   r&   <module>rn      s     				                       # # # # # # E E E E E E E E)g% g% g% g% g%M g% g% g% g% g%r(   