
    j                     X    d dl Z d dlZd dlmZ d dlZddlmZ dgZ G d de          Z	dS )    N)Any   )VisionDatasetDocArtefactsc            	       V     e Zd ZdZdZdZg dZ	 	 ddeded	ed
df fdZ	d
e
fdZ xZS )r   a  Object detection dataset for non-textual elements in documents.
    The dataset includes a variety of synthetic document pages with non-textual elements.

    .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/artefacts-grid.png&src=0
        :align: center

    >>> from doctr.datasets import DocArtefacts
    >>> train_set = DocArtefacts(train=True, download=True)
    >>> img, target = train_set[0]

    Args:
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        **kwargs: keyword arguments from `VisionDataset`.
    zVhttps://doctr-static.mindee.com/models?id=v0.4.0/artefact_detection-13fab8ce.zip&src=0@13fab8ced7f84583d9dccd0c634f046c3417e62a11fe1dea6efbbaba5052471b)
backgroundqr_codebar_codelogophotoTFtrainuse_polygonskwargsreturnNc                      t                      j         j        d  j        dfi | | _        t
          j                             j        |rdnd           _        t
          j                             j        d          }t          t
          j                             j        d          d          5 }t          j        |          }d d d            n# 1 swxY w Y   g  _        t          j        |          }t          |          t          |          k    rt          d          t           j        }|                                D ]\  }	}
t
          j                            t
          j                            ||	                    s0t)          dt
          j                            ||	                     t!          j        d	 |
D             |
          }t!          j         fd|
D             t           j        
          }|rt!          j        t!          j        |d d df         |d d df         gd          t!          j        |d d df         |d d df         gd          t!          j        |d d df         |d d df         gd          t!          j        |d d df         |d d df         gd          gd          } j                            |	t3          ||          f           | _        d S )NTr   valimageszlabels.jsonrbz,the number of images and labels do not matchzunable to locate c                     g | ]
}|d          S )geometry ).0objs     _/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/doc_artefacts.py
<listcomp>z)DocArtefacts.__init__.<locals>.<listcomp>@   s    +M+M+MC
O+M+M+M    )dtypec                 P    g | ]"}j                             |d                    #S )label)CLASSESindex)r   r   selfs     r   r   z)DocArtefacts.__init__.<locals>.<listcomp>A   s.    -`-`-`SVdl.@.@W.N.N-`-`-`r   r   r   )axis      )boxeslabels)super__init__URLSHA256r   ospathjoinrootopenjsonloaddatalistdirlenAssertionErrornpfloat32itemsexistsFileNotFoundErrorasarrayint64stackappenddict)r#   r   r   r   tmp_rootfr)   img_listnp_dtypeimg_namer    r(   classes	__class__s   `            r   r+   zDocArtefacts.__init__&   s    	4dEEfEEE
 GLLu,GGG%HH	7<<	844"',,ty-88$?? 	"1Yq\\F	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"68	:h''v;;#h--'' !OPPP:%||~~ 	L 	LOHe7>>"',,x"B"BCC `'(^BGLLS[<\<\(^(^___ !#
+M+Mu+M+M+MU] ^ ^ ^E"$*-`-`-`-`Z_-`-`-`hjhp"q"q"qG 
%1+uQQQT{!;"EEE%1+uQQQT{!;"EEE%1+uQQQT{!;"EEE%1+uQQQT{!;"EEE	    Ih5(I(I(IJKKKK			s   7CCCc                     d| j          S )Nztrain=)r   )r#   s    r   
extra_reprzDocArtefacts.extra_reprP   s    $
$$$r   )TF)__name__
__module____qualname____doc__r,   r-   r!   boolr   r+   strrK   __classcell__)rI   s   @r   r   r      s           cCOFDDDG "( (( ( 	(
 
( ( ( ( ( (T%C % % % % % % % %r   )
r3   r.   typingr   numpyr9   datasetsr   __all__r   r   r   r   <module>rW      s     				           # # # # # #
@% @% @% @% @%= @% @% @% @% @%r   