
    jx                         d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZ dgZ G d	 de
          ZdS )
    N)Path)Any)tqdm   )AbstractDataset)convert_target_to_relativecrop_bboxes_from_imageCOCOTEXTc                   Z     e Zd ZdZ	 	 	 	 ddededededed	ed
eddf fdZdefdZ xZ	S )r
   a  
    COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
    <https://arxiv.org/pdf/1601.07140v2>`_ |
    `"homepage" <https://bgshih.github.io/cocotext/>`_.

    >>> # NOTE: You need to download the dataset first.
    >>> from doctr.datasets import COCOTEXT
    >>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
    >>>                     label_path="/path/to/coco_text/cocotext.v2.json")
    >>> img, target = train_set[0]
    >>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
    >>> label_path = "/path/to/coco_text/cocotext.v2.json")
    >>> img, target = test_set[0]

    Args:
        img_folder: folder with all the images of the dataset
        label_path: path to the annotations file of the dataset
        train: whether the subset should be the training one
        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
        recognition_task: whether the dataset should be used for recognition task
        detection_task: whether the dataset should be used for detection task
        **kwargs: keyword arguments from `AbstractDataset`.
    TF
img_folder
label_pathtrainuse_polygonsrecognition_taskdetection_taskkwargsreturnNc           
      X    t                      j        |fd|st          nd i| |r|rt          d          t          j                            |          rt          j                            |          s3t          dt          j                            |          s|n|           |}| _        t          j
        }	g | _        t          |d          5 }
t          j        |
          }d d d            n# 1 swxY w Y   fd|d                                         D             }t!          |dt#          |                    D ]W\  }t          j                            ||d	                   }t          j                            |          st          d
|           fd|d                                         D             }|sg }|D ]i}|d         \  }}}}|r/t          j        ||g||z   |g||z   ||z   g|||z   gg|	          }n||||z   ||z   g}|                    |d         |f           jt-          | \  }}|rt/          t          j                            ||          t          j        |t2                                        d                    }t-          |t7          |                    D ]'\  }}|r d|vr| j                            ||f           (|rK| j                            |t          j        |t2                                        d          f           | j                            |t9          t          j        |t2                                        d          t7          |                    f           Y|| _        d S )Npre_transformsz 'recognition' and 'detection task' cannot be set to True simultaneously.  To get the whole dataset with boxes and labels leave both parameters to False zunable to find rc                 @    g | ]}|d          d         dk    k    |S )r   setr    ).0imgr   s     [/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/datasets/coco_text.py
<listcomp>z%COCOTEXT.__init__.<locals>.<listcomp>N   s1    ```SSVE]g=UZ_<_<_S<_<_<_    imgszPreparing and Loading COCOTEXT)desctotal	file_namezUnable to locate c                 ^    g | ])}|d          t                    k    |d         dk    '|*S )image_id
legibilitylegible)int)r   annimg_ids     r   r   z%COCOTEXT.__init__.<locals>.<listcomp>Y   sG       z?c&kk11c,6G96T6T 6T6T6Tr   annsbbox)dtypeutf8_stringr   )min)img_pathgeoms )boxeslabels)super__init__r   
ValueErrorospathexistsFileNotFoundErrorr   npfloat32dataopenjsonloaditemsr   lenjoinvaluesarrayappendzipr	   asarrayr'   cliplistdictroot)selfr   r   r   r   r   r   r   tmp_rootnp_dtypefiler=   	img_itemsimg_infor/   annotations_targets
annotationxywhboxtext_targetsbox_targetscropscroplabelr)   	__class__s      `                        @r   r5   zCOCOTEXT.__init__-   si    		
 	
IY'c'A'A_c	
gm	
 	
 	
  	 	d   w~~j)) 	v
1K1K 	v#$tWaHbHb6rjjhr$t$tuuu
:]_	*c"" 	#d9T??D	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# a```DL$6$6$8$8```	 !%Y5U]`aj]k]k l l l 5	 5	FHw||J0EFFH 7>>(++ H'(FH(F(FGGG   <..00  K  H) B B
'/
1a /(FUAJUAENAJ	 '  CC aQA.CM!:C @AAAA(+X%L+ .W\\(H==RZP[cfEgEgEgElElqrElEsEs   $'ud<.@.@#A#A 8 8KD% 8E!1!1	(($7778   	  (BJ{#,N,N,N,S,SXY,S,Z,Z![\\\\	  rz+SAAAFF1FMMVZ[gVhVhiii"    
 			s   C::C>C>c                     d| j          S )Nztrain=)r   )rM   s    r   
extra_reprzCOCOTEXT.extra_repr   s    $
$$$r   )TFFF)
__name__
__module____qualname____doc__strboolr   r5   rb   __classcell__)r`   s   @r   r
   r
      s         8 "!&$[ [[ [ 	[
 [ [ [ [ 
[ [ [ [ [ [z%C % % % % % % % %r   )r?   r7   pathlibr   typingr   numpyr;   r   datasetsr   utilsr   r	   __all__r
   r   r   r   <module>rp      s     				                       % % % % % % E E E E E E E E,w% w% w% w% w% w% w% w% w% w%r   