
    jO                         d dl mZ d dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZ d dlmZmZmZmZ d dlmZ dgZ G d de          Z G d	 d
e          ZdS )    )AnyN)fclusterdata)BlockDocumentKIEDocumentKIEPageLinePage
PredictionWord)estimate_page_angleresolve_enclosing_bboxresolve_enclosing_rbboxrotate_boxes)NestedObjectDocumentBuilderc                      e Zd ZdZ	 	 	 	 ddedededed	d
f
dZedej	        d	e
ej	        ej	        f         fd            Zdej	        dee         d	eee                  fdZdej	        d	eee                  fdZedej	        deee                  d	eeee                           fd            Zdej	        dej	        dee
eef                  deeeef                  d	ee         f
dZd	efdZ	 	 ddeej	                 deej	                 deej	                 deee
eef                           dee
eef                  deeeef                  deeeef                  d
z  deeeef                  d
z  d	efdZd
S ) r   a  Implements a document builder

    Args:
        resolve_lines: whether words should be automatically grouped into lines
        resolve_blocks: whether lines should be automatically grouped into blocks
        paragraph_break: relative length of the minimum space separating paragraphs
        export_as_straight_boxes: if True, force straight boxes in the export (fit a rectangle
            box to all rotated boxes). Else, keep the boxes format unchanged, no matter what it is.
    TFQ?resolve_linesresolve_blocksparagraph_breakexport_as_straight_boxesreturnNc                 >    || _         || _        || _        || _        d S Nr   r   r   r   )selfr   r   r   r   s        W/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/doctr/models/builder.py__init__zDocumentBuilder.__init__   s*     +,.(@%%%    boxesc                    | j         dk    r^t          | t          |            dd          } t          j        |                     d          |                     d          fd          } | dddf         d	| dddf         z  t          j        | dddf         | dddf         z
            z  z                                   | fS )
a  Sort bounding boxes from top to bottom, left to right

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)

        Returns:
            tuple: indices of ordered boxes of shape (N,), boxes
                If straight boxes are passed tpo the function, boxes are unchanged
                else: boxes returned are straight boxes fitted to the straightened rotated boxes
                so that we fit the lines afterwards to the straigthened page
           )   r$   g      @)	loc_predsangle
orig_shape	min_angle   Nr      )	ndimr   r   npconcatenateminmaxmedianargsort)r!   s    r   _sort_boxeszDocumentBuilder._sort_boxes*   s     :?? *5111'	  E NEIIaLL%))A,,#?DDEaaada%1+o	%1+aaaQRd:S0T0TTT]]__afffr    	word_idcsc                    g }fd|df                                                                          D             t                    dk     r|                               nd         g}dd         D ]`}d}||d                  }||df         |d         z
  }|| j        k     rd}|r|                    |           g }|                    |           a|                    |           |S )	a   Split a line in sub_lines

        Args:
            boxes: bounding boxes of shape (N, 4)
            word_idcs: list of indexes for the words of the line

        Returns:
            A list of (sub-)lines computed from the original line (words)
        c                      g | ]
}|         S  r7   ).0idxr4   s     r   
<listcomp>z6DocumentBuilder._resolve_sub_lines.<locals>.<listcomp>M   s    VVVYs^VVVr    r   r+   r)   NTr*   F)r2   tolistlenappendr   )	r   r!   r4   linessub_lineihoriz_breakprev_boxdists	     `      r   _resolve_sub_linesz"DocumentBuilder._resolve_sub_linesA   s    VVVVuY\/B/J/J/L/L/S/S/U/UVVV	 y>>ALL####!!~Hqrr] # #" ".QT{Xa[0$..."'K "LL***!H""""LL"""r    c                    |                      |          \  }}t          j        |dddf         |dddf         z
            }g }|d         g}||d                  ddg                                         }|dd         D ]}d}t	          ||         ddg                                         |t          |          z  z
            }	|	|dz  k     rd}|r-|                    |                     ||                     g }d}|                    |           |||         ddg                                         z  }t          |          dk    r)|                    |                     ||                     |S )zOrder boxes to group them in lines

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox

        Returns:
            nested list of box indices
        Nr#   r)   r   Tr+   F)	r3   r-   r1   meanabsr<   extendrD   r=   )
r   r!   idxsy_medr>   wordsy_center_sumr9   
vert_breaky_dists
             r   _resolve_lineszDocumentBuilder._resolve_linesg   s    &&u--e 	%1+aaad344a	T!W~q!f-22448 	6 	6CJ sQF+0022\CJJ5NNOOF	!!"
 !T44UEBBCCC LLE#J1v.33555LL u::>>LL00>>???r    r>   c                      j         dk    r!t          j         fdD                       }n, fdD             }t          j        d |D                       } j         dk    rt          j        |ddddf         |ddddf         z   dz  |ddddf         |ddddf         z   dz  |ddddf         |ddddf         z   dz  |ddddf         |ddddf         z   dz  |ddddf         |ddddf         z   dz  |ddddf         |ddddf         z   dz  fd	
          }nt          j        |dddf         |dddf         z   dz  |dddf         |dddf         z   dz  |dddf         |dddf         z   dz  |dddf         |dddf         z   dz  |dddf         |dddf         fd	
          }t	          |dddd          }i }t          |          D ]=\  }}||                                v r||                             |           7|g||<   >fd|                                D             }	|	S )zOrder lines to group them in blocks

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
            lines: list of lines, each line is a list of idx

        Returns:
            nested list of box indices
        r#   c                 F    g | ]}t          fd |D                       S )c           	      H    g | ]}t          |d d d d f                   S r   tupler8   r9   r!   s     r   r:   z>DocumentBuilder._resolve_blocks.<locals>.<listcomp>.<listcomp>   s4    (Q(Q(QSuS!!!QQQY/?)@)@(Q(Q(Qr    )r   r8   liner!   s     r   r:   z3DocumentBuilder._resolve_blocks.<locals>.<listcomp>   sF     0 0 0 ((Q(Q(Q(QD(Q(Q(QRR0 0 0r    c                 F    g | ]}t          fd |D                       S )c           	      t    g | ]4}t          |d df                   t          |dd f                   f5S )Nr+   rS   rU   s     r   r:   z>DocumentBuilder._resolve_blocks.<locals>.<listcomp>.<listcomp>   sJ    'g'g'g[^uS"1"W~)>)>eCQRQSQSGn@U@U(V'g'g'gr    )r   rV   s     r   r:   z3DocumentBuilder._resolve_blocks.<locals>.<listcomp>   sG        ''g'g'g'gbf'g'g'ghh  r    c                 ,    g | ]\  \  }}\  }}||||fS r7   r7   )r8   x1y1x2y2s        r   r:   z3DocumentBuilder._resolve_blocks.<locals>.<listcomp>   s.    #\#\#\9M(2rHRRR$4#\#\#\r    Nr   r)   r+   r*   )axisg?   distance	euclidean)tdepth	criterionmetricc                 ,    g | ]}fd |D             S )c                      g | ]
}|         S r7   r7   )r8   r9   r>   s     r   r:   z>DocumentBuilder._resolve_blocks.<locals>.<listcomp>.<listcomp>   s    ///#5:///r    r7   )r8   blockr>   s     r   r:   z3DocumentBuilder._resolve_blocks.<locals>.<listcomp>   s.    NNNE///////NNNr    )	r,   r-   asarraystackr   	enumeratekeysr=   values)
r!   r>   	box_lines
_box_linesbox_featuresclusters_blocksline_idxcluster_idxblockss
   ``        r   _resolve_blockszDocumentBuilder._resolve_blocks   s=    :??$&J 0 0 0 0!0 0 0 % %II
   !  J 
#\#\Q[#\#\#\]]I :??')xqqq!Qw')AAAq!G*<<Aqqq!Qw')AAAq!G*<<Aqqq!Qw')AAAq!G*<<Aqqq!Qw')AAAq!G*<<Aqqq!Qw')AAAq!G*<<Aqqq!Qw')AAAq!G*<<A 
( 
( 
(LL 8qqq!t_yA6!;qqq!t_yA6!;qqq!t_yA6!;qqq!t_yA6!;aaadOaaadO 
 
 
L  1
[fggg(*%.x%8%8 	2 	2!Hkgllnn,,$++H5555(0z$$ ONNNW^^=M=MNNNr    objectness_scores
word_predscrop_orientationsc                 f   j         d         t                    k    r-t          dj         d          dt                               j         d         dk    rg S }| j        r||                     |j        dk    r|n|ddddf                   }| j        rCt          |          dk    r0|                     |j        dk    r|n|ddddf         |          }n<|g}n8|                     |j        dk    r|n|ddddf                   d         g}|g}fd|D             }|S )	a  Gather independent words in structured blocks

        Args:
            boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
            objectness_scores: objectness scores of all detected words of the page, of shape N
            word_preds: list of all detected words of the page, of shape N
            crop_orientations: list of dictoinaries containing
                the general orientation (orientations + confidences) of the crops

        Returns:
            list of block elements
        r   Incompatible argument lengths: , r#   Nr`   r)   c                 L    g | ] }t          fd |D                       !S )c                 L    g | ] }t          fd |D                       !S )c           
         g | ]}j         d k    r`t          g |         t          d |                                         D                       t	          |                   |         R  nVt          g |         |df         |df         f|df         |d f         fft	          |                   |         R  S )r#   c              3   4   K   | ]}t          |          V  d S r   rS   r8   pts     r   	<genexpr>zQDocumentBuilder._build_blocks.<locals>.<listcomp>.<listcomp>.<listcomp>.<genexpr>  s(      FFBeBiiFFFFFFr    r   r)   r+   )r,   r   rT   r;   floatr8   r9   r!   rz   rx   ry   s     r   r:   zGDocumentBuilder._build_blocks.<locals>.<listcomp>.<listcomp>.<listcomp>   s-        zQ  #CFF%*2C2C2E2EFFFFF /455 *#.	     #CQ-sAv7%Q-sTUv9WX /455 *#.	    r    )r	   )r8   rW   r!   rz   rx   ry   s     r   r:   z<DocumentBuilder._build_blocks.<locals>.<listcomp>.<listcomp>   sl       " !          $      r    )r   )r8   r>   r!   rz   rx   ry   s     r   r:   z1DocumentBuilder._build_blocks.<locals>.<listcomp>   sl     
 
 
( '        " "#    
 
 
r    )	shaper<   
ValueErrorr   rO   r,   r   rw   r3   )	r   r!   rx   ry   rz   _boxesr>   rs   rv   s	    ````    r   _build_blockszDocumentBuilder._build_blocks   s   & ;q>S__,,bu{1~bbQTU_Q`Q`bbccc;q>QI  
	''&+2B2BqqqRTSTRTuVVE" "s5zzA~~..9I9IvvvVWVWVWY[Z[Y[V[}^cdd ' %%q0@0@fffQQQPRQRPRUmTTUVWXEgG
 
 
 
 
 
 
( !)
 
 
. r    c                 F    d| j          d| j         d| j         d| j         S )Nzresolve_lines=z, resolve_blocks=z, paragraph_break=z, export_as_straight_boxes=r   )r   s    r   
extra_reprzDocumentBuilder.extra_repr  sS    HT/ H H$BU H H#3H H(,(EH H	
r    pages
text_predspage_shapesorientations	languagesc	                     t          |          t          |          cxk    r't          |          cxk    rt          |          k    sMn t          |          t          |          cxk    r't          |          cxk    rt          |          k    rn nt          d          t          |t                    r|ndgt          |          z  }	t          |t                    r|ndgt          |          z  }
 j        r0t          |          dk    r|d         j        dk    rd |D             } fdt          |t          t          |                    ||||||	|
	  	        D             }t          |          S )ay  Re-arrange detected words into structured blocks

        Args:
            pages: list of N elements, where each element represents the page image
            boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4)
                or (*, 4, 2) for all words for a given page
            objectness_scores: list of N elements, where each element represents the objectness scores
            text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
            page_shapes: shape of each page, of size N
            crop_orientations: list of N elements, where each element is
                a dictionary containing the general orientation (orientations + confidences) of the crops
            orientations: optional, list of N elements,
                where each element is a dictionary containing the orientation (orientation + confidence)
            languages: optional, list of N elements,
                where each element is a dictionary containing the language (language + confidence)

        Returns:
            document object
        7All arguments are expected to be lists of the same sizeNr   r#   c                     g | ]?}t          j        |                    d           |                    d           fd           @S )r)   )r-   r.   r/   r0   )r8   p_boxess     r   r:   z,DocumentBuilder.__call__.<locals>.<listcomp>H  s>    bbbQXQQ(H!LLbbbr    c                 x    g | ]6\	  }}}}}}}}}	t          |
                    ||||          ||||	          7S r7   )r
   r   )r8   page_idxr   
page_boxes
loc_scoresry   word_crop_orientationsorientationlanguager   s             r   r:   z,DocumentBuilder.__call__.<locals>.<listcomp>J  s{     
 
 
 udE:z:G]_jlt ""*	   
 
 
r    )	r<   r   
isinstancelistr   r,   zipranger   )r   r   r!   rx   r   r   rz   r   r   _orientations
_languages_pagess   `           r   __call__zDocumentBuilder.__call__  s   < u::Z\\\\C0A,B,B\\\\cJ[F\F\\\\\`cdi`j`jnqo
 o
 a> a> a> a>"##a> a> a> a>'*+<'='=a> a> a> a> a> VWWW '|T::SLLU@S 	 #-Y"="=VYYD6CPUJJCV
( 	cSZZ!^^Qx}!!bb\abbb
 
 
 
 y|c%jj!!!!
y 
y
 
 
6 r    )TFr   FNN)__name__
__module____qualname____doc__boolr   r   staticmethodr-   ndarrayrT   r3   r   intrD   rO   rw   strdictr   r   r   r   r   r   r7   r    r   r   r      s         #$!&).
A 
A
A 
A 	
A
 #'
A 

A 
A 
A 
A g2: g%
BJ0F*G g g g \g,$
 $tCy $TRVWZR[_ $ $ $ $L+BJ +4S	? + + + +Z ?rz ?$tCy/ ?d4PTUXPY?F[ ? ? ? \?B>z> :> sEz*+	>
  S#X/> 
e> > > >@
C 
 
 
 
 5915H  H BJH  BJH   
+	H 
 eCJ/01H  %S/*H   S#X/H  4S>*T1H  S#X'$.H  
H  H  H  H  H  H r    c                      e Zd ZdZ	 	 ddeej                 deeeej        f                  deeeej        f                  deeeee	ee
f                  f                  dee	eef                  deeeeeeef                  f                  d	eeeef                  dz  d
eeeef                  dz  defdZdej        dej        dee	ee
f                  deeeef                  dee         f
dZdS )KIEDocumentBuildera  Implements a KIE document builder

    Args:
        resolve_lines: whether words should be automatically grouped into lines
        resolve_blocks: whether lines should be automatically grouped into blocks
        paragraph_break: relative length of the minimum space separating paragraphs
        export_as_straight_boxes: if True, force straight boxes in the export (fit a rectangle
            box to all rotated boxes). Else, keep the boxes format unchanged, no matter what it is.
    Nr   r!   rx   r   r   rz   r   r   r   c	                 .    t          |          t          |          cxk    r't          |          cxk    rt          |          k    sMn t          |          t          |          cxk    r't          |          cxk    rt          |          k    rn nt          d          t          |t                    r|ndgt          |          z  }	t          |t                    r|ndgt          |          z  }
 j        rt          |          dk    rt          t          |d                                                             j        dk    rzg }|D ]s}i }|	                                D ]E\  }}t          j        |                    d          |                    d          fd          ||<   F|                    |           t|} fdt          |t!          t          |                    ||||||	|
	  	        D             }t#          |          S )a  Re-arrange detected words into structured predictions

        Args:
            pages: list of N elements, where each element represents the page image
            boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                of shape (*, 5) or (*, 6) for all predictions
            objectness_scores: list of N dictionaries, where each element represents the objectness scores for a class
            text_preds: list of N dictionaries, where each element is the list of all word prediction
            page_shapes: shape of each page, of size N
            crop_orientations: list of N dictonaries, where each element is
                a list containing the general crop orientations (orientations + confidences) of the crops
            orientations: optional, list of N elements,
                where each element is a dictionary containing the orientation (orientation + confidence)
            languages: optional, list of N elements,
                where each element is a dictionary containing the language (language + confidence)

        Returns:
            document object
        r   Nr   r#   r)   c                    	 g | ]B\	  }}}	}}t          |
	fd                                 D             ||||          CS )c           
      r    i | ]3}|                     |         |         |         |                   4S r7   )r   )r8   kr   r   r   r   ry   s     r   
<dictcomp>z:KIEDocumentBuilder.__call__.<locals>.<listcomp>.<dictcomp>  sZ         t))"1"1"1.q1	   r    )r   rm   )r8   r   r   r   r   r   r   r   r   ry   r   s         @@@@r   r:   z/KIEDocumentBuilder.__call__.<locals>.<listcomp>  s     
 
 
" udE:z:G]_jlt!         (__..    
 
 
r    )r<   r   r   r   r   nextiterrn   r,   itemsr-   r.   r/   r0   r=   r   r   r   )r   r   r!   rx   r   r   rz   r   r   r   r   straight_boxesr   straight_boxes_dictr   boxr   s   `                r   r   zKIEDocumentBuilder.__call__s  st   < u::Z\\\\C0A,B,B\\\\cJ[F\F\\\\\`cdi`j`jnqo
 o
 a> a> a> a>"##a> a> a> a>'*+<'='=a> a> a> a> a> VWWW&|T::SLLU@S 	 #-Y"="=VYYD6CPUJJCV
( 	'SZZ!^^Dq**++,,1Q66>@$ ? ?G*,'")--// ] ]313SWWUVZZ@XZ[1\1\+A.."))*=>>>>&
 
 
 
" y|c%jj!!!!
y 
y#
 
 
< 6"""r    ry   c                 R   j         d         t                    k    r-t          dj         d          dt                               j         d         dk    rg S }|                     |j        dk    r|n|ddddf                   \  }}fd|D             }|S )a  Gather independent words in structured blocks

        Args:
            boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
            objectness_scores: objectness scores of all detected words of the page
            word_preds: list of all detected words of the page, of shape N
            crop_orientations: list of orientations for each word crop

        Returns:
            list of block elements
        r   r|   r}   r#   Nr`   c                    g | ]}j         d k    rtt          |         d         |         d         t          d |                                         D                       t	          |                   |                   njt          |         d         |         d         |df         |df         f|df         |d f         fft	          |                   |                   S )r#   r   r)   c              3   4   K   | ]}t          |          V  d S r   rS   r   s     r   r   z>KIEDocumentBuilder._build_blocks.<locals>.<listcomp>.<genexpr>  s(      GGRuRyyGGGGGGr    )value
confidencegeometryobjectness_scorecrop_orientationr+   )r,   r   rT   r;   r   r   s     r   r:   z4KIEDocumentBuilder._build_blocks.<locals>.<listcomp>  s!    
 
 
   zQ  oa(%c?1-GG5:3D3D3F3FGGGGG!&'8'=!>!>!23!7     oa(%c?1- a=%Q-85a=%PSUVPV-:XY!&'8'=!>!>!23!7  
 
 
r    )r   r<   r   r3   r,   )	r   r!   rx   ry   rz   r   rI   _predictionss	    ````    r   r   z KIEDocumentBuilder._build_blocks  s    $ ;q>S__,,bu{1~bbQTU_Q`Q`bbccc;q>QI ""V[A-=-=666!!!RaR%=QQa
 
 
 
 
 
 
  !
 
 
$ r    r   )r   r   r   r   r   r-   r   r   r   rT   r   r   r   r   r   r   r   r7   r    r   r   r   h  s        $ 5915Q# Q#BJQ# Dbj)*Q#  S"*_ 56	Q#
 c4c5j(9#::;<Q# %S/*Q#  S$tCH~*>%> ?@Q# 4S>*T1Q# S#X'$.Q# 
Q# Q# Q# Q#f-z- :- sEz*+	-
  S#X/- 
j	- - - - - -r    r   )typingr   numpyr-   scipy.cluster.hierarchyr   doctr.io.elementsr   r   r   r   r	   r
   r   r   doctr.utils.geometryr   r   r   r   doctr.utils.reprr   __all__r   r   r7   r    r   <module>r      s"             0 0 0 0 0 0 a a a a a a a a a a a a a a a a a a a a s s s s s s s s s s s s ) ) ) ) ) )
R  R  R  R  R l R  R  R j
K K K K K K K K K Kr    