
    {-jn                     (   g d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZ d	d
lmZmZ dej        dej        defdZ	 	 d8dededededef
dZd Z	 	 d9dee         dee         dedefdZ	 d:de	ej        eef         de	ej        eef         defdZd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&	 d;de	ee'         ee'e'e'e'f         f         de	ee'         ee'e'e'e'f         f         d%ed&edee	ee'         ee'e'e'e'f         f                  f
d'Z(	 d<d)eeeee'         f                  d*ed&edeeeeee'         f                  eeeee'         f                  f         fd+Z)d=d-Z*defd.Z+d/ Z,d0ed1e-fd2Z.d3 Z/d4 Z0d5 Z1d>d6edefd7Z2dS )?)get_sub_regions_ocr_resget_show_colorsorted_layout_boxes    N)deepcopy)DictListOptionalTupleUnion)Image   )convert_points_to_boxes)	OCRResult   )BLOCK_LABEL_MAPREGION_SETTINGS	src_boxes	ref_boxesreturnc                 Z   g }t          |           }|dk    rt          |          dk    rt          t          |                    D ]}||         }t          j        |d         | dddf                   }t          j        |d         | dddf                   }t          j        |d         | dddf                   }t          j        |d         | dddf                   }	||z
  }
|	|z
  }t          j        |
dk    |dk    z            d         }|                    |           |S )a  
    Get the indices of source boxes that overlap with reference boxes based on a specified threshold.

    Args:
        src_boxes (np.ndarray): A 2D numpy array of source bounding boxes.
        ref_boxes (np.ndarray): A 2D numpy array of reference bounding boxes.
    Returns:
        match_idx_list (list): A list of indices of source boxes that overlap with reference boxes.
    r   Nr   r      )lenrangenpmaximumminimumwhereextend)r   r   match_idx_listsrc_boxes_numrnoref_boxx1y1x2y2pub_wpub_h	match_idxs                p/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddlex/inference/pipelines/layout_parsing/utils.pyget_overlap_boxes_idxr+   !   s)    N	NNMqS^^a//Y(( 		- 		-CnGGAJ	!!!Q$88BGAJ	!!!Q$88BGAJ	!!!Q$88BGAJ	!!!Q$88BGEGE%!)	!:;;A>I!!),,,,    TFoverall_ocr_resobject_boxesflag_withinreturn_match_idxc                    i }g |d<   g |d<   g |d<   g |d<   | d         }t          ||          }t          t          |                    }t          t	          |                    D ]}|r
||v rd}nd}n	||vrd}nd}|r|d                             | d         |                    |d                             | d         |                    |d                             | d         |                    |d                             | d         |                    dD ]}	t          j        ||	                   ||	<    |r||fn|S )aX  
    Filters OCR results to only include text boxes within specified object boxes based on a flag.

    Args:
        overall_ocr_res (OCRResult): The original OCR result containing all text boxes.
        object_boxes (list): A list of bounding boxes for the objects of interest.
        flag_within (bool): If True, only include text boxes within the object boxes. If False, exclude text boxes within the object boxes.
        return_match_idx (bool): If True, return the list of matching indices.

    Returns:
        OCRResult: A filtered OCR result containing only the relevant text boxes.
    	rec_polys	rec_texts
rec_scores	rec_boxesTF)r2   r4   r5   )r+   listsetr   r   appendr   array)
r-   r.   r/   r0   sub_regions_ocr_resoverall_text_boxesr   box_no
flag_matchkeys
             r*   r   r   ;   s   $ ')$')$(*%')$(5*+=|LLN#n--..N.//00   		#''!

"

^++!

"
 	,33,V4    ,33,V4    -44-f5    ,33,V4   8 F F#%8,?,D#E#EC   	!	n-- r,   c                    t          |           }|dk    r| S t          | d           }t          |          }g }g }g }d}	 ||k    rn||         d         d         |dz  k     r?||         d         d         d	|z  d
z  k     r!|                    ||                    |dz  }nm||         d         d         d|z  d
z  k    r!|                    ||                    |dz  }n.||z  }||z  }|                    ||                    g }g }|dz  }t          |d           }t          |d           }|r||z  }|r||z  }|S )z
    Sort text boxes in order from top to bottom, left to right
    Args:
        res: List of dictionaries containing layout information.
        w: Width of image.

    Returns:
        List of dictionaries containing sorted layout information.
    r   c                 :    | d         d         | d         d         fS )N
block_bboxr   r    xs    r*   <lambda>z%sorted_layout_boxes.<locals>.<lambda>   s    aoa.@!L/RSBT-U r,   )r>   r   TrA      r   r      c                     | d         d         S NrA   r   rB   rC   s    r*   rE   z%sorted_layout_boxes.<locals>.<lambda>   s    qq/A r,   c                     | d         d         S rI   rB   rC   s    r*   rE   z%sorted_layout_boxes.<locals>.<lambda>   s    <1C r,   )r   sortedr6   r8   )	resw	num_boxessorted_boxes_boxesnew_resres_left	res_rightis	            r*   r   r   w   s    CIA~~
 ##U#UVVVL,FGHI	A	>> 1Il#A&Q..q	,'*QUQY66OOF1I&&&FAAAY|$Q'!a%!)33VAY'''FAAxGy GNN6!9%%%HIFA'* h$B$BCCCHy&D&DEEEI 8 9Nr,   
horizontalunionbbox1bbox2	directionc                 @   d\  }}|dk    rd\  }}t          | |         ||                   }t          | |         ||                   }||z
  }|dk    rdS |dk    r:t          | |         ||                   t          | |         ||                   z
  }	n}|dk    r/t          | |         | |         z
  ||         ||         z
            }	nH|dk    r/t          | |         | |         z
  ||         ||         z
            }	nt          d| d	          |	dk    r||	z  nd
S )a  
    Calculate the IoU of lines between two bounding boxes.

    Args:
        bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
        bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
        direction (str): direction of the projection, "horizontal" or "vertical".

    Returns:
        float: Line overlap ratio. Returns 0 if there is no overlap.
    )r   r   rU   )r   r   r   rV   smalllargeInvalid mode -, must be one of ['union', 'small', 'large'].        )maxmin
ValueError)
rW   rX   rY   modestart_index	end_indexintersection_startintersection_endoverlap	ref_widths
             r*   "calculate_projection_overlap_ratiorj      sn   " "KL  !%YU;/{1CDD5+U9-=>>!33G!||qwi(%	*:;;c+k 2?
 ?
 
		 
)u[1153CeKFX3X
 
		 
)u[1153CeKFX3X
 
		 ODOOO
 
 	
 #,a--7YS8r,   c                 6   t          j        | t           j                  } t          j        |t           j                  }t          j        | d         |d                   }t          j        | d         |d                   }t          j        | d         |d                   }t          j        | d         |d                   }t          j        d||z
            }t          j        d||z
            }t          j        ||t           j                  }	t          |           }
t          |          }|dk    r	|
|z   |	z
  }nK|dk    rt          j        |
|          }n/|dk    rt          j        |
|          }nt          d	| d
          |dk    rdS |	|z  S )a  
    Calculate the overlap ratio between two bounding boxes using NumPy.

    Args:
        bbox1 (np.ndarray, list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]
        bbox2 (np.ndarray, list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]
        mode (str): The mode of calculation, either 'union', 'small', or 'large'.

    Returns:
        float: The overlap ratio value between the two bounding boxes
    dtyper   r   r   r   rV   r[   r\   r]   r^   r_   )r   r9   float64r   r   multiplycalculate_bbox_arearb   )rW   rX   rc   x_min_intery_min_interx_max_intery_max_interinter_widthinter_height
inter_area
bbox1_area
bbox2_arearef_areas                r*   calculate_overlap_ratior{      s     HU"*---EHU"*---E*U1XuQx00K*U1XuQx00K*U1XuQx00K*U1XuQx00K*Qk 9::K:a{!:;;L[,bjIIIJ$U++J$U++Jw
*Z7	:j*55	:j*55ODOOO
 
 	
 1}}s  r,   c                 l   | st          d          t          j        |           }t          j        |dddf                   }t          j        |dddf                   }t          j        |dddf                   }t          j        |dddf                   }t          j        ||||g          S )a?  
    Calculate the minimum enclosing bounding box for a list of bounding boxes.

    Args:
        bboxes (list): A list of bounding boxes represented as lists of four integers [x1, y1, x2, y2].

    Returns:
        list: The minimum enclosing bounding box represented as a list of four integers [x1, y1, x2, y2].
    z$The list of bounding boxes is empty.Nr   r   r   r   )rb   r   r9   ra   r`   )bboxesbboxes_arraymin_xmin_ymax_xmax_ys         r*    calculate_minimum_enclosing_bboxr     s      A?@@@ 8F##L F<1%&&EF<1%&&EF<1%&&EF<1%&&E 8UE5%0111r,   c                 F    t          t          j        d|                     S )z#check if the char is english letterz
^[A-Za-z]$boolrematchchars    r*   is_english_letterr   (  s    --...r,   c                 F    t          t          j        d|                     S )zcheck if the char is numericz^[\d]+$r   r   s    r*   
is_numericr   -  s    T**+++r,   c                     h d}| |v S )z
    check if the char is non-breaking punctuation

    Args:
        char (str): character to check

    Returns:
        bool: True if the char is non-breaking punctuation
    >      “   、   ，   ：   ；,-"':;rB   )r   non_breaking_punctuationss     r*   is_non_breaking_punctuationr   2  s"    ! ! ! ,,,r,   c                 t    t          t          t          |                    \  }}}}d|  d| d| d| d| dS )Nzimgs/img_in__box__z.jpg)r6   mapint)labelboxx_miny_minx_maxy_maxs         r*   construct_img_pathr   M  sQ    !%c#smm!4!4E5%I%IIeIIeIIeIIeIIIIr,   c                    | ||||f         }|j         dk    rdS |j        dk    rt          j        |          S |j        d         dk    r)t          j        |dddf         ddddf                   S t          j        |          S )z
    Crop `original_img` to [y_min:y_max, x_min:x_max], clamped to image bounds.
    Returns a C-contiguous array suitable for `PIL.Image.fromarray`.
    r   Nr   r   .)sizendimr   ascontiguousarrayshape)original_imgr   r   r   r   crops         r*   _crop_image_region_for_pilr   R  s    
 eU5[01DyA~~tyA~~#D)))z"~#DbqbM#ttt)$<===%%%r,   c           
         g }t          | j        d                   t          | j        d                   }}|D ]8}|d         t          d         v r|d         }t          t	          t           |d                             \  }}}	}
t          dt          ||                    }t          dt          |	|                    }	t          dt          ||                    }t          dt          |
|                    }
|	|k    s|
|k    rt          ||d                   }t          | |||	|
          }|t          j
        |          }|                    ||||||	|
f|d         d           :|S )Nr   r   r   image_labels
coordinatescore)pathimgr   r   r   )r   r   r   r6   r   r`   ra   r   r   r   	fromarrayr8   )r   layout_det_objsimgs_in_dochrM   det_objr   r   r   r   r   img_patharr_for_pilr   s                 r*   gather_imgsr   a  s   K|!!$%%s<+=a+@'A'AqA"  7~>>>G$E)-c#w|7L.M.M)N)N&E5%3ua==))E3ua==))E3ua==))E3ua==))E~~%)%1FGGH4eUE5 K "/+..C$"#(%">$W-    r,   ratiosmallerc                     t          |           }t          |          }t          | |d          }||k    r||k    r|s||k    r|sdS dS dS )a  
    Determine if the overlap area between two bounding boxes exceeds a given ratio
    and return the smaller (or larger) bounding box based on the `smaller` flag.

    Args:
        bbox1 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the first bounding box [x_min, y_min, x_max, y_max].
        bbox2 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the second bounding box [x_min, y_min, x_max, y_max].
        ratio (float): The overlap ratio threshold.
        smaller (bool): If True, return the smaller bounding box; otherwise, return the larger one.

    Returns:
        Optional[Union[List[int], Tuple[int, int, int, int]]]:
            The selected bounding box or None if the overlap ratio is not exceeded.
    r[   rc   r   r   N)rp   r{   )rW   rX   r   r   area1area2overlap_ratios          r*   _get_minbox_if_overlap_by_ratior     se    *  &&E&&E+E5wGGGMuUNNwNEUNN7N114r,   ?blocks	thresholdc                 @   t                      }t          |           } g }t          | d                   D ]\  }}t          |dz   t	          | d                             D ]}| d         |         }||v s||v rt          |d         |d         ||          }	|	e|d         dk    }
|d         dk    }|
|k    r(|
r|n|}|                    | d         |                    n
|	dk    r|n|}|                    |           t          |d	          D ]}| d         |= | S )
a  
    Remove overlapping blocks based on a specified overlap ratio threshold.

    Args:
        blocks (List[Dict[str, List[int]]]): List of block dictionaries, each containing a 'block_bbox' key.
        threshold (float): Ratio threshold to determine significant overlap.
        smaller (bool): If True, the smaller block in overlap is removed.

    Returns:
        Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
            A tuple containing the updated list of blocks and a list of dropped blocks.
    boxesr   r   )r   Nr   imageT)reverse)	r7   r   	enumerater   r   r   r8   addrK   )r   r   r   dropped_indexesoverlap_image_blocksrT   block1jblock2overlap_box_indexis_block1_imageis_block2_image
drop_indexindexs                 r*   remove_overlap_blocksr     su    eeOfFvg// 0 0	6q1uc&/2233 	0 	0AG_Q'FO##qO';'; ?|$|$	! ! ! !,"(/W"<"(/W"<"o55&5!<1J(//w
0KLLLL&71&<&<!J##J///-	02 666 # #7OE""Mr,   bboxc                    t          j        |           } t          j        |          }t          | j                  dk    r| nt	          | g          d         }t          |j                  dk    r|nt	          |g          d         }t          |d         |d                   }t          |d         |d                   }t          |d         |d                   }t          |d         |d                   }||k    s||k    rdS |dk    rt          j        ||||g          S |dk    r,t          j        ||g||g||g||ggt           j                  S t          d	          )
a_  
    Compute the intersection of two bounding boxes, supporting both 4-coordinate and 8-coordinate formats.

    Args:
        bbox1 (tuple): The first bounding box, either in 4-coordinate format (x_min, y_min, x_max, y_max)
                       or 8-coordinate format (x1, y1, x2, y2, x3, y3, x4, y4).
        bbox2 (tuple): The second bounding box in the same format as bbox1.
        return_format (str): The format of the output intersection, either 'bbox' or 'poly'.

    Returns:
        tuple or None: The intersection bounding box in the specified format, or None if there is no intersection.
    r   r   r   r   Nr   polyrl   z.return_format must be either 'bbox' or 'poly'.)	r   r9   r   r   r   r`   ra   int16rb   )	rW   rX   return_formatrect1rect2rq   rr   rs   rt   s	            r*   get_bbox_intersectionr     sy    HUOOEHUOOE%%**EE0G0P0PQR0SE%%**EE0G0P0PQR0SE eAha))KeAha))KeAha))KeAha))K k!![K%?%?txk;LMMM	&	 	 xk*k*k*k*	 (
 
 
 	
 IJJJr,   c           
         | \  }}}}	|\  }
}}}ddddd}|
|z
  |z  ||z
  |z  ||z
  |z  |	|z
  |z  g}t          |          }t          |          }||                    |                   }t          |          dk    r| g fS t	          d          D ]}||         }| dd         }||         ||<   g g }}|D ]w}t          ||         d          }|t          j        d	d
          k    r|                    |           I|t          j        dd          k    r|                    |           xt          |          dk    rt          |          dk    r|D ]}|         }|\  }}}}	|\  }
}}}|
|z
  |z  ||z
  |z  ||z
  |z  |	|z
  |z  g}t          |          }|                    |          }||         }||         ||<   t          |||||          \  }}t          |          dk    rfd|D             }t          |          }  nA|                    |           t          |          }||                    |                   }| |fS )aG  
    Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.

    Args:
        supplement_region_bbox (list): The supplement region bbox.
        ref_region_bbox (list): The reference region bbox.
        image_width (int): The width of the image.
        image_height (int): The height of the image.
        block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
        block_bboxes (dict): The dictionary of block bboxes.

    Returns:
        list: The new region bbox and the matched block idxes.
    r   r   r   r   )r   r   r   r   Nr[   r   #match_block_overlap_ratio_thresholdg?#split_block_overlap_ratio_thresholdg?c                      g | ]
}|         S rB   rB   ).0idxblock_bboxess     r*   
<listcomp>z1shrink_supplement_region_bbox.<locals>.<listcomp>T  s    LLLCl3/LLLr,   )r   ra   r   r   r   r{   r   getr8   r`   shrink_supplement_region_bboxr   remove)supplement_region_bboxref_region_bboximage_widthimage_heightblock_idxes_setr   r#   r$   r%   r&   x1_primey1_primex2_primey2_primeindex_conversion_mapedge_distance_listedge_distance_list_tmpmin_distance	src_indexr   	dst_indextmp_region_bboxiner_block_idxessplit_block_idxes	block_idxr   split_block_idxsplit_block_bboxmax_distance
iner_idxesmatched_bboxess        `                         r*   r   r     s4   , ,NBB-<*Hh( Q133	B+%	B,&	h+%	h,&	 &&899)**L$%7%=%=l%K%KLI
?q  %r))1XX 2U 2U(3	03%4Y%?	".0"+( 	4 	4I3i!8w  M 25s      !''	2222!45s" "   "((333  1$$$%%))'8 ! !O'3O'D$%4NBB=M:Hh(!B+5!B,6h+5h,6	*& $''9#:#:L 2 8 8 F FI 4Y ?I1A)1LOI.2O''#$($3 3/OZ :!++  ,LLLL;KLLLN%En%U%U"E")),777566L,-?-E-El-S-STII!#333r,   c                    || S | \  }}}}|\  }}}}	t          t          ||                    }t          t          ||                    }t          t          ||                    }t          t          ||	                    }	||||	g}|S )zUpdate region box with bbox)r   ra   r`   )
r   
region_boxr#   r$   r%   r&   	x1_region	y1_region	x2_region	y2_regions
             r*   update_region_boxr  ^  s    NBB1;.Iy)YCI&&''ICI&&''ICI&&''ICI&&''IY	9=Jr,   formula_res_listocr_resc                 T   | D ]#}t          t          t          |d                             \  }}}}||f||f||f||fg}|d                             |           |d         }|d                             |           |d         j        dk    rt          j        |d         g          |d<   n&t          j        |d         |d         gf          |d<   |d                             d           |d                             |           |d	                             d
           %dS )zConvert formula result to OCR result format

    Args:
        formula_res_list (List): Formula results
        ocr_res (dict): OCR result
    Returns:
        ocr_res (dict): Updated OCR result
    dt_polysrec_formular3   r5   r   
rec_labelsformular2   r4   r   N)r6   r   r   r8   r   r   r9   vstack)	r  r  formula_resr   r   r   r   poly_pointsformula_res_texts	            r*   !convert_formula_res_to_ocr_formatr  p  sR    ( ( (%)#c;z3J*K*K%L%L"ueUENENENEN	
 	
"";/// +M :##$4555;$))#%8[-D,E#F#FGK  #%9%J(?'@A$ $GK  	$$Y///##K000$$Q'''''( (r,   c                 j    t          t          |           \  }}}}t          ||z
  ||z
  z            }|S )zCalculate bounding box area)r   floatabs)r   r#   r$   r%   r&   areas         r*   rp   rp     s8    %%NBBRBG$%%DKr,   c                 >    | \  }}|\  }}||z
  dz  ||z
  dz  z   dz  S )z/Calculate euclidean distance between two pointsr   g      ?rB   )point1point2r#   r$   r%   r&   s         r*   caculate_euclidean_distr     s4    FBFB"WNb2g!^+33r,   c                 &   d}d}| j         }| j        }| j        }| j        }||j        }|j        }	t          |j        |	z
            dk     }
|dk    }||j        k     o
||j         k    }|rCt          |j         |          }t          |j        |          }t          ||	z
            dk     }
d}nt          | j         |j        z
            }||z
  dk     }|
r$|r"|r |t          |j        | j                  k     rd}n||z
  dk     rd}||z
  dk     rd}||fS )a	  Get segment start flag and end flag based on previous block

    Args:
        block (Block): Current block
        prev_block (Block): Previous block

    Returns:
        seg_start_flag (bool): Segment start flag
        seg_end_flag (bool): Segment end flag
    TN
   r   r   F)	start_coordinateend_coordinateseg_start_coordinateseg_end_coordinatenum_of_linesr  ra   r`   width)block
prev_blockseg_start_flagseg_end_flagcontext_left_coordinatecontext_right_coordinater%  r&  num_of_prev_linespre_block_seg_end_coordinateprev_end_space_smallprev_lines_more_than_oneoverlap_blocksedge_distancecurrent_start_space_smalls                  r*   get_seg_flagr6    s    NL#4$3 51&3'1'D$
),HHIIBN 	 $5q#8  $j&?? G(:+FF 	  	T&)+-D' '# (+)+C( ($ ,/KKLLrQ ! MM 69R RSSM$8;R$RUW$W! !	#)	# )	# J$4ek B BBB"N"99B>>"N"44r99<''r,   r   c                     |rdddddddddd	d

}n<i dddddddddd	ddddddddddddddddddddddd dd!d"d"d#d$}d}|                     | |          S )%N)         d   )r8        r;  )f   r>  r8  r;  )r>     r8  r;  )   r8  3   r;  )      rB  r;  )r@  r   L   r;  )5         r;  )      rH  r;  )
	doc_titledoc_title_textparagraph_titlesub_paragraph_titlevisionvision_titlevision_footnotenormal_textcross_layoutcross_referencerL  rJ  table_title)r8  r8  r>  r;  figure_titlechart_titlerP  textvertical_textinline_formular  )r   r8  r   r;  display_formulaabstractcontent)(      \   r;  seal)   ra  ra  r;  table)   rc  r   r;  r   figure)      re  r;  )   r8  rc  r;  )r8        r;  )chart	referencereference_content	algorithm)r   )r   order_labellabel_colorsdefault_colors       r*   r   r     sT    /
-23#7)03,/3
 
!
3!
 -!

 /!
 0!
 /!
 3!
 %!
 .!
 /!
 '!
  /!!
" ,#!
& )'!
* (+!
. '/!
2 (3!
6 )7!
8 *-!5-A!
 !
 !
D )ME=111r,   )TF)rU   rV   )rV   )T)r   T)r   )F)3__all__r   copyr   typingr   r   r	   r
   r   numpyr   PILr   
componentsr   
ocr.resultr   settingr   r   ndarrayr+   r   r   r   r  strrj   r6   tupler{   r   r   r   r   r   r   r   r   r   r   r   r   r  dictr  rp   r   r6  r   rB   r,   r*   <module>r}     sK     
			       5 5 5 5 5 5 5 5 5 5 5 5 5 5           0 0 0 0 0 0 " " " " " " 5 5 5 5 5 5 5 5RZ BJ 4    : "	9 999 9 	9
 9 9 9 9x4 4 4t "		,9 ,9;,9;,9 ,9
 ,9 ,9 ,9 ,9d 
.! .!T5().!T5().! 	.! .! .! .!b2 2 24/ / /
, , ,
- - -6J J J
& & &  H 	 cE#sC"4556cE#sC"4556  	
 eDIuS#sC%7889:   F RV0 0c49n%&0380JN0
4S$s)^$%tDd3i,@'AAB0 0 0 0f+K +K +K +K\W4 
W4 W4 W4 W4t  $( (t ( ( ( (>  4 4 4@( @( @(F22 22# 22U 22 22 22 22 22 22r,   