
    i                     <   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKr	SSK
JrJr   S rS rS rS$S jrS rS	 rS
 rS rS$S jrS%S jrS rS rS rS rS rS rS rS rS rS r S r!S&S jr"S r#S r$S r%S r&S r'S r( " S  S!5      r) " S" S#\)5      r*g)'zp
This code is refer from:
https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/match.py
    N)Polygon
MultiPointc                     / nU  H%  n[        U5      S:X  a  M  UR                  U5        M'     [        R                  " U5      $ )zj
remove [0., 0., 0., 0.] in structure master bboxes.
len(bboxes.shape) must be 2.
:param bboxes:
:return:
g        )sumappendnparray)bboxes
new_bboxesbboxs      n/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddleocr/ppstructure/table/table_master_match.pyremove_empty_bboxesr   #   sA     Jt9$  88J    c                 B   [        U R                  5      S:X  a\  [        R                  " U 5      nU S   U S   S-  -
  US'   U S   U S   S-  -
  US'   U S   U S   S-  -   US'   U S   U S   S-  -   US'   U$ [        U R                  5      S:X  a  [        R                  " U 5      nU S S 2S4   U S S 2S4   S-  -
  US S 2S4'   U S S 2S4   U S S 2S4   S-  -
  US S 2S4'   U S S 2S4   U S S 2S4   S-  -   US S 2S4'   U S S 2S4   U S S 2S4   S-  -   US S 2S4'   U$ [        eN   r         lenshaper   
empty_like
ValueErrorr
   r   s     r   	xywh2xyxyr   2   sZ   
6<<A]]6*
q	F1IM1
1q	F1IM1
1q	F1IM1
1q	F1IM1
1	V\\	a	]]6*
!!Q$<&A,*::
1a4!!Q$<&A,*::
1a4!!Q$<&A,*::
1a4!!Q$<&A,*::
1a4r   c                 j   [        U R                  5      S:X  ab  [        R                  " U 5      nU S   U S   U S   -
  S-  -   US'   U S   U S   U S   -
  S-  -   US'   U S   U S   -
  US'   U S   U S   -
  US'   U$ [        U R                  5      S:X  a  [        R                  " U 5      nU S S 2S4   U S S 2S4   U S S 2S4   -
  S-  -   US S 2S4'   U S S 2S4   U S S 2S4   U S S 2S4   -
  S-  -   US S 2S4'   U S S 2S4   U S S 2S4   -
  US S 2S4'   U S S 2S4   U S S 2S4   -
  US S 2S4'   U$ [        er   r   r   s     r   	xyxy2xywhr   E   sv   
6<<A]]6*
q	VAY%:a$??
1q	VAY%:a$??
1q	F1I-
1q	F1I-
1	V\\	a	]]6*
!!Q$<6!Q$<&A,+F!*KK
1a4!!Q$<6!Q$<&A,+F!*KK
1a4!!Q$<&A,6
1a4!!Q$<&A,6
1a4r   c                    [         R                  R                  U 5      (       a"  [        R                  " [        U S5      5      nU$ [         R                  R                  U 5      (       a  [        5       n[         R                  R                  U SR                  U5      5      n[        R                  " U5      nU H4  n[        R                  " [        US5      5      nUR                  U5        M6     U$ [        e)Nrbz{}_*.pkl)ospathisfilepickleloadopenisdirdictjoinformatglobupdater   )r!   prefixdatasearch_pathpklspkl	this_datas          r   pickle_loadr2   X   s    	ww~~d{{4d+, K 
t		vggll4):):6)BCyy%CDdO4IKK	" 
 K r   c                     [         R                  " SS/[         R                  S9nU S   U S   sUS'   US'   U S   U S   sUS'   US	'   U S   U S
   sUS'   US'   U S   U S
   sUS'   US'   U$ )zH
Convert two points format to four points format.
:param xyxy:
:return:
   r   )dtyper   r   )r   r   )r   r   )r   r   )r   r   r   )r   r   )r   r   )r   r   )r   r   )r   zerosfloat32)xyxynew_bboxs     r   convert_coordr:   g   s     xxAbjj1H%)!Wd1g"HTNHTN%)!Wd1g"HTNHTN%)!Wd1g"HTNHTN%)!Wd1g"HTNHTNOr   c                 `   [        U 5      R                  n[        U5      R                  n[        R                  " X45      nUR	                  U5      (       d  SnU$ UR                  U5      R                  n[        U5      R                  R                  nUS:X  a  SnU$ [        U5      U-  nU$ )Nr   )	r   convex_hullr   concatenate
intersectsintersectionarear   float)bbox1bbox2
bbox1_poly
bbox2_poly
union_polyiou
inter_area
union_areas           r   cal_iourJ   u   s    ++J++J/J  ,, J  ,,Z8==

+77<<
?C J 
#j0CJr   c                 p    U S   US   -
  nU S   US   -
  n[         R                  " US-  US-  -   5      nU$ )Nr   r   r   )mathsqrt)p1p2delta_xdelta_yds        r   cal_distancerS      sD    ebemGebemG		7A:'1*-.AHr   c                     SnSnU S   US   S   :  a  U S   US   S   ::  a  SnU S   US   S   :  a  U S   US   S   ::  a  SnU(       a  U(       a  gg)z
Find if center_point inside the bbox(corner_point) or not.
:param center_point: center point (x, y)
:param corner_point: corner point ((x1,y1),(x2,y2))
:return:
Fr   r   T )center_pointcorner_pointx_flagy_flags       r   	is_insiderZ      sw     FFQ<?1--Q<?1--Q<?1--Q<?1--&r   c                     US:X  a  SnOUS:X  a  SnO[         e/ nU  Vs/ s H  oUU   PM	     nn[        U5       H  nXv;  d  M
  UR                  U5        M     U$ s  snf )a  
Find out no match end2end bbox in previous match list.
:param match_list: matching pairs.
:param all_end2end_nums: numbers of end2end_xywh
:param type: 'end2end' corresponding to idx 0, 'master' corresponding to idx 1.
:return: no match pse bbox index list
end2endr   masterr   )r   ranger   )
match_listall_end2end_numstypeidxno_match_indexsmmatched_bbox_indexsns           r   find_no_matchrg      sq     y		O+56:aS6:6#$'""1% % 	 7s   Ac                 8    [        U S   US   -
  5      nX2:  a  gg)Nr   TF)abs)	this_bboxtarget_bbox	thresholddeltas       r   is_abs_lower_than_thresholdrn      s%    	!{1~-.Er   c                     U Vs/ s H  o"S   PM	     nn[        U5      nS/[        U5      -  nS/[        U5      -  n[        X5       H!  u  prUR                  US   5      nX&U'   XuU'   M#     XV4$ s  snf )z
Sorted the bbox in the same line(group)
compare coord 'x' value, where 'y' value is closed in the same group.
:param g: index in the same group
:param bg: bbox in the same group
:return:
r   N)sortedr   zipindex)	gbgbg_itemxs	xs_sortedg_sorted	bg_sortedg_itemrb   s	            r   sort_line_bboxr{      s     %'	'B!*BB	'r
IvI&HY'Iq:oogaj) # &
  
(s   A1c                     / n/ n[        X5       H;  u  pE[        XE5       H'  u  pgUR                  U5        UR                  U5        M)     M=     X#4$ N)rq   r   )sorted_groupssorted_bbox_groupsidxsr
   group
bbox_grouprs   rt   s           r   flattenr      sQ    DF C+EAKKNMM" , D <r   c                 <   / n/ n[        X5       H  u  pEUn[        U5      S:X  a&  UR                  U/5        UR                  U/5        M<  Sn[        X#5       H>  u  p[        XiS   5      (       d  M  UR                  U5        U	R                  U5        Sn  O   U(       a  M  UR                  U/5        UR                  U/5        M     / / p[        X#5       H4  u  p[	        X5      u  pU
R                  U5        UR                  U5        M6     S/[        U
5      -  nS/[        U5      -  nU V	s/ s H
  oS   S   PM     nn	[        U5      n[        X5       H$  u  pUR                  U	S   S   5      nXU'   XU'   M&     [        X5      u  nnUUUU4$ s  sn	f )z
This function will group the render end2end bboxes in row.
:param end2end_xywh_bboxes:
:param no_match_end2end_indexes:
:return:
r   FTNr   )rq   r   r   rn   r{   rp   rr   r   )end2end_xywh_bboxesno_match_end2end_indexesgroupsbbox_groupsrr   end2end_xywh_bboxrj   flagrs   rt   
tmp_groupstmp_bbox_groupsrx   ry   r~   r   ys	sorted_ysrb   end2end_sorted_idx_listend2end_sorted_bbox_lists                        r   	sort_bboxr      s    FK$'(@$V %	v;!MM5'"	{+DV1.yQ%@@HHUOIIi(D 2 4ug&""I;/# %W( #%bV),Q3(#y) * FS_,M#o"66,	-_rQ%(_B	-r
IZ1oobeAh'c"$3 2 9@955
 	  	  
.s   4Fc                 "   / n/ nU  H5  nUS   nUR                  U5        [        U5      nUR                  U5        M7     [        R                  " U5      n[        R                  " U5      nUS   n	[	        U	5      n	U	n
[        U	5      nUnUUUU
4$ )a
  
This function is use to convert end2end results and structure master results to
List of xyxy bbox format and List of xywh bbox format
:param end2end_result: bbox's format is xyxy
:param structure_master_result: bbox's format is xywh
:return: 4 kind list of bbox ()
r   )r   r   r   r	   r   )end2end_resultstructure_master_resultend2end_xyxy_listend2end_xywh_listend2end_itemsrc_bbox	xywh_bboxend2end_xyxy_bboxesr   
src_bboxesstructure_master_xyxy_bboxesstructure_master_xywh_bboxess               r   get_bboxes_listr     s     &'  *h'	  +	 '
 ((#45((#45 )0J$Z0J#- *%I#,  	$$	 r   c                     / n[        U 5       Hd  u  p4[        U5       HP  u  pVUS   US   pUS   US   US   US   4u  ppXx4nX4X44n[        X5      (       d  M>  UR                  X5/5        MR     Mf     U$ )a	  
Judge end2end Bbox's center point is inside structure master Bbox or not,
if end2end Bbox's center is in structure master Bbox, get matching pair.
:param end2end_xywh_bboxes:
:param structure_master_xyxy_bboxes:
:return: match pairs list, e.g. [[0,1], [1,2], ...]
r   r   r   r   )	enumeraterZ   r   )r   r   match_pairs_listiend2end_xywhjmaster_xyxy	x_end2end	y_end2end	x_master1	y_master1	x_master2	y_master2center_point_end2endcorner_point_masters                  r   center_rule_matchr   A  s     $%89'(DENA#/?LOyAAAA	:6I) %.#9 $-#9I;Q"R-CC ''/ F : r   c                     / n[        X5       Hl  u  pESnSS/n[        U5       H9  u  p[        U5      n
[        U	5      n[        X5      nX:  d  M-  XHsUS'   US'   UnM;     US   c  M[  UR	                  U5        Mn     U$ )z
Use iou to find matching list.
choose max iou value bbox as match pair.
:param end2end_xyxy_bboxes:
:param end2end_xyxy_indexes: original end2end indexes.
:param structure_master_xyxy_bboxes:
:return: match pairs list, e.g. [[0,1], [1,2], ...]
r   Nr   )rq   r   r:   rJ   r   )r   end2end_xyxy_indexesr   match_pair_listend2end_xyxy_indexend2end_xyxymax_iou	max_matchr   r   end2end_4xy
master_4xyrG   s                r   iou_rule_matchr   Z  s     O,/-( 4L	'(DENA'5K&{3J+2C}-?*	!il F Q<y)!-" r   c                    / n[        X#5       Hv  u  pV[        R                  nSS/n[        X5       H=  u  pU
S   U
S   pUS   US   pX4nX4n[        UU5      nUU:  d  M1  XsUS'   US'   UnM?     UR	                  U5        Mx     U$ )aw  
Get matching between no-match end2end bboxes and no-match master bboxes.
Use min distance to match.
This rule will only run (no-match end2end nums > 0) and (no-match master nums > 0)
It will Return master_bboxes_nums match-pairs.
:param end2end_indexes:
:param end2end_bboxes:
:param master_indexes:
:param master_bboxes:
:return: match_pairs list, e.g. [[0,1], [1,2], ...]
r   r   )rq   r   infrS   r   )end2end_indexesend2end_bboxesmaster_indexesmaster_bboxesmin_match_listr   master_bboxmin_distance	min_matchr   end2end_bboxr   r   x_mastery_masterend2end_pointmaster_pointdists                     r   distance_rule_matchr   z  s     Nn<vvF	"?COA#/?LOy!,QQh&2M$/Lm<Dl"-.*	!il#  D 	i( = r   c                 |    [        U 5      U-   n/ n[        X5       H  nXU-
     nUR                  XT/5        M     U$ )z
This function will create some virtual master bboxes,
and get match with the no match end2end indexes.
:param no_match_end2end_indexes:
:param master_bbox_nums:
:return:
)r   r^   r   )r   master_bbox_numsend_numsextra_match_listr   end2end_indexs         r   extra_matchr     sR     +,/??H#.05E1EF 23 / r   c                     [        5       nU  H9  nUS   US   pCXAR                  5       ;  a  U/X'   M&  X   R                  U5        M;     U$ )zz
Convert match_list to a dict, where key is master bbox's index, value is end2end bbox index.
:param match_list:
:return:
r   r   )r'   keysr   )r_   
match_dict
match_pairr   master_indexs        r   get_match_dictr     sT     J 
&0mZ]|00(5J$$++M: ! r   c                 r    U R                  SS5      n U R                  SS5      n U R                  SS5      n U $ )z
deal successive space character for text
1. Replace ' '*3 with '<space>' which is real space is text
2. Remove ' ', which is split token, not true space
3. Replace '<space>' with ' ', to get real text
:param text:
:return:
z   z<space>  replace)texts    r   deal_successive_spacer     s9     <<+D<<R D<<	3'DKr   c                     SnU  H   nUR                  S5      (       d  M  US-  nM"     U[        U 5      :X  aV  / nU  H6  nUR                  SS5      R                  SS5      nUR                  U5        M8     SUR	                  U5      -   S-   /$ U $ )z
convert ['<b>Local</b>', '<b>government</b>', '<b>unit</b>'] to ['<b>Local government unit</b>']
PS: maybe style <i>Local</i> is also exist, too. it can be processed like this.
:param text_list:
:param break_token:
:return:
r   <b>r   r   </b>)
startswithr   r   r   r(   )	text_listbreak_tokencountr   new_text_lists        r   reduce_repeat_bbr     s     E??5!!QJE  ID<<r*2262>D  &  ((77&@AAr   c                     [        5       nU R                  5        H<  u  pEU Vs/ s H
  oaU   S   PM     nn[        Xr5      nUR                  U5      nXU'   M>     U$ s  snf )Nr   )r'   itemsr   r(   )	r   end2end_infor   match_text_dictr   end2end_index_listr   r   r   s	            r   get_match_text_dictr     su    fO,6,<,<,>(EW
EWM'/EW 	 
 %Y<		*(,% -? 
s   Ac                    / nSnU S   S:w  a  U R                  S5        X   S:w  Ga   X   S:X  a  XS-      R                  S5      (       d  XS-      R                  S5      (       a1   SR                  XUS	-   S-    5      nUS
-  nUR                  U5        OXS-      R                  S5      (       d  XS-      R                  S5      (       a1   SR                  XUS
-   S-    5      nUS-  nUR                  U5        O2UR                  X   5        US-  nOUR                  X   5        US-  n X   S:w  a  GM  UR                  S5        U$ !   [        S5         M#  = f)zW
Merge the span style token (row span or col span).
:param master_token_list:
:return:
r   </tbody><tdr   z	 colspan=z	 rowspan=r   r   r4   r      zBreak in merge...)r   r   r(   print)master_token_listnew_master_token_listpointertmps       r   merge_span_tokenr     s    G
*  ,

$

2"	 )U2$q[1<< &{3>>{KK
 ''"3gkAo"NOCqLG)005&{3>> &{3>>{KK
 ''"3gkAo"NOCqLG)005 *001B1KLqLG%,,->-GH1A 
$

2H   ,  	%&s   A.E A&E ?E E Ec                    U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  S	S
5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U R                  SS5      n U $ )aO  
post process with <eb></eb>, <eb1></eb1>, ...
emptyBboxTokenDict = {
    "[]": '<eb></eb>',
    "[' ']": '<eb1></eb1>',
    "['<b>', ' ', '</b>']": '<eb2></eb2>',
    "['\u2028', '\u2028']": '<eb3></eb3>',
    "['<sup>', ' ', '</sup>']": '<eb4></eb4>',
    "['<b>', '</b>']": '<eb5></eb5>',
    "['<i>', ' ', '</i>']": '<eb6></eb6>',
    "['<b>', '<i>', '</i>', '</b>']": '<eb7></eb7>',
    "['<b>', '<i>', ' ', '</i>', '</b>']": '<eb8></eb8>',
    "['<i>', '</i>']": '<eb9></eb9>',
    "['<b>', ' ', '\u2028', ' ', '\u2028', ' ', '</b>']": '<eb10></eb10>',
}
:param master_token:
:return:
z	<eb></eb>	<td></td>z<eb1></eb1>z
<td> </td>z<eb2></eb2>z<td><b> </b></td>z<eb3></eb3>u   <td>  </td>z<eb4></eb4>z<td><sup> </sup></td>z<eb5></eb5><td><b></b></td>z<eb6></eb6>z<td><i> </i></td>z<eb7></eb7>z<td><b><i></i></b></td>z<eb8></eb8>z<td><b><i> </i></b></td>z<eb9></eb9>z<td><i></i></td>z<eb10></eb10>u   <td><b>     </b></td>r   )master_tokens    r   deal_eb_tokenr     s    &  ''[AL''|DL''7JKL''7NOL''7NOL''7IJL''7JKL''7PQL''7QRL''7IJL'':L r   c                 n   [        U 5      n / nSnU  H  nUR                  S5      (       a[  U[        U5      S-
  :  a  US-  nM2  X1R                  5       ;  a  US-  nML  UR	                  SSR                  X   5      5      nUS-  n[        U5      nUR                  U5        M     SR                  U5      $ )zg
Insert OCR text result to structure token.
:param master_token_list:
:param match_text_dict:
:return:
r   r   r   z><z>{}<r   )	r   r   r   r   r   r)   r   r   r(   )r   r   merged_result_list
text_countr   s        r   insert_text_to_tokenr   >  s     )):;J)""5))C0144a
#7#7#99a
+33&--(CD  a
$\2!!,/ *  77%&&r   c                    Sn[         R                  " X5      nU Vs/ s H  o3R                  5       PM     nnSn/ nU Ha  n[         R                  " XW5      nUR                  5       n	U	b$  SR	                  U	5      n
UR                  U
5        MP  UR                  S5        Mc     [        Xd5       H  u  pU
b  U R                  Xz5      n M  M     U $ s  snf )z
Deal with isolate span cases in this function.
It causes by wrong prediction in structure recognition model.
eg. predict <td rowspan="2"></td> to <td></td> rowspan="2"></b></td>.
:param thead_part:
:return:
z<td></td> rowspan="(\d)+" colspan="(\d)+"></b></td>|<td></td> colspan="(\d)+" rowspan="(\d)+"></b></td>|<td></td> rowspan="(\d)+"></b></td>|<td></td> colspan="(\d)+"></b></td>zc rowspan="(\d)+" colspan="(\d)+"| colspan="(\d)+" rowspan="(\d)+"| rowspan="(\d)+"| colspan="(\d)+"Nz<td{}></td>)refinditerr   searchr)   r   rq   r   )
thead_partisolate_patternisolate_iterr   isolate_listspan_patterncorrected_listisolate_item	span_partspanStr_in_isolateItemcorrected_items              r   deal_isolate_spanr  [  s    	.  ;;;L'34|!GGI|L4	  N$IIl9	!*!2!-*112HIN!!.1!!$' % ),N(I$%#++LIJ	 )J
 5 5s   Cc                    Sn[         R                  " X5      nU Vs/ s H  o3R                  5       PM     nn/ nU H  nUR                  S5      S:  d  UR                  S5      S:  aW  UR	                  SS5      R	                  SS5      nUR	                  SS5      R	                  SS	5      nUR                  U5        M  UR                  U5        M     [        XE5       H  u  pgU R	                  Xg5      n M     U $ s  snf )
zn
Deal duplicate <b> or </b> after replace.
Keep one <b></b> in a <td></td> token.
:param thead_part:
:return:
z<td rowspan="(\d)+" colspan="(\d)+">(.+?)</td>|<td colspan="(\d)+" rowspan="(\d)+">(.+?)</td>|<td rowspan="(\d)+">(.+?)</td>|<td colspan="(\d)+">(.+?)</td>|<td>(.*?)</td>r   r   r   r   <td><td><b></td>	</b></td>)r  r  r   r   r   r   rq   )r  
td_patterntd_iterttd_listnew_td_listtd_itemnew_td_items           r   deal_duplicate_bbr    s    	  kk*1G")*'Qwwy'G* K==!#w}}V'<q'@ ooeR088DGoofi8@@+VGw'w'  !$G 9''=
 !:% +s   C<c                    Sn[         R                  " X5      c  U $ [         R                  " X5      R                  5       n[        R                  " U5      nSn[         R
                  " XB5      nU Vs/ s H  ofR                  5       PM     nn[        U5      S:  a  SOSnU(       dC  UR                  SS5      R                  SS	5      R                  S
S5      R                  SS5      nO/ n	U H$  n
U	R                  U
R                  SS5      5        M&     [        Xy5       H  u  pUR                  X5      nM     UR                  SS	5      nSnSn[         R                  " XU5      nSnSn[         R                  " XU5      nUR                  SS5      R                  S
S5      nUR                  SS5      n[        U5      n[        U5      nU R                  X25      n U $ s  snf )z
In our opinion, <b></b> always occurs in <thead></thead> text's context.
This function will find out all tokens in <thead></thead> and insert <b></b> by manual.
:param result_token:
:return:
z<thead>(.*?)</thead>zs<td rowspan="(\d)+" colspan="(\d)+">|<td colspan="(\d)+" rowspan="(\d)+">|<td rowspan="(\d)+">|<td colspan="(\d)+">r   TFr  r  r  r  z<b><b>r   z</b></b>r   >z><b>z(<b>)+z(</b>)+r   r   )r  r  r   copydeepcopyr  r   r   r   rq   subr  r  )result_tokenthead_patternr  origin_thead_partr  	span_iters	span_listhas_span_in_headreplaced_span_listsprsp
mb_patternsingle_b_stringmgb_patternsingle_gb_strings                   r   deal_bbr/    s    +M	yy-5=7==?Jj1 ILL5I$-.IqII."9~1tu vy1WWk*WXu%WZ(	 	"  B%%bjjf&=> 99GB#++B4J :  ''=
 
VVJD
!VVK:F
  ''	:BB8US
 ##$6DJ":.J #:.J''(9FLq /s   2G c                   ,    \ rS rSrS rS rS rS rSrg)Matcheri  c                 X    Xl         X l        [        USS9U l        [        USS9U l        g)z
This class process the end2end results and structure recognition results.
:param end2end_file: end2end results predict by end2end inference.
:param structure_master_file: structure recognition results predict by structure master inference.
r\   )r,   	structureN)end2end_filestructure_master_filer2   end2end_resultsstructure_master_results)selfr4  r5  s      r   __init__Matcher.__init__  s1     )%:"*<	J(3!+)
%r   c                    [        5       n[        U R                  R                  5       5       GH  u  nu  p4/ nX0R                  ;  a  M  U R                  U   n[        XF5      u  nnn	n
[        X5      nUR                  U5        [        U[        U5      SS9n[        U5      S:  a#  UU   n[        UUU
5      nUR                  U5        [        U[        U5      SS9n[        U[        U	5      SS9n[        U5      S:  a8  [        U5      S:  a)  UU   nU	U   n[        UUUU5      nUR                  U5        [        U[        U5      SS9n[        U5      S:  aS  UU   n[        UU5      u  nnnn[        U[        U	5      5      n[        R                  " U5      nUR                  U5        O[        R                  " U5      n/ n/ nUUUUS.nU R!                  UU5      nUX'   GM     U$ )z
Match process:
pre-process : convert end2end and structure master results to xyxy, xywh ndnarray format.
1. Use pseBbox is inside masterBbox judge rule
2. Use iou between pseBbox and masterBbox rule
3. Use min distance of center point rule
:return:
r\   )ra   r   r]   )r_   match_list_add_extra_matchr~   sorted_bboxes_groups)r'   r   r6  r   r7  r   r   extendrg   r   r   r   r   r   r  r  _format)r8  match_resultsrb   	file_namer   r_   r   r   r   r   r   center_rule_match_listcenter_no_match_end2end_indexscenter_no_match_end2end_xyxyiou_rule_match_list!centerIou_no_match_end2end_indexs centerIou_no_match_master_indexscenterIou_no_match_end2end_xywhcenterIou_no_match_master_xywhdistance_match_listr   no_match_end2end_xywhend2end_sorted_indexes_listend2end_sorted_bboxes_listr~   r=  r   r<  match_result_dicts                                r   matchMatcher.match  sr    09$:N:N:T:T:V0W,C,)J = ==&*&C&CI&N#  H##,, &7#&" 45 .;C 349.* 12Q6/B20, '5020'#
 !!"56 1>C 3491- 0=C <=H0, 4599:Q>2E53/ 2N42. ':5342	'# !!"56 (5C 349($ +,q0(;<T(U% 35MN/.!( $//5Q1R$  .2]]:-F**112BC .2]]:-F* "')$ ).H!.(<	! !%->	 J'8M$Q 1XT r   c                 R   U R                   U   nU R                  U   nUS   nUS   n/ nU HV  nS/n	[        U5      n
[        U
5       H  nU	R	                  S5        M     U	R	                  S5        UR                  U	5        MX     UR                  S5      nUS   S:X  a  US	S R                  U5        O_US   S:X  a4  UR	                  S5        UR                  U5        UR	                  S5        O"UR                  U5        UR	                  S5        UR                  S
U5        U$ )z
Extend the master token(insert virtual master token), and format matching result.
:param match_result:
:param file_name:
:return:
r   r~   z<tr>r   z</tr>,r   r   Nmatched_master_token_list)r6  r7  r   r^   r   r>  split
setdefault)r8  match_resultrA  r   master_infor   r~   virtual_master_token_list
line_grouptmp_list	item_nums_r   s                r   r?  Matcher._format}  s8    ++I633I>"6*$_5 %'!'JxHJI9%, &OOG$%,,X6 ( )..s3R J. cr"))*CD r"k1$$W-$$%>?$$Z0$$%>?$$Z0 	 ;=NOr   c                     [        5       nSn[        UR                  5       5       HR  u  nu  pVU R                  U   nUS   nUS   n	[	        U	5      n
[        XU5      n[        X5      n[        U5      nXU'   MT     U$ )z`
Merge the OCR result into structure token to get final results.
:param match_results:
:return:
r   rS  r<  )r'   r   r   r6  r   r   r   r/  )r8  r@  merged_resultsr   rb   rA  
match_infor   r   r_   r   r   merged_results                r   get_merge_resultMatcher.get_merge_result  s      ,5m6I6I6K,L(C()//	:L *+F G#$@AJ'
3J1*KXO01BTM#M2M(59% -M r   )r4  r6  r5  r7  N)	__name__
__module____qualname____firstlineno__r9  rO  r?  rb  __static_attributes__rU   r   r   r1  r1    s    
tl/br   r1  c                   $    \ rS rSrS rSS jrSrg)TableMasterMatcheri  c                     g r}   rU   )r8  s    r   r9  TableMasterMatcher.__init__  s    r   c                 d   U/ 0n[        X#5       H9  u  pg[        [        R                  " U5      US   S9nXT   R	                  U5        M;     XPl        U0 0n	Uu  pSR                  U
SS 5      n
XU   S'   XU   S'   Xl        U R                  5       nU R                  U5      nX   nSU-   S	-   nU$ )
Nr   )r   r   rR  r   r   r   z<html><body><table>z</table></body></html>)
rq   r'   r   r	   r   r6  r(   r7  rO  rb  )r8  structure_resdt_boxesrec_resimg_namer6  dt_boxresrR   structure_master_result_dictpred_structurespred_bboxesr@  r_  	pred_htmls                  r   __call__TableMasterMatcher.__call__  s    #R.x1KFXXf%VA %,,Q/ 2  /(0"~$'4$((?1R#899HX.v69DX.v6(D% 

..}=",	)I58PP	r   )r6  r7  N)r   )rd  re  rf  rg  r9  ry  rh  rU   r   r   rj  rj    s    r   rj  )r\   )r   )r   )+__doc__r    r  cv2r*   r  rL   r#   numpyr   shapely.geometryr   r   r   r   r   r2   r:   rJ   rS   rZ   rg   rn   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r/  r1  rj  rU   r   r   <module>r     s   
 
 	 
      0
 &&".0,7tD2@:  ,	0!f F':*Z"JIXL L^ r   