
    i,"                       S SK Jr  S SK Jr  S SK Jr  S SK Jr  S SKrS SKJr  S SKrS SK	r
S SKrS SKJrJrJr  S SKrS SKrS SKrS SKJr  S S	KJr  S S
KJr  S SKJr   " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S  S!\5      r$ " S" S#\!5      r% " S$ S%\%5      r& " S& S'\5      r' " S( S)\5      r( " S* S+\5      r) " S, S-\5      r* " S. S/\5      r+ " S0 S1\5      r, " S2 S3\5      r- " S4 S5\5      r. " S6 S7\5      r/ " S8 S9\5      r0 " S: S;\5      r1 " S< S=\!5      r2 " S> S?\5      r3 " S@ SA\5      r4 " SB SC\5      r5 " SD SE\5      r6 " SF SG\5      r7 " SH SI\8\5      r9 " SJ SK\95      r: " SL SM\95      r; " SN SO\5      r<g)P    )absolute_import)division)print_function)unicode_literalsN)Enum)
LineStringPointPolygon)sample)defaultdict)
get_logger)order_by_tbyxc                        \ rS rSrS rS rSrg)ClsLabelEncode$   c                     Xl         g N
label_list)selfr   kwargss      d/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddleocr/ppocr/data/imaug/label_ops.py__init__ClsLabelEncode.__init__%   s    $    c                 n    US   nX R                   ;  a  g U R                   R                  U5      nX!S'   U$ Nlabel)r   indexr   datar   s      r   __call__ClsLabelEncode.__call__(   s8    W'%%e,Wr   r   N__name__
__module____qualname____firstlineno__r   r"   __static_attributes__ r   r   r   r   $   s    %r   r   c                   ,    \ rS rSrS rS rS rS rSrg)DetLabelEncode1   c                     g r   r*   r   r   s     r   r   DetLabelEncode.__init__2       r   c                    US   n[         R                  " U5      n[        U5      n/ / / pen[        SU5       H]  nX'   S   nX'   S   n	UR	                  U5        UR	                  U	5        U	S;   a  UR	                  S5        ML  UR	                  S5        M_     [        U5      S:X  a  g U R                  U5      n[        R                  " U[        R                  S9n[        R                  " U[        R                  S9nXAS	'   XQS
'   XaS'   U$ )Nr   r   pointstranscription*z###TFdtypepolystextsignore_tags)
jsonloadslenrangeappendexpand_points_numnparrayfloat32bool_)
r   r!   r   nBoxboxestxtstxt_tagsbnoboxtxts
             r   r"   DetLabelEncode.__call__5   s    W

5!5z "BXD>C*X&C*_-CLLKKl"%& " u:?&&u-bjj188HBHH5WW&]r   c                     [         R                  " SSS9nUR                  SS9nU[         R                  " U5         US'   U[         R                  " U5         US'   [         R
                  " U[         R                  " U5      [         R                  " U5      4SS9n[         R                  " [         R                  " U5      SS9nU[         R                  " U5         US'   U[         R                  " U5         US'   U$ )	N)      rD   r7      )axisr   rP      )rB   zerossumargminargmaxdeletediffrC   )r   ptsrectstmprY   s         r   order_points_clockwise%DetLabelEncode.order_points_clockwiseN   s    xxi0GGGObiil#Qbiil#QiibiilBIIaL9Bwwrxx}1-biio&Qbiio&Qr   c                     SnU H  n[        U5      U:  d  M  [        U5      nM!     / nU H+  nX3S   /U[        U5      -
  -  -   nUR                  U5        M-     U$ )Nr   r>   r@   )r   rG   max_points_numrK   ex_boxesex_boxs         r   rA    DetLabelEncode.expand_points_numY   sf    C3x.(!$S  CG9S(ABBFOOF#  r   r*   N)	r%   r&   r'   r(   r   r"   r^   rA   r)   r*   r   r   r,   r,   1   s    2		r   r,   c                   4    \ rS rSrSr   SS jrS rS rSrg)	BaseRecLabelEncodee   )Convert between text-label and text-indexNc                    Xl         SU l        SU l        X@l        Uc?  [	        5       nUR                  S5        SU l        [        U R                  5      nSU l        O/ U l        [        US5       nUR                  5       nU HM  n	U	R                  S5      R                  S5      R                  S	5      n	U R                  R                  U	5        MO     S S S 5        U(       a  U R                  R                  S
5        [        U R                  5      nU R                  U5      n0 U l        [        U5       H  u  pXR                  U'   M     X`l        g ! , (       d  f       N= f)NsoseoszRThe character_dict_path is None, model can only recognize number and lower letters$0123456789abcdefghijklmnopqrstuvwxyzTrbutf-8

 )max_text_lenbeg_strend_strlowerr   warningcharacter_strlistopen	readlinesdecodestripr@   add_special_chardict	enumerate	character)r   max_text_lengthcharacter_dict_pathuse_space_charrw   loggerdict_characterfinlineslineichars               r   r   BaseRecLabelEncode.__init__h   s,    ,
&\FNNd "HD!$"4"45NDJ!#D)40C!D;;w/55d;AA&ID&&--d3 " 1
 ""))#.!$"4"45N..~>	 0GAIIdO 1' 10s   0A$E
E"c                     U$ r   r*   r   r   s     r   r   #BaseRecLabelEncode.add_special_char   s    r   c                 .   [        U5      S:X  d  [        U5      U R                  :  a  gU R                  (       a  UR                  5       n/ nU H2  nX0R                  ;  a  M  UR	                  U R                  U   5        M4     [        U5      S:X  a  gU$ )a  convert text-label into text-index.
input:
    text: text labels of each image. [batch_size]

output:
    text: concatenated text index for CTCLoss.
            [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
    length: length of each text. [batch_size]
r   N)r>   rt   rw   r   r@   r   text	text_listr   s       r   encodeBaseRecLabelEncode.encode   s     t9>SY):)::::::<D	D99$ TYYt_-  y>Qr   )ru   r   ry   r   rv   rw   rt   NFF)	r%   r&   r'   r(   __doc__r   r   r   r)   r*   r   r   rh   rh   e   s     3
 !"(Hr   rh   c                   >   ^  \ rS rSrSr SU 4S jjrS rS rSrU =r	$ )CTCLabelEncode   rj   c                 .   > [         [        U ]  XU5        g r   )superr   r   r   r   r   r   r   	__class__s        r   r   CTCLabelEncode.__init__        	nd,.	
r   c                 |   US   nU R                  U5      nUc  g [        R                  " [        U5      5      US'   US/U R                  [        U5      -
  -  -   n[        R                  " U5      US'   S/[        U R
                  5      -  nU H  nX4==   S-  ss'   M     [        R                  " U5      US'   U$ )Nr   lengthr   rQ   	label_ace)r   rB   rC   r>   rt   r   )r   r!   r   r   xs        r   r"   CTCLabelEncode.__call__   s    G}{{4 <#d),XqcT..T:;;Wc$..))AHMH HHUO[r   c                     S/U-   nU$ )Nblankr*   r   s     r   r   CTCLabelEncode.add_special_char   s    !^3r   r*   NF
r%   r&   r'   r(   r   r   r"   r   r)   __classcell__r   s   @r   r   r      s!    3 IN
 r   r   c                   4   ^  \ rS rSr SU 4S jjrS rSrU =r$ )E2ELabelEncodeTest   c                 .   > [         [        U ]  XU5        g r   )r   r   r   r   s        r   r   E2ELabelEncodeTest.__init__   s     	 $0.	
r   c                    SS K n[        U R                  5      nUS   nUR                  " U5      n[        U5      n/ / / pn[	        SU5       H]  n	XI   S   n
XI   S   nUR                  U
5        UR                  U5        US;   a  UR                  S5        ML  UR                  S5        M_     [        R                  " U[        R                  S9n[        R                  " U[        R                  S9nXaS	'   XS
'   / nU HX  nUR                  5       nU R                  U5      nUc    g X/U R                  [        U5      -
  -  -   nUR                  U5        MZ     [        R                  " U5      US'   U$ )Nr   r   r3   r4   r5   TFr7   r9   r;   r:   )r<   r>   r   r=   r?   r@   rB   rC   rD   rE   rw   r   rt   )r   r!   r<   padnumr   rF   rG   rH   rI   rJ   rK   rL   
temp_textsr   s                 r   r"   E2ELabelEncodeTest.__call__   sK   TYYW

5!5z "BXD>C*X&C*_-CLLKKl"%& " bjj188HBHH5W&]
D::<D;;t$D|(d&7&7#d)&CDDDd#  ,Wr   r*   r   r%   r&   r'   r(   r   r"   r)   r   r   s   @r   r   r      s    HM
 r   r   c                        \ rS rSrS rS rSrg)E2ELabelEncodeTrain   c                     g r   r*   r/   s     r   r   E2ELabelEncodeTrain.__init__   r1   r   c                    SS K nUS   nUR                  " U5      n[        U5      n/ / / pvn[        SU5       H]  nX8   S   n	X8   S   n
UR	                  U	5        UR	                  U
5        U
S;   a  UR	                  S5        ML  UR	                  S5        M_     [
        R                  " U[
        R                  S9n[
        R                  " U[
        R                  S9nXQS	'   XaS
'   XqS'   U$ )Nr   r   r3   r4   r5   TFr7   r9   r:   r;   )	r<   r=   r>   r?   r@   rB   rC   rD   rE   )r   r!   r<   r   rF   rG   rH   rI   rJ   rK   rL   s              r   r"   E2ELabelEncodeTrain.__call__   s    W

5!5z "BXD>C*X&C*_-CLLKKl"%& " bjj188HBHH5WW&]r   r*   Nr$   r*   r   r   r   r      s    r   r   c                   R   ^  \ rS rSr S
U 4S jjrS rS rS rS rS r	S r
S	rU =r$ )KieLabelEncodei  c                   > [         [        U ]  5         [        SS05      U l        [        5       U l        [        USSS9 nSnU H&  nUR                  5       n	XpR                  U	'   US-  nM(     S S S 5        [        US5       n
U
R                  5       n[        U5       H$  u  pxUR                  S5      nXpR                  U'   M&     S S S 5        X0l	        X@l
        g ! , (       d  f       Ns= f! , (       d  f       N,= f)N r   rrp   encodingrQ   rq   )r   r   r   r   label2classid_mapr{   r~   r|   r   normdirected)r   r   
class_pathr   r   r   fridxr   r   r   r   r   s               r   r   KieLabelEncode.__init__  s     	nd,."aM	!%%sW=Czz|"%		$q  > *c"cMMOE&u-	zz$'/2&&t, . #
 	  >= #"s    /CAC-
C*-
C;c                     USS2SS24   USS2SS24   p2USS2SS24   USS2SS24   pTXB-
  S-   [         R                  " XS-
  S-   S5      pvUSS2S4   S   U-
  U R                  -  nUSS2S4   S   U-
  U R                  -  n	USS2S4   S   U-  USS2S4   S   U-  pXg-  [         R                  " U
5      -   n[         R                  " XXU/S5      n[         R
                  " X#XE/S5      R                  [         R                  5      nX4$ )	z)Compute relation between every two boxes.Nr   rQ   rP   rO         ra   )rB   maximumr   
zeros_likestackconcatenateastyperD   )r   rG   x1sy1sx2sy2swshsdxsdysxhhsxwhswhs	relationsbboxess                  r   compute_relationKieLabelEncode.compute_relation"  s   AaC=%1Q3-SAaC=%1Q3-SQ

39q=! <B1a4y$		11a4y$		11Xd^b("QT(4.2*=dgd++HHc48"=	3 4b9@@L  r   c                 4   Sn[        U Vs/ s H  n[        U5      PM     sn5      n[        R                  " [        U5      U4[        R                  5      * n[        U5       H*  u  pc[        R                  " U5      XVS[        U5      24'   M,     XT4$ s  snf )zPad text index to same length.,  N)maxr>   rB   onesint32r   rC   )r   	text_indsmax_lentext_indrecoder_lenpadded_text_indsr   s          r   pad_text_indicesKieLabelEncode.pad_text_indices/  s    CX3x=CDGGS^W$=rxxHH&y1MC57XXh5G/CM/12 2,,	 Ds   Bc           	      8   US   US   p2[         R                  " U[         R                  5      nU R                  U5      u  pEUR	                  SS5      nUb  [         R                  " U[         R                  5      nUR	                  SS5      nUb  USS2S4   n[         R                  " U5      nUSS2S4   USSS24   :H  R                  [         R                  5      nU R                  (       a$  Xv-  S:H  R                  [         R                  5      n[         R                  " US5        [         R                  " Xg/S5      nU R                  U5      u  pSn
[         R                  " U
S	/5      nUR                  u  pX[SU2SS24'   [         R                  " XS
/5      nXNSU2SU2SS24'   [         R                  " X/5      nXSU2SS24'   [         R                  " X/5      nUUSU2SUS-   24'   [         R                  " X/5      n[        US   UUUUUS9$ )z7Convert bboxes, relations, texts and labels to ndarray.r3   r   labelsNedgesrQ   ra   r   rO   r   image)r   r3   r   r:   r   tag)rB   rC   r   r   getr   r   fill_diagonalr   r   rT   shaper   )r   	ann_infosrG   r   r   r   r   r   r   r   max_numtemp_bboxesh_temp_relationstemp_padded_text_indstemp_labelsr   s                     r   list_to_numpyKieLabelEncode.list_to_numpy8  s   $X.	+0Fy) 11%8	x.XXfbhh/FMM'40E 4q$w5q>9AA"((K=="^q088BE  +<(,(=(=i(H%hh|,||#BQBE7Q"78$-rr2A2qy! "'); <'7bqb!e$hh12#)BQB!a%K hh'(G$$'
 	
r   c           
         [        U5      S:X  d   e[        U5      S:X  d   e[        S5       Vs/ s H  n[        X   X#   5      PM     nn[        U Vs/ s H  oUR                  UR
                  4PM     sn5      nUR                  u  px  n	[        S5       Vs/ s H  n[        XC   [        Xx5      /5      PM     n
n[        R                  " U
 Vs/ s H  oR                  PM     sn5      n[        R                  " U5      nUS   nUS:X  a  / SQnOUS:X  a  / SQnOUS:X  a  / SQnO/ SQnU Vs/ s H  o1U   PM	     nnU Vs/ s H  nUU   PM
     nnUU4$ s  snf s  snf s  snf s  snf s  snf s  snf )	NrO   r   )r   rQ   rP   rS   rQ   )rQ   rP   rS   r   rP   )rP   rS   r   rQ   )rS   r   rQ   rP   )r>   r?   r	   r
   r   yboundsr   rB   rC   r   argsort)r   points_xpoints_yr   r3   ppolygonmin_xmin_yr   points_to_lefttopr   	distancessort_dist_idxlefttop_idxpoint_orderssorted_points_xjsorted_points_ys                      r   convert_canonical KieLabelEncode.convert_canonicalc  sj   8}!!!8}!!!;@8D8a%X[18Dv6v!CC:v67$^^aBG(
BJQJ	5#678( 	 
 HH6GH6Gdkk6GHI	

9-#A&!'LA'LA'L'L0<=1A;=0<=18A;=/// E6
 I >=s#   E!E "E%E*2E/E4c                    [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " U5      n[        R                  " U5      n[        R                  " U5      S-  n[        R                  " U5      S-  n[        R                  " X5-
  5      n[        R                  " XF-
  5      n[        R                  " X5      S-  [        R
                  -  n	[        R                  " U	5      n
/ / p[        S5       H/  nUR                  XU      5        UR                  X*U      5        M1     U R                  X5      $ )NrO   g      ?g     f@)
r>   rB   rC   rU   arctan2pir  r?   r@   r  )r   r  r  r   r  center_xcenter_yx_arry_arranglesort_idxr  r  r   s                 r   sort_vertexKieLabelEncode.sort_vertex  s   8}!!!8}!!!HHXHHX66!9t#66!9t#&&

5(502558::e$+-rqA""8QK#89""8QK#89  %%oGGr   c           	         SS K nUS   nUR                  " U5      n/ / / / / 4u  pVpxn	U GH  n
U
S   n[        S5       Vs/ s H
  oU   S   PM     nn[        S5       Vs/ s H
  oU   S   PM     nnU R                  X5      u  nn/ n[	        UU5       H(  u  nnUR                  U5        UR                  U5        M*     UR                  U5        U
S   nUR                  U
S   5        U Vs/ s H$  nUU R                  ;   d  M  U R                  U   PM&     nnUR                  U5        SU
R                  5       ;   a"  UR                  U R                  U
S      5        O4SU
R                  5       ;   a  UR                  U
S   5        O[        S5      eU	R                  U
R                  S	S5      5        GM     [        US
   UUUU	US9nU R                  U5      $ s  snf s  snf s  snf )Nr   r   r3   rO   rQ   r4   key_clszLCannot found 'key_cls' in ann.keys(), please check your training annotation.edger   )r   r3   r:   r   r   r   )r<   r=   r?   r  zipr@   r   keysr   
ValueErrorr   r   )r   r!   r<   r   annotationsrG   r:   r   r   r   annrK   r   x_listy_listsorted_x_listsorted_y_list
sorted_boxr   r  r   cr   r   s                           r   r"   KieLabelEncode.__call__  s   Wjj'13RR1C.iCh-C).q2A!fQiF2).q2A!fQiF2+/+;+;F+K(M=JM=91!!!$!!!$ : LL$'DLL_-..2Eda499n		!dHEX&#((*$d44S\BCchhj(c)n- b  LL+,- . w-
	 !!),,= 32 Fs   GG#.G(G()r   r   r   r   )
   F)r%   r&   r'   r(   r   r   r   r   r  r  r"   r)   r   r   s   @r   r   r     s0    AF!(!-)
V0:H,&- &-r   r   c                   J   ^  \ rS rSrSr S	U 4S jjrS rS rS rS r	Sr
U =r$ )
AttnLabelEncodei  rj   c                 .   > [         [        U ]  XU5        g r   )r   r2  r   r   s        r   r   AttnLabelEncode.__init__       	ot-.	
r   c                 ^    SU l         SU l        U R                   /U-   U R                  /-   nU$ Nrl   rm   ru   rv   r   s     r   r    AttnLabelEncode.add_special_char  1    ,,.8DLL>Ir   c                 d   US   nU R                  U5      nUc  g [        U5      U R                  :  a  g [        R                  " [        U5      5      US'   S/U-   [        U R
                  5      S-
  /-   S/U R                  [        U5      -
  S-
  -  -   n[        R                  " U5      US'   U$ Nr   r   r   rQ   rP   r   r>   rt   rB   rC   r   r   r!   r   s      r   r"   AttnLabelEncode.__call__  s    G}{{4 <t9)))#d),XC4>>"Q&'( cT&&T2Q678 	 Wr   c                 L    U R                  S5      nU R                  S5      nX/$ Nbegendget_beg_end_flag_idxr   beg_idxend_idxs      r   get_ignored_tokens"AttnLabelEncode.get_ignored_tokens  +    ++E2++E2!!r   c                     US:X  a/  [         R                  " U R                  U R                     5      nU$ US:X  a/  [         R                  " U R                  U R                     5      nU$  SU-  5       eNrB  rC  z+Unsupported type %s in get_beg_end_flag_idxrB   rC   r   ru   rv   r   
beg_or_endr   s      r   rE  $AttnLabelEncode.get_beg_end_flag_idx  k    ((499T\\23C
 
	 5 ((499T\\23C 
 UG*TT5r   r8  r   r%   r&   r'   r(   r   r   r   r"   rI  rE  r)   r   r   s   @r   r2  r2    s+    3 IN
""
 r   r2  c                   P   ^  \ rS rSrSr S
U 4S jjrS rS rS rS r	S r
S	rU =r$ )RFLLabelEncodei  rj   c                 .   > [         [        U ]  XU5        g r   )r   rU  r   r   s        r   r   RFLLabelEncode.__init__  r   r   c                 ^    SU l         SU l        U R                   /U-   U R                  /-   nU$ r7  r8  r   s     r   r   RFLLabelEncode.add_special_char  r:  r   c                     S/[        U R                  5      -  nU H  nX#==   S-  ss'   M     [        R                  " U5      $ )Ng        rQ   )r>   r   rB   rC   )r   r   	cnt_labelchar_s       r   
encode_cntRFLLabelEncode.encode_cnt  s>    EC//	E! xx	""r   c                    US   nU R                  U5      nUc  g [        U5      U R                  :  a  g U R                  U5      n[        R
                  " [        U5      5      US'   S/U-   [        U R                  5      S-
  /-   S/U R                  [        U5      -
  S-
  -  -   n[        U5      U R                  :w  a  g [        R
                  " U5      US'   X1S'   U$ )Nr   r   r   rQ   rP   r[  )r   r>   rt   r]  rB   rC   r   )r   r!   r   r[  s       r   r"   RFLLabelEncode.__call__  s    G}{{4 <t9)))OOD)	#d),XC4>>"Q&'( cT&&T2Q678 	 t9)))W%[r   c                 L    U R                  S5      nU R                  S5      nX/$ rA  rD  rF  s      r   rI  !RFLLabelEncode.get_ignored_tokens  rK  r   c                     US:X  a/  [         R                  " U R                  U R                     5      nU$ US:X  a/  [         R                  " U R                  U R                     5      nU$  SU-  5       erM  rN  rO  s      r   rE  #RFLLabelEncode.get_beg_end_flag_idx  rR  r   r8  r   )r%   r&   r'   r(   r   r   r   r]  r"   rI  rE  r)   r   r   s   @r   rU  rU    s0    3 IN
#*"
 r   rU  c                   >   ^  \ rS rSrSr SU 4S jjrS rS rSrU =r	$ )SEEDLabelEncodei)  rj   c                 .   > [         [        U ]  XU5        g r   )r   rf  r   r   s        r   r   SEEDLabelEncode.__init__,  r5  r   c                 z    SU l         SU l        SU l        XR                  U R                   U R                  /-   nU$ )Npaddingrm   unknown)rj  rv   rk  r   s     r   r    SEEDLabelEncode.add_special_char3  s8      '<<t||*TTr   c                    US   nU R                  U5      nUc  g [        U5      U R                  :  a  g [        R                  " [        U5      5      S-   US'   U[        U R
                  5      S-
  /-   [        U R
                  5      S-
  /U R                  [        U5      -
  S-
  -  -   n[        R                  " U5      US'   U$ )Nr   rQ   r   rS   rP   r=  r>  s      r   r"   SEEDLabelEncode.__call__:  s    G}{{4 <t9)))#d),q0X4>>"Q&'(4>>"Q&'4+<+<s4y+H1+LMN 	
 Wr   )rv   rj  rk  r   
r%   r&   r'   r(   r   r   r   r"   r)   r   r   s   @r   rf  rf  )  s!    3 IN
 r   rf  c                   N   ^  \ rS rSrSr   S	U 4S jjrS rS rS rS r	Sr
U =r$ )
SRNLabelEncodeiK  rj   c                 .   > [         [        U ]  XU5        g r   )r   rq  r   r   s        r   r   SRNLabelEncode.__init__N  s     	nd,.	
r   c                 :    XR                   U R                  /-   nU$ r   r8  r   s     r   r   SRNLabelEncode.add_special_charY  s    '<<*FFr   c                 P   US   nU R                  U5      n[        U R                  5      nUc  g [        U5      U R                  :  a  g [        R
                  " [        U5      5      US'   X#S-
  /U R                  [        U5      -
  -  -   n[        R
                  " U5      US'   U$ )Nr   r   rQ   )r   r>   r   rt   rB   rC   )r   r!   r   char_nums       r   r"   SRNLabelEncode.__call__]  s    G}{{4 t~~&<t9t(((#d),X!|n(9(9CI(EFFWr   c                 L    U R                  S5      nU R                  S5      nX/$ rA  rD  rF  s      r   rI  !SRNLabelEncode.get_ignored_tokensj  rK  r   c                     US:X  a/  [         R                  " U R                  U R                     5      nU$ US:X  a/  [         R                  " U R                  U R                     5      nU$  SU-  5       erM  rN  rO  s      r   rE  #SRNLabelEncode.get_beg_end_flag_idxo  rR  r   r*   )   NFrS  r   s   @r   rq  rq  K  s/    3  		
"
 r   rq  c                   L    \ rS rSrSr    S
S jr\S 5       rS rS r	S r
Srg	)TableLabelEncodeiy  rj   c                    Xl         SU l        XPl        X@l        X0l        / n[        US5       n	U	R                  5       n
U
 HC  nUR                  S5      R                  S5      R                  S5      nUR                  U5        ME     S S S 5        U R                  (       a.  SU;  a  UR                  S5        SU;   a  UR                  S5        U R                  U5      n0 U l        [        U5       H  u  pXR                  U'   M     U R                  R                  5        VVs0 s H  u  pX_M	     snnU l        Xl        X`l        U R                  U R$                     U l        U R                  U R$                     U l        U R                  U R*                     U l        / SQU l        S	S
SSSSSSSSSS.U l        g ! , (       d  f       GN?= fs  snnf )NFro   rp   rq   rr   	<td></td><td>)r  <td	<eb></eb>r  r  z<eb1></eb1>z<eb2></eb2>z<eb3></eb3>z<eb4></eb4>z<eb5></eb5>z<eb6></eb6>z<eb7></eb7>z<eb8></eb8>z<eb9></eb9>z<eb10></eb10>)z[]z[' ']z['<b>', ' ', '</b>']z['\u2028', '\u2028']z['<sup>', ' ', '</sup>']z['<b>', '</b>']z['<i>', ' ', '</i>']z['<b>', '<i>', '</i>', '</b>']z#['<b>', '<i>', ' ', '</i>', '</b>']z['<i>', '</i>']z2['<b>', ' ', '\u2028', ' ', '\u2028', ' ', '</b>'])rt   rw   learn_empty_boxmerge_no_span_structurereplace_empty_cell_tokenr{   r|   r}   r~   r@   remover   r   r   itemsidx2charr   loc_reg_numru   pad_idx	start_idxrv   rH  td_tokenempty_bbox_token_dict)r   r   r   r  r  r  r  r   r   r   r   r   r   r   kvs                   r   r   TableLabelEncode.__init__|  s    ,
.'>$(@%%t,MMOE{{7+11$7==fE%%d+  - ''.0%%k2'%%f-..~>	 0GAIIdO 1*.))//*;<*;$!*;<'&yy.4<<0yy.A"$1&3(5,$1.;3@,DS&
"3 -,  =s   AG &G 
Gc                      U R                   S-   $ )NrP   rt   r   s    r   _max_text_lenTableLabelEncode._max_text_len  s      1$$r   c                    US   nUS   nU R                   (       a  U R                  U5      nU R                  (       a  U R                  X25      n/ nU H0  nUS:w  d  M  SU;   a  US   S:w  a  SU-   nUR	                  U5        M2     U R                  U5      nUc  g [        U5      US'   U R                  /U-   U R                  /-   nX0R                  /U R                  [        U5      -
  -  -   n[        R                  " U5      nX1S'   [        U5      U R                  :  a  g [        R                  " U R                  U R                  4[        R                  S9n[        R                  " U R                  S	4[        R                  S9nSn[!        U5       H  u  pU R"                  U   U R$                  ;   d  M$  S
X(   ;   ac  [        X(   S   5      S:  aO  X(   S
   R'                  5       n
[        R                  " U
[        R                  S9R)                  S5      n
XU	'   SXy'   U R*                  (       a  SXy'   US	-  nM     XaS'   XqS'   U$ )Ncells	structurer   spanr   rs   r   r7   rQ   bboxtokensra   g      ?r   
bbox_masks)r  _merge_no_span_structurer  _replace_empty_cell_tokenr@   r   r>   r  rH  r  r  rB   rC   rT   r  rD   r   r  r  copyreshaper  )r   r!   r  r  new_structuretokenr   r  bbox_idxr   r  s              r   r"   TableLabelEncode.__call__  s   W%	''55i@I((66yHIE{U?uQx3%KE$$U+	  KK.	YX^^$y0DLL>A	Y/!
 
	 HHY'	%[y>D... 4--t/?/?@

SXXt1115RZZH
!),HA}}U#t}}4U_,U_X5N1ORS1S ?62779D88D

;CCBGD $1I$'JM''$'JMA -  X'\r   c                     / nSnU[        U5      :  a8  X   nUS:X  a  SnUS-  nUR                  U5        US-  nU[        U5      :  a  M8  U$ )zz
This code is refer from:
https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/data_preprocess.py
r   r  r  rQ   rb   )r   r  r  r   r  s        r   r  )TableLabelEncode._merge_no_span_structure  sc    
 #i. LE#Q  'FA #i.  r   c                     Sn/ nU Hg  nUS;   aM  SX#   R                  5       ;  a  [        X#   S   5      nU R                  U   nUR                  U5        US-  nMV  UR                  U5        Mi     U$ )z~
This fun code is refer from:
https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/data_preprocess.py
r   )r  r  r  r  r  rQ   )r%  strr  r@   )r   
token_listr  r  add_empty_bbox_token_listr  contents          r   r  *TableLabelEncode._replace_empty_cell_token  s     $&!E44!5!5!77!%/(";<G 66w?E)007A)007   )(r   )r   r   r  rH  r  r  r  rw   rt   r  r  r  r  r  NFFFrO   )r%   r&   r'   r(   r   r   propertyr  r"   r  r  r)   r*   r   r   r  r  y  s=    3 "' %6
p % %/b )r   r  c                   N   ^  \ rS rSrSr    SU 4S jjr\S 5       rS rSr	U =r
$ )TableMasterLabelEncodei  rj   c                    > [         [        U ]
  " UUUUUU40 UD6  U R                  U R                     U l        U R                  U R                     U l        g r   )r   r  r   r   pad_strr  unknown_strunknown_idx)	r   r   r   r  r  r  r  r   r   s	           r   r   TableMasterLabelEncode.__init__  s^     	$d4$#	
 	
 yy.99T%5%56r   c                     U R                   $ r   r  r  s    r   r  $TableMasterLabelEncode._max_text_len&  s       r   c                     SU l         SU l        SU l        SU l        UnUU R                  U R                   U R                  U R                  /-   nU$ )Nz<SOS><EOS><UKN><PAD>)ru   rv   r  r  r   s     r   r   'TableMasterLabelEncode.add_special_char*  sY    "''LLLLLL	+
 
 r   )ru   rv   r  r  r  r  r  )r%   r&   r'   r(   r   r   r  r  r   r)   r   r   s   @r   r  r    s7    3 "' %7, ! ! r   r  c                   0    \ rS rSrSS jrS rS rS rSrg)	TableBoxEncodei9  c                 ,    US;   d   eXl         X l        g )N)xywhxyxyxyxyxyxyin_box_formatout_box_format)r   r  r  r   s       r   r   TableBoxEncode.__init__:  s    !====*,r   c                 j   US   R                   S S u  p#US   nU R                  U R                  :w  aS  U R                  S:X  aC  U R                  S:X  a  U R                  U5      nO!U R                  S:X  a  U R	                  U5      nUS S 2SS S24==   U-  ss'   US S 2SS S24==   U-  ss'   XAS'   U$ )	Nr   rP   r   r  r  r  r   rQ   )r   r  r  xyxyxyxy2xywh	xyxy2xywh)r   r!   
img_height	img_widthr   s        r   r"   TableBoxEncode.__call__?  s     $W 3 3BQ 7
h!4!44""f,%%3!//7F''61!^^F3Fq!$Q$w9$q!$Q$w:%Xr   c                 x   [         R                  " [        U5      S/5      nUS S 2SS S24   R                  5       US S 2S4'   US S 2SS S24   R                  5       US S 2S4'   US S 2SS S24   R	                  5       US S 2S4   -
  US S 2S4'   US S 2SS S24   R	                  5       US S 2S4   -
  US S 2S4'   U$ )NrO   r   rP   rQ   rS   )rB   rT   r>   minr   )r   rG   
new_bboxess      r   r  TableBoxEncode.xyxyxyxy2xywhN  s    XXs5z1o.
 ADqD>--/
1a4 ADqD>--/
1a4 ADqD>--/*QT2BB
1a4 ADqD>--/*QT2BB
1a4r   c                    [         R                  " U5      nUS S 2S4   US S 2S4   -   S-  US S 2S4'   US S 2S4   US S 2S4   -   S-  US S 2S4'   US S 2S4   US S 2S4   -
  US S 2S4'   US S 2S4   US S 2S4   -
  US S 2S4'   U$ )Nr   rP   rQ   rS   )rB   
empty_like)r   r   r  s      r   r  TableBoxEncode.xyxy2xywhV  s    ]]6*
"1a4L6!Q$<71<
1a4"1a4L6!Q$<71<
1a4!!Q$<&A,6
1a4!!Q$<&A,6
1a4r   r  N)r  r  )	r%   r&   r'   r(   r   r"   r  r  r)   r*   r   r   r  r  9  s    -
r   r  c                   D   ^  \ rS rSrSr SU 4S jjrS rS rS rSr	U =r
$ )	SARLabelEncodei_  rj   c                 .   > [         [        U ]  XU5        g r   )r   r  r   r   s        r   r   SARLabelEncode.__init__b  r   r   c                     SnSnSnX/-   n[        U5      S-
  U l        X/-   n[        U5      S-
  U l        [        U5      S-
  U l        X/-   n[        U5      S-
  U l        U$ Nz	<BOS/EOS>r  r  rQ   r>   r  r  rH  padding_idxr   r   beg_end_strr  padding_strs        r   r   SARLabelEncode.add_special_chari  {    !'-7~.2'-7^,q0>*Q.'-7~.2r   c                    US   nU R                  U5      nUc  g [        U5      U R                  S-
  :  a  g [        R                  " [        U5      5      US'   U R
                  /U-   U R                  /-   n[        U R                  5       Vs/ s H  o@R                  PM     nnX5S [        U5      & [        R                  " U5      US'   U$ s  snf )Nr   rQ   r   )	r   r>   rt   rB   rC   r  rH  r?   r  r   r!   r   targetr   padded_texts         r   r"   SARLabelEncode.__call__w  s    G}{{4 <t9))A--#d),X..!D(DLL>916t7H7H1IJ1IA''1IJ%+Mc&k"-W	 Ks   Cc                     U R                   /$ r   r  r  s    r   rI  !SARLabelEncode.get_ignored_tokens        !!r   )rH  r  r  r  r   )r%   r&   r'   r(   r   r   r   r"   rI  r)   r   r   s   @r   r  r  _  s&    3 IN
" "r   r  c                   N   ^  \ rS rSrSr   S	U 4S jjrS rS rS rS r	Sr
U =r$ )
SATRNLabelEncodei  rj   c                 :   > [         [        U ]  XU5        X@l        g r   )r   r  r   rw   r   r   r   r   rw   r   r   s         r   r   SATRNLabelEncode.__init__  s!     	..	
 
r   c                     SnSnSnX/-   n[        U5      S-
  U l        X/-   n[        U5      S-
  U l        [        U5      S-
  U l        X/-   n[        U5      S-
  U l        U$ r  r  r  s        r   r   !SATRNLabelEncode.add_special_char  r  r   c                     U R                   (       a  UR                  5       n/ nU H7  nUR                  U R                  R                  X0R                  5      5        M9     [        U5      S:X  a  g U$ Nr   )rw   r@   r   r   r  r>   r   s       r   r   SATRNLabelEncode.encode  sY    ::::<D	DTYY]]41A1ABC y>Qr   c                    US   nU R                  U5      nUc  g [        R                  " [        U5      5      US'   U R                  /U-   U R
                  /-   n[        U R                  5       Vs/ s H  o@R                  PM     nn[        U5      U R                  :  a  US U R                   nOX5S [        U5      & [        R                  " U5      US'   U$ s  snf )Nr   r   )	r   rB   rC   r>   r  rH  r?   rt   r  r  s         r   r"   SATRNLabelEncode.__call__  s    G}{{4 <#d),X..!D(DLL>916t7H7H1IJ1IA''1IJv;*** !44#4#45K)/#f+&-W Ks   2Cc                     U R                   /$ r   r  r  s    r   rI  #SATRNLabelEncode.get_ignored_tokens  r  r   )rH  rw   r  r  r  r   )r%   r&   r'   r(   r   r   r   r   r"   rI  r)   r   r   s   @r   r  r    s/    3
 !" "r   r  c                   @   ^  \ rS rSr SU 4S jjrS rS rS rSrU =r	$ )PRENLabelEncodei  c                 .   > [         [        U ]  XU5        g r   )r   r  r   r   s        r   r   PRENLabelEncode.__init__  r5  r   c                 J    SnSnSnX#U/U-   nSU l         SU l        SU l        U$ )Nr  r  z<UNK>r   rQ   rP   )r  rH  r  )r   r   r  rv   r  s        r   r    PRENLabelEncode.add_special_char  s=    %<~Mr   c                    [        U5      S:X  d  [        U5      U R                  :  a  g U R                  (       a  UR                  5       n/ nU HM  nX0R                  ;  a  UR	                  U R
                  5        M/  UR	                  U R                  U   5        MO     UR	                  U R                  5        [        U5      U R                  :  a(  X R                  /U R                  [        U5      -
  -  -  nU$ r  )r>   rt   rw   r   r@   r  rH  r  r   s       r   r   PRENLabelEncode.encode  s    t9>SY$*;*;;::::<D	D99$  !1!12  41	 
 	&y>D---**+t/@/@3y>/QRRIr   c                 l    US   nU R                  U5      nUc  g [        R                  " U5      US'   U$ r   )r   rB   rC   )r   r!   r   encoded_texts       r   r"   PRENLabelEncode.__call__  s9    G}{{4(.Wr   )rH  r  r  F)
r%   r&   r'   r(   r   r   r   r"   r)   r   r   s   @r   r  r    s    CH

  r   r  c                   h   ^  \ rS rSrSr       SU 4S jjrS rS rS rS r	S r
S	 rS
 rSrU =r$ )VQATokenLabelEncodei  z"
Label encode for NLP VQA methods
c	                 4  > [         [        U ]  5         SSKJn
JnJn  SSKJn  U
SS.USS.USS.S.nX l	        X   nUS	   R                  US
   5      U l        U" U5      u  U l        nX0l        Xpl        Xl        XPl        X`l        U R"                  S;   d   eg )Nr   )LayoutXLMTokenizerLayoutLMTokenizerLayoutLMv2Tokenizer)load_vqa_bio_label_mapszlayoutxlm-base-uncased)classpretrained_modelzlayoutlm-base-uncasedzlayoutlmv2-base-uncased)	LayoutXLMLayoutLM
LayoutLMv2r  r  )Ntb-yx)r   r  r   paddlenlp.transformersr	  r
  r  ppocr.utils.utilityr  contains_refrom_pretrained	tokenizerlabel2id_mapadd_special_ids
infer_mode
ocr_engineuse_textline_bbox_infoorder_method)r   r   r  r  	algorithmr  r  r  r  r   r	  r
  r  r  tokenizer_dicttokenizer_configid2label_mapr   s                    r   r   VQATokenLabelEncode.__init__  s     	!413	
 	

 	@ ,$<
 +$;
 -$=
 ')4)'2BB/0
 +B**M'<.$$&<#(  O333r   c           	      ,   UR                  5       n/ nSnUu  pxpX-
  [        U5      -  n[        U5       H[  u  p[        U5      U-  nXxX~-   U
/nUR                  U/[        UR	                  U5      5      -  5        U[        U5      S-   U-  -  nM]     U$ Nr   rQ   )splitr>   r   extendtokenize)r   r  r   r  wordstoken_bboxescurr_word_idxx1y1x2y2unit_wr   wordcurr_w	word_bboxs                   r   
split_bboxVQATokenLabelEncode.split_bbox&  s    

'SY&"5)ICY'Fb1Ic)2D2DT2J.K KL3t9q=F**B	 *
 r   c                 l   / n/ n[        U5       HR  u  pE[        US   5      S:  a'  UR                  [        R                  " U5      5        M>  UR                  US   5        MT     [        U5       H?  u  pE/ nUS    H(  nUS   U;   d	  US   U;   a  M  UR                  U5        M*     XbU   S'   MA     U$ )z/
find out the empty texts and remove the links
r4   r   idlinkingrQ   )r   r>   r@   r  deepcopy)r   ocr_infonew_ocr_infoempty_indexr   infonew_linklinks           r   filter_empty_contents)VQATokenLabelEncode.filter_empty_contents3  s     "8,IC4()A-##DMM$$78""4:.	 - #<0ICHY7k)T!W-C% ( ,4i( 1 r   c                 &	   U R                  U5      n[        [        U5      5       H(  nSX#   ;  d  M  U R                  X#   S   5      X#   S'   M*     U R                  S:X  a  [        U5      nU R                  =(       a    U R                  (       + nU(       a  U R                  U5      nUS   R                  u  pVn/ n/ n	/ n
/ n/ n/ n/ nU(       a  / n0 n0 n[        5       n[        R                  " U5      US'   U GH  nUS   n[        U5      S::  a  M  U(       ag  [        U5      S:X  a  WR                  US   5        MF  US	   WUS   '   WR                  US
    Vs/ s H  n[        [!        U5      5      PM     sn5        U R                  US   5      US'   U R"                  R%                  USSSS9nU R&                  (       d!  US   SS US'   US   SS US'   US   SS US'   U R(                  (       a  US   /[        US   5      -  nO#U R+                  US   US   U R"                  5      n[        U5      S::  a  GMI  U R-                  UXV5      nU R&                  (       a'  UR/                  S/ SQ5        UR1                  / SQ5        U R                  (       d  US	   nU R3                  UU5      nU(       a|  WS   U R4                  S   :w  ae  [        U5      WUS   '   WR7                  5       nUR1                  [        U
5      [        U
5      [        US   5      -   UR7                  5       S.5        O6UR1                  [        U
5      [        U
5      [        US   5      -   SS.5        U
R                  US   5        UR                  US   5        U	R                  U5        UR1                  U5        UR1                  [        U
5      5        U R                  (       a  GM  UR                  W5        GM      XS'   XS'   XS'   S/[        U
5      -  US'   XS'   XS'   [9        U R"                  R:                  U R"                  R<                  U R"                  R>                  S9US'   XS'   U(       a  WUS'   WUS'   WUS'   WUS'   U$ s  snf )Nr  r3   r  r   r9  r4   r   r6  r   r7  FT)pad_to_max_seq_lenreturn_attention_maskreturn_token_type_ids	input_idsrQ   ra   token_type_idsattention_mask)r   r   r   r   O)startrC  r   r   segment_offset_id)padding_sidepad_token_type_idpad_token_idtokenizer_paramsentitiesr   id2labelempty_entityentity_id_to_index_map) _load_ocr_infor?   r>   trans_poly_to_bboxr  r   r  r  r?  r   setr  r8  addr&  tuplesortedr  r   r  r  r3  _smooth_boxinsertr@   _parse_labelr  upperr   rK  rL  rM  )r   r!   r9  r   train_reheightwidthr   
words_list	bbox_listinput_ids_listtoken_type_ids_listrJ  gt_label_listrO  r   rP  rR  rQ  r<  r   l
encode_resr  r   gt_labels                             r   r"   VQATokenLabelEncode.__call__H  s   &&t,X'CX]*(,(?(?h@W(Xf% ( '$X.H ##;DOO(;11(;H=..q
	 IH%'"5L==2ZD(D4yA~t9> $$T$Z0'+G}d$  DO!LOq%q	"2O!LM224>BDL..#(&*&*	 / J ''*4[*A!B*G
;'/9:J/KAb/Q
+,/9:J/KAb/Q
+,**V~J{,C(DDL$"7 4yA~##D&8D##A|,L) ??W,,UJ? A;$"3"3C"889<X*4:6!KKMEOO%(%8#&~#6Z=T9U#U%*[[] !$^!4">2SK9P5QQ!$ !!*["9:&&z2B'CDT"d#$$S%89???$$X.O R +[!4 V"#s>':!:&X$5 !#'44"nn>>44$
 
 $Z )D'D#/D -CD)*c "Ms   R
c           	         [        [        R                  " U Vs/ s H  o"S   PM	     sn5      5      n[        [        R                  " U Vs/ s H  o"S   PM	     sn5      5      n[        [        R                  " U Vs/ s H  o"S   PM	     sn5      5      n[        [        R                  " U Vs/ s H  o"S   PM	     sn5      5      nX5XF/$ s  snf s  snf s  snf s  snf r$  )intrB   r  r   )r   polyr  r+  r-  r,  r.  s          r   rT  &VQATokenLabelEncode.trans_poly_to_bbox  s    t,t!1t,-.t,t!1t,-.t,t!1t,-.t,t!1t,-.	 -,,,s   CC>C0Cc                    U R                   (       a]  U R                  R                  US   SS9S   n/ nU H3  nUR                  US   S   U R	                  US   5      US   S.5        M5     U$ US   n[
        R                  " U5      nU$ )Nr   F)clsr   rQ   )r4   r  r3   r   )r  r  ocrr@   rT  r<   r=   )r   r!   
ocr_resultr9  resr<  	info_dicts          r   rS  "VQATokenLabelEncode._load_ocr_info  s    ??,,T'],FqIJH!),Q $ 7 7A ?"%a& " O=D

4(Ir   c                     [         R                  " U5      nUS S 2S4   S-  U-  US S 2S4'   US S 2S4   S-  U-  US S 2S4'   US S 2S4   S-  U-  US S 2S4'   US S 2S4   S-  U-  US S 2S4'   UR                  S5      R                  5       nU$ )Nr   i  rP   rQ   rS   int64)rB   rC   r   tolist)r   r   r^  r_  s       r   rY  VQATokenLabelEncode._smooth_box  s    &!ad|d*U2q!tad|d*U2q!tad|d*V3q!tad|d*V3q!tw'..0r   c                 Z   / nUR                  5       S;   a#  UR                  S/[        US   5      -  5        U$ UR                  U R                  SU-   R                  5          5        UR                  U R                  SU-   R                  5          /[        US   5      S-
  -  5        U$ )N)otherothersignorer   rE  zb-zi-rQ   )rw   r&  r>   r@   r  r\  )r   r   rf  rg  s       r   r[   VQATokenLabelEncode._parse_label  s    ;;=99OOQC#j&=">>?  OOD--te|.B.B.DEFOO""D5L#7#7#9:;z+./!35 r   )r  r  r  r  r  r  r  r  )FFr  TNFN)r%   r&   r'   r(   r   r   r3  r?  r"   rT  rS  rY  r[  r)   r   r   s   @r   r  r    sO     #-4^*~@ &
 
r   r  c                   8   ^  \ rS rSr   SU 4S jjrS rSrU =r$ )MultiLabelEncodei  c                    > [         [        U ]  XU5        [        XU40 UD6U l        X@l        Uc  [        XU40 UD6U l        g [        U5      " XU40 UD6U l        g r   )	r   r~  r   r   
ctc_encodegtc_encode_typer  
gtc_encodeeval)r   r   r   r   r  r   r   s         r   r   MultiLabelEncode.__init__  s|     	..	
 ).
DJ
  *,nHNDO #:.nHNDOr   c                    [         R                  " U5      n[         R                  " U5      n[        5       nUR                  SS 5      US'   US   US'   U R                  R                  U5      nU R                  R                  U5      nUb  Uc  g US   US'   U R                  b	  US   US'   OUS   US'   US   US'   U$ )Nimg_pathr   r   	label_ctc	label_gtc	label_sarr   )r  r8  r   r   r  r"   r  r  )r   r!   data_ctcdata_gtcdata_outctcgtcs          r   r"   MultiLabelEncode.__call__  s    ==&==&6#xx
D9 Moo&&x0oo&&x0;#+ #G+$'LH[!$'LH[! ]r   )r  r  r  )NFNr   r   s   @r   r~  r~    s     !2 r   r~  c                   >   ^  \ rS rSrSr SU 4S jjrS rS rSrU =r	$ )NRTRLabelEncodei%  rj   c                 .   > [         [        U ]  XU5        g r   )r   r  r   r   s        r   r   NRTRLabelEncode.__init__(  r5  r   c                 n   US   nU R                  U5      nUc  g [        U5      U R                  S-
  :  a  g [        R                  " [        U5      5      US'   UR                  SS5        UR                  S5        US/U R                  [        U5      -
  -  -   n[        R                  " U5      US'   U$ )Nr   rQ   r   r   rP   rS   )r   r>   rt   rB   rC   rZ  r@   r>  s      r   r"   NRTRLabelEncode.__call__/  s    G}{{4 <t9))A--#d),XAqAqcT..T:;;Wr   c                     / SQU-   nU$ )N)r   <unk><s></s>r*   r   s     r   r    NRTRLabelEncode.add_special_char=  s    :^Kr   r*   r   r   r   s   @r   r  r  %  s!    3 IN
 r   r  c                   J   ^  \ rS rSrSrSrSrSr S
U 4S jjrS r	S r
S	rU =r$ )ParseQLabelEncodeiB  rj   z[B]z[E]z[P]c                 .   > [         [        U ]  XU5        g r   )r   r  r   r   s        r   r   ParseQLabelEncode.__init__I  s     	/.	
r   c                    US   nU R                  U5      nUc  g [        U5      U R                  S-
  :  a  g [        R                  " [        U5      5      US'   U R
                  U R                     /U-   U R
                  U R                     /-   nX R
                  U R                     /U R                  [        U5      -
  -  -   n[        R                  " U5      US'   U$ )Nr   rP   r   )	r   r>   rt   rB   rC   r   BOSEOSPADr>  s      r   r"   ParseQLabelEncode.__call__P  s    G}{{4 <t9))A--#d),X		$((#$t+tyy/B.CCyy*+t/@/@3t9/LMMWr   c                 X    U R                   /U-   U R                  U R                  /-   nU$ r   )r  r  r  r   s     r   r   "ParseQLabelEncode.add_special_char]  s*    ((n4$((7KKr   r*   r   )r%   r&   r'   r(   r   r  r  r  r   r"   r   r)   r   r   s   @r   r  r  B  s0    3
C
C
C IN
 r   r  c                   B   ^  \ rS rSrSr   SU 4S jjrS rS rSrU =r	$ )ViTSTRLabelEncodeib  rj   c                 :   > [         [        U ]  XU5        X@l        g r   )r   r  r   ignore_indexr   r   r   r   r  r   r   s         r   r   ViTSTRLabelEncode.__init__e  "     	/.	
 )r   c                    US   nU R                  U5      nUc  g [        U5      U R                  :  a  g [        R                  " [        U5      5      US'   UR                  SU R                  5        UR                  S5        X R                  /U R                  S-   [        U5      -
  -  -   n[        R                  " U5      US'   U$ r<  )r   r>   rt   rB   rC   rZ  r  r@   r>  s      r   r"   ViTSTRLabelEncode.__call__r  s    G}{{4 <t9)))#d),XAt(()A(()T->->-BSY-NOOWr   c                     SS/U-   nU$ )Nr  r  r*   r   s     r   r   "ViTSTRLabelEncode.add_special_char  s    >9r   r  )NFr   r   r   s   @r   r  r  b  s%    3
 !) r   r  c                   B   ^  \ rS rSrSr   SU 4S jjrS rS rSrU =r	$ )ABINetLabelEncodei  rj   c                 :   > [         [        U ]  XU5        X@l        g r   )r   r  r   r  r  s         r   r   ABINetLabelEncode.__init__  r  r   c                 \   US   nU R                  U5      nUc  g [        U5      U R                  :  a  g [        R                  " [        U5      5      US'   UR                  S5        X R                  /U R                  S-   [        U5      -
  -  -   n[        R                  " U5      US'   U$ )Nr   r   r   rQ   )r   r>   rt   rB   rC   r@   r  r>  s      r   r"   ABINetLabelEncode.__call__  s    G}{{4 <t9)))#d),XA(()T->->-BSY-NOOWr   c                     S/U-   nU$ Nr  r*   r   s     r   r   "ABINetLabelEncode.add_special_char  s     N2r   r  )NFd   r   r   s   @r   r  r    s%    3
 !) r   r  c                   :   ^  \ rS rSr SU 4S jjrS rS rSrU =r$ )SRLabelEncodei  c                 ~  > [         [        U ]  XU5        0 U l        [	        US5       nUR                  5        H3  nUR                  5       nUR                  5       u  pxXR                  U'   M5     S S S 5        Sn	0 U l        [        [        U	5      5       H  n
XR                  X   '   M     g ! , (       d  f       NC= f)Nr   
0123456789)r   r  r   dicr{   r|   r~   r%  english_stroke_dictr?   r>   )r   r   r   r   r   r   r   r   sequenceenglish_stroke_alphabetr   r   s              r   r   SRLabelEncode.__init__  s     	mT+.	
 %s+szz|&*jjl#	&.# ( ,
 #/#% 3678EGL$$%<%CD 9 ,+s   AB..
B<c                 :   SnU H%  nX0R                   ;  a  M  X R                   U   -  nM'     US-  nUn[        U5      n[        R                  " U R                  5      R                  S5      n[        US-
  5       H  nU R                  X      XVS-   '   M     XE4$ )Nr   0ru  rQ   )r  r>   rB   rT   rt   r   r?   r  )r   r   stroke_sequencer   r   input_tensorr  s          r   r   SRLabelEncode.encode  s    I(88I#66	 
 	3Uxx 1 1299'Bvz"A"&":":58"DLQ # ##r   c                 N    US   nU R                  U5      u  p4X1S'   XAS'   Uc  g U$ )Nr   r   r  )r   )r   r!   r   r   r  s        r   r"   SRLabelEncode.__call__  s8    G}#{{40X+^<r   )r  r  r   	r%   r&   r'   r(   r   r   r"   r)   r   r   s   @r   r  r    s    HMM"$$ r   r  c                   B   ^  \ rS rSrSr   SU 4S jjrS rS rSrU =r	$ )SPINLabelEncodei  rj   c                 :   > [         [        U ]  XU5        X@l        g r   )r   r  r   rw   r  s         r   r   SPINLabelEncode.__init__  s      	ot-.	
 
r   c                 ^    SU l         SU l        U R                   /U R                  /-   U-   nU$ r7  r8  r   s     r   r    SPINLabelEncode.add_special_char  s1    ,,4<<.8>Ir   c                 l   US   nU R                  U5      nUc  g [        U5      U R                  :  a  g [        R                  " [        U5      5      US'   S/U-   S/-   n[        U R                  S-   5       Vs/ s H  nSPM     nnX5S [        U5      & [        R                  " U5      US'   U$ s  snf r<  )r   r>   rt   rB   rC   r?   r  s         r   r"   SPINLabelEncode.__call__  s    G}{{4 <t9t(((#d),Xtqc!"'(9(9A(=">?">Qq">?%+Mc&k"-W	 @s   ;B1)ru   rv   rw   )NFTro  r   s   @r   r  r    s%    3
 ! r   r  c                   8   ^  \ rS rSrSr SU 4S jjrS rSrU =r$ )VLLabelEncodei  rj   c                    > [         [        U ]  XU5        0 U l        [	        U R
                  5       H  u  pVXPR                  U'   M     g r   )r   r  r   r   r   r   )r   r   r   r   r   r   r   r   s          r   r   VLLabelEncode.__init__  sB     	mT+.	
 	 0GAIIdO 1r   c                 "   US   n[        U5      nUS::  a  g Sn[        [        U5      5      n[        XT5      S   nX&   nXcS-
  :X  a  US U nOUS:X  a  USS  nOUS U X&S-   S  -   nXS'   XqS'   XaS'   U R	                  U5      nUc  g U V	s/ s H  oS-   PM	     nn	[
        R                  " [        U5      5      US'   US/U R                  [        U5      -
  -  -   n[
        R                  " U5      US'   U R	                  U5      nU R	                  U5      nUc  / nOU V	s/ s H  oS-   PM	     nn	Uc  / nOU V	s/ s H  oS-   PM	     nn	[
        R                  " [        U5      5      US'   [
        R                  " [        U5      5      US	'   US/U R                  [        U5      -
  -  -   nUS/U R                  [        U5      -
  -  -   n[
        R                  " U5      US'   [
        R                  " U5      US'   U$ s  sn	f s  sn	f s  sn	f )
Nr   r   rQ   	label_res	label_sublabel_idr   
length_res
length_sub)r>   rz   r?   r   r   rB   rC   rt   )
r   r!   r   len_str
change_numorder	change_idr  r  r   s
             r   r"   VLLabelEncode.__call__	  s   G}d)a<
U7^$5-a0	O	1%Zi(I!^QRIZi(4A+@@I%[%[$Z{{4 <#$t!At$#d),XqcT..T:;;WKK	*	KK	*	I(12	1Q	I2I(12	1Q	I2XXc)n5\XXc)n5\t'8'83y>'I JJ	t'8'83y>'I JJ	HHY/[HHY/[) % 3 3s   H"H<H)r   r   )	r%   r&   r'   r(   r   r   r"   r)   r   r   s   @r   r  r    s    3 IN , ,r   r  c                        \ rS rSrS rS rSrg)CTLabelEncodei8  c                     g r   r*   r/   s     r   r   CTLabelEncode.__init__9  r1   r   c                 :   US   n[         R                  " U5      n[        U5      n/ / pT[        SU5       HI  nX&   S   n[        R
                  " U5      nUR                  U5        X&   S   nUR                  U5        MK     [        U5      S:X  a  g XAS'   XQS'   U$ )Nr   r   r3   r4   r9   r:   )r<   r=   r>   r?   rB   rC   r@   )	r   r!   r   rF   rG   rH   rJ   rK   rL   s	            r   r"   CTLabelEncode.__call__<  s    W

5!5z"tD>C*X&C((3-CLL*_-CKK " u:?WWr   r*   Nr$   r*   r   r   r  r  8  s    r   r  c                   >   ^  \ rS rSr   SU 4S jjrS rS rSrU =r$ )CANLabelEncodeiR  c                 .   > [         [        U ]  X!X45        g r   )r   r  r   )r   r   r   r   rw   r   r   s         r   r   CANLabelEncode.__init__S  s     	nd,.	
r   c                     / nU H>  nX0R                   ;  a  M  UR                  U R                  R                  U5      5        M@     [	        U5      S:X  a  g U$ r  )r   r@   r   r   r>   )r   text_seqtext_seq_encodedr   s       r   r   CANLabelEncode.encode_  sS    D>>)##DIIMM$$78   A%r   c                     US   n[        U[        5      (       a  UR                  5       R                  5       nUR	                  U R
                  5        U R                  U5      US'   U$ r   )
isinstancer  r~   r%  r@   rv   r   r    s      r   r"   CANLabelEncode.__call__i  sS    WeS!!KKM'')ET\\"E*Wr   r*   )r  FTr  r   s   @r   r  r  R  s"     

  r   r  c                   P   ^  \ rS rSrSr    S	U 4S jjrS rS rS rS r	Sr
U =r$ )
CPPDLabelEncodeir  rj   c                 F   > [         [        U ]  XU5        X@l        XPl        g r   )r   r  r   chr  )r   r   r   r   r  r  r   r   s          r   r   CPPDLabelEncode.__init__u  s&     	ot-.	
 (r   c                 h   US   nU R                   (       Ga  U R                  U5      u  p#nUc  g [        U5      U R                  :  a  g [        R
                  " [        U5      5      US'   S/[        U5      S-   -  S/U R                  [        U5      -
  -  -   nUR                  S5        X R                  /U R                  S-   [        U5      -
  -  -   n[        R
                  " U5      US'   [        R
                  " XE-   5      US'   [        R
                  " U5      US'   U$ U R                  U5      u  p&nUc  g [        U5      U R                  :  a  g [        R
                  " [        U5      5      US'   S/[        U5      S-   -  S/U R                  [        U5      -
  -  -   nUR                  S5        X R                  /U R                  S-   [        U5      -
  -  -   n[        R
                  " U5      US'   [        R
                  " Xe-   5      US'   [        R
                  " U5      US'   U$ )Nr   r   rQ   r   
label_nodelabel_indexlabel_order)	r  encodechr>   rt   rB   rC   r@   r  r   )r   r!   r   text_node_indextext_node_numtext_pos_nodetext_char_nodech_orders           r   r"   CPPDLabelEncode.__call__  s   G}77737==3F0D=|4y4,,,XXc$i0DNC3t9q=1QC!!CI-5 M KKN,,-1B1BQ1FT1RSSDHHTNDM!#-*G!HD"$((?";DK-1[[->*D(|4yD---XXc$i0DNC3t9q=1QC!!CI-5 M KKN,,-1B1BQ1FT1RSSDHHTNDM!#.*H!ID"$((8"4DKr   c                 2    S/U-   n[        U5      U l        U$ r  )r>   num_characterr   s     r   r    CPPDLabelEncode.add_special_char  s!     N2 0r   c                 "   [        U5      S:X  d  [        U5      U R                  :  a  gU R                  (       a  UR                  5       n[        U R                  5       Vs/ s H  nSPM     nnSUS'   / n/ nSnU H  nXpR
                  ;  a  M  UR                  U R
                  U   5        X0R
                  U   ==   S-  ss'   UR                  U R
                  U   X0R
                  U      U/5        US-  nM     / nU R                   H+  nXq;  d  M
  UR                  U R
                  U   SS/5        M-     [        R                  " U5        XX-   nUSU R                  S-    n[        U5      S:X  a  gXCUR                  5       4$ s  snf )r   r   NNNrQ   N)r>   rt   rw   r?   r  r   r@   r   randomshufflesort)	r   r   r   	text_noder   r  r  r   no_ch_orders	            r   r   CPPDLabelEncode.encode  si   t9>SY):)::#::::<D %d&8&8 9: 91Q 9	:	!	D99$TYYt_-iio&!+&OOTYYt_i		$.H%PQQJE  NND""DIIdOQ#:; # 	{#)3d//!34y>Q#X]]_44/ ;s   "Fc           
      <   [        U5      S:X  d  [        U5      U R                  :  a  gU R                  (       a  UR                  5       n0 nUR                  SS05        [	        U R
                  5       Vs/ s H  o3PM     nn/ nU Hh  nX`R                  ;  a  M  U R                  U   nUR                  U5        XrR                  5       ;   a  X'==   S-  ss'   MU  UR                  US05        Mj     [        UR                  5       5       H  nUR                  U5        M     [        US[        [        UR                  5       5      5      -
  5      n	U	 H  nSX('   M	     [        U5      n
U
 Vs/ s H  oU   PM	     nn[        U5      S:X  a  gXZU4$ s  snf s  snf )r   r   r	  rQ   %   )r>   rt   rw   updater?   r  r   r@   r%  rz   r  r   rX  )r   r   text_node_dictr   character_indexr   r   i_cicnone_char_indexr  r  r   s                r   r  CPPDLabelEncode.encodech  sx   t9>SY):)::#::::<Dq!f%&+D,>,>&?@&?1&?@	D99$))D/CS!))++#q(#%%sAh/  ~**,-B""2& . "s4@S@S@U;V7W2WX!B!"N " !04CDOq*ODy>Q#=88+ A$ Es   7F0F)r  r  r  )NFFr  )r%   r&   r'   r(   r   r   r"   r   r   r  r)   r   r   s   @r   r  r  r  s3    3
 !)(T
5>9 9r   r  c                   J    \ rS rSrS r       SS jr    S	S jrS rSrg)
LatexOCRLabelEncodei  c                 x    SSK Jn  UR                  U5      U l        / SQU l        SU l        SU l        SU l        g )Nr   	TokenizerrE  rF  rG  rQ   rP   )
tokenizersr  	from_filer  model_input_namesrM  bos_token_ideos_token_id)r   rec_char_dict_pathr   TokenizerFasts       r   r   LatexOCRLabelEncode.__init__  s:    
 	:&001CD!Rr   Nc	                    Uc  SU R                   ;   nUc  SU R                   ;   nU(       a  UR                  b  U/UR                  -   n	OU/n	[        [        5      n
U	 H  nU
S   R	                  UR
                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       d  M  U
S   R	                  [        UR
                  5      5        M     X4$ NrF  rG  rE  special_tokens_maskoffset_mappingr   r!  overflowingr   rz   r@   idstype_idsrG  r)  offsetsr>   r   r   rD  rC  return_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbose	encodingsencoding_dictes               r   _convert_encoding%LatexOCRLabelEncode._convert_encoding   s    !($48N8N$N! ($48N8N$N!$)=)=)I!
X%9%99I!
I#D)A+&--aee4$./66qzzB$./66q7G7GH)34;;A<Q<QR%./66qyyA}h'..s155z:  ''r   c                 0   UnU R                   R                  UUUS9nU Vs/ s H  nU R                  USS SSSSSS9PM     n	n0 n
U	S   S   R                  5        H*  nU	 VVVs/ s H  u  pX     H  oPM     M     nnnnXU'   M,     U
$ s  snf s  snnnf )Nadd_special_tokensis_pretokenizedFTr   rD  rC  r1  r2  r3  r4  r5  r   )r  encode_batchr9  r%  r   r   	text_pairrD  r=  is_split_into_wordsbatched_inputr6  r   tokens_and_encodingssanitized_tokenskeyitemr   r8  r   s                   r   r   LatexOCRLabelEncode.encode'  s     NN//1/ 0 
	  & 
 & ""!&+&**/+0',# # 	 & 	  
 '*1-224C&:N&:74DIqQIQ&:EN$)S! 5  # 
 Os    B%B
c                 z   U R                  U5      n[        X R                  U R                  /SS//5       H  u  p4X#    Vs/ s H  oTS   /U-   US   /-   PM     nnSnU H  n[	        U[        U5      5      nM     [        R                  " [        U5      U4SS9n	[        U5       H  u  p[        U5      nXU
   S U& M     XU'   M     [        R                  " US   5      R                  [        R                  5      [        R                  " US   5      R                  [        R                  5      W4$ s  snf )NrQ   r   ru  r7   rE  rG  )r   r$  r"  r#  r   r>   rB   rT   r   rC   r   ru  )r   eqstopkr  r  r   process_seq
max_lengthseqr   r   re  s               r   r"   LatexOCRLabelEncode.__call__H  s$   {{3 1 143D3DE1vNODA8<@1aD6A:1.K@J" SX6
 #XXs;/<GLF%k2H"%sBQ 3 G P HHT+&'..rxx8HHT*+,33BHH=
 	
 As   D8)r"  r#  r!  rM  r  NNFFFFTNFTF)	r%   r&   r'   r(   r   r9  r   r"   r)   r*   r   r   r  r    s=      #""'#($%(T #! B
r   r  c                   (    \ rS rSrSr\S 5       rSrg)ExplicitEnumi[  z;
Enum with more explicit error message for missing values.
c           
      ~    [        U SU R                   S[        U R                  R	                  5       5       35      e)Nz is not a valid z, please select one of )r&  r%   rz   _value2member_map_r%  )rn  values     r   	_missing_ExplicitEnum._missing_`  s?    g%cll^3J4PSPfPfPkPkPmKnJop
 	
r   r*   N)r%   r&   r'   r(   r   classmethodrX  r)   r*   r   r   rT  rT  [  s     
 
r   rT  c                   (    \ rS rSrSrSrSrSrSrSr	g)	TruncationStrategyig  z}
Possible values for the `truncation` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for tab-completion in
an IDE.

only_firstonly_secondlongest_firstdo_not_truncater*   N)
r%   r&   r'   r(   r   
ONLY_FIRSTONLY_SECONDLONGEST_FIRSTDO_NOT_TRUNCATEr)   r*   r   r   r\  r\  g  s    
 JK#M'Or   r\  c                   $    \ rS rSrSrSrSrSrSrg)PaddingStrategyis  zz
Possible values for the `padding` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for tab-completion in an
IDE.
longestrN  
do_not_padr*   N)	r%   r&   r'   r(   r   LONGEST
MAX_LENGTH
DO_NOT_PADr)   r*   r   r   rf  rf  s  s    
 GJJr   rf  c                       \ rS rSr/ SQrS rSS\4S jjrS r\	S 5       r
\	S 5       r\	S	 5       rS
 r       SS jr    SS jrS rSrg)UniMERNetLabelEncodei~  )	bos_token	eos_token	unk_token	sep_token	pad_token	cls_token
mask_tokenadditional_special_tokensc                 >   SSK Jn  SSK Jn  SU l        SU l        SU l        SU l        S U l        S U l        S U l	        / U l
        / SQU l        X l        S	U l        SU l        S
U l        SU l        SU l        SU l        S U l        [(        R*                  R-                  US5      n[(        R*                  R-                  US5      nUR/                  U5      U l        0 n0 n	UGb  [3        USS9 n
[4        R6                  " U
5      nSU;   a|  US   R9                  5        He  u  p[;        U[<        5      (       a  U" S0 UD6n[;        X5      (       a  X[?        U5      '   X[A        U5      '   MN  [C        SURD                   S35      e   XS'   URG                  S0 5      n[I        UR9                  5       S S9 VVs/ s H  u  pX;  d  M  UPM     nnnU RK                  U5      n[M        URO                  5       5      U Vs/ s H  n[A        U5      PM     sn-   nUU RP                   Vs/ s H  nUU;  d  M  X;  d  M  UPM     sn-  n[S        U5      S:  a  S n/ nU RT                  nU Hr  n[;        X5      (       a!  URV                  =(       d    [A        U5      U;   O[A        U5      U;   nUb  UU:X  a  URY                  U5        OU R[                  UUS9  U/nUnMt     U(       a  U R[                  UUS9  S S S 5        g g s  snnf s  snf s  snf ! , (       d  f       g = f)Nr   r  )
AddedTokenr  r  r  z<pad>r  rQ   rP   rightztokenizer.jsonztokenizer_config.jsonrp   r   added_tokens_decoderzFound a zV in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instancec                     U S   $ r  r*   )r   s    r   <lambda>/UniMERNetLabelEncode.__init__.<locals>.<lambda>  s    AaDr   rG  )special_tokensr*   ).r  r  rw  
_unk_token
_bos_token
_eos_token
_pad_token
_sep_token
_cls_token_mask_token_additional_special_tokensr!  max_seq_lenrM  r"  r#  rK  rr  rL  pad_to_multiple_ofospathjoinr   r  r{   r<   loadr  r  r   rj  r  r&  r   poprX  added_tokens_encoderrz   r%  all_special_tokens_extendedr>   all_special_tokensspecialr@   _add_tokens)r   r$  r  r   r%  rw  fast_tokenizer_filetokenizer_config_filery  added_tokens_maptokenizer_config_handleinit_kwargsr   r  r   tokens_to_addr  encoderis_last_specialr  r~  
is_specials                         r   r   UniMERNetLabelEncode.__init__  sN    	:)! !*,'!R&# !""& ggll+=?OP " 7!
 '001DE! ,%("ii(?@)[8&12H&I&O&O&Q
%eT22$.$7$7E%e88=BS:;@SZ8","*5??*;  <R  !S#  'R 7K23'27Mr'R$ )/,224.)!) 8	 )  ! (,'@'@AU'V$388:;,9?,95CJM?  !%!A!A"!AG+ 050J !A" 
 }%)&*OF%)%<%<N!.  *%<< #]]Jc%jN.J!$U~!= #
 +2o6S"MM%0 ,,VO,T&+WF*4 "/ (((Pe  -$!?"9 sJ   -CL8K>K>0L=LL&
L	4L	;L	B3L>L
Lreturnc                 |    U(       a  U R                   R                  U5      $ U R                   R                  U5      $ r   )r  r=  
add_tokens)r   
new_tokensr~  s      r   r   UniMERNetLabelEncode._add_tokens  s/    >>44Z@@~~((44r   c                     [        UR                  5       S S9 VVs0 s H  u  p#UR                  U_M     snn$ s  snnf )Nc                     U S   $ r  r*   )rH  s    r   r{  ;UniMERNetLabelEncode.added_tokens_encoder.<locals>.<lambda>  s	    dSTgr   r}  )rX  r  r  )r   ry  r  r  s       r   r  )UniMERNetLabelEncode.added_tokens_encoder  sE     399;AUV
V IIqLV
 	
 
s   :c                 Z    U R                    Vs/ s H  n[        U5      PM     nnU$ s  snf r   )r  r  )r   r\   all_tokss      r   r  'UniMERNetLabelEncode.all_special_tokens  s-    $($D$DE$DqCF$DE Fs   (c                 p   / n[        5       nU R                  R                  5        H  n[        U[        [
        45      (       a$  U Vs/ s H  n[        U5      U;  d  M  UPM     nnO[        U5      U;  a  U/O/ nUR                  [        [        U5      5        UR                  U5        M     U$ s  snf r   )
rU  special_tokens_map_extendedvaluesr  rz   rW  r  r  mapr&  )r   
all_tokensseenrW  r  r  s         r   r  0UniMERNetLabelEncode.all_special_tokens_extended  s    
u55<<>E%$//49 TE5SZt=SE T+.u:T+ArKKC/0m, ?  !Us   	B3 B3c                 h    0 nU R                    H  n[        U SU-   5      nU(       d  M  X1U'   M!     U$ )Nr   )SPECIAL_TOKENS_ATTRIBUTESgetattr)r   set_attrattr
attr_values       r   r  0UniMERNetLabelEncode.special_tokens_map_extended   s:    22D sTz2Jz!+ 3 r   c                    U R                   R                  nU R                   R                  nU[        R                  :X  a  Ub  U R
                  R                  5         OZUUUR                  SS.nUc  S n	O"U V
s0 s H  oUR                  U
S 5      _M     n	n
X:w  a  U R                   R                  " S0 UD6  U[        R                  :X  a  Ub  U R                   R                  5         g g U[        R                  :X  a  UOS nUU R                  U R                  U R                   U R"                  US.nXx:w  a  U R                   R$                  " S0 UD6  g g s  sn
f )Nrx  )rN  stridestrategy	direction)r   r  pad_idrr  pad_type_idr  r*   )r  
truncationrj  r\  rd  
_tokenizerno_truncationrW  r   enable_truncationrf  rk  
no_paddingrj  rK  rM  rr  rL  enable_padding)r   padding_strategytruncation_strategyrN  r  r  _truncation_paddingr  currentr  r   s               r   set_truncation_and_padding/UniMERNetLabelEncode.set_truncation_and_padding	  sI    nn//>>))"4"D"DD&--/ ) /55$	F "@FG1kooa66G 00:6:999#))+ $ //2L2LL
RV  !!..++!^^#55&8F !--77 "' Hs   :ENc	                    Uc  SU R                   ;   nUc  SU R                   ;   nU(       a  UR                  b  U/UR                  -   n	OU/n	[        [        5      n
U	 H  nU
S   R	                  UR
                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       a  U
S   R	                  UR                  5        U(       d  M  U
S   R	                  [        UR
                  5      5        M     X4$ r(  r+  r0  s               r   r9  &UniMERNetLabelEncode._convert_encoding8  s    !($48N8N$N! ($48N8N$N!$)=)=)I!
X%9%99I!
I#D)A+&--aee4$./66qzzB$./66q7G7GH)34;;A<Q<QR%./66qyyA}h'..s155z:  ''r   c                    UnU R                  [        R                  [        R                  U R
                  SS S9  U R                  R                  UUUS9nU Vs/ s H  nU R                  USS SSSSSS9PM     n	n0 n
U	S   S   R                  5        H*  nU	 VVVs/ s H  u  pX     H  oPM     M     nnnnXU'   M,     U
$ s  snf s  snnnf )Nr   )r  r  rN  r  r  r<  FTr?  )
r  rf  ri  r\  rc  r  r  r@  r9  r%  rA  s                   r   r   UniMERNetLabelEncode.encode^  s    '',44 2 @ @''# 	( 	
 NN//1/ 0 
	" & 
 & ""!&+&**/+0',# # 	 & 	  
 '*1-224C&:N&:74DIqQIQ&:EN$)S! 5  # 
 Os    CC

c                    US   nU R                  U/5      n[        X0R                  U R                  /SS//5       H8  u  pEX4    Vs/ s H  ofPM     nnSnU H  n	[	        U[        U	5      5      nM     M:     [        R                  " US   5      R                  [        R                  5      S   US'   [        R                  " US   5      R                  [        R                  5      S   US'   U$ s  snf )Nr   rQ   r   rE  rG  )
r   r$  r"  r#  r   r>   rB   rC   r   ru  )
r   r!   rK  rL  r  r  r   rM  rN  rO  s
             r   r"   UniMERNetLabelEncode.__call__  s    7m{{C5! 1 143D3DE1vNODA&*g.g1gK.J" SX6
 # P
 k!23::288DQGW!#$/?*@!A!H!H!RST!U /s   C3)r  r  r  r  r  r  r  r  r"  r#  r  r!  r  rr  rM  rL  rK  r  r  rQ  rR  )r%   r&   r'   r(   r  r   rj  r  r  r  r  r  r  r  r9  r   r"   r)   r*   r   r   rm  rm  ~  s    	!UQn5s 5
   
 
  -8d #""'#($$(R #!' R
r   rm  )=
__future__r   r   r   r   r  enumr   r  numpyrB   stringshapely.geometryr   r	   r
   r<   r
  r   collectionsr   ppocr.utils.loggingr   ppocr.data.imaug.vqa.augmentr   objectr   r,   rh   r   r   r   r   r2  rU  rf  rq  r  r  r  r  r  r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  rT  r\  rf  rm  r*   r   r   <module>r     s   '  % ' 	     7 7     # * 6
V 
1V 1hA AH' <&+ &R& :o-V o-d-( -`7' 7t( D+' +\Q) Q)h)- )X#V #L("' ("V8") 8"v*( *ZC& CL*) *Z( :* @ *  F* D,& ,^#o #L9& 9xF 4' @}9( }9@f
& f
R	
3 	
	( 	(l S6 Sr   