
    iB                         S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
r
SSKJr  SSKJr  SSKJrJr   " S S\5      rg)	zj
This code is refer from:
https://github.com/lukas-blecher/LaTeX-OCR/blob/main/pix2tex/dataset/dataset.py
    N)Dataset   )LatexOCRLabelEncode)	transformcreate_operatorsc                   D   ^  \ rS rSrSU 4S jjrS rS rS rS rSr	U =r
$ )	LaTeXOCRDataSet"   c                 "  > [         [        U ]  5         X0l        UR	                  5       U l        US   nX   S   nX   S   nUR                  S5      nUS   U l        UR                  S5      U l        UR                  S5      U l	        UR                  S5      U l
        UR                  S	5      U l        UR                  S
5      U l        UR                  S5      U l        [        U R                  5      U l        [!        US5      n	["        R$                  " U	5      n
0 nU
 Hi  nU R                  S   US   s=::  a  U R                  S   ::  d  M/  O  M3  U R                  S   US   s=::  a  U R                  S   ::  d  M_  O  Mc  X   X'   Mk     Xl        US   U l        X@l        U R
                  S:X  a1  U R(                  (       a   [,        R*                  " U R*                  5        / U l        U R&                   GH,  n[0        R2                  " U R&                  U   [4        S9nU R
                  S:X  a0  U R(                  (       a  [6        R8                  " [;        U5      5      O[6        R<                  " [;        U5      5      n[?        S[;        U5      U R                  5       H  nXXU R                  -       n[;        UR@                  5      S:X  a	  US S S 24   n[;        U5      U R                  :  a  U R                  (       d  Md  U R.                  RC                  U5        M     GM/     U R(                  (       aF  [0        R,                  RE                  [0        R2                  " U R.                  [4        S95      U l        O([0        R2                  " U R.                  [4        S9U l        [;        U R.                  5      U l#        U RI                  U R*                  U5        [K        US   U5      U l&        URO                  SS5      U l(        SU l)        g )NGlobaldatasetloaderdatadata_dirmin_dimensionsmax_dimensionsbatch_size_per_pairkeep_smaller_batchesmax_seq_lenrec_char_dict_pathrbr   r   shuffletrain)dtype
transformsext_op_transform_idx   T)*superr	   __init__loggerlowermodepopr   r   r   	batchsizer   r   r   r   	tokenizeropenpickleloadr   
do_shuffleseedrandompairsnparrayobjectpaddlerandpermlenarangerangeshapeappendpermutationsizeset_epoch_as_seedr   opsgetr   
need_reset)selfconfigr"   r    r*   global_configdataset_configloader_configpkl_pathfiler   tempkinfopibatch	__class__s                    e/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddleocr/ppocr/data/latexocr_dataset.pyr   LaTeXOCRDataSet.__init__#   s%   ot-/JJL	x(i0X.!%%f-&z2,001AB,001AB'++,AB$2$6$67M$N!(,,];"/"3"34H"I,T-D-DEHd#{{4 A##A&!A$H$2E2Ea2HHH''*adLd6I6I!6LLL'  	'	2	99DOOKK		"
A88DIIaL7D 99'DOO D	*]]3t9- 
 1c$i8qt~~#567u{{#q(!$'NEu:.t7P7P

!!%( 9  ??..rxx

&/QRDJ$**F;DJ

O	tyy.9#N<$@-P$2$6$67Mq$Q!    c                    U R                   S:X  a   [        US   5       VVs/ s H  u  p4SU;   d  M  UPM     snnS   n[        US   5       VVs/ s H  u  p4SU;   d  M  UPM     snnS   nUb  UOSUS   U   S   S'   Ub  UOSUS   U   S   S'   g g s  snnf s  snnf ! [         a  n[        U5         S nAg S nAff = f)Nr   r   MakeBorderMapr   MakeShrinkMapepoch)r"   	enumerate	Exceptionprint)r=   r*   r@   index
dictionaryborder_map_idshrink_map_idEs           rK   r9   !LaTeXOCRDataSet.set_epoch_as_seed`   s
   99 .7~l7S-T!-T)&*4 -T! 	! .7~l7S-T!-T)&*4 -T! 	! "-T1 |,];OL
 "-T1 |,];OL  !
!  as>   B" BBB" B!B'-B" B" "
C,B<<Cc                     [         R                  " U R                  5        [         R                  " U R                  5        g N)r+   r*   r   
data_linesr=   s    rK   shuffle_data_random#LaTeXOCRDataSet.shuffle_data_randomw   s%    DIIt'rM   c                    U R                   U   nUR                  u  p4 Su  pVn/ nU H  n	[        R                  R	                  U R
                  U	5      n
SU
0n[        US   S5       nUR                  5       nXS'   [        XR                  5      nUR                  [        R                  " US   5      5        S S S 5        M     [        R                  " USS9S S 2[        R                  S S 2S S 24   nUR                  [        R                   5      nU R#                  [%        U5      5      u  nnnU R&                  U:  ad  U R(                  S:X  a-  [        R*                  R-                  U R/                  5       5      OUS-   U R/                  5       -  nU R1                  U5      $ UUU4$ ! , (       d  f       GM  = f!   U R2                  R5                  S	R7                  WS   [8        R:                  " 5       5      5        S n O= fUcd  U R(                  S:X  a-  [        R*                  R-                  U R/                  5       5      OUS-   U R/                  5       -  nU R1                  U5      $ U$ )
N)r   r   r   img_pathr   imager   )axisr   r   z1When parsing line {}, error happened with msg: {})r,   Tospathjoinr   r&   readr   r:   r6   r-   r.   concatenatenewaxisastypefloat32r%   listr   r"   r+   randint__len____getitem__r    errorformat	traceback
format_exc)r=   idxrI   eqsims	max_width
max_height
max_lengthimages_transform	file_namerb   r   fimgitemimage_concatlabelsattention_maskrnd_idxoutss                       rK   rq   LaTeXOCRDataSet.__getitem__|   s   

377"	07-I:! 	77<<yA $z*D1Q&&(C$'M$T884D$++BHHT!W,=>	 21 ! >>*:CArzzSTVWDWXL+222::>15S	1J.FNJ*, yyG+ II%%dlln5'T\\^3 
 ''00$fn== 21"	KKCJJ$i&:&:&<
 D< 99' 		!!$,,.1Ag/ 
 ##G,,s-   A	F? (AF,:C,F? 'F? ,
F<	6	F? ?AHc                     U R                   $ r\   )r8   r^   s    rK   rp   LaTeXOCRDataSet.__len__   s    yyrM   )r$   r   r   r)   r   r   r    r   r   r   r"   r<   r:   r,   r   r*   r8   r%   r\   )__name__
__module____qualname____firstlineno__r   r9   r_   rq   rp   __static_attributes____classcell__)rJ   s   @rK   r	   r	   "   s#    ;z.
/b rM   r	   )__doc__numpyr-   cv2mathrf   jsonr'   r+   rt   r0   	paddle.ior   imaug.label_opsr   imaugr   r   r	    rM   rK   <module>r      s>   
  
  	       0 .Lg LrM   