
    i)                         S SK rS SKrS SKrS SKJr  S SKrS SKrS SKrS SK	r	S SK
Jr  SSKJrJr   " S S\5      r " S S	\5      r " S
 S\5      rg)    N)Dataset)Image   )	transformcreate_operatorsc                   V   ^  \ rS rSrSU 4S jjrS rS rS rS rS r	S r
S	 rS
rU =r$ )LMDBDataSet   c                 *  > [         [        U ]  5         US   nX   S   nX   S   nUS   nUS   n	US   U l        U R	                  U	5      U l        UR                  SU	-  5        U R                  5       U l        U R                  (       a)  [        R                  R                  U R                  5        [        US   U5      U l        UR                  S	S
5      U l        UR                  SS/5      n
SU
 Vs/ s H  oS
:  PM	     sn;   U l        g s  snf )NGlobaldatasetloaderbatch_size_per_carddata_dirshufflez!Initialize indexes of datasets:%s
transformsext_op_transform_idxr   
ratio_listg      ?T)superr	   __init__
do_shuffleload_hierarchical_lmdb_dataset	lmdb_setsinfodataset_traversaldata_idx_order_listnprandomr   r   opsgetr   
need_reset)selfconfigmodeloggerseedglobal_configdataset_configloader_config
batch_sizer   r   x	__class__s               a/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddleocr/ppocr/data/lmdb_dataset.pyr   LMDBDataSet.__init__   s   k4)+x(i0X."#89
!*-'	2<<XF7(BC#'#9#9#; ??IId667#N<$@-P$2$6$67Mq$Q!#''se<
*"=*Qq5*"=="=s   7Dc           
      "   0 nSn[         R                  " US-   5       Hm  u  pEnU(       a  M  [        R                  " USSSSSS9nUR	                  SS9n[        UR                  SR                  5       5      5      n	UUUU	S	.X#'   US
-  nMo     U$ )Nr   /    TFmax_readersreadonlylock	readaheadmeminitwriteznum-samplesdirpathenvtxnnum_samplesr   )oswalklmdbopenbeginintr    encode)
r"   r   r   dataset_idxr;   dirnames	filenamesr<   r=   r>   s
             r-   r   *LMDBDataSet.load_hierarchical_lmdb_dataset1   s    	,.GGHsN,C(Gy8ii "!#! iiei,!#''-*>*>*@"AB&#.	*	& q % -D&     c                 n   [        U R                  5      nSn[        U5       H  nX R                  U   S   -  nM     [        R                  " US45      nSn[        U5       HN  nU R                  U   S   nXV-   nX4XW2S4'   [        [        U5      5      XEU2S4'   XEU2S4==   S-  ss'   XV-   nMP     U$ )Nr   r>      r   )lenr   ranger   zeroslist)r"   lmdb_numtotal_sample_numlnor   beg_idxtmp_sample_numend_idxs           r-   r   LMDBDataSet.dataset_traversalI   s    t~~&?Cs 3M BB # hh(8!'<=?C!^^C0?N.G69 236:5;P6Q 23 23q83.G # #"rJ   c                 ~    U(       d  g[         R                  " USS9nUc  g[        R                  " US5      nUc  gU$ get_img_dataNuint8)dtyper   r   
frombuffercv2imdecoder"   valueimgdataimgoris       r-   rZ   LMDBDataSet.get_img_dataY   =    --W5?gq)>rJ   c                    SnU R                    H"  n[        US5      (       d  M  [        US5      n  O   U R                   S U R                   n/ n[	        U5      U:  a  U R
                  [        R                  R                  [	        U 5      5         u  pV[        U5      n[        U5      nU R                  U R                  U   S   U5      nUc  M  Uu  pXS.n
[        X5      n
U
c  M  UR                  U
5        [	        U5      U:  a  M  U$ )Nr   ext_data_numr=   imagelabel)r   hasattrgetattrr   rM   r   r   r   randintrD   get_lmdb_sample_infor   r   append)r"   rh   opload_data_opsext_datalmdb_idxfile_idxsample_infoimgrk   datas              r-   get_ext_dataLMDBDataSet.get_ext_datae   s   ((Br>**&r>:  !<4#<#<=(ml*!%!9!9")):K:KCPTI:V!WH8}H8}H33x(/K "$JC 1DT1D|OOD! (ml* rJ   c                     SR                  5       U-  nUR                  U5      nUc  g UR                  S5      nSR                  5       U-  nUR                  U5      nXd4$ )Nz
label-%09dzutf-8z
image-%09d)rE   r    decode)r"   r=   index	label_keyrk   img_keyimgbufs          r-   ro    LMDBDataSet.get_lmdb_sample_info   sb     '')E1		"=W%%%'%/!}rJ   c                    U R                   U   u  p#[        U5      n[        U5      nU R                  U R                  U   S   U5      nUc<  U R	                  [
        R                  R                  U R                  5       5      5      $ Uu  pVXVS.nU R                  5       US'   [        XpR                  5      nUc<  U R	                  [
        R                  R                  U R                  5       5      5      $ U$ )Nr=   ri   rs   )r   rD   ro   r   __getitem__r   r   rn   __len__ry   r   r   )	r"   idxrt   ru   rv   rw   rk   rx   outss	            r-   r   LMDBDataSet.__getitem__   s    !55c:x=x=//NN8$U+X
 ##BII$5$5dlln$EFF 
-,,.Zxx(<##BII$5$5dlln$EFFrJ   c                 4    U R                   R                  S   $ Nr   r   shaper"   s    r-   r   LMDBDataSet.__len__       ''--a00rJ   )r   r   r   r   r!   r   N)__name__
__module____qualname____firstlineno__r   r   r   rZ   ry   ro   r   r   __static_attributes____classcell__)r,   s   @r-   r	   r	      s0    >*0# 
4"1 1rJ   r	   c                   0    \ rS rSrSS jrS rS rS rSrg)	LMDBDataSetSR   c                     UR                  U5      n[        R                  " 5       nUR                  U5        UR	                  S5        [
        R                  " U5      R                  U5      nU$ r   )r    ioBytesIOr9   seekr   rB   convert)r"   r=   keytyper   bufims          r-   buf2PILLMDBDataSetSR.buf2PIL   sN    jjl		&ZZ_$$T*	rJ   c                    [         R                  [         R                  [         R                  -   [         R                  [         R                  -   [         R                  [         R                  -   [         R                  -   S.nUS:X  a  UR                  5       nU H  nXCU   ;  d  M  UR                  US5      nM!     U$ )N)digitlowerupperallr    )stringdigitsascii_lowercaseascii_letterspunctuationr   replace)r"   str_voc_type
alpha_dictchars        r-   str_filtLMDBDataSetSR.str_filt   s    ]]]]V%;%;;]]V%9%99==6#7#77&:L:LL	

 w::<DDh//||D"-  rJ   c                    SU l         SU l        SU l        SU-  n[        UR	                  U5      R                  5       5      nSU-  nSU-  n U R                  XS5      nU R                  XS5      nU R                  X@R                   5      n	XxU	4$ ! [        =(       d    [        U5      U R                  :   a
    XS-      s $ f = f)	Nr   d   Fs
   label-%09ds   image_hr-%09ds   image_lr-%09dRGBr   )
r   max_lenteststrr    r|   r   IOErrorrM   r   )
r"   r=   r}   r~   word
img_HR_key
img_lr_keyimg_HRimg_lr	label_strs
             r-   ro   "LMDBDataSetSR.get_lmdb_sample_info   s    	!E)	3779%,,./%-
%-
	#\\#59F\\#59F MM$6	y(( 2#d)dll2 	#	?"	#s   $B 0CCc                    U R                   U   u  p#[        U5      n[        U5      nU R                  U R                  U   S   U5      nUc<  U R	                  [
        R                  R                  U R                  5       5      5      $ Uu  pVnXVUS.n[        XR                  5      n	U	c<  U R	                  [
        R                  R                  U R                  5       5      5      $ U	$ )Nr=   )image_hrimage_lrrk   r   rD   ro   r   r   r   r   rn   r   r   r   )
r"   r   rt   ru   rv   r   r   r   rx   r   s
             r-   r   LMDBDataSetSR.__getitem__   s    !55c:x=x=//NN8$U+X
 ##BII$5$5dlln$EFF$/!	"Kxx(<##BII$5$5dlln$EFFrJ   )r   r   r   N)r   )	r   r   r   r   r   r   ro   r   r    rJ   r-   r   r      s    ) rJ   r   c                   2    \ rS rSrS rS rS rS rS rSr	g)	LMDBDataSetTableMaster   c           	          0 nSn[         R                  " USSSSSS9nUR                  SS9n[        [        R
                  " UR                  S5      5      5      nUUUUS.X#'   U$ )	Nr   r1   TFr2   r8   s   __len__r:   )rA   rB   rC   rD   pickleloadsr    )r"   r   r   rF   r<   r=   r>   s          r-   r   5LMDBDataSetTableMaster.load_hierarchical_lmdb_dataset   sx    	ii
 iiei$&,,swwz':;<&	"
	 rJ   c                 ~    U(       d  g[         R                  " USS9nUc  g[        R                  " US5      nUc  gU$ rY   r]   ra   s       r-   rZ   #LMDBDataSetTableMaster.get_img_data   rf   rJ   c                    S n [         R                  " UR                  [        U5      R	                  S5      5      5      nUS   nUS   nUS   nUR                  5       R                  S5      nUS   US   pU
R                  S5      n
USS  nSnU Vs/ s H-  nU" UR                  5       R                  U5      5      SS	/S
.PM/     nn0 nX_S'   XS'   XS'   XoS'   U$ !    g = fs  snf )Nc                 P    / nU  H  nUR                  [        U5      5        M     U$ r   )rp   rD   )bbox_str_list	bbox_listbbox_strs      r-   convert_bboxALMDBDataSetTableMaster.get_lmdb_sample_info.<locals>.convert_bbox   s)    I)  X/ *rJ   utf8r   r   rL   
,12)bboxtokens	file_name	structurecellsrj   )r   r   r    r   rE   stripsplit)r"   r=   r}   r   rx   r   bytes
info_linesraw_dataraw_nametextr   
bbox_splitbslbboxes	line_infos                   r-   ro   +LMDBDataSetTableMaster.get_lmdb_sample_info   s   		<<E
(9(9&(A BCD
 G	Q!W
##%++D1QKQK  zz# 
 %
$ "#))+"3"3J"?@SRUJW$ 	 
 	!*+!%+#'"';	
s   =C 4C&C#c                    U R                   U   u  p#[        U5      n[        U5      nU R                  U R                  U   S   U5      nUc<  U R	                  [
        R                  R                  U R                  5       5      5      $ [        X@R                  5      nUc<  U R	                  [
        R                  R                  U R                  5       5      5      $ U$ )Nr=   r   )r"   r   rt   ru   rx   r   s         r-   r   "LMDBDataSetTableMaster.__getitem__   s    !55c:x=x=(()A%)H(S<##BII$5$5dlln$EFFxx(<##BII$5$5dlln$EFFrJ   c                 4    U R                   R                  S   $ r   r   r   s    r-   r   LMDBDataSetTableMaster.__len__,  r   rJ   r   N)
r   r   r   r   r   rZ   ro   r   r   r   r   rJ   r-   r   r      s    *
&P
1rJ   r   )numpyr   r   r?   	paddle.ior   rA   r_   r   r   PILr   imaugr   r   r	   r   r   r   rJ   r-   <module>r      sO     	 	   
    .@1' @1F5K 5pW1[ W1rJ   