
    iL                        S SK r S SKrS SKr\ R                  R	                  \5      rS SKJr  \R                  R                  \ R                  R                  \S5      5        S SKrS SKJr  S SKrS SKrS SKJr  S SKrS SKJr  S SKrS SKJr  SS jr\" S	\ R                  R                  \S
5      SS9r\R6                  " SS5      r\R6                  " SS5      rS SKJr  S SK J!r!J"r"J#r#J$r$  S SK%J&r&J'r'J(r(J)r)  S SK*J+r+  S SK,J-r-J.r.J/r/  S SK0J1r1J2r2  S SK3J4r4J5r5J6r6  S SK7J8r8J9r9  S SK:J;r;  \" 5       r</ SQr=S/r>SS/r?\ R                  R                  S\ R                  R                  S5      5      rCSrD/ S QrES!rFS"S!/rGS#S$0S#S%0S#S&0S'.S(S)S*.S+S,S*.S-S.S*.S/S0S*.S1S2S*.S3S4S*.S5S6S*.S7S8S*.S9S:S*.S;S<S*.S=S>S*.S?S@S*.SASBS*.SC.SDS#SE00SF.S#SG0S#S%0S#S&0S'.SHS)S*.SIS.S*.SJS0S*.SKS2S*.S3S4S*.SLS6S*.SMS8S*.SNS:S*.S;S<S*.SOS>S*.S?S@S*.SPSBS*.SQ.SDS#SE00SF.SDS#SR00SDSSS)S*.0SDS#SE00SF.S#ST0S#SU0S#SV0SW.SXS)S*.SYS.S*.SZS[S*.S\S]S*.S^S0S*.S_S2S*.S`S4S*.SaS6S*.SbS8S*.ScS:S*.SdS<S*.SeS>S*.SfS@S*.SgSBS*.ShSiS*.Sj.SDS#SE00SF.Sk.SlSmSnSoS*.00SpSoS*.SqSrS*.Ss.StSuS*.SvSwS*.Ss.SxSyS*.SxSyS*.Ss.Sz.S{.S|.rHSS} jrIS~ rJS rKS\L4S jrMSS jrN " S S\+R                  5      rP " S S\45      rQS rRg)    N)
try_import )deepcopy)Path)BytesIO)Imagec                     [         R                  R                  X5      n[         R                  R                  U5      nUR                  R                  U5        U(       a  U[        R                  U '   U$ N)	importlibutilspec_from_file_locationmodule_from_specloaderexec_modulesysmodules)module_name	file_pathmake_importablespecmodules        S/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddleocr/paddleocr.py_import_filer   $   sR    >>11+ID^^,,T2FKKF##)K M    toolsztools/__init__.pyT)r   ppocr	paddleocrppstructure)
get_logger)check_and_readget_image_file_listalpha_to_colorbinarize_img)maybe_downloaddownload_with_progressbaris_linkconfirm_model_dir_url)predict_system)draw_ocrstr2bool	check_gpu)	init_argsdraw_structure_result)StructureSystemsave_structure_resto_excel)sorted_layout_boxesconvert_info_docx)convert_info_markdown)
	PaddleOCRPPStructurer)   r-   r/   r%   r0   r1   r2   r3   DBCRNN
SVTR_LCNetPADDLE_OCR_BASE_DIRz~/.paddleocr/PP-OCRv4)PP-OCRPP-OCRv2PP-OCRv3r:   PP-StructureV2PP-StructureurlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tarzYhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar)chenmlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tarz./ppocr/utils/ppocr_keys_v1.txt)r@   	dict_pathzzhttps://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0rc0/PP-OCRv4_server_rec_doc_infer.tarz'./ppocr/utils/dict/ppocrv4_doc_dict.txtzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tarz./ppocr/utils/en_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tarz"./ppocr/utils/dict/korean_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/japan_PP-OCRv4_rec_infer.tarz!./ppocr/utils/dict/japan_dict.txtzXhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tarz'./ppocr/utils/dict/chinese_cht_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ta_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/ta_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/te_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/te_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ka_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/ka_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/latin_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/arabic_PP-OCRv4_rec_infer.tarz"./ppocr/utils/dict/arabic_dict.txtzUhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tarz$./ppocr/utils/dict/cyrillic_dict.txtzWhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/devanagari_PP-OCRv4_rec_infer.tarz&./ppocr/utils/dict/devanagari_dict.txt)rA   ch_docrB   koreanjapanchinese_chttatekalatinarabiccyrillic
devanagarirA   zRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar)detrecclszJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tarzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tarzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tarzWhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar)rA   rB   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   zJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tarz\https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar)rA   rB   	structurezRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tarz]https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tarzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/french_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/german_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tarzYhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tarz`https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tarzbhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tarzdhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tarzppocr/utils/dict/table_dict.txt)rA   rB   frenchgermanrF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rS   )r:   r=   r<   r;   tablerB   zahttps://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tarz)ppocr/utils/dict/table_structure_dict.txtzqhttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/paddle3.0b2/en_ppstructure_mobile_v2.0_SLANet_infer.tarzqhttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/paddle3.0b2/ch_ppstructure_mobile_v2.0_SLANet_infer.tarz,ppocr/utils/dict/table_structure_dict_ch.txt)rB   rA   zahttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tarz6ppocr/utils/dict/layout_dict/layout_publaynet_dict.txtzfhttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tarz1ppocr/utils/dict/layout_dict/layout_cdla_dict.txtzDhttps://paddleocr.bj.bcebos.com/contribution/rec_latex_ocr_infer.tarz)ppocr/utils/dict/latex_ocr_tokenizer.json)rV   layoutformula)r?   r>   )OCR	STRUCTUREc                 l   SS K n[        5       nXl        UR                  S[        SS9  UR                  S[
        SS9  UR                  S[
        SS9  UR                  S[        S	S9  UR                  S
[
        SS9  UR                  S[        [        SSS9  UR                  S[        [        SSS9  UR                   H  nUR                  S;   d  M  S Ul
        M     U (       a  UR                  5       $ 0 nUR                   H  nUR                  XCR                  '   M     UR                  " S0 UD6$ )Nr   z--langrA   )typedefaultz--detTz--recz--typeocrz
--savefileFz--ocr_versionr:   aU  OCR Model version, the current model support list is as follows: 1. PP-OCRv4/v3 Support Chinese and English detection and recognition model, and direction classifier model2. PP-OCRv2 Support Chinese detection and recognition model. 3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.)r\   choicesr]   helpz--structure_versionr>   zModel version, the current model support list is as follows: 1. PP-Structure Support en table structure model. 2. PP-StructureV2 Support ch and en table structure model.)rec_char_dict_pathtable_char_dict_pathlayout_dict_pathformula_char_dict_path )argparser,   add_helpadd_argumentstrr*   SUPPORT_OCR_MODEL_VERSIONSUPPORT_STRUCTURE_MODEL_VERSION_actionsdestr]   
parse_args	Namespace)mMainrf   parseractioninference_args_dicts        r   rn   rn   |  s>   [FO
sD9
h=
h=
sE:
8UC
)x  	 / F   //;; 
 
 "FN "   "" ooF/5~~, &!!8$788r   c                 F   / SQn/ SQn/ SQn/ SQnX;   a  Sn OX;   a  Sn OX;   a  Sn OX;   a  Sn U [         S	   [           S
   ;   d6   SR                  [         S	   [           S
   R                  5       U 5      5       eU S;   a  SnX4$ U S:X  a  SnX4$ U S;   a  SnX4$ SnX4$ )N)*afazbscscydadeesetfrgahrhuidisitkulaltlvmimsmtnlnoocpiplptrors_latinskslsqsvswtltruzvirT   rU   )arfaugur)rurs_cyrillicbebgukmnabqadykbdavadarinhchelbeleztab)himrnebhmaiangbhomahscknewgomsabgcrL   rM   rN   rO   rY   rQ   z!param lang must in {}, but got {})rA   rE   rA   rS   )rB   rL   rB   rC   )
MODEL_URLSDEFAULT_OCR_MODEL_VERSIONformatkeys)lang
latin_langarabic_langcyrillic_langdevanagari_langdet_langs         r   
parse_langr     s    +JX +KM$O 						 
5!";<UCC*11534U;@@BDC  > 
	
 >	 
	  > >r   c                    U S:X  a  [         nOU S:X  a  [        nO[        e[        U    nX;  a  UnX%U   ;  aV  X%U   ;   a  UnOK[        R                  SR                  X%U   R                  5       5      5        [        R                  " S5        X5U   U   ;  a]  X5U   U   ;   a  UnOO[        R                  SR                  UXT   U   R                  5       U5      5        [        R                  " S5        XQ   U   U   $ )NrY   rZ   z,{} models is not support, we only support {}z8lang {} is not support, we only support {} for {} models)
r   DEFAULT_STRUCTURE_MODEL_VERSIONNotImplementedErrorr   loggererrorr   r   r   exit)r\   version
model_typer   DEFAULT_MODEL_VERSION
model_urlss         r   get_model_configr     s   u} 9		 ?!!D!J 'G,,$9::+GLL>EE+@ A F F H
 HHRLg&z2234Z@@+GLLJQQ5jAFFH HHRLz*400r   contentc                     [         R                  " U [         R                  S9n[        R                  " U[        R
                  5      $ )N)dtype)np
frombufferuint8cv2imdecodeIMREAD_UNCHANGED)r   np_arrs     r   
img_decoder   7  s-    ]]7"((3F<< 4 455r   c                 z   Su  p#[        U [        5      (       a  [        U 5      n [        U [        5      (       Ga  [	        U 5      (       a  [        U S5        Sn U n[        U5      u  pnU(       Gd)  U(       Gd!  [        US5       nUR                  5       n[        U5      n SSS5        U c   [        5       n[        W5      n[        R                  " U5      n	U	R                  S5      n
U
R                  US5        UR                  S5        UR                  5       n[        [        R                  " U5      SS	9n[        R                   " U5      n["        R$                  " U["        R&                  5      n[(        R*                  " U[(        R,                  5      n U c(  [.        R1                  S
R3                  U5      5        SX#4$ [        U ["        R4                  5      (       a>  [7        U R8                  5      S:X  a%  [(        R:                  " U [(        R<                  5      n [        U ["        R4                  5      (       a7  [7        U R8                  5      S:X  a  U R8                  S   S:X  a  [?        X5      n XU4$ ! , (       d  f       GN= f!   [.        R1                  S
R3                  U5      5        SX#4s $ = f)u  
Check the image data. If it is another type of image file, try to decode it into a numpy array.
The inference network requires three-channel images, So the following channel conversions are done
    single channel image: Gray to RGB R←Y,G←Y,B←Y
    four channel image: alpha_to_color
args:
    img: image data
        file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats
        storage type: binary image, net image file, local image file
    alpha_color: Background color in images in RGBA format
    return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean
)FFtmp.jpgrbNRGBjpegr   utf-8encodingerror in loading image:{}         ) 
isinstancebytesr   ri   r&   r%   r    openreadr   r   convertsaveseekbase64	b64encode	b64decoder   r   r   r   r   IMREAD_COLORr   r   r   ndarraylenshapecvtColorCOLOR_GRAY2BGRr"   )imgalpha_colorflag_gifflag_pdf
image_filefimg_strbufimageimrgbimage_bytesdata_base64image_decode	img_arrays                  r   	check_imgr  <  s
    &H#uo#s3<<%c95C
"0"<xj$'1&&( ) ( {4!)C#G,EE*B**U+CHHS&)HHQK"%((*K"%f&6&6{&Cg"VK#)#3#3K#@L "lBHH EI,,y#2B2BCC ;LL4;;JGH++#rzz""s399~':ll3 2 23#rzz""s399~':syy|q?PS.(""9 (' 4LL!<!C!CJ!OP33s   I<-C-J <
J*J:c                   @   ^  \ rS rSrU 4S jrSSSSSS0 4S jrSrU =r$ )r4   is  c           	      :  > [        SS9nUR                  R                  " S0 UD6  UR                  [        ;   d%   SR                  [        UR                  5      5       e[        UR                  5      Ul        UR                  (       d#  [        R                  [        R                  5        UR                  U l        [        UR                  5      u  p4[!        SUR                  SU5      n[#        UR$                  [&        R(                  R+                  [,        SSU5      US   5      u  Ul        n[!        SUR                  SU5      n[#        UR.                  [&        R(                  R+                  [,        SSU5      US   5      u  Ul        n[!        SUR                  S	S
5      n	[#        UR0                  [&        R(                  R+                  [,        SS	5      U	S   5      u  Ul        n
UR                  S;   a  SUl        OSUl        UR5                  S5      b  UR5                  S5      Ul        UR6                  (       dB  [9        UR$                  U5        [9        UR.                  U5        [9        UR0                  U
5        UR:                  [<        ;  a>  [        R?                  SR                  [<        5      5        [@        RB                  " S5        URD                  [F        ;  a>  [        R?                  SR                  [F        5      5        [@        RB                  " S5        URH                  c-  [K        [M        [N        5      RP                  US   -  5      Ul$        [        RS                  U5        [T        TU ]  U5        URX                  U l,        g)zM
paddleocr package
args:
    **kwargs: other params show in paddleocr --help
Frp   z"ocr_version must in {}, but get {}rY   rP   whlr@   rQ   rR   rA   )r=   r:   z
3, 48, 320z
3, 32, 320rec_image_shapeNzdet_algorithm must in {}r   zrec_algorithm must in {}rD   re   )-rn   __dict__updateocr_versionrj   r   r+   use_gpushow_logr   setLevelloggingINFOuse_angle_clsr   r   r   r'   det_model_dirospathjoinBASE_DIRrec_model_dircls_model_dirr  getuse_onnxr$   det_algorithmSUPPORT_DET_MODELr   r   r   rec_algorithmSUPPORT_REC_MODELra   ri   r   __file__parentdebugsuper__init__page_num)selfkwargsparamsr   r   det_model_configdet_urlrec_model_configrec_urlcls_model_configcls_url	__class__s              r   r+  PaddleOCR.__init__t  s    %(((";;	
/66%v'9'9
	
; #6>>2OOGLL)#11#FKK0 ,E63E3EuhW(=  GGLL5%:U#)
%g
 ,E63E3EudS(=  GGLL5%6U#)
%g
 ,E63E3EudS(=  GGLL5%0U#)
%g
 !99%1F"%1F"::'(4%+ZZ0A%BF"6//96//96//9'88LL3::;LMNHHQK'88LL3::;LMNHHQK$$,(+X%%(8(EE)F% 	V r   TF   r9  r9  c	                   ^^^ [        U[        R                  [        [        [
        45      (       d   e[        U[        5      (       a&  US:X  a   [        R                  S5        [        S5        US:X  a%  U R                  S:X  a  [        R                  S5        [        UT5      u  pn
[        U[        5      (       aC  U
(       a<  U R                  [        U5      :  d  U R                  S:X  a  UnOUSU R                   nOU/nUUU4S jnU(       a  U(       a  / nU H  nU" U5      nU R                  XU5      u  pnU(       d  U(       d  UR                  S5        MA  [!        X5       VVs/ s H  u  nnUR#                  5       U/PM     nnnUR                  U5        M     U$ U(       a  U(       d}  / nU Hs  nU" U5      nU R%                  U5      u  nnUR&                  S:X  a  UR                  S5        MB  U Vs/ s H  nUR#                  5       PM     nnUR                  U5        Mu     U$ / n/ nU H  n[        U[        5      (       d  U" U5      nU/nU R                  (       a4  U(       a-  U R)                  U5      u  nnnU(       d  UR                  U5        U R+                  U5      u  nnUR                  U5        M     U(       d  U$ U$ s  snnf s  snf )a  
OCR with PaddleOCR

Args:
    img: Image for OCR. It can be an ndarray, img_path, or a list of ndarrays.
    det: Use text detection or not. If False, only text recognition will be executed. Default is True.
    rec: Use text recognition or not. If False, only text detection will be executed. Default is True.
    cls: Use angle classifier or not. Default is True. If True, the text with a rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance.
    bin: Binarize image to black and white. Default is False.
    inv: Invert image colors. Default is False.
    alpha_color: Set RGB color Tuple for transparent parts replacement. Default is pure white.
    slice: Use sliding window inference for large images. Both det and rec must be True. Requires int values for slice["horizontal_stride"], slice["vertical_stride"], slice["merge_x_thres"], slice["merge_y_thres"] (See doc/doc_en/slice_en.md). Default is {}.

Returns:
    If both det and rec are True, returns a list of OCR results for each image. Each OCR result is a list of bounding boxes and recognized text for each detected text region.
    If det is True and rec is False, returns a list of detected bounding boxes for each image.
    If det is False and rec is True, returns a list of recognized text for each image.
    If both det and rec are False, returns a list of angle classification results for each image.

Raises:
    AssertionError: If the input image is not of type ndarray, list, str, or bytes.
    SystemExit: If det is True and the input is a list of images.

Note:
    - If the angle classifier is not initialized (use_angle_cls=False), it will not be used during the forward process.
    - For PDF files, if the input is a list of images and the page_num is specified, only the first page_num images will be processed.
    - The preprocess_image function is used to preprocess the input image by applying alpha color replacement, inversion, and binarization if specified.
Tz.When input a list of images, det must be falser   Fz]Since the angle classifier is not initialized, it will not be used during the forward processNc                 ~   > [        U T5      n T(       a  [        R                  " U 5      n T(       a  [        U 5      n U $ r
   )r"   r   bitwise_notr#   )_imager   bininvs    r   preprocess_image'PaddleOCR.ocr.<locals>.preprocess_image  s2    #FK8F0%f-Mr   )r   r   r   listri   r   r   r   r   r  warningr  r,  r   __call__appendziptolisttext_detectorsizetext_classifiertext_recognizer)r-  r   rP   rQ   rR   r>  r?  r   slicer   r   imgsr@  ocr_resdt_boxesrec_res_boxrestmp_reselapsecls_rescls_res_tmps        ```               r   r^   PaddleOCR.ocr  sl   N #

D#u=>>>>c4  SD[LLIJG$;4--6NNo #,C"=xc4  X}}s3x'4==A+=?T]]+5D	 3G&s+'+}}Su'E$1NN4(?B8?UV?U83CJJL#.?UVw'  NG&s+#'#5#5c#: &==A%NN4(3;<8C3::<8<w'  NGG!#t,,*3/C%C%%#/3/C/CC/H,Cf{3"&"6"6s";w'  N; W =s   .K?K)r,  r  )__name__
__module____qualname____firstlineno__r+  r^   __static_attributes____classcell__r6  s   @r   r4   r4   s  s-    B(N #g gr   r4   c                   B   ^  \ rS rSrSrU 4S jr   SU 4S jjrSrU =r$ )r5   i"  zK
PPStructure class represents the structure analysis system for PaddleOCR.
c           	        > [        SS9nUR                  R                  " S0 UD6  UR                  [        ;   d%   SR                  [        UR                  5      5       e[        UR                  5      Ul        SUl        UR                  (       d#  [        R                  [        R                  5        [        UR                  5      u  p4US:X  a  SnOSnUR                  S:X  a  SUl        [#        SUR$                  S	U5      n['        UR(                  [*        R,                  R/                  [0        S
S	U5      US   5      u  Ul        n[#        SUR$                  SU5      n['        UR2                  [*        R,                  R/                  [0        S
SU5      US   5      u  Ul        n	[#        SUR                  SU5      n
['        UR4                  [*        R,                  R/                  [0        S
S5      U
S   5      u  Ul        n[#        SUR                  SU5      n['        UR6                  [*        R,                  R/                  [0        S
S5      US   5      u  Ul        n[#        SUR                  SU5      n['        UR8                  [*        R,                  R/                  [0        S
S5      US   5      u  Ul        nUR:                  (       dn  [=        UR(                  U5        [=        UR2                  U	5        [=        UR4                  U5        [=        UR6                  U5        [=        UR8                  U5        UR>                  c-  [A        [C        [D        5      RF                  US   -  5      Ul        URH                  c-  [A        [C        [D        5      RF                  U
S   -  5      Ul$        URJ                  c-  [A        [C        [D        5      RF                  US   -  5      Ul%        URL                  c-  [A        [C        [D        5      RF                  US   -  5      Ul&        [        RO                  U5        [P        TU ]  U5        g)z
Initializes the PPStructure object with the given parameters.

Args:
    **kwargs: Additional keyword arguments to customize the behavior of the structure analysis system.

Raises:
    AssertionError: If the structure version is not supported.

Fr  z(structure_version must in {}, but get {}rS   rA   rB   r?   rY   rP   r  r@   rQ   rZ   rV   rW   rX   NrD   re   )*rn   r  r  structure_versionrk   r   r+   r  moder  r   r  r  r  r   r   merge_no_span_structurer   r  r'   r  r  r  r  r  r  table_model_dirlayout_model_dirformula_model_dirr"  r$   ra   ri   r   r'  r(  rb   rc   rd   r)  r*  r+  )r-  r.  r/  r   r   
table_langr0  r1  r2  r3  table_model_config	table_urllayout_model_config
layout_urlformula_model_configformula_urlr6  s                   r   r+  PPStructure.__init__'  s    %((($$(GG	
5<<+V-E-E
	
G #6>>2!OOGLL)#FKK04<JJ##~5-2F* ,E63E3EuhW(=  GGLL5%:U#)
%g
 ,E63E3EudS(=  GGLL5%6U#)
%g
 .117J
 -B""GGLL5'2u%-
)	
 /118T
 /D##GGLL5(3&/
+
  0119d 
 1F$$GGLL5)4 '1
- + 6//96//96119=622J?633[A$$,(+X%%(8(EE)F% &&.*-X%%(:;(GG+F' ""*&)X%%(;K(HH'F# ((0,/X%%(<[(II-F) 	V r   c           	      P  > [        X5      u  pn[        U[        5      (       aq  U(       aj  / n[        U5       HW  u  p[        R                  SR                  US-   [        U5      5      5        [        TU ]%  XUS9u  pUR                  U
5        MY     U$ [        TU ]%  XUS9u  pU
$ )a  
Performs structure analysis on the input image.

Args:
    img (str or numpy.ndarray): The input image to perform structure analysis on.
    return_ocr_result_in_table (bool, optional): Whether to return OCR results in table format. Defaults to False.
    img_idx (int, optional): The index of the image. Defaults to 0.
    alpha_color (tuple, optional): The alpha color for transparent images. Defaults to (255, 255, 255).

Returns:
    list or dict: The structure analysis results.

processing {}/{} page:   img_idx)r  r   rB  	enumerater   infor   r   r*  rD  rE  )r-  r   return_ocr_result_in_tablert  r   r   r   res_listindexpdf_imgrS  rQ  r6  s               r   rD  PPStructure.__call__  s    ( #,C"=xc4  XH"+C.4;;EAIs3xPQ) *  $ #1 O!#7!S
r   re   )Fr   r8  )	rY  rZ  r[  r\  __doc__r+  rD  r]  r^  r_  s   @r   r5   r5   "  s$    ]!D $)# r   r5   c                     [        SS9n [        R                  S5        U R                  n[	        U5      (       aL  [
        R                  R                  S5      (       a  [
        R                  " S5      OS  [        US5        S/nO[        U R                  5      n[        U5      S:X  a/  [        R                  SR                  U R                  5      5        gU R                  S:X  a  [        S%0 U R                   D6nO,U R                  S	:X  a  [#        S%0 U R                   D6nO[$        eU GH=  n[
        R                  R'                  U5      R)                  S
5      S   n[        R                  SR                  SUS5      5        U R                  S:X  Gac  UR+                  UU R,                  U R.                  U R0                  U R2                  U R4                  U R6                  S9nUGb  / nU Hf  nUc  [        R9                  SU 35        M   U H@  n	[        R                  U	5        UR;                  [<        R>                  " U	5      S-   5        MB     Mh     U R@                  (       a  [
        R                  R                  U RB                  5      SL a   [
        RD                  " U RB                  5        U RB                  S-   U-   S-   n
[G        U
SSS9 nURI                  U5        SSS5        GM  GM  GM  U R                  S	:X  d  GM  [K        U5      u  pnU(       d  U(       d  [L        RN                  " U5      nU RP                  (       a  U RR                  (       a  U(       a  [U        S5        SSK+J,n  [
        R                  R[                  U RB                  SR                  U5      5      nU" U5      nUR]                  U5        UR_                  5         [        R                  SR                  U5      5        GM  U(       d/  Uc'  [        R                  SR                  U5      5        GM  XL//nO/ n[a        U5       H  u  nn[
        Rb                  " [
        R                  R[                  U RB                  U5      SS9  [
        R                  R[                  U RB                  XUS-   [e        U5      -   S-   5      n[L        Rf                  " UU5        UR;                  UU/5        M     / n[a        U5       H  u  nu  nn[        R                  SR                  US-   [        U5      5      5        U" UUS 9n[i        X`RB                  UU5        U RP                  (       d  Mk  U/ :w  d  Ms  URj                  u  nnn[m        U5      n[o        UU5      nUU-  nM     U RP                  (       aG  U/ :w  aA   [q        UUU RB                  U5        U Rr                  (       a  [u        UU RB                  U5        U H:  nURy                  S"5        URy                  S#5        [        R                  U5        M<     [        R                  S$R                  U RB                  5      5        GM@     g! , (       d  f       GMS  = f! [v         a1  n[        R                  S!R                  UU5      5         SnAGM  SnAff = f)&z
Main function for running PaddleOCR or PPStructure.

This function takes command line arguments, processes the images, and performs OCR or structure analysis based on the specified type.

Args:
    None

Returns:
    None
Tr  z-for usage help, please use `paddleocr --help`r   Nr   zno images find in {}r^   rS   .z{}{}{}z
**********)rP   rQ   rR   r>  r?  r   zNo text found in image 
F/z.txtwr   r   pdf2docx)	Converterz{}.docxzdocx save to {}r   )exist_okrQ  z.jpgrq  rr  rs  z.error in layout recovery image:{}, err msg: {}r   rS  zresult save to {}re   )=rn   r   rv  	image_dirr&   r  r  existsremover%   r!   r   r   r   r\   r4   r  r5   r   basenamesplitr^   rP   rQ   r  binarizeinvert
alphacolorrC  rE  pprintpformatsavefileoutputmkdirr   
writelinesr    r   imreadrecoveryuse_pdf2docx_apir   pdf2docx.converterr  r  r   closeru  makedirsri   imwriter/   r   r   r1   r2   recovery_to_markdownr3   	Exceptionpop)argsr  image_file_listengineimg_pathimg_nameresultlinesrS  lineoutfiler  r   r   r   r  	docx_filecv	img_pathsry  rz  pdf_img_pathall_resnew_img_pathhr  rQ  	result_cpresult_sortedexitems                                  r   mainr    s    D!D
KK?@Iy "y 9 9		)t!)Y7$+-dnn=
?q +224>>BCyyE-dmm-	k	!//!!#77##H-33C8;HOOHhAB99ZZHHHH&&MMKK OO   F !!C{)@
'KL  #D)V^^D%9D%@A !$	 " ==ww~~dkk2e;-"kkC/(:VCGgsW=U+ >=	 ! "  YY+%&4X&>#C8Hjj*}}!6!68:&8GGLLi6F6Fx6PQ	x(

9%
-44Y?@;LL!<!C!CH!MN&_-		&/nNE7KKT[[( CdS#%77<<X#~E
/JV/S$L KKg6$$lG%<= '5 G.7	.B**c4;;EAIs9~VWU3"6;;%H===Vr\!iiGAq! ( 0I$7	1$EM},G /C }}B
%c7DKKJ00-gt{{HM  D!   KK+224;;?@s $4 >=f ! LLHOO$b
 s%   Y1=A Z1
Z	
Z?%Z::Z?)F)T)r8  )Sr  r   r   r  dirnamer'  __dir__paddle.utilsr   rE  r  r   copyr   r  numpyr   pathlibr   r   ior   r  PILr   r   r   import_moduler   r   ppocr.utils.loggingr   ppocr.utils.utilityr    r!   r"   r#   ppocr.utils.networkr$   r%   r&   r'   tools.inferr(   tools.infer.utilityr)   r*   r+   ppstructure.utilityr,   r-   ppstructure.predict_systemr.   r/   r0   $ppstructure.recovery.recovery_to_docr1   r2   )ppstructure.recovery.recovery_to_markdownr3   r   __all__r$  r&  environr!  
expanduserr  r   rj   r   rk   r   rn   r   r   r   r   r  
TextSystemr4   r5   r  re   r   r   <module>r     s   
 
 
''//(
# # Wb) * 
         	RWW\\'#67	 	5%%m[A *   ' = = @ T T W K	 F \* ::>>/1C1CO1TU& J "2 #13C"D  g g v
 h!B
 X!J
 h!<
 q!E
 p!D
 v!J 
 m!A
 m!A
 m!A
 p!D
 q!E
 s!G
 u!Ic5n oEG
V g g v
 h!B
 h!<
 q!E
 p!D
 v!J 
 m!A
 m!A
 m!A
 p!D
 q!E
 s!G
 u!I[1f o}C
L g g!B o
* o y x
 p!B
 {!<
 x!E
 x!E
 x!E
 w!D
 }!J 
 t!A
 t!A
 t!A
 }!D
 ~!E
 @!G
 B!I
 y!Bs=~ oUO
od ~!L
 O!L
 O!O	 !Y
 D!T	 b!L
 b!L	+
)c[
|,9^dN"1J6 6
4#nl)) l^C/ CLyAr   