
    ՑiϷ                        S SK r S SKrS SKrS SKrS SKJr  S SKJrJrJ	r	J
r
Jr  S SKrS SKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)J*r*  SSK+J,r,J-r-J.r.  SSK/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8  / SQr9\Rt                   " S S\!5      5       r; " S S\5      r<\" S5       " S S\<5      5       r=\" S5       " S  S!\<5      5       r>g)"    N)chain)AnyDictOptionalTupleUnion)Image   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	benchmark)	HPIConfig)PaddlePredictorOption   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByBoxes)merge_tables_across_pages)assign_levels_to_parsing_res)construct_img_pathgather_imgs   )
BaseResultPaddleOCRVLBlockPaddleOCRVLResult)	convert_otsl_to_htmlcrop_marginfilter_overlap_boxesmerge_blockspost_process_for_spottingpre_process_for_spottingtokenize_figure_of_tabletruncate_repetitive_contentuntokenize_figure_of_table)imageheader_imagefooter_imagec            3       T  ^  \ rS rSrSr     S6S\S\\   S\\   S\	S\\
\\\4   \4      S	\	S
S4U 4S jjjrS r S7S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\\\      S
\4S jjrS\S
\	4S jr      S8S jr                       S9S\
\\\   \R*                  \\R*                     4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\
\	S4   S\\
\\4      S\\	   S\\
\\\\4   \4      S\\   S\\   S \\	   S!\\
\S4      S\
\	S4   S"\\   S#\\   S$\\   S%\\   S&\\   S'\\   S\\	   S\\\      S(\\   S
\42S) jjrS*\S
\4S+ jr   S:S,\S-\	S.\	S/\	4S0 jjr   S:S,\S1\	S2\	S3\	4S4 jjrS5rU =r $ );_PaddleOCRVLPipeline6   z_PaddleOCRVLPipeline PipelineNconfigdevice	pp_optionuse_hpip
hpi_configinitial_predictorreturnc                   > [         TU ]  X#XES9  U(       Gaj  UR                  SS5      U l        U R                  (       a:  UR                  S0 5      R                  SSS05      nU R	                  U5      U l        UR                  SS5      U l        U R                  (       a  UR                  S	0 5      R                  S
SS05      nUR                  SS5      n	U	b  U	S;   d   S5       e0 n
UR                  SS5      =nb  XS'   UR                  SS5      =n b  XS'   UR                  SS5      =n b  XS'   UR                  SS5      =n b  XS'   U R                  " U40 U
D6U l        UR                  SS5      U l	        UR                  SS5      U l
        UR                  S	0 5      R                  SSS05      nU R                  U5      U l        UR                  SS5      U l        UR                  SS5      U l        [        UR                  SS5      S9U l        [!        SS 9U l        [%        5       U l        UR                  S!S5      U l        UR                  S"S5      U l        UR                  S#/ S$Q5      U l        gg)%a  
Initializes the class with given configurations and options.

Args:
    config (Dict): Configuration dictionary containing various settings.
    device (str, optional): Device to run the predictions on. Defaults to None.
    pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
    use_hpip (bool, optional): Whether to use the high-performance
        inference plugin (HPIP) by default. Defaults to False.
    hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
        The default high-performance inference configuration dictionary.
        Defaults to None.
    initial_predictor (bool, optional): Whether to initialize predictors.
)r/   r0   r1   r2   use_doc_preprocessorTSubPipelinesDocPreprocessorpipeline_config_errorz+config error for doc_preprocessor_pipeline!use_layout_detection
SubModulesLayoutDetectionmodel_config_errorz"config error for layout_det_model!
model_nameN)zPP-DocLayoutV2zPP-DocLayoutV3z3model_name must be PP-DocLayoutV2 or PP-DocLayoutV3	threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modeuse_chart_recognitionFuse_seal_recognitionVLRecognitionzconfig error for vl_rec_model!format_block_contentuse_ocr_for_image_block
batch_sizer   )rH   BGR)format
use_queuesmerge_layout_blocksmarkdown_ignore_labels)numberfootnoteheaderr)   footerr*   
aside_text)super__init__getr6   create_pipelinedoc_preprocessor_pipeliner:   create_modellayout_det_modelrC   rD   vl_rec_modelrF   rG   r   batch_samplerr   
img_readerr   crop_by_boxesrK   rL   rM   )selfr.   r/   r0   r1   r2   r3   doc_preprocessor_configlayout_det_configr>   layout_kwargsr?   r@   rA   rB   vl_rec_config	__class__s                   q/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddlex/inference/pipelines/paddleocr_vl/pipeline.pyrT   _PaddleOCRVLPipeline.__init__:   s   . 	 	 	
 (.

3I4(PD%((*0**^R*H*L*L%/1^+' 261E1E+2. )/

3I4(PD%(($*JJ|R$@$D$D%)+OP%! /22<F
!-* A 3 I II  !#!2!6!6{D!IIIV1:+."3"7"7d"KKJ 3=,/+<+@+@-t, ' 	
 <O"780A0E0E2D1 , 	
 AY"<=(,(9(9%))6)% *04KU)SD&(.

3I5(QD%"JJ|R8<<%'GHM
 !% 1 1- @D(.

3I5(QD%+1::6OQV+WD(!2!::lA6"D (u5DO!,D$jju=DO'-zz2G'ND$*0**(+D'G     c                 \    [        U S5      (       a  U R                  R                  5         g g )NrZ   )hasattrrZ   closer^   s    rd   ri   _PaddleOCRVLPipeline.close   s&    4((##% )rf   use_doc_orientation_classifyuse_doc_unwarpingr:   rC   rD   rG   rF   rL   rM   c
                 8   Uc  Uc  U R                   n
OUSL d  USL a  Sn
OSn
Uc  U R                  nUc  U R                  nUc  U R                  nUc  U R                  nUc  U R
                  nUc  U R                  nU	c  U R                  n	[        U
UUUUUUU	S9$ )a  
Get the model settings based on the provided parameters or default values.

Args:
    use_doc_orientation_classify (Union[bool, None]): Enables document orientation classification if True. Defaults to system setting if None.
    use_doc_unwarping (Union[bool, None]): Enables document unwarping if True. Defaults to system setting if None.

Returns:
    dict: A dictionary containing the model settings.

TF)r6   r:   rC   rD   rG   rF   rL   rM   )	r6   r:   rC   rD   rG   rF   rL   rM   dict)r^   rl   rm   r:   rC   rD   rG   rF   rL   rM   r6   s              rd   get_model_settings'_PaddleOCRVLPipeline.get_model_settings   s    . (/4E4M#'#<#< +t37HD7P'+$',$'#'#<#<  ($($>$>!'#'#<#< "*&*&B&B#'#'#<#< &"&":":!)%)%@%@"!5!5"7!5$;!5 3#9	
 		
rf   input_paramsc                 h    US   (       a(  U R                   (       d  [        R                  " S5        gg)a  
Check if the input parameters are valid based on the initialized models.

Args:
    input_params (Dict): A dictionary containing input parameters.

Returns:
    bool: True if all required models are initialized according to input parameters, False otherwise.
r6   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.FT)r6   r   error)r^   rr   s     rd   check_model_settings_valid/_PaddleOCRVLPipeline.check_model_settings_valid   s,     ./8Q8QMMd rf   c
                 4   / n
Sn[        5       nUR                  SS 5      nUb  UOSnUR                  SS 5      nUb  UOSn0 n0 n0 n[        S/-   nU(       a  / O[        R                  5       nU(       d  US/-  nUS/-  nU(       d  US/-  n[	        [        XU5      5       GHX  u  nu  nnn[        UU	5      nUS   nU R                  UUU	5      nAAU(       a  [        UUS	/-   S
9nU
R                  U5        [	        U5       GH  u  nnUS   nUS   nUU;  d  M  Uc  M  0 n Sn!UR                  SU5      nUR                  SU5      n/ n"US	:X  a;  Sn![        UUS   U5      u  nn n"UR                  SU5      nUR                  SU5      nOUS:X  a.  U(       a'  Sn!UR                  SU5      nUR                  SU5      nOSU;   aV  US:w  aP  Sn![        U5      n#U#R                  u  n$n%n&U$S:  a  U%S:  a  U#nUR                  SU5      nUR                  SU5      nOMUS:X  a  Sn!SnSnS n[        U5      nO3US:X  a-  U(       a&  S!n!UR                  S"U5      nUR                  S#U5      nX4n'U'U;  a  / / / / S$S%.UU''   UU'   S&   R                  U5        UU'   S'   R                  U!5        UU'   S(   R                  U 5        UU'   S)   R                  UU45        U'UUU4'   UR                  U"5        GM     AGM[     AAUc  0 nOUR                  S*S 5      c  S+US*'   U H}  n'U'u  pSUUS,.UEn(UU'   S&   nUU'   S'   n)[!        U R"                  R$                  " [        UU)5       VV*s/ s H  u  nn*UU*S-.PM     sn*n4S.U(       a  SOS0U(D65      n+AA)U+UU'   S/'   M     / n,/ n-/ n./ n/[	        U
5       GH  u  nn/ n0/ n10 n2[	        U5       GHQ  u  nnUS   nUS   n3US   nS0n40 n UU4U;   Gal  UUU4   n'UU'   n5U5S1   n6U6['        U5S)   5      :  a  U5S)   U6   UU4:X  d   eU5S/   U6   n7U5S&   U6   n8U5S(   U6   n U6S2-  n6U6U5S1'   U8U7S3'   U7R                  S4S05      n9U9c  S0n9US	:X  a  S5OS6n:[)        U9U:S79n9S8U9;   a  S9U9;   d  S:U9;   a  S;U9;   a  U9R+                  S<S05      n9U9R+                  S8S=5      R+                  S9S>5      R+                  S?S:5      R+                  S@S;5      R+                  S:SA5      R+                  S;SA5      n9US:X  a  U9R+                  S<S05      n9US	:X  a  [-        U95      n;U;S0:w  a  U;n9US:X  a"  UR                  S S u  n%n$[/        U9U$U%5      u  n9n2U9n4[1        UU3U4UR                  SBS 5      UR                  SCS 5      SD9n<US	:X  a  U/R                  U U<SE.5        UU;   ab  Ub_  [3        US   US   5      n=U<UU='   U=U;  a?  S$S Kn>U>R7                  UU>R8                  5      nU=[:        R<                  " U5      SF.U<l        OGM>  U0R                  U<5        A<AGMT     U/ H)  n?U?SG   nU?SH   n [A        URB                  U U5      Ul!        M+     U,R                  U05        U-R                  U15        U.R                  U25        A0A1A2GM     U,U-U.U4$ s  sn*nf )INF
min_pixelsi  
max_pixelsi P sealchartboxestable)non_merge_labelsimglabelzOCR:ocr_min_pixelsocr_max_pixelszTable Recognition:boxtable_min_pixelstable_max_pixelszChart Recognition:chart_min_pixelschart_max_pixelsformulaformula_numberzFormula Recognition:r   formula_min_pixelsformula_max_pixelsspottingz	Spotting:Ti  zSeal Recognition:seal_min_pixelsseal_max_pixelsr   )imagesqueriesfigure_token_mapsvlm_block_idscurr_vlm_block_idxr   r   r   r   max_new_tokensi   )	use_cacherx   ry   )r(   queryskip_special_tokensvlm_results r   r   r(   resulti  2   )	min_countz\(z\)z\[z\]$z $ z $z\[\[z\]\]z $$ group_idpolygon_points)r   bboxcontentr   r   )figure_token_mapblockpathr   r   r   )"setpopIMAGE_LABELScopy	enumeratezipr!   r]   r"   appendr%   r    shaper$   updaterU   listrZ   predictlenr&   replacer   r#   r   r   cv2cvtColorCOLOR_BGR2RGBr	   	fromarrayr(   r'   r   )@r^   r   layout_det_resultsimgs_in_docrC   rD   rG   
vlm_kwargsrL   layout_shape_modeblockshas_spottingdrop_figures_setrx   default_min_pixelsry   default_max_pixelsbatch_dict_by_pixelid2pixel_key_mapimage_path_to_obj_mapvis_image_labelsimage_labelsir(   layout_det_resimgs_in_doc_for_imgr|   blocks_for_imgjr   	block_imgblock_labelr   text_promptdrop_figurescrop_imgwh_	pixel_keykwargsr   r   batch_resultsparsing_res_liststable_res_listsspotting_res_listtable_blocksparsing_res_listtable_res_listspotting_res
block_bboxblock_content
pixel_infor   vl_rec_resultblock_img4vl
result_strr   html_str
block_infoimg_pathr   blk_infos@                                                                   rd   get_layout_parsing_results/_PaddleOCRVLPipeline.get_layout_parsing_results   s    5^^L$7
+5+AZv^^L$7
+5+AZw  "'6(24r,:K:K:M$WI%L	)#VH$L?HK8@
;A;~': 2.BSTN"7+E!//u>OPN"!-"\WI5M" MM.)%n55!%L	#Gnl2y7L')$"(K!+0@BT!UJ!+0@BT!UJ#%L"g-&:4 )5<9L B	#3\
 &0^^.0B&
 &0^^.0B&
 %/4I&:%/^^.0B&
 &0^^.0B&
 #k1kEU6U&<#.y#9"*..1aq5QU(0I%/^^02D&
 &0^^02D&
 %
2&1'+%+
%,
$<Y$G	$.3G&9%/^^-/A&
 &0^^-/A&
 ", 8I (;;&(')13-/23:+I6 (	28<CCIN'	29=DD[Q'	23FGNN( (	2?CJJAq6R/8$aV,$++L9Q 6R k@
l &J^^,d3;+/J'(,I%."J!(( 	F )3H=F))4Y?G !!)) -0,@
 -ALE5 &+%* -A
 2>4
 
M <I	*=91 -4 !*6!2A~!NL%n55!%L	"5\
#Gn "#% q6-- 0!Q 8I!4Y!?J)34H)I&-"?31 $_56HIaQRVST T %/}$=>P$QM#-h#78J#KL'12E'F*($ '!+&7IJ34-9M'*!.!2!28R!@J!)%'
(3w(>BI!<"i"J +0C+0C%/%7%7R%@
 '..ue<$WUD1$WXu5$WXu5$WUF3$WUF3 # '*::)3););C)DJ"g-#7
#C#r>)1J"j0(r213L&140
L %/M-%#)"YYz48#(99-=t#D
 ') ''0@%/ "22y7L1%.%,OH6@)(3'77"$'LLC<M<M$N	$,#(??9#=,
(
 ! ''
3	g 6j ) )#+,>#?  :MM#35J! ) $$%56"">2$$\2 .,E "3J 	
 	
ms   +Zinputlayout_thresholdr@   rA   rB   r   rK   prompt_labelrepetition_penaltytemperaturetop_prx   ry   r   vlm_extra_argsc              +   

  ^ ^^^^	^
^^^^^^^^^^^&^'^(^)^*^+^,^-^.^/#    T R                  TTUUUUUUU5	      m,TS:X  a  SOST,S'   T R                  T,5      (       d  SS0v   Uc  T R                  nTc  0 mT,S   (       d]  T(       a  TOS	mTR                  5       S
:X  a  ST,S'   OTR                  5       S:X  a  ST,S'   TR                  5       S;   d   ST S35       eS%UU	UUU
U,UU UU4
S jjm&UUUUU,UU UUU4
S jm'U(       Ga<  Sn[        R
                  " US9m.[        R
                  " US9m-[        R
                  " T R                  R                  U-  S9m/[        R                  " 5       m*[        R                  " 5       m)[        R                  " 5       m([        R                  " 5       m+U)U*U.U 4S jnU&U(U)U*U,U-U.U 4S jnU'U(U*U+U-U/U 4S jn[        R                  " UU4SS9nUR                  5         [        R                  " USS9nUR                  5         [        R                  " USS9n U R                  5          U(       a  T+R                  5       (       a  T/R                  5       (       df   T/R                  SS9n!U!S   (       d  [!        SU!S    SU!S     35      eU!S   v   T+R                  5       (       d  MO  T/R                  5       (       d  Mf  OdT R                  U5       HO  n"[#        T&" U"5      5      n#[%        U#5      S:X  d   [%        U#5      5       eU#S   n$T'" U$5       H  n%U%v   M	     A%A$A#A"MQ     U(       a  T*R'                  5         WR)                  S!S9  UR+                  5       (       a  [,        R.                  " S"5        WR)                  S!S9  UR+                  5       (       a  [,        R.                  " S#5        W R)                  S!S9  U R+                  5       (       a  [,        R.                  " S$5        ggg! [        R                   a    T+R                  5       (       a   M   GM  f = f! U(       a  T*R'                  5         WR)                  S!S9  UR+                  5       (       a  [,        R.                  " S"5        WR)                  S!S9  UR+                  5       (       a  [,        R.                  " S#5        W R)                  S!S9  U R+                  5       (       a  [,        R.                  " S$5        f f f = f7f)&aD  
Predicts the layout parsing result for the given input.

Args:
    input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
                                                                numpy array of an image, or list of numpy arrays.
    use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
    use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
    use_layout_detection (Optional[bool]): Whether to use layout detection. Default is None.
    use_chart_recognition (Optional[bool]): Whether to use chart recognition. Default is None.
    use_seal_recognition (Optional[bool]): Whether to use seal recognition. Default is None.
    layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
    layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
    layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
        Defaults to None.
        If it's a single number, then both width and height are used.
        If it's a tuple of two numbers, then they are used separately for width and height respectively.
        If it's None, then no unclipping will be performed.
    layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
    layout_shape_mode (Optional[str], optional): The mode for layout shape. Defaults to "auto", [ "rect", "quad","poly", "auto"] are supported.
    use_queues (Optional[bool], optional): Whether to use queues. Defaults to None.
    prompt_label (Optional[Union[str, None]], optional): The label of the prompt in ['ocr', 'formula', 'table', 'chart']. Defaults to None.
    format_block_content (Optional[bool]): Whether to format the block content. Default is None.
    repetition_penalty (Optional[float]): The repetition penalty parameter used for VL model sampling. Default is None.
    temperature (Optional[float]): Temperature parameter used for VL model sampling. Default is None.
    top_p (Optional[float]): Top-p parameter used for VL model sampling. Default is None.
    min_pixels (Optional[int]): The minimum number of pixels allowed when the VL model preprocesses images. Default is None.
    max_pixels (Optional[int]): The maximum number of pixels allowed when the VL model preprocesses images. Default is None.
    max_new_tokens (Optional[int]): The maximum number of new tokens. Default is None.
    merge_layout_blocks (Optional[bool]): Whether to merge layout blocks. Default is None.
    markdown_ignore_labels (Optional[list[str]]): The list of ignored markdown labels. Default is None.
    **kwargs (Any): Additional settings to extend functionality.

Returns:
    PaddleOCRVLResult: The predicted layout parsing result.
rectFTreturn_layout_polygon_pointsrt   z0the input params for model settings are invalid!Nr:   ocrr{   rC   rz   rD   )r   r   r}   r{   r   rz   zLayout detection is disabled (use_layout_detection=False). 'prompt_label' must be one of ['ocr', 'formula', 'table', 'chart'], but got 'z'.c              3   ^  >
#    U(       d  [        U 5      n[        S[        U 5      U5       GHf  nU R                  X"U-    nU R                  X"U-    nU R                  X"U-    nU R
                  X"U-    nTR                  U5      nTS   (       a  [        TR                  UTTS95      nOU V	s/ s H  n	SU	0PM	     nn	U V
s/ s H  oS   PM	     nn
TS   (       aJ  [        TR                  UTTTTTSS95      n[        X5       VVs/ s H  u  p[        XS   5      PM     nnnOd/ nU HJ  nUR                  S S STR                  5       S	SSUR                  S	   UR                  S   /S
./S.5        ML     U Vs/ s H  n/ PM     nnXEXkXU4v   GMi     g s  sn	f s  sn
f s  snnf s  snf 7f)Nr   r6   )rl   rm   
output_imgr:   F)r?   r@   rA   rB   r   r!   r|   r   )cls_idr   score
coordinate)
input_path
page_indexr|   )r   range	instancesinput_pathspage_indexespage_countsr\   r   rW   rY   r   r   r   lowerr   )
batch_datanew_batch_sizeidxr   r   r   r   image_arraysdoc_preprocessor_resultsarritemdoc_preprocessor_imagesr   
doc_pp_imgr   r   doc_preprocessor_imager   rB   r@   r   r   rA   model_settingsr   r^   rl   rm   s                     rd   _process_cv1_PaddleOCRVLPipeline.predict.<locals>._process_cvg  s:    !!$ZQJ@&00^7KL	(44S;OP)66s>=QR(44S;OP#y9!"89/366(9U.? 7 0, 8D07Cs+| - 0
 4L+3K4&3K ( + ""89)---3&6'10C5M.?16 . 
*& ;>3;#;6J $Jw0GH;   #K *,&2I.*11.2.2 341=1C1C1E12,-,-,B,H,H,K,B,H,H,K	7*	
%&*" 3J( 0B"B/A!2/AK"B!Wo  FQ  Q  QG A"0+ #8 #Cs7   B+F-.F<F-F8F-F""AF-<F(%F-c              3   <  >
#    U u  nnnnnnnTR                  UUUTS   TS   TS   TTTTTTS.TETS   TS9	u  nn	n
n[        UUUUUUU	UU
U5
       HD  u
  nnnnnnnnnnUUUUR                  S   UR                  S   UUUUUUTS	.n[        U5      v   MF     g 7f)
NrC   rD   rG   )r   r   r   rx   ry   r   rL   )	r   r   r   rC   rD   rG   r   rL   r   r   r   )r   r   
page_countwidthheightdoc_preprocessor_resr   r   r   r   r   r
  )r   r   r   r   ) 
results_cvr   r   r   r  r  r   r   r   r   r   r   r   r  r	  r  r   r   r   r   r   single_img_resr   r   ry   rx   r
  r   r^   r   r   r   s                          rd   _process_vlm2_PaddleOCRVLPipeline.predict.<locals>._process_vlm  s?     '(" //.#5'&45L&M%34J%K(67P(Q*<#."",",&4 % %33H$I"3# 0 !!@ '("!!&$ # #-",",399!<4::1=,@&4&4(8$0#6&4" (775s   BB@   )maxsizec                 P  > TR                  U 5      nTR                  5       (       d7   [        U5      nTR                  SU45        ATR                  5       (       d  M7  TR                  5         g ! [         a     M  [
         a  nTR                  SSU45         S nAMA  S nAff = f)NTFr   )r[   is_setnextputStopIteration	Exceptionr   )input_all_batch_datar   eevent_data_loading_doneevent_shutdownqueue_inputr^   s       rd   _worker_input3_PaddleOCRVLPipeline.predict.<locals>._worker_input  s    !%!3!3F!;(//11	'%).%9
 $z(:;& )//11 (++- ) $ #(;<s   A0 0
B%=	B%B  B%c                  8  > TR                  5       (       d   T	R                  SS9n U S   (       d  TR                  U 5        g  T" U S   TS   (       a   T
R                  R                  R                  OS 5       H  nTR                  SU45        AM     A TR                  5       (       d  M  g g ! [        R                   a+    TR                  5       (       a  TR	                  5          g  M  f = f! [         a  nTR                  SSU45         S nAg S nAff = f)	N      ?timeoutr   r   r:   TFcv)
r  rU   queueEmptyr   r  rY   r[   rH   r  )r  r  r   r  event_cv_processing_doner!  r"  r
  queue_cvr#  r^   s      rd   
_worker_cv0_PaddleOCRVLPipeline.predict.<locals>._worker_cv  s   (//11!*s;  7 T**5 G $22H#I !% 5 5 C C N N%)+J %LL$
);< *+ !- )//11 !;; !299;;488:! 	!( %  eT1%56s*   B/ AC1 /9C.-C.1
D;DDc                    > Sn TR                   R                  R                  nTR                  5       (       GdT  / n[        R                  " 5       nSnSn U [        R                  " 5       U-
  -
  nUS::  a  Ok TR                  US9nUS   (       d  TR                  U5        SnO=UR                  US   5        AUS   S    H  nU[        US	   5      -  nM     XQ:  a  OM  U(       a  g U(       d(  TR                  5       (       a  TR                  5         g M  [        U6  V	s/ s H"  n	[        [        R                  " U	5      5      PM$     n
n	A T" U
5       H  nTR                  SU45        AM     A
TR                  5       (       d  GMS  g g ! [        R                   a     M  f = fs  sn	f ! [          a  nTR                  SS
U45         S nAg S nAff = f)Nr'  Fr   Tr(  r      r|   vlm)rZ   r[   rH   r  timerU   r+  r,  r  r   r   r   r   r   r   from_iterabler  )MAX_QUEUE_DELAY_SECSMAX_NUM_BOXESresults_cv_list
start_timeshould_break	num_boxesremaining_timer  reslistsmerged_results_cv
result_vlmr   r  r-  r"  event_vlm_processing_doner.  	queue_vlmr^   s                rd   _worker_vlm1_PaddleOCRVLPipeline.predict.<locals>._worker_vlm2  s   '*$ $ 1 1 ? ? J J(//11&(O!%J#(L !I)= IIK*4* *Q.!"#+<<<#GD  $Aw%MM$/+/L!'..tAw7 #22#6q#9C%S\)::I $:$5!' ( $*3::<<599;!  &)/%:)%:E U0078%: & ) (*67H*IJ%MM4*<= * +J .W )//11  %{{ "!"() % !ueQ&78s0   9F $)F*$F/ F'&F'/
G9GG)targetargsdaemon)rF  rH  r'  r(  r   zException from the 'r   z
' worker: r   r3  z&Input worker did not terminate in timez#CV worker did not terminate in timez$VLM worker did not terminate in timeN)rp   ru   rK   r   r+  Queuer[   rH   	threadingEventThreadstartr  emptyrU   r,  RuntimeErrorr   r   r   joinis_aliver   warning)0r^   r   rl   rm   r:   rC   rD   rG   r   r@   rA   rB   r   rK   r   rF   r   r   r   rx   ry   r   rL   rM   r   r   max_num_batches_in_processr$  r/  rD  thread_input	thread_cv
thread_vlmr  r   r9  r  r>  r  r  r-  r!  r"  rB  r
  r.  r#  rC  s0   ` ``    ````` ` ``````  `             @@@@@@@@@@rd   r   _PaddleOCRVLPipeline.predict  s    @ 00( ! # "

 '&0Ed 	56 ..~>>NOOJ!N45+7<UL!!#w.:>67##%/9=56%%' ,  k Z  [g  Zh  hj  kk G	Q G	Q G	QRI	8 I	8V )+&++.HIK{{+EFH**558RRI '__.N&/oo&7#'0'8$(1(9%. . 82 2h %++$E8EL  !((
5IIOO"))UKJ"	L4;;==)//BSBS!(}}S}9
  7*247):d1gYO  #1g 5;;==)//BSBS #'"4"4U";J&*;z+B&CO/14Jc/6JJ4!0!3J+J7!	  8Z* #< ""$!!!!,((**OO$LMq)%%''OO$IJ*&&((OO$JK ) ' !;; !4;;==! !& ""$!!!!,((**OO$LMq)%%''OO$IJ*&&((OO$JK ) sQ   H"T?1P7 1P  =P7 ?P7 A%P7 ;CT)P4,P7 3P44P7 7C	T  Tmarkdown_listc                 2    SnU H  nUSUS   -   -  nM     U$ )z
Concatenate Markdown content from multiple pages into a single document.

Args:
    markdown_list (list): A list containing Markdown data for each page.

Returns:
    tuple: A tuple containing the processed Markdown text.
r   z

markdown_texts )r^   rY  r[  r>  s       rd   concatenate_markdown_pages/_PaddleOCRVLPipeline.concatenate_markdown_pages  s0      Cfs+;'<<<N ! rf   res_listmerge_tabletitle_levelmerge_pagesc                 R    [         R                  " S5        U R                  XX45      $ )a  Concatenate layout parsing results from multiple pages.

Args:
    res_list: List of page parsing results
    merge_talble: Whether to merge tables across pages
    title_level: Whether to assign title levels
    merge_pages: Whether to concatenate pages using the new consolidate_pages() logic

Returns:
    PaddleOCRVLResult: Combined OCR-VL result after merge_table or title_level policy
zDeprecationWarning: `concatenate_pages()` is deprecated as of v3.3.14 and will be removed in v3.4.0. Please use `restructure_pages()` instead. It provides better support for table merging and title restructuring.)r   rS  restructure_pages)r^   r_  r`  ra  rb  s        rd   concatenate_pages&_PaddleOCRVLPipeline.concatenate_pages  s+    $ 	 c	
 %%h[VVrf   merge_tablesrelevel_titlesre  c              #     ^#    [        U5      S:X  a  / $ S mU4S jnSn/ nU H  n[        U[        5      (       d7  US   n/ US'   UR                  S/ 5      n	UR                  S0 5      n
U" X5      n	OUS   n	UR                  S0 5      n
/ nU	 H%  nXll        Xll        US-  nUR                  U5        M'     XS'   UR                  U5        M     UnU Vs/ s H  oS   PM	     nnU(       a  [        U5      nU(       a  [        U5      n/ nU(       a  US   nU VVs/ s H  nU  H  nUPM     M     snnUS'   S	US
'   [        U5      US'   W
S   (       a  U Vs/ s H  oS   PM	     snUS'   U
S   (       a  U Vs/ s H  oS   PM	     snUS'   UR                  [        U5      5        O7[        U5       H(  u  nnUU   US'   UR                  [        U5      5        M*     U S	h  vN   g	s  snf s  snnf s  snf s  snf  N7f)a{  Restructure layout parsing results from multiple pages.
Args:
    res_list: List of page parsing results
    merge_tables: Whether to merge tables across pages
    relevel_titles: Whether to relevel titles
    concatenate_pages: Whether to concatenate pages to a single document

Returns:
    PaddleOCRVLResult: Combined OCR-VL result after merge_tables or relevel_titles policy
r   c                     U R                  SS 5      (       a  U S   $ U S   S;   d   U S   S:X  a.  UR                  SS5      (       d  [        U S   U S   5      nUS S.$ g )	Nr(   r   )r(   rz   r{   rC   Fr   r   )rU   r   )r   r
  r   s      rd   _get_img_obj<_PaddleOCRVLPipeline.restructure_pages.<locals>._get_img_obj  sp    yy$''W~%]#'88m$/&**+BEJJ)%*>l@ST $T22rf   c                   > / nU  Hu  n[        US   US   UR                  SS 5      [        R                  " SSUS   5      UR                  SS 5      S9nT" X15      =n(       a  XTl        UR                  U5        Mw     U$ )	Nr   r   block_polygon_pointsz^#+\sr   r   r   )r   r   r   r   r   )r   rU   resubr(   r   )r   r
  r>  r   objr   rk  s         rd   _conver_blocks_to_objE_PaddleOCRVLPipeline.restructure_pages.<locals>._conver_blocks_to_obj  s    C&.|,#(99-CT#JFF8R1GH"YYz48 'u==3= #I

3   Jrf   r>  r   r   r
  r   Nr   r  r:   r   r6   r  )r   
isinstancer   rU   global_block_idglobal_group_idr   r   r   r   r   )r^   r_  rg  rh  re  rr  ru  obj_res_listone_page_resr   r
  r   r   r>  blocks_by_pageconcatenate_resall_page_resblksblkpage_idxrk  s                       @rd   rd  &_PaddleOCRVLPipeline.restructure_pages  sN    $ x=AI			 $LlJ77+E2.0]+%))*<bA!-!1!12BB!G.vF%&89!-!1!12BB!G!(7%(7%1$ ''.	   0@+,-% %&  =EFXc01XF6~FN9.IN#A;L .0 .$3$0L+, *.L&),XL&455=25=c()X2-. 45;C8;CC./8834 ""#4\#BC*3H*=&,3A(3K/0&&'8'FG +> #""; G028 	#sC   CHG.$9HG33(HG9)H=G>AH(H)H)r[   r]   rW   rF   r\   rY   rM   rL   rC   r6   r:   rG   rK   rD   rZ   )NNFNTrI  )FFFNTauto)FFNNNNNNNNr  NNNNNNNNNNNN)TTF)!__name__
__module____qualname____firstlineno____doc__r   r   strr   boolr   r   r   rT   ri   r   ro   rp   ru   r   npndarrayfloatr   intr   r   tupler]  re  rd  __static_attributes____classcell__)rc   s   @rd   r,   r,   6   sW   '
 !%59AE"&ii i 12	i
 i U4S>9#<=>i  i 
i iV& 7;=
&+D$J&7=
 !t,=
 $D$J/	=

  %T4Z0=
 $D$J/=
 "'tTz!2=
 $D$J/=
 #4:.=
 !)c 3=
 
=
~t  0 $" %  B
N ;@/4263726599=%)QU26+1%)3726.2'+!%$($((,.26:)-3RLS$s)RZZbjj1AABRL ',D$J&7RL !t,	RL
 $D$J/RL  %T4Z0RL $D$J/RL "'tTz!2RL #5#56RL TNRL &eE53F,L&MNRL #+3-RL $C=RL TNRL uS$Y/0RL  $D$J/!RL" %UO#RL$ e_%RL& 'RL( SM)RL* SM+RL, !-RL. &d^/RL0 !)c 31RL2 !3RL6 
7RLh  ( ! !WW W 	W
 W4 "#"'c#c# c# 	c#
  c# c#rf   r,   c                   *    \ rS rSr\S 5       rS rSrg)_BasePaddleOCRVLPipelinei!  c                     [         $ rI  )r,   rj   s    rd   _pipeline_cls&_BasePaddleOCRVLPipeline._pipeline_cls"  s    ##rf   c                 &    UR                  SS5      $ )NrH   r   )rU   )r^   r.   s     rd   _get_batch_size(_BasePaddleOCRVLPipeline._get_batch_size&  s    zz,**rf   r\  N)r  r  r  r  propertyr  r  r  r\  rf   rd   r  r  !  s    $ $+rf   r  r   c                       \ rS rSrSrSrg)PaddleOCRVLPipelinei*  zPaddleOCR-VLr\  Nr  r  r  r  entitiesr  r\  rf   rd   r  r  *  s    Hrf   r  c                       \ rS rSrSrSrg)PaddleOCRVL15Pipelinei/  zPaddleOCR-VL-1.5r\  Nr  r\  rf   rd   r  r  /  s    !Hrf   r  )?r+  ro  rK  r5  	itertoolsr   typingr   r   r   r   r   numpyr  PILr	   utilsr   
utils.depsr   common.batch_samplerr   common.readerr   utils.benchmarkr   	utils.hpir   utils.pp_optionr   	_parallelr   baser   
componentsr   layout_parsing.merge_tabler   layout_parsing.title_levelr   layout_parsing.utilsr   r   r   r   r   r   uiltsr   r    r!   r"   r#   r$   r%   r&   r'   r   time_methodsr,   r  r  r  r\  rf   rd   <module>r     s     	    4 4    2 5 & ( " 4 @  $ B E B C C
 
 
 9 g#< g# g#T+G + 2    "4 "  "rf   