
    {-j8                    j   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlZd dlmZ ddlmZ dd	lmZ d
dlmZ d
dlmZ d
dlmZmZ d
dl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< g dZ=e!j>         G d de%                      Z? G d de#          Z@ ed           G d d e@                      ZA ed           G d! d"e@                      ZB ed           G d# d$e@                      ZCdS )%    )annotationsN)ThreadPoolExecutor)chain)AnyDictListOptionalTupleUnion)Image   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	HPIConfigPaddlePredictorOption)	benchmark   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByBoxes)merge_tables_across_pages)assign_levels_to_parsing_res)construct_img_pathgather_imgs   )
BaseResultPaddleOCRVLBlockPaddleOCRVLResult)	convert_otsl_to_htmlcrop_marginfilter_overlap_boxesmerge_blockspost_process_for_spottingpre_process_for_spottingtokenize_figure_of_tabletruncate_repetitive_contentuntokenize_figure_of_table)imageheader_imagefooter_imagec                  v    e Zd ZdZdddddddddc fdZd Z	 ddded$Zdfd&Z ej	        d'(          d)             Z
 ej	        d*(          d+             Z ej	        d,(          d-             Zd. Z ej	        d/(          d0             Zd1 Zd2 Z ej	        d3(          d4             Z ej	        d5(          d6             Z ej	        d7(          d8             Z ej	        d9(          d:             Z	 	 	 	 	 	 dgd<Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dhdidUZdjdYZ	 	 	 dkdld^Z	 	 	 dkdmdbZ xZS )n_PaddleOCRVLPipelinez_PaddleOCRVLPipeline PipelineNFT)deviceengineengine_config	pp_optionuse_hpip
hpi_configinitial_predictorconfigr   r0   Optional[str]r1   r2   Optional[Dict[str, Any]]r3   Optional[PaddlePredictorOption]r4   boolr5   *Optional[Union[Dict[str, Any], HPIConfig]]r6   returnNonec          
         t                      j        d(||||||d|	 |r"|                    dd          | _        | j        rF|                    di                               dddi          }
|                     |
          | _        |                    dd          | _        | j        r|                    d	i                               d
ddi          }|                    dd          }||dv s
J d            i }|                    dd          x}||d<   |                    dd          x}	 ||d<   |                    dd          x}	 ||d<   |                    dd          x}	 ||d<    | j        |fi || _        |                    dd          | _	        |                    dd          | _
        |                    d	i                               dddi          }|                     |          | _        |                    dd          | _        |                    dd          | _        t          |                    dd                    | _        t!          d           | _        t%                      | _        |                    d!d          | _        |                    d"d          | _        |                    d#g d$          | _        |                    d%d&          }	 t/          d&t1          |                    | _        dS # t4          t6          f$ r}t7          d'          |d}~ww xY wdS ))a  Initializes the PaddleOCR-VL pipeline.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (Optional[str], optional): The device to use for prediction. Defaults to `None`.
            engine (Optional[str], optional): Inference engine. Defaults to `None`.
            engine_config (Optional[Dict[str, Any]], optional): Engine-specific config. Defaults to `None`.
            pp_option (Optional[PaddlePredictorOption], optional): Paddle predictor options.
                Defaults to `None`.
            use_hpip (bool, optional): Whether to use HPIP. Defaults to `False`.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                HPIP configuration. Defaults to `None`.
            initial_predictor (bool, optional): Whether to initialize predictors.
                Defaults to `True`.
        )r0   r1   r2   r3   r4   r5   use_doc_preprocessorTSubPipelinesDocPreprocessorpipeline_config_errorz+config error for doc_preprocessor_pipeline!use_layout_detection
SubModulesLayoutDetectionmodel_config_errorz"config error for layout_det_model!
model_nameN)zPP-DocLayoutV2zPP-DocLayoutV3z3model_name must be PP-DocLayoutV2 or PP-DocLayoutV3	threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modeuse_chart_recognitionFuse_seal_recognitionVLRecognitionzconfig error for vl_rec_model!format_block_contentuse_ocr_for_image_block
batch_sizer   )rR   BGR)format
use_queuesmerge_layout_blocksmarkdown_ignore_labels)numberfootnoteheaderr,   footerr-   
aside_textlayout_prep_cpu_workersr   zf`layout_prep_cpu_workers` must be a non-negative integer (0 disables parallel CPU layout preparation). )super__init__getr@   create_pipelinedoc_preprocessor_pipelinerD   create_modellayout_det_modelrM   rN   vl_rec_modelrP   rQ   r   batch_samplerr   
img_readerr   crop_by_boxesrU   rV   rW   maxintr]   	TypeError
ValueError)selfr7   r0   r1   r2   r3   r4   r5   r6   kwargsdoc_preprocessor_configlayout_det_configrH   layout_kwargsrI   rJ   rK   rL   vl_rec_configlpexc	__class__s                        q/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddlex/inference/pipelines/paddleocr_vl/pipeline.pyr`   z_PaddleOCRVLPipeline.__init__;   s   8 	 	
'!	
 	
 	
 	
 	
  W	(.

3I4(P(PD%( 	*0**^R*H*H*L*L%/1^+ +' 261E1E+2 2. )/

3I4(P(PD%( $*JJ|R$@$@$D$D%)+OP% %! /22<FF
!-* A 3 3 3 I3 3  !#!2!6!6{D!I!IIIV1:M+."3"7"7d"K"KKJ 3=M,/+<+@+@-t, , ' 	
 <OM"780A0E0E2D1 1 , 	
 AYM"<=(9(9%) ))6) )% *04KU)S)SD&(.

3I5(Q(QD%"JJ|R88<<%'GH M
 !% 1 1- @ @D(.

3I5(Q(QD%+1::6OQV+W+WD(!2!::lA66" " "D (u555DO!,D$jju==DO'-zz2G'N'ND$*0**(  + +D' 5q99B/21c"gg,,,z*    D  gW	 W	s   :"L M/L??Mc                \    t          | d          r| j                                         d S d S )Nrf   )hasattrrf   closern   s    rw   rz   z_PaddleOCRVLPipeline.close   s9    4(( 	&##%%%%%	& 	&    use_doc_orientation_classifyUnion[bool, None]use_doc_unwarpingrD   rM   rN   rQ   rP   rV   rW   Optional[List[str]]dictc
           
         |
|| j         }
n|du s|du rd}
nd}
|| j        }|| j        }|| j        }|| j        }|| j        }|| j        }|	| j        }	t          |
|||||||	          S )a  
        Get the model settings based on the provided parameters or default values.

        Args:
            use_doc_orientation_classify (Union[bool, None]): Enables document orientation classification if True. Defaults to system setting if None.
            use_doc_unwarping (Union[bool, None]): Enables document unwarping if True. Defaults to system setting if None.

        Returns:
            dict: A dictionary containing the model settings.

        NTF)r@   rD   rM   rN   rQ   rP   rV   rW   )	r@   rD   rM   rN   rQ   rP   rV   rW   r   )rn   r}   r   rD   rM   rN   rQ   rP   rV   rW   r@   s              rw   get_model_settingsz'_PaddleOCRVLPipeline.get_model_settings   s    . (/4E4M#'#<  +t337HD7P7P'+$$',$'#'#<  ($($>!'#'#< "*&*&B#'#'#< &"&":!)%)%@"!5!5"7!5$;!5 3#9	
 	
 	
 		
r|   input_paramsc                P    |d         r| j         st          j        d           dS dS )a4  
        Check if the input parameters are valid based on the initialized models.

        Args:
            input_params (Dict): A dictionary containing input parameters.

        Returns:
            bool: True if all required models are initialized according to input parameters, False otherwise.
        r@   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.FT)r@   r   error)rn   r   s     rw   check_model_settings_validz/_PaddleOCRVLPipeline.check_model_settings_valid   s?     ./ 	8Q 	Md   5tr|   !paddleocr_vl_filter_overlap_boxes)namec                "    t          ||          S N)r$   )rn   layout_det_reslayout_shape_modes      rw   "_paddleocr_vl_filter_overlap_boxesz7_PaddleOCRVLPipeline._paddleocr_vl_filter_overlap_boxes  s    #N4EFFFr|    paddleocr_vl_crop_layout_regionsc                0    |                      |||          S r   )ri   )rn   r+   boxesr   s       rw   !_paddleocr_vl_crop_layout_regionsz6_PaddleOCRVLPipeline._paddleocr_vl_crop_layout_regions  s    !!%0ABBBr|   "paddleocr_vl_merge_adjacent_blocksc                L    |d         s|S t          ||d         dgz             S )NrV   image_labelstablenon_merge_labels)r%   )rn   blocks_for_imglayout_prep_cfgs      rw   #_paddleocr_vl_merge_adjacent_blocksz8_PaddleOCRVLPipeline._paddleocr_vl_merge_adjacent_blocks  s?    45 	"!!,^<yH
 
 
 	
r|   c           	        g }d}t                      }t          |          D ]D\  }}	|	d         }
|	d         }||d         vr#|
 i }d}|d         }|d         }g }|dk    r.d	}t          |
|	d
         |          \  }
}}|d         }|d         }n|dk    r|d         rd}|d         }|d         }nd|v rA|dk    r;d}t          |
          }|j        \  }}}|dk    r|dk    r|}
|d         }|d         }n>|dk    rd}d}d}d}t          |
          }
n |dk    r|d         rd}|d          }|d!         }|                    |||
|||f|f           |                    |           F|||fS )"NFimglabelr   zOCR:ocr_min_pixelsocr_max_pixelsr   zTable Recognition:boxtable_min_pixelstable_max_pixelschartrM   zChart Recognition:chart_min_pixelschart_max_pixelsformulaformula_numberzFormula Recognition:r   formula_min_pixelsformula_max_pixelsspottingz	Spotting:T  i  sealrN   zSeal Recognition:seal_min_pixelsseal_max_pixels)set	enumerater(   r#   shaper'   appendupdate)rn   page_idxr   imgs_in_doc_for_imgr   page_vlm_entriespage_has_spottingpage_drop_figuresjblock	block_imgblock_labelfigure_token_maptext_promptblk_min_pixelsblk_max_pixelsdrop_figurescrop_imgwh_s                        rw   +_paddleocr_vl_collect_page_vlm_entries_corez@_PaddleOCRVLPipeline._paddleocr_vl_collect_page_vlm_entries_core!  s"    !EE!.11 8	7 8	7HAueI.K?>#BBB)#% $!01A!B!01A!B!'))"6K0%uU|5H  >I/
 &55G%HN%45G%HNN7**?V/W*"6K%45G%HN%45G%HNN+--+AQ2Q2Q"8K*955H&nGAq!1uuQ$,	%45I%JN%45I%JNN J.."-K(,%%+N%,N 8 C CII F**?U/V*"5K%45F%GN%45F%GN '' !#'8(	 	 	 "((666!24EEEr|   %paddleocr_vl_collect_block_vlm_inputsc                2    |                      ||||          S r   )r   )rn   r   r   r   r   s        rw   &_paddleocr_vl_collect_block_vlm_inputsz;_PaddleOCRVLPipeline._paddleocr_vl_collect_block_vlm_inputsh  s(     ??	
 
 	
r|   c                   |\  }}}}}t          ||d                   }|d         }|                     |||d                   }|d         rt          ||d         dgz             }|                     ||||          \  }	}
}|||	|
|fS )uX   Filter → crop → merge → build VLM inputs (safe for thread pool; no nested timers).r   r   rV   r   r   r   )r$   ri   r%   r   rn   payloadir+   r   r   r   r   r   r   r   r   s               rw   _paddleocr_vl_prepare_page_corez4_PaddleOCRVLPipeline._paddleocr_vl_prepare_page_corew  s    IPF5."5-O,?@
 
 w'++5/*=>
 
 01 	)!0!@G9!L  N <<~2O
 
		

 
 	
r|   c                   |\  }}}}}|                      ||d                   }|d         }|                     |||d                   }|                     ||          }|                     ||||          \  }	}
}|||	|
|fS )Nr   r   )r   r   r   r   r   s               rw   -_paddleocr_vl_prepare_page_serial_benchmarkedzB_PaddleOCRVLPipeline._paddleocr_vl_prepare_page_serial_benchmarked  s     	
@@O,?@
 
 w'??5/*=>
 
 AAO
 
 77	
 
		
 
 	
r|   !paddleocr_vl_layout_prep_parallelc                    t          |          5 }t          |                    | j        |                    cd d d            S # 1 swxY w Y   d S )N)max_workers)r   listmapr   )rn   page_payloadsr   cpu_pools       rw   (_paddleocr_vl_layout_prep_parallel_pagesz=_PaddleOCRVLPipeline._paddleocr_vl_layout_prep_parallel_pages  s    K888 	HTA=QQ 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   (AA
A
"paddleocr_vl_aggregate_vlm_batchesc                   g }d}t                      }i }i }|D ]\  }}}	}
}|                    |           |
rd}|                    |           |	D ]\  }}}}}}||vrg g g g dd||<   ||         d                             |           ||         d                             |           ||         d                             |           ||         d                             ||f           ||||f<   |||||fS )	NFTr   )imagesqueriesfigure_token_mapsvlm_block_idscurr_vlm_block_idxr   r   r   r   )r   r   r   )rn   page_resultsblockshas_spottingdrop_figures_setbatch_dict_by_pixelid2pixel_key_mapr   r   r   r   r   r   r   r   r   	pixel_keyr   s                     rw   #_paddleocr_vl_aggregate_vlm_batchesz8_PaddleOCRVLPipeline._paddleocr_vl_aggregate_vlm_batches  s   55  !	5 !	5 
MM.)))  $###$5666 "5 5  $777"$#%-/)+./6 6'	2 $I.x8??	JJJ#I.y9@@MMM#I./BCJJ$   $I.?FF1vNNN+4 !Q((-52 
 	
r|   'paddleocr_vl_run_vl_recognition_batchesc           
     8   |i }n|                     dd           d|d<   |D ]v}|\  }}d||d|}||         d         }||         d         }	t           | j        j        d t	          ||	          D             fd|rd	ndi|          }
~~	|
||         d
<   wd S )Nmax_new_tokensi   T)	use_cache
min_pixels
max_pixelsr   r   c                    g | ]
\  }}||d S ))r+   queryr^   ).0r+   r   s      rw   
<listcomp>zQ_PaddleOCRVLPipeline._paddleocr_vl_run_vl_recognition_batches.<locals>.<listcomp>  s;       
 )E5 &+%*   r|   skip_special_tokensFvlm_results)ra   r   rf   predictzip)rn   r   r   
vlm_kwargsr   min_pxmax_pxro   	pv_imagesr   batch_resultss              rw   (_paddleocr_vl_run_vl_recognition_batchesz=_PaddleOCRVLPipeline._paddleocr_vl_run_vl_recognition_batches  s    JJ^^,d33;+/J'(, 	J 	JI&NFF!$$  	F ,I6x@I))4Y?G )!) 
 -0	7,C,C  
 
 2>(G4
 
 
 M 7<I	*=991	J 	Jr|   %paddleocr_vl_assemble_parsing_resultsc                   g }g }g }i }	g }
t          |          D ]P\  }}g }g }i }t          |          D ]\  }}|d         }|d         }|d         }d}i }||f|v r|||f         }||         }|d         }|t          |d                   k     r|d         |         ||fk    sJ |d         |         }|d         |         }|d	         |         }|d
z  }||d<   ||d<   |                    dd          }|d}|dk    rdnd}t          ||          }d|v rd|v sd|v rd|v r|                    dd          }|                    dd                              dd                              dd                              dd                              dd                              dd          }|dk    r|                    dd          }|dk    rt          |          }|dk    r|}|dk    r&|j        d d         \  }} t          || |          \  }}|}t          ||||                    dd           |                    dd                      }!|dk    r|
	                    ||!d!           ||v re|ct          |d         |d                   }"|!|	|"<   |"|vr<d"d l}#|#                    ||#j                  }|"t          j        |          d#|!_        n|	                    |!           ~!~|
D ]-}$|$d$         }|$d%         }t#          |j        ||	          |_        .|	                    |           |	                    |           |	                    |           ~~~R|||fS )&Nr   r   r    r   r   r   r   r   r   r+   resultr   i  2   )	min_countz\(z\)z\[z\]$z $ z $z\[\[z\]\]z $$ r   r   r   group_idpolygon_points)r   bboxcontentr  r  )r   r   r   pathr   r   r   )r   lenra   r)   replacer"   r   r&   r    r   r   cv2cvtColorCOLOR_BGR2RGBr   	fromarrayr+   r*   r  )%rn   r   r   r   r   vis_image_labelsparsing_res_liststable_res_listsspotting_res_listimage_path_to_obj_maptable_blocksr   r   parsing_res_listtable_res_listspotting_resr   r   r   
block_bboxr   block_contentr   r   
pixel_infor   vl_rec_resultblock_img4vl
result_strr  html_strr   r   
block_infoimg_pathr  blk_infos%                                        rw   &_paddleocr_vl_assemble_parsing_resultsz;_PaddleOCRVLPipeline._paddleocr_vl_assemble_parsing_results  s     "!*6!2!2 a	? a	?A~!NL%n55 S* S*5!%L	"5\
#Gn "#% q6--- 0!Q 8I!4Y!?J)34H)I&-"?31 1  $_56HIaQRVSSST %/}$=>P$QM#-h#78J#KL'12E'F*($ '!+&7IJ34-9M'*!.!2!28R!@!@J!)%'
(3w(>(>BI!<"i" " "J ++0C0C++0C0C%/%7%7R%@%@
 '..ue<<$WUD11$WXu55$WXu55$WUF33$WUF33 # '*:::)3););C)D)DJ"g--#7
#C#C#r>>)1J"j00(rr213L&14 40
L %/M-%#)"YYz488#(99-=t#D#D  
 ')) ''0@%/    "222y7L1%.%,OOH6@)(3'777"


$'LLC<M$N$N	$,#(?9#=#=, ,
((
 ! ''
333		(   )#+,>#?  :M#35J! ! $$%5666"">222$$\222 .,, /3DDDr|   autoc
                   & |i }|                     dd           }
|
|
nd}|                     dd           }||nd}t          dgz   }|rg nt                                          }|s|dgz  }|dgz  }|s|dgz  }|                     d|          }|                     d|          }|                     d	|          }|                     d
|          }|                     d|          }|                     d|          }|                     d|          }|                     d|          }|                     d|          }|                     d|          }|	||||||||||||||d&t                    }&fdt	          |          D             }|dk    rt           j        |          nd}|dk    r|dk    r                     ||          }n1|dk    r fd|D             }n                     |d                   g} 	                    |          \  }}} }!}"~ 
                    |!||                                ||!|"| |          \  }#}$}%|#|$|%fS )Nr   r   r   i P r   r   r   r   r   r   r   r   r   r   r   r   )r   rV   r   rM   rN   r   r   r   r   r   r   r   r   r   r   c                B    g | ]}||         |         |         fS r^   r^   )r   r   r   imgs_in_doclayout_det_resultsr   s     rw   r   zC_PaddleOCRVLPipeline.get_layout_parsing_results.<locals>.<listcomp>  sI     	
 	
 	
  q	"1%A	
 	
 	
r|   r   r   c                :    g | ]}                     |          S r^   )r   )r   prn   s     rw   r   zC_PaddleOCRVLPipeline.get_layout_parsing_results.<locals>.<listcomp>  s7        BB1EE  r|   )popIMAGE_LABELScopyr
  rangeminr]   r   r   r   r   r#  )'rn   r   r(  r'  rM   rN   rQ   r   rV   r   r   default_min_pixelsr   default_max_pixelsr  r   r   r   r   r   r   r   r   r   r   r   	num_pagesr   r   r   r   r   r   r   r   r  r  r  r   s'   ````                                  @rw   get_layout_parsing_resultsz/_PaddleOCRVLPipeline.get_layout_parsing_results  sX    J^^L$77
+5+AZZv^^L$77
+5+AZZw'6(24Mrr,:K:K:M:M$ 	*WI%L	)# 	%VH$L#(8:LMM#(8:LMM%>>*<>PQQ%>>*<>PQQ%>>*<>PQQ%>>*<>PQQ'^^,@BTUU'^^,@BTUU$..):<NOO$..):<NOO "3#6(%:$8,, 0 0 0 0"4"4..
 
$ KK		
 	
 	
 	
 	
 	
 	
 9%%	
 	
 	
 =FMMC,i888q 	 q==[1__HH{ LL ]]   &  LL BB=QRCSTTL 44\BB	
 &55z	
 	
 	
 77
 
		
 	
 	
r|   input3Union[str, List[str], np.ndarray, List[np.ndarray]]layout_thresholdOptional[Union[float, dict]]rJ   Optional[bool]rK   1Optional[Union[float, Tuple[float, float], dict]]rL   r   rU   prompt_labelOptional[Union[str, None]]repetition_penaltyOptional[float]temperaturetop_pr   Optional[int]r   r   vlm_extra_argsOptional[dict]r!   c              +  N
   	
&'()*+,-./K                         |||||||	  	        ,dk    rdnd,d<                        ,          sddiV  | j        }i ,d         sernd	                                d
k    rd,d<   n                                dk    rd,d<                                   dv sJ d d            d%	
, f
d	&, f
d'|r9d}t	          j        |          .t	          j        |          -t	          j         j        j        |z            /t          j	                    *t          j	                    )t          j	                    (t          j	                    +)*. fd}&()*,-. fd}'(*+-/ fd}t          j
        ||fd          }|                                 t          j
        |d          }|                                 t          j
        |d          } |                                  	 |rʉ+                                r/                                s	 /                    d          }!n+# t          j        $ r +                                rY ndY gw xY w|!n[|!d         s!t!          d|!d          d|!d                     |!d         V  +                                /                                nx                     |          D ]b}"t#           &|"                    }#t%          |#          dk    sJ t%          |#                      |#d         }$ '|$          D ]}%|%V  ~%~$~#~"c|rЉ*                                 |                    d!           |                                rt-          j        d"           |                    d!           |                                rt-          j        d#           |                     d!           |                                 rt-          j        d$           dS dS dS # |rω*                                 |                    d!           |                                rt-          j        d"           |                    d!           |                                rt-          j        d#           |                     d!           |                                 rt-          j        d$           w w w xY w)&a`  
        Predicts the layout parsing result for the given input.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
                                                                        numpy array of an image, or list of numpy arrays.
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_layout_detection (Optional[bool]): Whether to use layout detection. Default is None.
            use_chart_recognition (Optional[bool]): Whether to use chart recognition. Default is None.
            use_seal_recognition (Optional[bool]): Whether to use seal recognition. Default is None.
            layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
            layout_nms (Optional[bool], optional): Whether to use layout-aware NMS. Defaults to `False`.
            layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
                Defaults to `None`.
                If it's a single number, then both width and height are used.
                If it's a tuple of two numbers, then they are used separately for width and height respectively.
                If it's None, then no unclipping will be performed.
            layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to `None`.
            layout_shape_mode (Optional[str], optional): The mode for layout shape. Defaults to "auto", [ "rect", "quad","poly", "auto"] are supported.
            use_queues (Optional[bool], optional): Whether to use queues. Defaults to `None`.
            prompt_label (Optional[Union[str, None]], optional): The label of the prompt in ['ocr', 'formula', 'table', 'chart']. Defaults to `None`.
            format_block_content (Optional[bool]): Whether to format the block content. Default is None.
            repetition_penalty (Optional[float]): The repetition penalty parameter used for VL model sampling. Default is None.
            temperature (Optional[float]): Temperature parameter used for VL model sampling. Default is None.
            top_p (Optional[float]): Top-p parameter used for VL model sampling. Default is None.
            min_pixels (Optional[int]): The minimum number of pixels allowed when the VL model preprocesses images. Default is None.
            max_pixels (Optional[int]): The maximum number of pixels allowed when the VL model preprocesses images. Default is None.
            max_new_tokens (Optional[int]): The maximum number of new tokens. Default is None.
            merge_layout_blocks (Optional[bool]): Whether to merge layout blocks. Default is None.
            markdown_ignore_labels (Optional[list[str]]): The list of ignored markdown labels. Default is None.
            **kwargs (Any): Additional settings to extend functionality.

        Returns:
            PaddleOCRVLResult: The predicted layout parsing result.
        rectFTreturn_layout_polygon_pointsr   z0the input params for model settings are invalid!NrD   ocrr   rM   r   rN   )rF  r   r   r   r   r   zLayout detection is disabled (use_layout_detection=False). 'prompt_label' must be one of ['ocr', 'formula', 'table', 'chart'], but got 'z'.c              3  $  
K   |st          |           }t          dt          |           |          D ][}| j        |||z            }| j        |||z            }| j        |||z            }| j        |||z            }                    |          }d         r&t                              |                    }nd |D             }d |D             }	d         rDt          	                    |	d                    }
d	 t          |	|
          D             }n^g }
|	D ]M}|
                    d d d                                d
dd|j        d
         |j        d         gdgd           Nd |
D             }||||	||
|fV  ]d S )Nr   r@   )r}   r   c                    g | ]}d |iS 
output_imgr^   )r   arrs     rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._process_cv.<locals>.<listcomp>{  s*     0 0 003s+0 0 0r|   c                    g | ]
}|d          S rI  r^   )r   items     rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._process_cv.<locals>.<listcomp>  s*     + + ++/D&+ + +r|   rD   F)rI   rJ   rK   rL   r   r$   c                @    g | ]\  }}t          ||d                    S )r   )r   )r   
doc_pp_imgr   s      rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._process_cv.<locals>.<listcomp>  s;     # # #6J $Jw0GHH# # #r|   r   )cls_idr   score
coordinate)
input_path
page_indexr   c                    g | ]}g S r^   r^   )r   r   s     rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._process_cv.<locals>.<listcomp>  s    "B"B"B!2"B"B"Br|   )r
  r.  	instancesinput_pathspage_indexespage_countsrh   r   rc   re   r   r   lowerr   )
batch_datanew_batch_sizeidxrV  rW  rX  rY  image_arraysdoc_preprocessor_resultsdoc_preprocessor_imagesr(  r'  doc_preprocessor_imagerL   rJ   r   r6  rK   model_settingsr:  rn   r}   r   s                rw   _process_cvz1_PaddleOCRVLPipeline.predict.<locals>._process_cvf  s     ! 1!$ZQJ@@ CQ CQ&0s^7K1KL	(4S3;O5OP)6sS>=Q7QR(4S3;O5OP#y99!"89 /366(9U.? 7  0 0,,0 07C0 0 0,+ +3K+ + +' ""89 )C)---3&6'10C5M.?16 .  
* 
*&# #:=35G; ;# # #KK *,&2I  .*11.2.2 341=1C1C1E1E12,-,-,B,H,K,B,H,K	7*	
%& 
%&*"    & #C"B/A"B"B"BK!<>UWo  rD  FQ  Q  Q  Q  Q  QGCQ CQr|   c              3  `  
K   | \  }}}}}}}                     |||d         d         d         dd         	  	        \  }}	}
}t          |||||||	||
|
  
        D ]C\
  }}}}}}}}}}||||j        d         |j        d         ||||||d	}t          |          V  Dd S )
NrM   rN   rQ   )r<  r>  r?  r   r   r   rV   )	r   r(  r'  rM   rN   rQ   r   rV   r   r   r   )rS  rT  
page_countwidthheightdoc_preprocessor_resr   r  r  r  r'  rb  )r3  r   r   r!   ) 
results_cvrW  rX  rY  r`  r_  r(  r'  r  r  r  rS  rT  re  ra  rh  r   r  r  r  r   single_img_resr   r   r   r   rb  r<  rn   r>  r?  rA  s                          rw   _process_vlmz2_PaddleOCRVLPipeline.predict.<locals>._process_vlm  sj      '(" //.#5'&45L&M%34J%K(67P(Q*<#."",",&4  % %33H$I"3# 0  !!@ '("!! %8 %8 &$ # #-",",39!<4:1=,@&4&4(8$0#6&4" " (777777K%8 %8r|   @   )maxsizec                p                        |           }                                sw	 t          |          }                    d|f           ~n:# t          $ r Y nBt
          $ r"}                    dd|f           Y d }~nd }~ww xY w                                w                                 d S )NTFr4  )rg   is_setnextputStopIteration	Exceptionr   )input_all_batch_datar[  eevent_data_loading_doneevent_shutdownqueue_inputrn   s       rw   _worker_inputz3_PaddleOCRVLPipeline.predict.<locals>._worker_input  s    !%!3!3F!;!;(//11 
'	'%).%9%9
 $z(:;;;&J )   $   #(;<<< )//11 
' (++-----s   A 
B 	B)BBc                 j                                    s	 	                    d          } nU# t          j        $ rC                                  r,                                                     d            Y d S Y ~w xY w| d         s                    |            d S 	  | d         d         r
j        j        j        nd           D ]}                    d|f           ~~ n0# t          $ r#}                    dd|f           Y d }~d S d }~ww xY w                                 d S d S )	N      ?timeoutr   r   rD   TFcv)
ro  ra   queueEmptyr   rq  re   rg   rR   rs  )rM  ri  rv  rc  event_cv_processing_donerw  rx  rb  queue_cvry  rn   s      rw   
_worker_cvz0_PaddleOCRVLPipeline.predict.<locals>._worker_cv  s   (//11 !*s;; ; ! ! !299;; "488:::$LL...!EE !  7  T****5+ G $22H#I!* 5 C N N%)+ + 	+ 	+J %LL$
);<<< *
 D$    eT1%56661 )//11     s)   / AB B$AC, ,
D6DDc                 V   d} j         j        j        }d}                                s}g }t	          j                    }d}d}	 | t	          j                    |z
  z
  }|dk    rn	                     |          }n# t          j        $ r Y nvw xY w|d}nm|d         s                    |           d}nM|	                    |d                    ~|d         d         D ]}	|t          |	d	                   z  }||k    rn|rd S |sC|s                                r+                                                     d            d S 7d
 t          | D             }
~	  |
          D ]}                    d|f           ~~
n0# t          $ r#}                    dd|f           Y d }~d S d }~ww xY w|r	 	                                 }n# t          j        $ r Y nw xY w||d         sns|d         g}d t          | D             }	  |          D ]}                    d|f           n/# t          $ r"}                    dd|f           Y d }~n	d }~ww xY w                                                     d            d S                                 {d S d S )Nr|  Fr   Tr}  r      r   c                P    g | ]#}t          t          j        |                    $S r^   r   r   from_iterabler   listss     rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._worker_vlm.<locals>.<listcomp>]  s;     ) ) )! U07788) ) )r|   vlmc                P    g | ]#}t          t          j        |                    $S r^   r  r  s     rw   r   zE_PaddleOCRVLPipeline.predict.<locals>._worker_vlm.<locals>.<listcomp>w  s;     & & &$) !%U%8%?%? @ @& & &r|   )rf   rg   rR   ro  timera   r  r  rq  r   r
  r   r   rs  
get_nowait)MAX_QUEUE_DELAY_SECSMAX_NUM_BOXEScv_doneresults_cv_list
start_timeshould_break	num_boxesremaining_timerM  resmerged_results_cv
result_vlmrv  results_cv_list_finalmergedrk  r  rx  event_vlm_processing_doner  	queue_vlmrn   s                  rw   _worker_vlmz1_PaddleOCRVLPipeline.predict.<locals>._worker_vlm2  s   '*$ $ 1 ? J(//11 L&(O!%J#(L !I")= IKK*4* *Q..!"#+<<<#G#GDD${ " " "!E"<&*G!#Aw "%MM$///+/L!'..tAw777 #22#6q#9 ; ;C%S\):)::II$55!/"0 $ * !" "&>&E&E&G&G "599;;;%MM$///!E ) )%(/%:) ) )% (*6,7H*I*I + +J%MM4*<=== *
--$   !ueQ&7888
  &&'/':':'<'<#(; & & & %&#|47| %59!WI1& &-02G-H& & &F&2>,v2F2F !F !FJ$-MM42D$E$E$E$E!F#, & & & )ueQ.? @ @ @ %&&$ 255777!d+++Y )//11 L L L L LsT   'A> >BB#'F 
F8F33F8?G G&%G&%H7 7
I#II#)targetargsdaemon)r  r  r|  r}  r   zException from the 'r   z
' worker: r   r  z&Input worker did not terminate in timez#CV worker did not terminate in timez$VLM worker did not terminate in timer   )r   r   rU   rZ  r  Queuerg   rR   	threadingEventThreadstartro  emptyra   r  RuntimeErrorr   r
  r   joinis_aliver   warning)0rn   r4  r}   r   rD   rM   rN   rQ   r6  rJ   rK   rL   r   rU   r:  rP   r<  r>  r?  r   r   r   rV   rW   rA  ro   max_num_batches_in_processrz  r  r  thread_input	thread_cv
thread_vlmrM  r[  r  ri  r  rc  rk  r  rw  rx  r  rb  r  ry  r  s0   ` ``    ````` ` ``````  `             @@@@@@@@@@rw   r   z_PaddleOCRVLPipeline.predict  sN     @ 00( ! # "

 

 '&00EEd 	56 ..~>> 	PNOOOOJ!N45 	k+7B<<UL!!##w..:>677##%%//9=56%%'' ,    k  [g  k  k  k  G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	Q G	QRI	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8 I	8V  R	)+&+.HIIIK{+EFFFH*58RR  I '_..N&/o&7&7#'0'8'8$(1(9(9%. . . . . . . .           :Q Q Q Q Q Q Q Q Q Q Qf %+$E8E  L    !(
5IIIIOO")UKKKJ%	L E4;;== &)//BSBS &!(}}S}99 ; ! ! !4;;== "!E ! |7 &*O47OOd1gOO   #1g 5;;== &)//BSBS &" #'"4"4U";"; E EJ&*;;z+B+B&C&CO//1444c/6J6J444!0!3J+|J77 " "!				Z** 
L""$$$!!!!,,,((** NO$LMMMq)))%%'' KO$IJJJ***&&(( LO$JKKKKK
L 
LL L  
L""$$$!!!!,,,((** NO$LMMMq)))%%'' KO$IJJJ***&&(( LO$JKKKK
LLs2   6*Q !I8 7Q 8#J Q J  CQ CT$markdown_listr   tuplec                0    d}|D ]}|d|d         z   z  }|S )a  
        Concatenate Markdown content from multiple pages into a single document.

        Args:
            markdown_list (list): A list containing Markdown data for each page.

        Returns:
            tuple: A tuple containing the processed Markdown text.
        r   z

markdown_textsr^   )rn   r  r  r  s       rw   concatenate_markdown_pagesz/_PaddleOCRVLPipeline.concatenate_markdown_pages  s5       	= 	=Cfs+;'<<<NNr|   res_listmerge_tabletitle_levelmerge_pagesc                Z    t          j        d           |                     ||||          S )a  Concatenate layout parsing results from multiple pages.

        Args:
            res_list: List of page parsing results
            merge_talble: Whether to merge tables across pages
            title_level: Whether to assign title levels
            merge_pages: Whether to concatenate pages using the new consolidate_pages() logic

        Returns:
            PaddleOCRVLResult: Combined OCR-VL result after merge_table or title_level policy
        zDeprecationWarning: `concatenate_pages()` is deprecated as of v3.3.14 and will be removed in v3.4.0. Please use `restructure_pages()` instead. It provides better support for table merging and title restructuring.)r   r  restructure_pages)rn   r  r  r  r  s        rw   concatenate_pagesz&_PaddleOCRVLPipeline.concatenate_pages  s:    $ 	 d	
 	
 	
 %%h[+VVVr|   merge_tablesrelevel_titlesr  c              #    K   t          |          dk    rg S d fd}d}g }|D ]}t          |t                    sF|d         }g |d<   |                    dg           }	|                    di           }
 ||	|
          }	n|d         }	|                    di           }
g }|	D ]*}||_        ||_        |dz  }|                    |           +||d<   |                    |           |}d	 |D             }|rt          |          }|rt          |          }g }|rg }|D ]+}|	                    |                    dg                      ,||d         d<   |d         }g }t          |          D ]&\  }}|D ]}||_        |                    |           '||d<   d
|d<   t          |          |d<   |
d         rd |D             |d<   |
d         rd |D             |d<   |                    t          |                     nBt          |          D ]2\  }}||         |d<   |                    t          |                     3|E d
{V  d
S )a  Restructure layout parsing results from multiple pages.
        Args:
            res_list: List of page parsing results
            merge_tables: Whether to merge tables across pages
            relevel_titles: Whether to relevel titles
            concatenate_pages: Whether to concatenate pages to a single document

        Returns:
            PaddleOCRVLResult: Combined OCR-VL result after merge_tables or relevel_titles policy
        r   c                    |                      dd           r| d         S | d         dv s"| d         dk    r7|                     dd          s!t          | d         | d                   }|d dS d S )	Nr+   r   )r+   r   r   rM   Fr  r  )ra   r   )r   rb  r	  s      rw   _get_img_objz<_PaddleOCRVLPipeline.restructure_pages.<locals>._get_img_obj  s    yy$'' &W~%]#'888m$//&**+BEJJ 0 *%*>l@STT $T2224r|   c                .   g }| D ]}t          |d         |d         |                    dd           t          j        dd|d                   |                    dd                     } ||          x}r||_        |                    |           |S )	Nr   r  block_polygon_pointsz^#+\sr   r  r  )r   r  r  r  r  )r    ra   resubr+   r   )r   rb  r  r   objr   r  s         rw   _conver_blocks_to_objzE_PaddleOCRVLPipeline.restructure_pages.<locals>._conver_blocks_to_obj  s    C 
  
 &.|,#(99-CT#J#JF8R1GHH"YYz488   ',un===3 $ #CI

3Jr|   r  r'  r  rb  r   c                    g | ]
}|d          S )r  r^   r   r  s     rw   r   z:_PaddleOCRVLPipeline.restructure_pages.<locals>.<listcomp>#  s    FFFc#01FFFr|   NrT  re  rD   c                    g | ]
}|d          S )r   r^   r  s     rw   r   z:_PaddleOCRVLPipeline.restructure_pages.<locals>.<listcomp>:  s+     2 2 2.1C()2 2 2r|   r   r@   c                    g | ]
}|d          S )rh  r^   r  s     rw   r   z:_PaddleOCRVLPipeline.restructure_pages.<locals>.<listcomp>>  s+     8 8 847C./8 8 8r|   rh  )r
  
isinstancer   ra   global_block_idglobal_group_idr   r   r   extendr   rT  r!   )rn   r  r  r  r  r  r  obj_res_listone_page_resr   rb  r  r   blocks_by_pageconcatenate_resall_imgs_in_docr  all_page_res
all_blocksr   blksblkr  s                         @rw   r  z&_PaddleOCRVLPipeline.restructure_pages  sc     $ x==AI		 		 			 	 	 	 	 $ 	. 	.LlJ77 H+E2.0]+%))*<bAA!-!1!12BB!G!G..v~FF%&89!-!1!12BB!G!G! / /(7%(7%1$ ''..../?L+,----FFXFFF 	G6~FFN 	J9.IIN 	H O C C&&sww}b'A'ABBBB)8HQK&#A;LJ"+N";"; + +$ + +C%-CN%%c****+ 0:L+,)-L&),XL&45 2 25=2 2 2-. 45 8 8;C8 8 834 ""#4\#B#BCCCC*3H*=*= H H&,3A(3K/0&&'8'F'FGGGG""""""""""r|   )r7   r   r0   r8   r1   r8   r2   r9   r3   r:   r4   r;   r5   r<   r6   r;   r=   r>   r   )r}   r~   r   r~   rD   r~   rM   r~   rN   r~   rQ   r~   rP   r~   rV   r~   rW   r   r=   r   )r   r   r=   r;   )FFFNTr$  )FFNNNNNNNNr$  NNNNNNNNNNNN)2r4  r5  r}   r~   r   r~   rD   r~   rM   r~   rN   r~   rQ   r~   r6  r7  rJ   r8  rK   r9  rL   r8   r   r8   rU   r8  r:  r;  rP   r~   r<  r=  r>  r=  r?  r=  r   r@  r   r@  r   r@  rV   r8  rW   r   rA  rB  r=   r!   )r  r   r=   r  )TTF)r  r   r  r;   r  r;   r  r;   )r  r   r  r;   r  r;   r  r;   )__name__
__module____qualname____doc__r`   rz   r   r   r   timeit_with_optionsr   r   r   r   r   r   r   r   r   r   r#  r3  r   r  r  r  __classcell__)rv   s   @rw   r/   r/   7   si       '' !% $2659AE"&} } } } } } } }~& & & 7;=
 =
 =
 =
 =
~   & #Y"(KLLLG G MLG #Y"(JKKKC C LKC #Y"(LMMM
 
 NM
EF EF EFN #Y"(OPPP
 
 QP

 
 
<"
 "
 "
H #Y"(KLLL  ML #Y"(LMMM/
 /
 NM/
b #Y"(QRRR J  J SR JD #Y"(OPPPpE pE QPpEn $" %  t
 t
 t
 t
r ;@/4263726599=%)QU26+1%)3726.2'+!%$($((,.26:)-3uL uL uL uL uLn   ( ! !W W W W W4 "#"'j# j# j# j# j# j# j# j# j#r|   r/   c                  *    e Zd Zed             Zd ZdS )_BasePaddleOCRVLPipelinec                    t           S r   )r/   r{   s    rw   _pipeline_clsz&_BasePaddleOCRVLPipeline._pipeline_clsK  s    ##r|   c                .    |                     dd          S )NrR   r   )ra   )rn   r7   s     rw   _get_batch_sizez(_BasePaddleOCRVLPipeline._get_batch_sizeO  s    zz,***r|   N)r  r  r  propertyr  r  r^   r|   rw   r  r  J  s<        $ $ X$+ + + + +r|   r  rF  c                      e Zd ZdZdS )PaddleOCRVLPipelinezPaddleOCR-VLNr  r  r  entitiesr^   r|   rw   r  r  S  s        HHHr|   r  c                      e Zd ZdZdS )PaddleOCRVL15PipelinezPaddleOCR-VL-1.5Nr  r^   r|   rw   r  r  X          !HHHr|   r  c                      e Zd ZdZdS )PaddleOCRVL16PipelinezPaddleOCR-VL-1.6Nr  r^   r|   rw   r  r  ]  r  r|   r  )D
__future__r   r  r  r  r  concurrent.futuresr   	itertoolsr   typingr   r   r   r	   r
   r   numpynpPILr   utilsr   
utils.depsr   common.batch_samplerr   common.readerr   modelsr   r   utils.benchmarkr   	_parallelr   baser   
componentsr   layout_parsing.merge_tabler   layout_parsing.title_levelr   layout_parsing.utilsr   r   r   r   r    r!   uiltsr"   r#   r$   r%   r&   r'   r(   r)   r*   r,  time_methodsr/   r  r  r  r  r^   r|   rw   <module>r     sp   # " " " " "  				      1 1 1 1 1 1       : : : : : : : : : : : : : : : :                 2 2 2 2 2 2 5 5 5 5 5 5 & & & & & & 6 6 6 6 6 6 6 6 ( ( ( ( ( ( @ @ @ @ @ @       $ $ $ $ $ $ B B B B B B E E E E E E B B B B B B B B C C C C C C C C C C
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 988 O# O# O# O# O#< O# O# O#d(+ + + + +G + + +     2     " " " " "4 " "  " " " " " "4 " "  " " "r|   