
    }-jw                     j    d dl mZmZ ddlmZmZ ddlmZ  G d de          Z G d de          Z	d	S )
   )get_subcommand_argsstr2bool   )PaddleXPipelineWrapperPipelineCLISubcommandExecutor)create_config_from_structurec                   z    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Zed             ZddZddZd Zd Z	ddddddddddddddddddddddd	Z
ddddddddddddddddddddddd
ZddddddZdddZdddddddddddddddddddZed             Zd Z xZS )PPChatOCRv4DocNc.                     t                                                      }/|/                    d           |/                    d           |/| _         t	                      j        di |. d S )Nselfkwargs )localscopypop_paramssuper__init__)1r   layout_detection_model_namelayout_detection_model_dir#doc_orientation_classify_model_name"doc_orientation_classify_model_dirdoc_unwarping_model_namedoc_unwarping_model_dirtext_detection_model_nametext_detection_model_dirtextline_orientation_model_nametextline_orientation_model_dirtextline_orientation_batch_sizetext_recognition_model_nametext_recognition_model_dirtext_recognition_batch_size&table_structure_recognition_model_name%table_structure_recognition_model_dirseal_text_detection_model_nameseal_text_detection_model_dir seal_text_recognition_model_nameseal_text_recognition_model_dir seal_text_recognition_batch_sizeuse_doc_orientation_classifyuse_doc_unwarpinguse_textline_orientationuse_seal_recognitionuse_table_recognitionlayout_threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modetext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_threshretriever_configmllm_chat_bot_configchat_bot_configr   params	__class__s1                                                   e/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddleocr/_pipelines/pp_chatocrv4_doc.pyr   zPPChatOCRv4Doc.__init__   se    b 

6

8""6"""""    c                     dS )NzPP-ChatOCRv4-docr   r   s    rD   _paddlex_pipeline_namez%PPChatOCRv4Doc._paddlex_pipeline_nameP       !!rE   c                 <    | j                             |||          S )N)vector_info	save_pathr?   )paddlex_pipelinesave_vector)r   rK   rL   r?   s       rD   rN   zPPChatOCRv4Doc.save_vectorT   s+    $00#- 1 
 
 	
rE   c                 :    | j                             ||          S )N)	data_pathr?   )rM   load_vector)r   rP   r?   s      rD   rQ   zPPChatOCRv4Doc.load_vector[   s(    $002B 1 
 
 	
rE   c                 8    | j                             |          S )N)rP   )rM   load_visual_info_list)r   rP   s     rD   rS   z$PPChatOCRv4Doc.load_visual_info_list`   s    $::Y:OOOrE   c                 :    | j                             ||          S )N)visual_inforL   )rM   save_visual_info_list)r   rU   rL   s      rD   rV   z$PPChatOCRv4Doc.save_visual_info_listc   s'    $::#y ; 
 
 	
rE   )r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   c                     | j         j        |fi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|d|d|d|d|d||S Nr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   )rM   visual_predictr   inputr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r   s                           rD   visual_predict_iterz"PPChatOCRv4Doc.visual_predict_iterh   s:   6 4t$3
 
 
)E)E
 0/
 &>%=	

 "6!5
 #8"7
 .-
 "z
 !4 3
 &>%=
 %<$;
 !4 3
 ,O
 !4 3
 #8"7
  #8"7!
" %<$;#
$ !4 3%
& ,O'
( !4 3)
* #8"7+
, #8"7/
 
 	
rE   c                    t           | j        |fi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|d|d|d|d|d||          S rX   )listr\   rZ   s                           rD   rY   zPPChatOCRv4Doc.visual_predict   sH   6 $D$  -I-I #4"3 *B)A	
 &:%9 '<&; "2!1 &: %8$7 *B)A )@(? %8$7 !0 %8$7 '<&;  '<&;!" )@(?#$ %8$7%& !0'( %8$7)* '<&;+, '<&;/ 
 
 	
rE   i  i,  Fmin_characters
block_sizeflag_save_bytes_vectorr?   c                @    | j                             |||||          S )Nr_   )rM   build_vector)r   rU   r`   ra   rb   r?   s         rD   rd   zPPChatOCRv4Doc.build_vector   s3     $11)!#9- 2 
 
 	
rE   r@   c                <    | j                             |||          S )Nre   )rM   	mllm_pred)r   r[   key_listr@   s       rD   rg   zPPChatOCRv4Doc.mllm_pred   s+    $..!5 / 
 
 	
rE   Tintegrationuse_vector_retrievalrK   r`   text_task_descriptiontext_output_formattext_rules_strtext_few_shot_demo_text_content!text_few_shot_demo_key_value_listtable_task_descriptiontable_output_formattable_rules_str table_few_shot_demo_text_content"table_few_shot_demo_key_value_listmllm_predict_infomllm_integration_strategyrA   r?   c                \    | j                             |||||||||	|
|||||||||          S )Nrj   )rM   chat)r   rh   rU   rk   rK   r`   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rA   r?   s                       rD   ry   zPPChatOCRv4Doc.chat   s]    . $))!5#)"71),K.O#9 3+-M/Q/&?+-' * 
 
 	
rE   c                     t                      S N)#PPChatOCRv4DocCLISubcommandExecutor)clss    rD   get_cli_subcommand_executorz*PPChatOCRv4Doc.get_cli_subcommand_executor  s    2444rE   c                     i d| j         d         d| j         d         d| j         d         d| j         d         d	| j         d
         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d         d| j         d          d!| j         d"         i d#| j         d$         d%| j         d&         d'| j         d(         d)| j         d*         d+| j         d,         d-| j         d.         d/| j         d0         d1| j         d,         p| j         d.         d2| j         d3         d4| j         d5         d6| j         d7         d8| j         d9         d:| j         d;         d<| j         d=         d>| j         d?         d@| j         dA         dB| j         dC         | j         dD         | j         dE         | j         dF         | j         d?         | j         dG         | j         dH         | j         dI         | j         dJ         | j         dK         | j         dL         | j         dM         | j         dN         dO}t          |          S )PNz?SubPipelines.LayoutParser.SubModules.LayoutDetection.model_namer   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.model_dirr   zcSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_namer   zbSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_dirr   zYSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_namer   zXSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_dirr   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_namer   zTSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_dirr   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_namer   zZSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_dirr   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.batch_sizer   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_namer    zVSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_dirr!   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.batch_sizer"   zgSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableStructureRecognition.model_namer#   zfSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableStructureRecognition.model_dirr$   zoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_namer%   znSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_dirr&   zqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_namer'   zpSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_dirr(   zqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.batch_sizer)   zSSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_orientation_classifyr*   zHSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_unwarpingr+   zJSubPipelines.LayoutParser.SubPipelines.GeneralOCR.use_textline_orientationr,   z.SubPipelines.LayoutParser.use_doc_preprocessorz.SubPipelines.LayoutParser.use_seal_recognitionr-   z/SubPipelines.LayoutParser.use_table_recognitionr.   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.thresholdr/   z8SubPipelines.LayoutParser.SubModules.LayoutDetection.nmsr0   zASubPipelines.LayoutParser.SubModules.LayoutDetection.unclip_ratior1   zFSubPipelines.LayoutParser.SubModules.LayoutDetection.merge_bboxes_moder2   zYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_side_lenr3   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_typer4   zQSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.threshr5   r6   r7   r8   r:   r;   r<   r=   r>   r?   r@   rA   )zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.box_threshzWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.unclip_ratiozYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.score_threshzsSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_side_lenzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_typezkSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.threshzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.box_threshzqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.unclip_ratiozsSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.score_threshzSubModules.LLM_RetrieverzSubModules.MLLM_ChatzSubModules.LLM_Chat)r   r   )r   	STRUCTUREs     rD   _get_paddlex_config_overridesz,PPChatOCRv4Doc._get_paddlex_config_overrides  s   F
Mt|-PF
 Mdl,O	F
 rsws5tF
 qrvr~4sF
 himiu*jF
  ghlht)i!F
& deieq+f'F
, cdhdp*e-F
2 jkokw1l3F
8 ijnjv0k9F
> jkokw1l?F
D fgkgs-hEF
J efjfr,gKF
P fgkgs-hQF
V vw{  xD8xWF
\ uvz  wC7w]F
b ~  @D  @L0@cF
 F
h }  C  K/iF
n @  BF  BN2BoF
t   AE  AM1AuF
z @  BF  BN2B{F
@ bcgco.dAF
F WX\Xd#YGF
L YZ^Zf*[MF
R =dl.? ?1 |/0YF
Z =dl&?[F
` >t|'@aF
f Mdl"OgF
l GImF
r PQUQ]%RsF
x UVZVb*WyF
~ himiu)jF
D deieq%fEF
J `aeam!bKF
 F
P fjeq%f hlgs'h jniu'j DH  DP)D @D  @L%@ |@  |H!| @D  @L%@ BF  BN'B DH  DP'D )-5G(H$(L1G$H#'<0A#BKF
 F
 F
	N ,I666rE   )-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNr{   )__name__
__module____qualname__r   propertyrH   rN   rQ   rS   rV   r\   rY   rd   rg   ry   classmethodr~   r   __classcell__)rC   s   @rD   r
   r
      s        %)#',0+/!% $"&!%(,'+(,$(#'$(/3.2'+&*)-(,)-%)!%!" !% $  "" $  ""!]6# 6# 6# 6# 6# 6#p " " X"
 
 
 

 
 
 

P P P
 
 
 &*!%!" !% $  "" $  ""13
 3
 3
 3
 3
r &*!%!" !% $  "" $  ""15
 5
 5
 5
 5
v $
 
 
 
 
" BF 
 
 
 
 
 ""(,*.# )-+/"/++
 +
 +
 +
 +
Z 5 5 [5H7 H7 H7 H7 H7 H7 H7rE   r
   c                   0    e Zd Zed             Zd Zd ZdS )r|   c                     dS )Npp_chatocrv4_docr   rG   s    rD   subparser_namez2PPChatOCRv4DocCLISubcommandExecutor.subparser_name  rI   rE   c           	      
   |                     ddt          dd           |                     ddt          ddd	d
           |                     dt          d           |                     dt          dd           |                     dt          d           |                     dt          d           |                     dt          d           |                     dt          d           |                     dt          d           |                     dt          d           |                     dt          d            |                     d!t          d"           |                     d#t          d$           |                     d%t          d&           |                     d't          d(           |                     d)t          d*           |                     d+t          d,           |                     d-t          d.           |                     d/t          d0           |                     d1t          d2           |                     d3t          d4           |                     d5t          d6           |                     d7t          d8           |                     d9t          d:           |                     d;t          d<           |                     d=t          d>           |                     d?t          d@           |                     dAt          dB           |                     dCt          dD           |                     dEt          dF           |                     dGt          dH           |                     dIt          dJ           |                     dKt          dL           |                     dMt          dN           |                     dOt          dP           |                     dQt          dR           |                     dSt          dT           |                     dUt          dV           |                     dWt          dX           |                     dYt          dZ           |                     d[t          d\           |                     d]t          d^           |                     d_t          d`           |                     dat          db           |                     dct          dd           |                     det          df           |                     dgt          dh           |                     dit          dj           d S )kNz-iz--inputTzInput path or URL.)typerequiredhelpz-kz--keys+KEYz$Keys use for information extraction.)r   nargsr   metavarr   z--save_pathzPath to the output directory.)r   r   z--invoke_mllmFz6Whether to invoke the multimodal large language model.)r   defaultr   z--layout_detection_model_namez#Name of the layout detection model.z--layout_detection_model_dirz-Path to the layout detection model directory.z%--doc_orientation_classify_model_namez<Name of the document image orientation classification model.z$--doc_orientation_classify_model_dirzFPath to the document image orientation classification model directory.z--doc_unwarping_model_namez'Name of the text image unwarping model.z--doc_unwarping_model_dirz,Path to the image unwarping model directory.z--text_detection_model_namez!Name of the text detection model.z--text_detection_model_dirz+Path to the text detection model directory.z!--textline_orientation_model_namez7Name of the text line orientation classification model.z --textline_orientation_model_dirzAPath to the text line orientation classification model directory.z!--textline_orientation_batch_sizez>Batch size for the text line orientation classification model.z--text_recognition_model_namez#Name of the text recognition model.z--text_recognition_model_dirz-Path to the text recognition model directory.z--text_recognition_batch_sizez*Batch size for the text recognition model.z(--table_structure_recognition_model_namez.Name of the table structure recognition model.z'--table_structure_recognition_model_dirz8Path to the table structure recognition model directory.z --seal_text_detection_model_namez&Name of the seal text detection model.z--seal_text_detection_model_dirz0Path to the seal text detection model directory.z"--seal_text_recognition_model_namez(Name of the seal text recognition model.z!--seal_text_recognition_model_dirz2Path to the seal text recognition model directory.z"--seal_text_recognition_batch_sizez/Batch size for the seal text recognition model.z--use_doc_orientation_classifyz9Whether to use document image orientation classification.z--use_doc_unwarpingz$Whether to use text image unwarping.z--use_textline_orientationz4Whether to use text line orientation classification.z--use_seal_recognitionz Whether to use seal recognition.z--use_table_recognitionz!Whether to use table recognition.z--layout_thresholdz/Score threshold for the layout detection model.z--layout_nmsz'Whether to use NMS in layout detection.z--layout_unclip_ratioz+Expansion coefficient for layout detection.z--layout_merge_bboxes_modez!Overlapping box filtering method.z--text_det_limit_side_lenzUThis sets a limit on the side length of the input image for the text detection model.z--text_det_limit_typezxThis determines how the side length limit is applied to the input image before feeding it into the text deteciton model.z--text_det_threshzDetection pixel threshold for the text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--text_det_box_threshzDetection box threshold for the text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--text_det_unclip_ratiozText detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z--text_rec_score_threshzrText recognition threshold used in general OCR. Text results with scores greater than this threshold are retained.z--seal_det_limit_side_lenzZThis sets a limit on the side length of the input image for the seal text detection model.z--seal_det_limit_typez}This determines how the side length limit is applied to the input image before feeding it into the seal text deteciton model.z--seal_det_threshzDetection pixel threshold for the seal text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--seal_det_box_threshzDetection box threshold for the seal text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--seal_det_unclip_ratiozSeal text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z--seal_rec_score_threshzcSeal text recognition threshold. Text results with scores greater than this threshold are retained.z--qianfan_api_keyz&Configuration for the embedding model.z--pp_docbee_base_urlz6Configuration for the multimodal large language model.)add_argumentstrr   intfloat)r   	subparsers     rD   _update_subparserz5PPChatOCRv4DocCLISubcommandExecutor._update_subparser  s   % 	 	
 	
 	
 	7 	 	
 	
 	
 	0 	 	
 	
 	
 	I	 	 	
 	
 	
 	+6 	 	
 	
 	

 	*@ 	 	
 	
 	

 	3O 	 	
 	
 	

 	2Y 	 	
 	
 	

 	(: 	 	
 	
 	

 	'? 	 	
 	
 	

 	)4 	 	
 	
 	

 	(> 	 	
 	
 	

 	/J 	 	
 	
 	

 	.T 	 	
 	
 	

 	/Q 	 	
 	
 	

 	+6 	 	
 	
 	

 	*@ 	 	
 	
 	

 	+= 	 	
 	
 	

 	6A 	 	
 	
 	

 	5K 	 	
 	
 	

 	.9 	 	
 	
 	

 	-C 	 	
 	
 	

 	0; 	 	
 	
 	

 	/E 	 	
 	
 	

 	0B 	 	
 	
 	

 	,L 	 	
 	
 	

 	!7 	 	
 	
 	

 	(G 	 	
 	
 	

 	$3 	 	
 	
 	

 	%4 	 	
 	
 	
 	 B 	 	
 	
 	

 	: 	 	
 	
 	

 	#> 	 	
 	
 	

 	(4 	 	
 	
 	

 	'h 	 	
 	
 	

 	# L 	 	
 	
 	

 	 t 	 	
 	
 	

 	# [ 	 	
 	
 	

 	% _ 	 	
 	
 	

 	% F 	 	
 	
 	

 	'm 	 	
 	
 	

 	# Q 	 	
 	
 	

 	 y 	 	
 	
 	

 	# ` 	 	
 	
 	

 	% d 	 	
 	
 	

 	%v 	 	
 	
 	
 	9 	 	
 	
 	

 	"I 	 	
 	
 	
 	
 	
rE   c                 4   t          |          }|                    d          }|                    d          }|                    d          }|                    d          }|                    d          }|dddd	|d
|d<   dddd|d
|d<   |                    d          }|dd|ddd
|d<   t          di |}	|	                    |          }
g }|
D ]:}|                    |d                    |r|d                             |           ;|	                    |          }|r|	                    ||          }|d         }nd }|	                    ||||          }|d         	                                D ]\  }}t          | d|            d S )Nr[   keysrL   invoke_mllmqianfan_api_key	retrieverzembedding-v1zhttps://qianfan.baidubce.com/v2qianfan)module_name
model_namebase_urlapi_typeapi_keyr?   chat_botzernie-3.5-8kopenairA   pp_docbee_base_urlz	PP-DocBeefake_keyr@   rU   layout_parsing_resultmllm_res)rK   rv   chat_res r   )r   r   r
   r\   appendsave_allrd   rg   ry   itemsprint)r   argsrB   r[   r   rL   r   r   r   chatocrresult_visualvisual_info_listresrK   result_mllmrv   result_chatkvs                      rD   execute_with_argsz5PPChatOCRv4DocCLISubcommandExecutor.execute_with_args  s   $T**

7##zz&!!JJ{++	jj// **%677&*,=%** *F%&  *,=$*) )F$% $ZZ(<==))) /$%. .F)* !**6**33E::  	A 	AC##C$6777 A+,55i@@@**+;<< 	%!++E488K +J 7 $ll#/	 # 
 
  
+1133 	 	DAqQ****	 	rE   N)r   r   r   r   r   r   r   r   rE   rD   r|   r|     sO        " " X"}
 }
 }
~= = = = =rE   r|   N)

_utils.clir   r   baser   r   utilsr   r
   r|   r   rE   rD   <module>r      s           H G G G G G G G / / / / / /N7 N7 N7 N7 N7+ N7 N7 N7bA A A A A*G A A A A ArE   