
    {-jDV              
      f   d dl mZ d dlZd dlZd dlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZmZmZmZmZmZ dd
lmZ ddlmZmZmZmZmZmZm Z m!Z! ddl"m#Z#m$Z$ g dZ% ed          rd dl&Z& G d de'          Z( ed           G d deeeee                      Z) G d de)          Z*dS )    )annotationsN)Image	ImageDraw   )logging)class_requires_depsis_dep_available)SIMFANG_FONT   )BaseCVResult
BaseResult	HtmlMixin	JsonMixinMarkdownMixin	WordMixin	XlsxMixin)MarkdownConverter)build_handle_funcs_dictformat_centered_by_htmlformat_chart2html_tableformat_image_plainformat_image_scaled_by_htmlformat_table_centerformat_text_plainsimplify_table   )draw_box_txt_fineget_minarea_rect)figure_titlevision_footnoteimagecharttableheaderheader_imagefooterfooter_imagefootnote
aside_textzopencv-contrib-pythonc                  6    e Zd ZdZ	 	 	 	 	 d
ddZddZdd	ZdS )PaddleOCRVLBlockzPaddleOCRVL Block Class NreturnNonec                    || _         t          t          t          |                    | _        || _        d| _        || _        || _        || _	        || _
        dS )a  
        Initialize a PaddleOCRVLBlock object.

        Args:
            label (str): Label assigned to the block.
            bbox (list): Bounding box coordinates of the block.
            content (str, optional): Content of the block. Defaults to an empty string.
        N)labellistmapintbboxcontentr!   polygon_pointsgroup_idglobal_block_idglobal_group_id)selfr0   r4   r5   r7   r6   r8   r9   s           o/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddlex/inference/pipelines/paddleocr_vl/result.py__init__zPaddleOCRVLBlock.__init__D   sU    $ 
S$((	
, ..    strc                <    d| j          d| j         d| j         d}|S z>
        Return a string representation of the block.
        z

#################
label:	z
bbox:	z

content:	z
#################r0   r4   r5   r:   _strs     r;   __str__zPaddleOCRVLBlock.__str___   2     ~~di~~]a]i~~~r=   c                <    d| j          d| j         d| j         d}|S r@   rA   rB   s     r;   __repr__zPaddleOCRVLBlock.__repr__f   rE   r=   )r,   NNNNr-   r.   )r-   r>   )__name__
__module____qualname____doc__r<   rD   rG    r=   r;   r+   r+   A   sl        !! / / / / /6        r=   r+   c                  n     e Zd ZdZd fdZddZddZdd
ZddZddZ	ddZ
ddZdddZddZ xZS )PaddleOCRVLResultzT
    PaddleOCRVLResult class for holding and formatting OCR/VL parsing results.
    r-   r.   c                   t                                          |           t          j        |            t          j        |            t	          j        |            t          j        |            t          j        |            | d                             dg           }d t          	                                |z   D             | _
        dS )z
        Initializes a new instance of the class with the specified data.

        Args:
            data: The input data for the parsing result.
        model_settingsmarkdown_ignore_labelsc                    g | ]}|S rM   rM   ).0r0   s     r;   
<listcomp>z.PaddleOCRVLResult.__init__.<locals>.<listcomp>   s%     "
 "
 "
E"
 "
 "
r=   N)superr<   r   r   r   r   r   getSKIP_ORDER_LABELScopyskip_order_labels)r:   datarR   	__class__s      r;   r<   zPaddleOCRVLResult.__init__t   s     	4   4   t$$$4   4   !%&6!7!;!;$b"
 "
"
 "
05577:PP"
 "
 "
r=   r3   c                P    | d         }t          |t                    r|d         n|S )z:Return the page image width, unwrapping list if necessary.widthr   )
isinstancer1   )r:   ws     r;   _page_image_widthz#PaddleOCRVLResult._page_image_width   s(    M!!T**1qtt1r=   dict[str, np.ndarray]c           
     ~
   i }| d         }|d         rt          | d         t                    r*| d         j                                        D ]
\  }}|||<   t          | d         t                    rYt          | d                   D ]C\  }}t          |t                    r)|j                                        D ]\  }}||| d| <   D| d         d         rt          | d         t                    r| d         j        d         |d<   t          | d         t                    rCt          | d                   D ]-\  }}t          |t                    r|j        d         |d| <   .|                     d	          rt          | d	         t                    s|                     d          r| d	         d
         }| d	         d         }	| d         d         dddddddf         }
|
j        dd         \  }}t          j	        |
          }t          j        ||dft          j                  dz  }t          j        d           t          j        |          }t"          }t          t%          ||	                    D ]'\  }\  }}	 t          j        dd          t          j        dd          t          j        dd          f}t          j        |          }t+          |          dk    rd |                                D             }|                    ||d|           t1          |          }t3          dt5          |dddf                   t7          |dddf                   z
  z            }t          j        |dddf                   |dddf<   t          j        |dddf                   t7          d|          z   |dddf<   n5d |                                D             }|                    ||           t          |t:                    r|d         }t=          ||f|||j                  }t          j        |t          j                   !                    d          }tE          j#        ||gd|d           tE          j$        ||          }!#  Y &xY wt          j%        t          j	        |
          |d          }t          j&        d|dz  |fd           }|'                    |dd||f           |'                    t          j	        |          |d|dz  |f           ||d!<   |S )"z
        Convert the parsing result to a dictionary of images.

        Returns:
            dict: Keys are names, values are numpy arrays (images).
        rQ   use_doc_preprocessordoc_preprocessor_res_use_layout_detectionlayout_det_resreslayout_det_res_spotting_res	rec_polys	rec_texts
output_imgNr   r   r   )dtype   r   c                    g | ]	\  }}||f
S rM   rM   rT   xys      r;   rU   z-PaddleOCRVLResult._to_img.<locals>.<listcomp>   s     ???$!Q1v???r=      )outliner^   fillg      ?      c                P    g | ]#\  }}t          |          t          |          f$S rM   )r3   rs   s      r;   rU   z-PaddleOCRVLResult._to_img.<locals>.<listcomp>   s-    "M"M"M1CFFCFF#3"M"M"Mr=   )rx   )ro   ry   r   TRGB)rq   rq   rq   spotting_res_img)(r_   r   imgitemsr1   	enumeraterW   shaper   	fromarraynponesuint8randomseedr   Drawr
   ziprandintarraylentolistpolygonr   r3   maxminmeantupler   pathint32reshapecv2	polylinesbitwise_andblendnewpaste)r:   res_img_dictrQ   keyvalueidxre   
layout_resboxestxtsr!   hr`   img_left	img_right	draw_leftvis_fontboxtxtcolorptsheightbox_ptsimg_right_textimg_shows                            r;   _to_imgzPaddleOCRVLResult._to_img   sY    ./01 
	A$56
CC ."&'=">"B"H"H"J"J . .JC(-L%%$56== A1:/02 2 A A-C- ""6
CC A*>*B*H*H*J*J A AJC;@LC#88 !"89 	V$/0*== S156F1G1KE1R-.$/0$77 V'06F1G'H'H V VOC!*j99 V@Ju@U%<s%<%<= HH^$$.	8tN3T::.	8 /00.	8
 (5E'4D/0>qqq!!!TTrTzJE;qs#DAqu--HAq	:::S@IKNNN!x00I#H#,S-=-=#>#>  Zc3q#..q#..q#..E
 (3--C3xx!||??#**,,???!))#uAE)RRR.s33!$SCAAAqD	NNSQQQT^^,K%L!M!M%'WSAY%7%7BQBE
%'WSAY%7%7#b&//%IABBE

"M"M

"M"M"M!))')>>>!#u-- %!!f%61vsC%W%WN(31199*EECM.3%uaHHH #	> J JIIH{5?5#9#98SIIHyQ
ODDHNN8aAq\222NN5?9551a!eQ7GHHH/7L+,s   HR$$R)dict[str, str]c                   i }|                      d          rit          | d                   dk    rPt          t          | d                             D ]-}| d         |         }|d         }d| }|j        d         ||<   .|S )z
        Converts the prediction to its corresponding HTML representation.

        Returns:
            dict: The str type HTML representation result.
        table_res_listr   table_region_idtable_pred)rW   r   rangehtml)r:   res_html_dictsno	table_resr   r   s         r;   _to_htmlzPaddleOCRVLResult._to_html        88$%% 	<#d3C.D*E*E*I*IS&6!78899 < < !1237	"+,=">000%.^F%;c""r=   c                   i }|                      d          rit          | d                   dk    rPt          t          | d                             D ]-}| d         |         }|d         }d| }|j        d         ||<   .|S )z
        Converts the prediction HTML to an XLSX file path.

        Returns:
            dict: The str type XLSX representation result.
        r   r   r   r   r   )rW   r   r   xlsx)r:   res_xlsx_dictr   r   r   r   s         r;   _to_xlsxzPaddleOCRVLResult._to_xlsx   r   r=   c                   i }| d         |d<   | d         |d<   | d         |d<   | d         |d<   | d         |d<   | d         }||d<   | d         d         r=t          | d         t                    r| d         j        d	         |d<   n| d         |d<   | d         d
         r=t          | d         t                    r| d         j        d	         |d<   n| d         |d<   | d         }d |D             }||d<   t          j        |g|R i |S )aw  
        Converts the instance's attributes to a dictionary and then to a string.

        Args:
            *args: Additional positional arguments passed to the base class method.
            **kwargs: Additional keyword arguments passed to the base class method.

        Returns:
            dict: A dictionary with the instance's attributes converted to strings.
        
input_path
page_index
page_countr^   r   rQ   rd   re   ri   rg   rh   parsing_res_listc                8    g | ]}|j         |j        |j        d S ))block_labelblock_content
block_bbox)r0   r5   r4   )rT   parsing_ress     r;   rU   z-PaddleOCRVLResult._to_str.<locals>.<listcomp>  sC     
 
 
 	  +0!,!4). 
 
 
r=   )r_   r   r>   r   _to_str)r:   argskwargsr[   rQ   r   s         r;   r   zPaddleOCRVLResult._to_str   sx    !,/\!,/\!,/\WWhX./!/ !"89 	L$56
CC L/34J/K/OPU/V+,,/34J/K+, !"89 	@$/0*== @)-.>)?)CE)J%&&)-.>)?%& 23
 
  0
 
 
 $4  7777777r=   Tc                  
 | d                              dd          | d                              dd          |                                 
|rd }
fd}
fd}nd }fd	}fd
}| d                              dd          rt          n|}| d                              dd          s|}|rd }nd }d }t          ||||||          }| d                              dg           D ]}	|                    |	d           |S )z6Build label-to-handler mapping for content formatting.rQ   use_ocr_for_image_blockFuse_seal_recognitionc                :    t          t          |                     S N)r   r   blocks    r;   <lambda>z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>.  s    -D!%((. . r=   c                H    t          t          |                      S N)original_image_widthshow_ocr_content)collapse_newlinesr   r   )r   r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>1  s:    .E+)=%<  
 '>"=/ / / r=   c                H    t          t          |                      S r   r   )r   r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>9  s:    -D+)=%9  
 ';":. . . r=   c                    | j         S r   r5   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>B  s    U] r=   c                &    t          |           S N)r   r   )r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>C  s    .@(?/ / / r=   c                &    t          |           S r   r   )r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>F  s    -?(<. . . r=   use_chart_recognitionrg   c                &    dt          |           z   S N
)r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>T  s    d5H5O5O.O r=   c                0    t          d| j        z             S r   )r   r5   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>V  s    nTEM=Q.R.R r=   c                    | j         S r   r   r   s    r;   r   z<PaddleOCRVLResult._build_handle_funcs_dict.<locals>.<lambda>X  s    EM r=   )	text_func
image_func
chart_func
table_funcformula_func	seal_funcrR   N)rW   ra   r   r   pop)r:   prettyformat_text_funcformat_image_funcformat_seal_funcformat_chart_funcformat_table_funcformat_formula_funchandle_funcs_dictr0   r   r   r   s             @@@r;   _build_handle_funcs_dictz*PaddleOCRVLResult._build_handle_funcs_dict%  s   "&'7"8"<"<%u#
 #
  $$4599:PRWXX#5577 	   ! ! ! ! !           ;:! ! ! !        $%))*A5II###" 	 $%))*@%HH 	0/ 	S O O R R993&(((,&
 
 
 *+//0H"MM 	/ 	/E!!%....  r=   c                t   |                     dd          }i }| d         |d<   | d         |d<   | d         |d<   | d         |d<   | d         |d<   | d         }||d<   | d                             d	d          r|                     d
          }| d         }g }d}	t          |          D ]\  }
}|j        }|| j        vr|	}|	dz  }	nd}|j        |j        |j        |
||j        |j        n|
d}t          |d          r|j
        
|j
        |d<   t          |d          r|j        
|j        |d<   |j        
|j        |d<   |r|j        
|j        |d<   | d                             d	d          r>|                    |j                  r ||j                 |          |d<   n
|j        |d<   |                    |           ||d<   |                     d          r<t          | d         t                     rd | d         D             |d<   n| d         |d<   | d         d         rt          | d         t"                    r| d         j        d         |d<   nt          | d         t                     r^g }| d         D ]M}t          |t"                    r!|                    |j        d                    8|                    |           N||d<   n| d         |d<   | d         d         rt          | d         t"                    r| d         j        d         |d<   nt          | d         t                     r^g }| d         D ]M}t          |t"                    r!|                    |j        d                    8|                    |           N||d<   n| d         |d<   t'          j        |g|R i |S )aQ  
        Converts the object's data to a JSON dictionary.

        Args:
            *args: Positional arguments passed to the JsonMixin._to_json method.
            **kwargs: Keyword arguments passed to the JsonMixin._to_json method.

        Returns:
            dict: A dictionary containing the object's data in JSON format.
        keep_imgFr   r   r   r^   r   rQ   format_block_contentTr   r   ry   N)r   r   r   block_idblock_orderr7   r8   r9   block_polygon_pointsr!   r   rk   c                    g | ]}|S rM   rM   )rT   ri   s     r;   rU   z.PaddleOCRVLResult._to_json.<locals>.<listcomp>  s    'L'L'L'L'L'Lr=   rd   re   ri   rg   rh   )r   rW   r   r   r0   rZ   r5   r4   r7   hasattrr8   r9   r6   r!   appendr_   r1   r   jsonr   _to_json)r:   r   r   	_keep_imgr[   rQ   r   r   parsing_res_list_jsonorder_indexr   r   r0   orderres_dictre   ri   rh   s                     r;   r  zPaddleOCRVLResult._to_jsonf  s    JJz511	!,/\!,/\!,/\WWhX./!/ !%%&<eDD 	K $ = =T = J J 23 " )*: ; ; )	3 )	3C%ED222#q *0!,!4).$,7,@,LK((RU	 	H %677J/;.9.I*+%677J/;.9.I*+)53>3M/0 6[.:$/$5!$%))*@%HH D$(():;; D0T0A+BS0T#1 1H_-- 1<0CH_-!((2222#8 88N## 	<$~.55 <'L'LtN7K'L'L'L^$$'+N';^$ !"89 	L$56
CC L/34J/K/PQV/W+,,D!78$?? 	L')$ 67 9 9C!#z22 9,33CHUODDDD,33C8888/C+,,/34J/K+, !"89 	@$/0*== @)-.>)?)DU)K%&&D!12D99 	@!# 01 3 3C!#z22 3&--chuo>>>>&--c2222)7%&&)-.>)?%&!$8888888r=   Fdictc                    |                      |          }t          j        | d         ||| d                   }| d         |d<   | d         |d<   |S )a>  
        Save the parsing result to a Markdown file.

        Args:
            pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
            show_formula_number (bool): whether to show formula numbers.

        Returns:
            dict: Markdown information with text and images.
        r   r   imgs_in_doc)r   show_formula_numberr  r   r   )r   r   convert)r:   r   r  r   results        r;   _to_markdownzPaddleOCRVLResult._to_markdown  sn     !999HH"*#$/ 3]+	
 
 
  $L1|#L1|r=   c                   ddl m} ddlm} d|j        ddd|j        ddd|j        d	d|j        d	d|j        d	d|j        d	d|j        d	d|j        d	d
|j        d	d	}|                                 }|                     dd          }t          |t                    r|d         nt          |pd          } || d         ||                     dg                     \  }}|||| d         |dS )a  Convert the parsing result to a Word-compatible dict.

        Returns:
            dict: {
                "word_blocks": List[Dict],       # Simplified list of content blocks
                "original_image_width": int,   # Pixel width of the source page
                "input_path": str,             # Original input file path
                "images": List[Dict]           # List of {"path": str, "img": PIL.Image}
            }
        r   )WD_ALIGN_PARAGRAPHr   )build_word_blocks   T)sizealignindent
   )r  r  	   )	ocrvertical_textr)   spottinginline_formuladisplay_formulareference_contentr5   r(   r   r   r  )extra_style_mapr  r   )word_blocksr   original_image_heightr   images)docx.enum.textr  common.result.converterr  JUSTIFYLEFTCENTERra   rW   r_   r1   r3   )	r:   r  r  r!  r   
height_valr#  r"  r$  s	            r;   _to_wordzPaddleOCRVLResult._to_word  s    	655555@@@@@@
 +3  +3 
 $&0B0GHH!#.@.EFF')4F4KLL(*5G5NOO+3" " !#-?-DEE!"-?-DEE)
 
.  $5577XXh**
'
D99SJqMMs:?QR?S?S 	 0/#$+33
 
 
V '$8%:|,
 
 	
r=   rH   )r-   r3   )r-   rb   )r-   r   )T)TF)r-   r  )rI   rJ   rK   rL   r<   ra   r   r   r   r   r   r  r  r+  __classcell__)r\   s   @r;   rO   rO   n   s        
 
 
 
 
 
(2 2 2 2
M M M M^        '8 '8 '8 '8R?! ?! ?! ?!Bf9 f9 f9 f9P    0:
 :
 :
 :
 :
 :
 :
 :
r=   rO   c                  &    e Zd Zd Zd Zd Zd ZdS )PaddleOCRVLPagesResultc                .    t          j        d           d S )Nz@The result of multi-pages don't support to save as image format!r   warningr:   r   r   s      r;   save_to_imgz"PaddleOCRVLPagesResult.save_to_img$  s     O	
 	
 	
 tr=   c                .    t          j        d           d S )Nz?The result of multi-pages don't support to save as html format!r0  r2  s      r;   save_to_htmlz#PaddleOCRVLPagesResult.save_to_html*       N	
 	
 	
 tr=   c                .    t          j        d           d S )Nz?The result of multi-pages don't support to save as xlsx format!r0  r2  s      r;   save_to_xlsxz#PaddleOCRVLPagesResult.save_to_xlsx0  r6  r=   c                .    t          j        d           d S )Nz?The result of multi-pages don't support to save as word format!r0  r2  s      r;   save_to_wordz#PaddleOCRVLPagesResult.save_to_word6  r6  r=   N)rI   rJ   rK   r3  r5  r8  r:  rM   r=   r;   r.  r.  #  sP                  r=   r.  )+
__future__r   r   numpyr   PILr   r   utilsr   
utils.depsr   r	   utils.fontsr
   common.resultr   r   r   r   r   r   r   r&  r   -common.result.converter.markdown_format_funcsr   r   r   r   r   r   r   r   
ocr.resultr   r   rX   r   objectr+   rO   r.  rM   r=   r;   <module>rE     sS   # " " " " "                            @ @ @ @ @ @ @ @ ( ( ( ( ( (                  9 8 8 8 8 8	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 = < < < < < < <    +,, JJJ* * * * *v * * *Z ,--q
 q
 q
 q
 q
iM9 q
 q
 .-q
h    .     r=   