
    {-jR0                     8    d dl Z d Zd Zd Zd Zd Zd Zd ZdS )	    Nc                 B   | |         dk    rdS |dk    r| |dz
           nd}|dz   t          |           k     r| |dz            nd}|                                s|                                rdS |                                s|                                rdS |dv rdS dS )zH
    Check if the given character is a sentence ending punctuation.
    .Fr       )r    	
"'u   ”u   ’)u   】u   」u   》T)lenisdigitisalpha)textiprevnexts       s/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddlex/inference/pipelines/pp_doctranslation/utils.py_is_sentence_dotr      s    
 Aw#~~ua%%4A;;RDa%#d))++4A;;D||~~  u||~~  uVVVt5    c                    t          |           dz  }g d}t          |t          |                     D ]}| |         |v ry|dz   }|t          |           k     r,| |         dv r"|dz  }|t          |           k     r
| |         dv "|t          |           k     r!t          | d|                   |k    r||fc S | |         dk    rt          | |          rx|dz   }|t          |           k     r,| |         dv r"|dz  }|t          |           k     r
| |         dv "|t          |           k     r!t          | d|                   |k    r||fc S t          |dd          D ]}| |         |v rf|dz   }|t          |           k     r,| |         dv r"|dz  }|t          |           k     r
| |         dv "t          | d|                   |k    r||fc S r| |         dk    rut          | |          re|dz   }|t          |           k     r,| |         dv r"|dz  }|t          |           k     r
| |         dv "t          | d|                   |k    r||fc S t          |t          |                     t          |t          |                     fS )	z
    Find the position to split the text into two chunks.

    Args:
        text (str): The original text to be split.
        chunk_size (int): The maximum size of each chunk.

    Returns:
        int: The index where the text should be split.
       )r	   u   。;u   ；!u   ！?u   ？r   z 	
Nr   r   )r   ranger   min)r   
chunk_sizecentersplit_charsr   js         r   _find_split_posr#   )   s    YY!^FCCCK 63t99%%  7k!!AAc$ii--DGw$6$6Q c$ii--DGw$6$63t99}}T"1"X*!<!<!t!W^^ 0q 9 9^AAc$ii--DGw$6$6Q c$ii--DGw$6$63t99}}T"1"X*!<!<!t 61b!!  7k!!AAc$ii--DGw$6$6Q c$ii--DGw$6$648}}
**!t +!W^^ 0q 9 9^AAc$ii--DGw$6$6Q c$ii--DGw$6$648}}
**!t z3t99%%s:s4yy'A'AAAr   c                 $   |                                  } t          |           |k    r ||           S t          | |          \  }}| d|         }| |d         }| ||         }|rt          |||          }|rt          |||          }	||z   |	z   S )af  
    Split the text recursively and translate each chunk.

    Args:
        text (str): The original text to be split.
        chunk_size (int): The maximum size of each chunk.
        translate_func (callable): A function that translates a single chunk of text.
        results (list): A list to store the translated chunks.

    Returns:
        None
    N)stripr   r#   split_text_recursive)
r   r   translate_func	split_posend_whitespaceleftright
whitespace	left_text
right_texts
             r   r&   r&   Z   s     ::<<D
4yyJ~d###$3D*$E$E!	>JYJ^__%)N23
 	O,T:~NNI 	Q-eZPPJ:%
22r   c                    |                                                      d          }|d                             d          s|d                             d          r|d         }|d                             d          s|d                             d          r|d         nd}|rd                    |dd                   nd                    |dd                   }nd}d}| }t	          |||          }d	 |                    d          D             }	d                    |	          }
|r
| d|
 d| n|
}|                    |           dS )
a{  
    Translate a code block and append the result to the results list.

    Args:
        code_block (str): The code block to be translated.
        chunk_size (int): The maximum size of each chunk.
        translate_func (callable): A function that translates a single chunk of text.
        results (list): A list to store the translated chunks.

    Returns:
        None
    r	   r   ```~~~r   r   r   Nc                     g | ]R}|                                                     d           )|                                                     d          P|SS )r0   r1   )r%   
startswith).0lines     r   
<listcomp>z(translate_code_block.<locals>.<listcomp>   sd       

''.. 37**,,2I2I%2P2P  r   )r%   splitr3   joinr&   append)
code_blockr   r'   resultslinesheaderfootercode_contenttranslated_code_linesfiltered_code_linestranslated_coderesults               r   translate_code_blockrD   x   s    $$T**EQx5!! "U1X%8%8%?%? "q b	$$U++/4Ry/C/CE/J/JE"II 	
 28Qtyyqt---TYYuQRRy=Q=Q!0j. 
 )//55  
 ii 344O9?T55/55V555_FNN6r   c                 b
   ddl }ddlm} |                     d          dk     rz|                     d          dk     ra|                     d          |                     d          k    r5t	          |           |k     r" ||           }|                    |           dS  || d          }t                      }g }	g }
|                    dd	          D ]}|                    d
dg          }|rt          |          |vrt|
                                                                }|r*|	                    |           |
                    |           |                    t          |                     |}d}|t	          |	          k     rjg }g }d}|t	          |	          k     r|t	          |
|                   z   |k    r|                    |	|                    |                    |
|                    |t	          |
|                   z  }|dz  }|t	          |	          k     r|t	          |
|                   z   |k    d}|                    |          } ||          }|                    |          }t          ||          D ]W\  }}|                                  ||d          }|j        D ]*}|                    |                    |                     +X|t	          |	          k     jg }|                    dd	          D ]B}|                    d
dg          s)|                                r|                    |           Cd}t	          |          }||k     rg }g }t	          d          }||k     r||                                         }t	          |          |k    r2t'          |||          } ||                             |            |dz  }ed| d}!|t	          |!          z   |k    rnM|                    ||                    |                    |!           |t	          |!          z  }|dz  }||k     |s2||dz
                                           }d| d}!||dz
           g}|!g}|rdd                    |          z   dz   } ||          } ||d          }"|"                    d          }#t          ||#          D ],\  }$}%|$                    |%
                                           -||k     |                    t+          |                     dS )a{  
    Translate a HTML block and append the result to the results list.

    Args:
        html_block (str): The HTML block to be translated.
        chunk_size (int): The maximum size of each chunk.
        translate_func (callable): A function that translates a single chunk of text.
        results (list): A list to store the translated chunks.

    Returns:
        None
    r   NBeautifulSoup<   >html.parserT)string	recursivetdthr   __TD__z	<ol></ol>z<li>z</li>z<ol>r   z</ol>li)copybs4rG   countr   r9   setfind_allfind_parentiddecode_contentsr%   addr8   r7   zipclearcontentsdeepcopyr&   replace_withstr)&
html_blockr   r'   r;   rR   rG   
translatedsouptd_seentd_batch_nodestd_batch_textsnode	parent_tdtd_text
batch_sizer   batch_nodesbatch_textscurrent_lengthplaceholder
batch_texttranslated_batchtranslated_linestd_noder5   fragchild
text_nodesidxtotalli_texts	node_texttranslated_textli_str
trans_souptranslated_lis	orig_nodeli_tags&                                         r   translate_html_blockr      s    KKK!!!!!! 	!!S!!A%%S!!Z%5%5c%:%:::
OOj((#^J//
z"""=]33DeeGNN TT:: ' '$$dD\22	 	'Ig55//117799G /%%i000%%g...KK9&&& J	A
c.!!
!
!N####^A%6!7!77:EE~a0111~a0111c."3444NFA N####^A%6!7!77:EE  %%k22
)>*55+11+>> .>?? 	5 	5MGTMMOOO =}55D 5 5t}}U3344445/ c.!!
!
!4 JTT:: $ $t-- 	$$**,, 	$d###
C
OOE
++[))Ekk"3--//I9~~
**"6z># # 3,,_===q,I,,,FF+j88z#///OOF###c&kk)N1HC Ekk   	 "37+1133I,I,,,F%cAg./KxH 	A"''("3"33g=J'
33J&z=AAJ'0066N%(n%E%E A A!	6&&v'='='?'?@@@@C ++F NN3t99r   c                    ddl m}  || d          }g }g }d}d}d}|j        D ]}t          |d          r`|j        Yt          |          }	|                    |          }
|                    |	           |                    |
           ||
z  }|dz  }r|t          |          z  }g }t          ||          }d}t          |          D ]r\  }}|\  }}|t          |          k     rU||         |v rK|                    ||         ||                   }|dz  }d	|f||<   |t          |          k     r
||         |v Ks|S )
z.
    Split the original text into chunks.
    r   rF   rK   z<<HTML_BLOCK_{}>>r   nameNr   html)rS   rG   r]   hasattrr   r`   formatr9   split_and_append_text	enumerater   replace)r   rG   rc   html_blockshtml_placeholdersplaceholder_fmttext_after_placeholderindexelemhtml_strrn   splited_blockcurrent_indexrv   block_contents                    r   split_original_textsr     s    "!!!!! =}--DK)OE 	0 	04   	0TY%:4yyH)0077Kx((($$[111"k1"QJEE"c$ii/"" M)-9OPPM M.. 
3 
3
U
7C 12222!-0G;;oo!-0+m2L G QM"('!2M# C 12222!-0G;; r   c                    |                                 rut          j        dt          j                  }d}|                    |          D ]}|                                |k    rs|||                                         }t          j        d|          }|D ]?}|                                 r)|                     d|                                 f           @|                     d|                                f           |	                                }|t          |          k     ra||d         }t          j        d|          }|D ]?}|                                 r)|                     d|                                 f           @| S )a  
    Split the text and append the result to the result list.

    Args:
        result (list): The current result list.
        text_content (str): The text content to be processed.

    Returns:
        list: The updated result list after processing the text content.
    z(```.*?\n.*?```|~~~.*?\n.*?~~~)r   z\n{2,}r   codeN)r%   recompileDOTALLfinditerstartr7   r9   groupendr   )rC   text_contentcode_patternlast_posmnon_code
paragraphsps           r   r   r   H  sl     7z"DbiPP&&|44 
	 
	Awwyy8##'17799(<=Xi::
# ; ;Awwyy ;vqwwyy&9:::MM617799-...uuwwHHc,''''#HII.H)X66J 7 77799 7MM617799"5666Mr   )r   r   r#   r&   rD   r   r   r    r   r   <module>r      s    
			  ..B .B .Bb3 3 3<( ( (Vt t tn+ + +\! ! ! ! !r   