
    Αi%                         S r SSKrSSKrSSKrSSKJr  / rSrSr	Sr
SrSrS	rS
rSrSrSrSrS rS rS r SS jr\" SSSSS9S 5       r\" SSSSS9S 5       r\" SSSSS9S 5       r\" SSSSS9S 5       rg)a^  
Conll05 dataset.
Paddle semantic role labeling Book and demo use this dataset as an example.
Because Conll05 is not free in public, the default downloaded URL is test set
of Conll05 (which is public). Users can change URL and MD5 to their Conll
dataset. And a pre-trained word vector model based on Wikipedia corpus is used
to initialize SRL model.
    N)
deprecatedzBhttp://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz 387719152ae52d60422c016e92a742fcz:http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt ea7fb7d4c75cc6254716f0177a506baaz:http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt 0d2977293bbb6cbefab5b0f97db1e77cz<http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt d8c7f03ceb5fc2e5a0fa7503a4353751z1http://paddlemodels.bj.bcebos.com/conll05st%2Femb bf436eb0faa1f6f9103017f8be57cdb7c                 Z   0 n[        5       n[        U S5       n[        U5       HA  u  pEUR                  5       nUR	                  S5      (       d  M-  UR                  USS  5        MC     SnU H  nXaSU-   '   US-  nXaSU-   '   US-  nM     XaS'   S S S 5        U$ ! , (       d  f       U$ = f)	Nr)B-I-   r   r      r   O)setopen	enumeratestrip
startswithadd)filenamedtag_dictfilineindextags           V/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/dataset/conll05.pyload_label_dictr   -   s    
AuH	h	 |GA::<D|,,T!"X& $ C!dSjMQJE!dSjMQJE	 
 # 
 H 
	 Hs   5B?B
B*c                     0 n[        U S5       n[        U5       H  u  p4X1UR                  5       '   M     S S S 5        U$ ! , (       d  f       U$ = f)Nr
   )r   r   r   )r   r   r   r   r   s        r   	load_dictr!   ?   sL    
A	h	 |GAdjjlO $ 
 H 
	 Hs   'A  
Ac                    ^ ^^ U UU4S jnU$ )z
Read one corpus. It returns an iterator. Each element of
this iterator is a tuple including sentence and labels. The sentence is
consist of a list of word IDs. The labels include a list of label IDs.
:return: a iterator of data.
:rtype: iterator
c            
   3   r  >#    [         R                  " T5      n U R                  T5      nU R                  T5      n[        R                  " US9 n[        R                  " US9 n/ n/ n/ n[        X45       GHS  u  pUR                  5       R                  5       nU	R                  5       R                  5       R                  5       n	[        U	5      S:X  Ga  [        [        US   5      5       H(  n
U Vs/ s H  oU
   PM	     nnUR                  U5        M*     [        U5      S:  Gau  / nUS    H  nUS:w  d  M  UR                  U5        M     [        USS  5       GH;  u  pSnSn/ nSnU GH  nUS:X  a  U(       d  UR                  S5        M$  US:X  a  U(       a  UR                  S	U-   5        MG  US
:X  a  UR                  S	U-   5        SnMe  UR                  S5      S:w  aA  UR                  S5      S:w  a,  USUR                  S5       nUR                  SU-   5        SnM  UR                  S5      S:w  aB  UR                  S5      S:X  a-  USUR                  S5       nUR                  SU-   5        SnGM  [        SU 35      e   X]U
   U4v   GM>     / n/ n/ nGM1  UR                  U5        UR                  U	5        GMV     S S S 5        S S S 5        UR                  5         UR                  5         U R                  5         g s  snf ! , (       d  f       NL= f! , (       d  f       NU= f7f)N)fileobjr   r   -r   F *r   z*)()r   TzUnexpected label: )tarfiler   extractfilegzipGzipFilezipr   decodesplitlenrangeappendr   findRuntimeErrorclose)tfwfpf
words_file
props_file	sentenceslabelsone_segwordlabelr   xa_kind_label	verb_listlblcur_tagis_in_bracketlbl_seq	verb_wordl	data_path
props_name
words_names                       r   readercorpus_reader.<locals>.readerP   s    \\)$^^J'^^J'MM"%MM"%IFG"::zz|**,,,.446u:?"3wqz?36='>g!g'>l3 4 6{a'$&	!'A Cx ) 0 0 3 "+ '0qr
&;FA&)G,1M&(G(*I%(#$8M$+NN3$7%&#X-$+NN4'>$B%&$Y$+NN4'>$B49M%&VVC[B%6166#;";L./AFF3K.@G$+NN4'>$B49M%&VVC[B%6166#;";L./AFF3K.@G$+NN4'>$B48M*69KA37O*P$P# &)& #,q\7"BB1 '<4 !#IF G$$T*NN5)]  ; & &l 	




Y (? &% &%sP   AL7L&%BL5L5L<FLL&9L7L
L#	L&&
L40L7 )rK   rM   rL   rN   s   ``` r   corpus_readerrQ   G   s    =~ M    c                     ^ ^^^ U UUU4S jnU$ )Nc            
   3   l  >#    T" 5        GH  u  pn[        U 5      nUR                  S5      nS/[        U5      -  nUS:  a  SXTS-
  '   XS-
     nOSnUS:  a  SXTS-
  '   XS-
     nOSnSXT'   X   nU[        U5      S-
  :  a  SXTS-   '   XS-      n	OSn	U[        U5      S-
  :  a  SXTS-   '   XS-      n
OSn
U  Vs/ s H  nTR                  U[        5      PM     nnTR                  U[        5      /U-  nTR                  U[        5      /U-  nTR                  U[        5      /U-  nTR                  U	[        5      /U-  nTR                  U
[        5      /U-  nTR                  U5      /U-  nU Vs/ s H  nTR                  U5      PM     nnUUUUUUUUU4	v   GM     g s  snf s  snf 7f)NzB-Vr   r   bosr   eos)r2   r   getUNK_IDX)sentence	predicater>   sen_len
verb_indexmarkctx_n1ctx_n2ctx_0ctx_p1ctx_p2wword_idx
ctx_n2_idx
ctx_n1_idx	ctx_0_idx
ctx_p1_idx
ctx_p2_idxpred_idx	label_idxrQ   
label_dictpredicate_dict	word_dicts                       r   rN   reader_creator.<locals>.reader   s    +8?'H(mGe,J3V$DA~'(!^$!q.1A~'(!^$!q.1 D(ECK!O+'(!^$!q.1CK!O+'(!^$!q.1;CD8a	a18HD#--89GCJ#--89GCJ"ug67'AI#--89GCJ#--89GCJ&**956@H4:;Fq*FI; 
 
W ,;@ E <s   B:F4= F*BF4:F/F4rP   )rQ   rn   rm   rl   rN   s   ```` r   reader_creatorrp      s    6 6p MrR   z2.0.0zpaddle.text.datasets.Conll05str   z>Please use new dataset API which supports paddle.io.DataLoader)since	update_tolevelreasonc                  r   [        [        R                  R                  R	                  [
        S[        5      5      n [        [        R                  R                  R	                  [        S[        5      5      n[        [        R                  R                  R	                  [        S[        5      5      nXU4$ )z>
Get the word, verb and label dictionary of Wikipedia corpus.
	conll05st)r!   paddledatasetcommondownloadWORDDICT_URLWORDDICT_MD5VERBDICT_URLVERBDICT_MD5r   TRGDICT_URLTRGDICT_MD5)rn   	verb_dictrl   s      r   get_dictr      s     &&|[,OI &&|[,OI !&&{KMJ ++rR   c                  h    [         R                  R                  R                  [        S[
        5      $ )z8
Get the trained word vector based on Wikipedia corpus.
rv   )rw   rx   ry   rz   EMB_URLEMB_MD5rP   rR   r   get_embeddingr      s#     >>  ))';HHrR   c                      [        5       u  pn[        [        R                  R                  R                  [        S[        5      SSS9n[        X0X5      $ )aP  
Conll05 test set creator.

Because the training dataset is not free, the test dataset is used for
training. It returns a reader creator, each sample in the reader is nine
features, including sentence sequence, predicate, predicate context,
predicate context flag and tagged sequence.

:return: Training reader creator
:rtype: callable
rv   z2conll05st-release/test.wsj/words/test.wsj.words.gzz2conll05st-release/test.wsj/props/test.wsj.props.gz)rM   rL   )	r   rQ   rw   rx   ry   rz   DATA_URLDATA_MD5rp   )rn   r   rl   rN   s       r   testr      sK    $ (0z$I*&&xhGGGF
 &YCCrR   c                     [         R                  R                  R                  [        S[
        5        [         R                  R                  R                  [        S[        5        [         R                  R                  R                  [        S[        5        [         R                  R                  R                  [        S[        5        [         R                  R                  R                  [        S[        5        g )Nrv   )rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   rP   rR   r   fetchr     s     NN""<lK
NN""<lK
NN"";[I
NN""7KA
NN""8[(CrR   )NNN)__doc__r-   r+   paddle.dataset.commonrw   paddle.utilsr   __all__r   r   r{   r|   r}   r~   r   r   r   r   rX   r   r!   rQ   rp   r   r   r   r   rP   rR   r   <module>r      s      #
O-K1K1L0
=
,
$HX DH;| 
.
K	,,  
.
K	II 
.
K	DD* 
.
K	DDrR   