
    ёi#                        S SK Jr  S SKrS SKrS SKJrJrJr  S SKr	S SK
Jr  S SKJr  \(       a  S SKJr  \S   r/ r/ SQrSrS	r " S
 S5      r " S S5      r " S S\5      rg)    )annotationsN)TYPE_CHECKINGAnyLiteral)_check_exists_and_download)Datasettraintest)         #   -   2   8   z3https://dataset.bj.bcebos.com/movielens%2Fml-1m.zip c4d9eecfca2ab87c1945afe126590906c                  \    \ rS rSr% SrS\S'   S\S'   S\S'   SS	 jrS
 rSS jrSS jr	Sr
g)	MovieInfo%   zE
Movie id, title and categories information are stored in MovieInfo.
intindex	list[str]
categoriesstrtitlec                <    [        U5      U l        X l        X0l        g N)r   r   r   r   )selfr   r   r   s       ^/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/text/datasets/movielens.py__init__MovieInfo.__init__.   s    Z
$
    c                    U R                   /U R                   Vs/ s H  o1U   PM	     snU R                  R                  5        Vs/ s H  oBUR	                  5          PM     sn/$ s  snf s  snf )z
Get information from a movie.
)r   r   r   splitlower)r   categories_dictmovie_title_dictcws        r    valueMovieInfo.value3   s`    
 ZZL)-9AQ926**2B2B2DE2DQaggi(2DE
 	
9Es   A'A,c                V    SU R                    SU R                   SU R                   S3$ )Nz<MovieInfo id(z	), title(z), categories()>)r   r   r   r   s    r    __str__MovieInfo.__str__=   s+    

|9TZZLtN__abbr#   c                "    U R                  5       $ r   )r0   r/   s    r    __repr__MovieInfo.__repr__@   s    ||~r#   )r   r   r   N)r   r   r   r   r   r   returnNoner5   r   __name__
__module____qualname____firstlineno____doc____annotations__r!   r+   r0   r3   __static_attributes__ r#   r    r   r   %   s.     JJ

cr#   r   c                  f    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   SS	 jrS
 rSS jrSS jr	Sr
g)UserInfoD   zC
User id, gender, age, and job information are stored in UserInfo.
r   r   boolis_maleagejob_idc                    [        U5      U l        US:H  U l        [        R                  [        U5      5      U l        [        U5      U l        g )NM)r   r   rE   	age_tablerF   rG   )r   r   genderrF   rG   s        r    r!   UserInfo.__init__N   s7    Z
}??3s8,&kr#   c                x    U R                   /U R                  (       a  SOS/U R                  /U R                  //$ )z
Get information from a user.
r   r   )r   rE   rF   rG   r/   s    r    r+   UserInfo.valueT   s7    
 ZZL,,QA&XXJ[[M	
 	
r#   c           	         U R                   (       a  SOSnSU R                   SU S[        U R                      SU R                   S3	$ )NrI   Fz<UserInfo id(z
), gender(z), age(z), job(r.   )rE   r   rJ   rF   rG   )r   rK   s     r    r0   UserInfo.__str___   sH    #tzzl*VHGIdhhDWCXX_`d`k`k_llnoor#   c                    [        U 5      $ r   )r   r/   s    r    r3   UserInfo.__repr__c   s    4yr#   )rF   r   rE   rG   N)
r   r   rK   r   rF   r   rG   r   r5   r6   r7   r8   r@   r#   r    rB   rB   D   s2     JM	HK"	
pr#   rB   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   S\S'        S           SS jjrSS jrSS jrSS jr	SS jr
Srg) 	Movielensg   a  
Implementation of `Movielens 1-M <https://grouplens.org/datasets/movielens/1m/>`_ dataset.

Args:
    data_file(str|None): path to data tar file, can be set None if
        :attr:`download` is True. Default None.
    mode(str): 'train' or 'test' mode. Default 'train'.
    test_ratio(float): split ratio for test sample. Default 0.1.
    rand_seed(int): random seed. Default 0.
    download(bool): whether to download dataset automatically if
        :attr:`data_file` is not set. Default True.

Returns:
    Dataset: instance of Movielens 1-M dataset.

Examples:

    .. code-block:: pycon

        >>> # doctest: +TIMEOUT(75)
        >>> import paddle
        >>> from paddle.text.datasets import Movielens

        >>> class SimpleNet(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...
        ...     def forward(self, category, title, rating):
        ...         return paddle.sum(category), paddle.sum(title), paddle.sum(rating)


        >>> movielens = Movielens(mode='train')

        >>> for i in range(10):
        ...     category, title, rating = movielens[i][-3:]
        ...     category = paddle.to_tensor(category)
        ...     title = paddle.to_tensor(title)
        ...     rating = paddle.to_tensor(rating)
        ...
        ...     model = SimpleNet()
        ...     category, title, rating = model(category, title, rating)
        ...     print(category.shape, title.shape, rating.shape)
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
        paddle.Size([]) paddle.Size([]) paddle.Size([])
_MovieLensDataSetModemode
str | None	data_filefloat
test_ratior   	rand_seedzdict[int, MovieInfo]
movie_infozdict[str, int]r(   r'   zdict[int, UserInfo]	user_infozlist[list[float]]dataNc                z   UR                  5       S;   d
   SU 35       eUR                  5       U l        Xl        U R                  c*  U(       d   S5       e[        U[        [
        SU5      U l        X0l        X@l        [        R                  R                  U5        U R                  5         U R                  5         g )Nr	   z(mode should be 'train', 'test', but got z>data_file is not set and downloading automatically is disabled	sentiment)r&   rX   rZ   r   URLMD5r\   r]   nprandomseed_load_meta_info
_load_data)r   rZ   rX   r\   r]   downloads         r    r!   Movielens.__init__   s     zz|  
 
 	= 6dV<	= 
 JJL	">>! P8 83[(DN %"
		y!r#   c                :   [         R                  " S5      n0 U l        0 U l        0 U l        0 U l        [        R                  " U R                  5       nUR                  5        GH  n[        U[        R                  5      (       d   e[        5       n[        5       nUR                  S5       n[        U5       H  u  pxUR                  SS9nUR!                  5       R#                  S5      u  pnUR#                  S5      nU H  nUR%                  U5        M     UR'                  U
5      R)                  S5      n
[+        XU
S9U R                  [-        U	5      '   U
R#                  5        H"  nUR%                  UR/                  5       5        M$     M     S S S 5        [        U5       H  u  p}XpR                  U'   M     [        U5       H  u  p|XpR                  U'   M     UR                  S	5       nU HY  nUR                  SS9nUR!                  5       R#                  S5      u  nnnnn[1        UUUUS
9U R
                  [-        U5      '   M[     S S S 5        GM     S S S 5        g ! , (       d  f       N= f! , (       d  f       GM%  = f! , (       d  f       g = f)Nz^(.*)\((\d+)\)$zml-1m/movies.datlatinencoding::|r   )r   r   r   zml-1m/users.dat)r   rK   rF   rG   )recompiler^   r(   r'   r_   zipfileZipFilerZ   infolist
isinstanceZipInfosetopen	enumeratedecodestripr%   addmatchgroupr   r   r&   rB   )r   patternpackageinfotitle_word_setcategories_set
movie_fileilinemovie_idr   r   r)   r*   	user_fileuidrK   rF   job_s                       r    rh   Movielens._load_meta_info   s%   **/0 "!__T^^,((*!$8888!$!$\\"45#,Z#8#{{G{<6:jjl6H6H6N3%/%5%5c%:
!+A*..q1 ", 'e 4 : :1 =9B"*:H6 "'A*..qwwy9 "/ $9 6 &n5DA/0))!, 6 &n5DA./((+ 6 \\"34	 )#{{G{<37::<3E3Ed3K0VS#q3;"%f#c4s3x0 !* 541 + -,
 65( 543 -,s@   AJ.C'I(AJ2A I9J(
I62J9
J		J
Jc           
        / U l         U R                  S:H  n[        R                  " U R                  5       nUR                  S5       nU GH   nUR                  SS9n[        R                  R                  5       U R                  :  U:X  d  MD  UR                  5       R                  S5      u  pVp7[        U5      n[        U5      n[        U5      S-  S-
  nU R                  U   nU R                  U   n	U R                   R!                  U	R#                  5       UR#                  U R$                  U R&                  5      -   U//-   5        GM     S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr   zml-1m/ratings.datrm   rn   rp      g      @)r`   rX   rt   ru   rZ   rz   r|   re   rf   r\   r}   r%   r   r[   r^   r_   appendr+   r'   r(   )
r   is_testr   ratingr   r   mov_idr   movusrs
             r    ri   Movielens._load_data   s0   	))v%OODNN+wLL,-{{G{4II$$&8WD-1ZZ\-?-?-E*Cc(C [F"6]Q.4F//&1C..-CII$$		))D$8$8$:O:OPQ"8*%  . ,+-- ,+s+   E3	AE"CE"E3"
E0	,E33
Fc                    U R                   U   n[        U Vs/ s H  n[        R                  " U5      PM     sn5      $ s  snf r   )r`   tuplere   array)r   idxr`   ds       r    __getitem__Movielens.__getitem__  s4    yy~404abhhqk40110s    A c                ,    [        U R                  5      $ r   )lenr`   r/   s    r    __len__Movielens.__len__  s    499~r#   )	r'   r`   rZ   rX   r^   r(   r]   r\   r_   )Nr
   g?r   T)rZ   rY   rX   rW   r\   r[   r]   r   rj   rD   r5   r6   )r5   r6   )r   r   r5   ztuple[npt.NDArray[Any], ...])r5   r   )r9   r:   r;   r<   r=   r>   r!   rh   ri   r   r   r?   r@   r#   r    rU   rU   g   s    4l  N$$$$##""
 !%&- $ 	
   
<%N.2r#   rU   )
__future__r   rr   rt   typingr   r   r   numpyre   paddle.dataset.commonr   	paddle.ior   numpy.typingnptrW   __all__rJ   rc   rd   r   rB   rU   r@   r#   r    <module>r      sg    # 	  . .  < #O4
'	;( >   Fb br#   