
    Ƒi3                     6   S SK r S SKJrJr  S SKJrJr  S SKJrJ	r	J
r
JrJr  S SKJr  S SKJrJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJrJrJrJ r   S SK!J"r"  S SK#J$r$  \"" 5       r% " S S\5      r& " S S\&5      r' " S S\&5      r( " S S\&5      r)g)    N)ABCabstractmethod)OptionalUnion)DatasetDatasetBuilderDatasetDictIterableDatasetIterableDatasetDict)load_dataset)HubApiModelScopeConfig)OssAuthConfig)DatasetContextConfig)DataFilesManager)ExternalDataset)DataMetaManager)DatasetFormationsDatasetPathNameDownloadModeVirgoDatasetConfig)
get_logger)	valid_urlc                   v    \ rS rSrSrS\4S jr\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       rS
rg)BaseDownloader   z%Base dataset downloader to load data.dataset_context_configc                     Xl         g Nr   )selfr   s     m/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/modelscope/msdatasets/data_loader/data_loader.py__init__BaseDownloader.__init__   s    &<#    c                 <    [        S[        R                   S35      e)z6The entity processing pipeline for fetching the data. 'No default implementation provided for z	.process.NotImplementedErrorr   __name__r!   s    r"   processBaseDownloader.process!   s$     "5n6M6M5NiX
 	
r%   c                 <    [        S[        R                   S35      e)Nr'   z._authorize.r(   r+   s    r"   
_authorizeBaseDownloader._authorize(   s"    !5n6M6M5Nl[
 	
r%   c                 <    [        S[        R                   S35      e)Nr'   z._build.r(   r+   s    r"   _buildBaseDownloader._build.   s"    !5n6M6M5NhW
 	
r%   c                 <    [        S[        R                   S35      e)Nr'   z._prepare_and_download.r(   r+   s    r"   _prepare_and_download$BaseDownloader._prepare_and_download4   s#    !5n6M6M5NNef
 	
r%   c                 <    [        S[        R                   S35      e)Nr'   z._post_process.r(   r+   s    r"   _post_processBaseDownloader._post_process:   s"    !5n6M6M5No^
 	
r%   r    N)r*   
__module____qualname____firstlineno____doc__r   r#   r   r,   r/   r2   r5   r8   __static_attributes__ r%   r"   r   r      sv    /=/C = 
 
 
 

 
 

 
 

 
 
r%   r   c                   b   ^  \ rS rSrS\4U 4S jjrS
S jrS
S jrS
S jrS
S jr	S
S jr
S	rU =r$ )OssDownloaderA   r   c                 \   > [         TU ]  U5        S U l        S U l        S U l        S U l        g r   )superr#   data_files_builderdatasetbuilderdata_files_managerr!   r   	__class__s     r"   r#   OssDownloader.__init__C   s6    /0>B :> 	 26>Br%   c                     U R                  5         U R                  5         U R                  5         U R                  5         g)zSequential data fetching process: authorize -> build -> prepare_and_download -> post_process,
to keep dataset_context_config updated. Nr/   r2   r5   r8   r+   s    r"   r,   OssDownloader.processM   /     	""$r%   c                 j   [        5       R                  U R                  R                  S9n[        R
                  " 5       n[        R                  " 5       nU R                  R                  (       d  [        XUS9nO(U R                  R                  nXl	        X$l
        X4l        X@R                  l        g)zjAuthorization of target dataset.
Get credentials from cache and send to the modelscope-hub in the future. access_tokencookies	git_token	user_infoN)r   get_cookiesr   tokenr   	get_tokenget_user_infoauth_configr   rT   rU   rV   )r!   rT   rU   rV   r[   s        r"   r/   OssDownloader._authorizeV   s     (&&44:: ' <$..0	$224	**66'	KK 55AAK")$-!$-!2=##/r%   c                     [        U R                  5      nUR                  5         UR                  5         UR                  U l        [	        U R                  S9U l        U R
                  R                  5       U l        g)ziSequential data files building process: build_meta -> build_data_files , to keep context_config updated. r    N)r   r   fetch_meta_filesparse_dataset_structurer   rH   get_data_files_builderrG   )r!   meta_managers     r"   r2   OssDownloader._buildi   si     't'B'BC%%',,.&2&I&I# #3#'#>#>#@..EEGr%   c                    U R                   R                  R                  nU R                   R                  R                  nU R                   R                  nU R                   R
                  nU R                   R                  nU R                   R                  nU R                   R                  nU R                   R                  nU R                   R                  n	U R                   R                  n
U R                   R                  nU R                   R                  nU R                  c  U(       d  SU S3eU(       aY  U[        R                   :X  aE  U(       a  ["        R%                  SU S35        ['        U4UUUUUU	U
R(                  US.UD6U l        gU R,                  R/                  U R                  5      U l        g)z.Fetch data-files from modelscope dataset-hub. Nzmeta-file: z$.py not found on the modelscope hub.z3Use trust_remote_code=True. Will invoke codes from z9. Please make sure that you can trust the external codes.)namerevisionsplitdata_dir
data_files	cache_dirdownload_modetrust_remote_code)r   data_meta_configdataset_py_scriptdataset_formationdataset_namesubset_nameversionrf   rg   rh   cache_root_dirrj   config_kwargsrk   rG   r   hf_compatibleloggerwarninghf_load_datasetvaluerF   rH   fetch_data_files)r!   rm   rn   ro   rp   rq   rf   rg   rh   ri   rj   input_kwargsrk   s                r"   r5   #OssDownloader._prepare_and_downloadv   s    77HHZZ 77HHZZ22??11==--55++11..7700;;
//>>	33AA22@@ 77II<<(9~-QRR!26G6U6U!U I, XB BC +!
   !%#+11"3
  
 DL  22CCDLr%   c                     [        U R                  [        5      (       a0  U R                  R                  R
                  U R                  l        g g r   )
isinstancerF   r   r   rl   meta_type_map
custom_mapr+   s    r"   r8   OssDownloader._post_process   s6    dllO44&*&A&A&R&R&`&`DLL# 5r%   )rG   rE   rH   rF   r   )returnN)r*   r:   r;   r<   r   r#   r,   r/   r2   r5   r8   r>   __classcell__rJ   s   @r"   rA   rA   A   s4    C/C C>&H%Na ar%   rA   c                   R   ^  \ rS rSrSrS\4U 4S jjrS rS rS r	S r
S	 rS
rU =r$ )VirgoDownloader   z&Data downloader for Virgo data source.r   c                 2   > [         TU ]  U5        S U l        g r   rD   r#   rF   rI   s     r"   r#   VirgoDownloader.__init__       /0r%   c                     U R                  5         U R                  5         U R                  5         U R                  5         g)zl
Sequential data fetching virgo dataset process: authorize -> build -> prepare_and_download -> post_process
NrM   r+   s    r"   r,   VirgoDownloader.process   rO   r%   c                 J   SSK Jn  [        5       R                  U R                  R
                  S9n[        R                  " 5       nU R                  R                  (       d	  U" USUS9nO)U R                  R                  nX$l	        SUl
        X4l        X@R                  l        g)zAuthorization of virgo dataset.r   )VirgoAuthConfigrQ    rS   N)&modelscope.msdatasets.auth.auth_configr   r   rW   r   rX   r   rZ   r[   rT   rU   rV   )r!   r   rT   rV   r[   s        r"   r/   VirgoDownloader._authorize   s    J(&&44:: ' <$224	**66)2DK 55AAK")$&K!$-!2=##/r%   c                    SSK Jn  SSKn[        U R                  5      nUR                  5         UR                  U l        U" S
0 U R                  R                  D6U l        [        R                  R                  U R                  R                  U R                  R                  U R                  R                  U R                  R                  5      n[        R                  " [        R                  R                  U[         R"                  5      SS9  [        R                  R                  U[         R"                  S5      n[%        U R                  R&                  UR(                  5      (       a_  U R                  R&                  nUR+                  USS9  XPR                  l        X@R                  l        [0        R3                  S	U 35        gg)z+
Fetch virgo meta and build virgo dataset.
r   )VirgoDatasetNTexist_okzmeta_content.csvF)indexzVirgo meta content saved to r?   ))modelscope.msdatasets.dataset_cls.datasetr   pandasr   r   fetch_virgo_metars   rF   ospathjoinrr   	namespacero   rq   makedirsr   	META_NAMEr}   meta	DataFrameto_csvmeta_content_cache_filevirgo_cache_dirru   info)r!   r   pdra   r   r   meta_content_dfs          r"   r2   VirgoDownloader._build   sd    	K&t'B'BC%%'&2&I&I## 9))779 '',,''66''11''44''//	1
 	GGLL/*C*CD	 #%'',,/>/H/H/A#C dll''66"ll//O""#:%"H3JLL0+:LL(KK./F.GHJ 7r%   c                   ^^^^^	^
 U R                   R                  R                  SS5      nU R                  R                  S:X  Ga  U(       Ga  SSKm	SSKmSSKnSSKJ	m
  SSK
Jm  UU	U
4S jmSU R                  l        U R                   R                  n[        R                  R!                  U R                  R"                  [$        R&                  5      mU[(        R*                  :X  a  UR-                  TSS	9  SS
KJn  UR3                  SS9  U R                  R4                  R7                  UUU4S jSS9U R                  R4                  [8        R:                  '   ggg)z;
Fetch data-files from oss-urls in the virgo meta content.
download_virgo_filesr   r   N)urlparse)partialc                   > / n/ n TR                  U 5      n U R                  SS5      nU(       a  UR                  U5        OGU R                  SS5      nU H/  nUR                  SS5      nU(       d  M  UR                  U5        M1     U H  n[        U5      nU(       a2  T" U5      n	[        R
                  R                  U	R
                  5      n
O[        SU 35      e[        R
                  R                  X5      nUR                  X{45        M     U H  u  pU(       d  M  [        R
                  R                  U5      (       a  M4  [        R                  SU 35        [        R                  " USS9  [        US	5       nUR!                  TR                  U5      R"                  5        S S S 5        M     U$ ! [         a$  n[        R                  SU 35        / n S nANS nAff = f! , (       d  f       M  = f)
Nurlr   	inner_urlzUnsupported url: zparse virgo meta info error: zDownloading file to Tr   wb)loadsgetappendr   r   r   basename
ValueErrorr   	Exceptionru   errorexistsr   r   openwritecontent)meta_info_valrg   file_url_listfile_path_listfile_urltmp_inner_member_listitemone_file_urlis_urlurl_parse_res	file_name	file_pathefile_url_itemfile_path_itemfjsonrequestsr   s                   r"   download_file<VirgoDownloader._prepare_and_download.<locals>.download_file   s    "!#($(JJ}$=M,00;H%,,X60=0A0A'1--$9D'+xxr':H'x - 4 4X > %:
 )6!*<!8!,4X,>M(*(8(89K9K(LI",/@
-K"LL$&GGLL$E	&--|.GH )6 6D1M%~bggnn^.L.L&:>:J$KLHt<!.$71GGHLL$?$G$GH 87	 6D &% ! (LL#@!DE%'N( 87s+   A*F> 5B!F> +G/>
G,G''G,/
G>	T)ignore_errors)tqdmzapply download_file)descc                 2   > T" TTS9" U R                   5      $ )N)rg   )	meta_info)rowdata_files_dirr   r   s    r"   <lambda>7VirgoDownloader._prepare_and_download.<locals>.<lambda>*  s    %!@@C!Or%      )axis)r   rs   poprF   	data_typer   r   shutilurllib.parser   	functoolsr   r   rj   r   r   r   r   r   DATA_FILES_NAMEr   FORCE_REDOWNLOADrmtree	tqdm.autor   r   r   progress_applyr   col_cache_file)r!   r   r   rj   r   r   r   r   r   r   r   s        @@@@@@r"   r5   %VirgoDownloader._prepare_and_download   s   
  $::HHLL"B ( <<!!Q&+?-)&&P 15DLL- 77EEMWW\\$,,*F*F*9*I*IKN  = ==nDA&KK2K3 #',,"3"3"B"BO #C # LL" s ,@&r%   c                     g r   r?   r+   s    r"   r8   VirgoDownloader._post_process.      r%   )rF   r   r*   r:   r;   r<   r=   r   r#   r,   r/   r2   r5   r8   r>   r   r   s   @r"   r   r      s5    0/C >&JBFP r%   r   c                   R   ^  \ rS rSrSrS\4U 4S jjrS rS rS r	S r
S	 rS
rU =r$ )MaxComputeDownloaderi2  z+Data downloader for MaxCompute data source.r   c                 2   > [         TU ]  U5        S U l        g r   r   rI   s     r"   r#   MaxComputeDownloader.__init__6  r   r%   c                     g r   r?   r+   s    r"   r,   MaxComputeDownloader.process:  r   r%   c                     g r   r?   r+   s    r"   r/   MaxComputeDownloader._authorize=  r   r%   c                     g r   r?   r+   s    r"   r2   MaxComputeDownloader._build@  r   r%   c                     g r   r?   r+   s    r"   r5   *MaxComputeDownloader._prepare_and_downloadC  r   r%   c                     g r   r?   r+   s    r"   r8   "MaxComputeDownloader._post_processF  r   r%   )rF   r   r   s   @r"   r   r   2  s1    5/C  r%   r   )*r   abcr   r   typingr   r   datasetsr   r   r	   r
   r   r   rw   modelscope.hub.apir   r   r   r   4modelscope.msdatasets.context.dataset_context_configr   3modelscope.msdatasets.data_files.data_files_managerr   !modelscope.msdatasets.dataset_clsr   ,modelscope.msdatasets.meta.data_meta_managerr   modelscope.utils.constantr   r   r   r   modelscope.utils.loggerr   modelscope.utils.url_utilsr   ru   r   rA   r   r   r?   r%   r"   <module>r     s    
 # "+ + 4 7 @ = HI I . 0	#
S #
L^aN ^aBMn M`> r%   