
    j@*                        d Z ddlZddlmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZ dd
lmZ ddlm Z   e! ee                    Z" ee          Z# ej$        dd e#D             e%          Z&ee%dz   ej'        dd(                    e"           d ee"                    f         Z) ed          Z*e*+                    dg d          dddddeddddddfdee%dz   ej,        d          f         dedededee&dz   ej'        d          f         d ed!e)d"ee- ej'        d#d$d%          f         d&ee- ej'        d'd(          f         d)ee- ej'        d*d+d,          f         d-ed.ed/dfd0            Z.e*+                    d1g d2          	 	 dRd4ee% ej,        d5          f         d ed.ed/dfd6            Z/e*+                    d7d8d9g          	 	 	 dSd4ee% ej,        d:          f         d-ed!e)d.ed/df
d;            Z0e*+                    d<g d=          	 	 	 dSd4ee% ej,        d:          f         d>ee%dz   ej'        d?d@          f         dAee%dz   ej'        dB          f         d.ed/df
dC            Z1e*+                    dDdEdFg          	 dTdDee% ej,        dG          f         d.ed/dfdH            Z2e*+                    dIg dJ          	 	 	 dUd4ee% ej,        d:          f         dKee- ej'        dLdM          f         dNee- ej'        dOdP          f         d.ed/df
dQ            Z3dS )Va  Contains commands to interact with datasets on the Hugging Face Hub.

Usage:
    # list datasets on the Hub
    hf datasets ls

    # list datasets with a search query
    hf datasets ls --search "code"

    # get info about a dataset
    hf datasets info HuggingFaceFW/fineweb
    N)	Annotatedget_args)execute_raw_sql_query)CLIErrorRepositoryNotFoundErrorRevisionNotFoundError)DatasetSort_TExpandDatasetProperty_T)DatasetCard   )REPO_LIST_DEFAULT_LIMIT	AuthorOpt	FilterOptLimitOptRevisionOpt	SearchOptTokenOptapi_object_to_dict
get_hf_apimake_expand_properties_parsertyper_factory)list_repo_files_cmd)outDatasetSortEnumc                     i | ]}||S  r   ).0ss     _/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/huggingface_hub/cli/datasets.py
<dictcomp>r    8   s    /L/L/L1/L/L/L    )typezComma-separated properties to return. When used, only the listed properties (and id) are returned. Example: '--expand=downloads,likes,tags'. Valid: z, .)helpcallbackz"Interact with datasets on the Hub.)r$   z	list | ls)zhf datasets lsz*hf datasets ls --sort downloads --limit 10zhf datasets ls --search "code"z*hf datasets ls --filter benchmark:officialz$hf datasets ls HuggingFaceFW/finewebz'hf datasets ls HuggingFaceFW/fineweb -Rz.hf datasets ls HuggingFaceFW/fineweb --tree -h)examplesFrepo_idzVDataset ID (e.g. `username/repo-name`) to list files from. If omitted, lists datasets.searchauthorfiltersortzSort results.limitexpandhuman_readablez--human-readablez-hz=Show sizes in human readable format (only for listing files).as_treez--treez3List files in tree format (only for listing files).	recursivez--recursivez-Rz0List files recursively (only for listing files).revisiontokenreturnc           	         | |t          j        d          |t          j        d          |t          j        d          |t          j        d          |t          k    rt          j        d          |t          j        d          t          | d|||	|
|	          S |rt          j        d
          |	rt          j        d          |rt          j        d          |
t          j        d          t	          |          }|r|j        nd}d |                    ||||||          D             }t          j        |           dS )zList datasets on the Hub, or files in a dataset repo.

    When called with no argument, lists datasets on the Hub.
    When called with a dataset ID, lists files in that dataset repo.
    Nz'Cannot use --search when listing files.z'Cannot use --author when listing files.z'Cannot use --filter when listing files.z%Cannot use --sort when listing files.z&Cannot use --limit when listing files.z'Cannot use --expand when listing files.dataset)r'   	repo_typer.   r/   r0   r1   r2   z(Cannot use --tree when listing datasets.z-Cannot use --recursive when listing datasets.z2Cannot use --human-readable when listing datasets.z,Cannot use --revision when listing datasets.r2   c                 ,    g | ]}t          |          S r   r   )r   dataset_infos     r   
<listcomp>zdatasets_ls.<locals>.<listcomp>   s0     
 
 
 	<((
 
 
r!   )r*   r)   r(   r+   r,   r-   )	typerBadParameterr   r   r   valuelist_datasetsr   table)r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   apisort_keyresultss                  r   datasets_lsrD   G   s   \ $%NOOO$%NOOO$%NOOO$%LMMM+++$%MNNN$%NOOO")
 
 
 	
  M !KLLL R !PQQQ W !UVVV !OPPP
5
!
!
!C!+tzztH
 
-- . 
 

 
 
G Igr!   leaderboard)z4hf datasets leaderboard SWE-bench/SWE-bench_VerifiedzLhf datasets leaderboard SWE-bench/SWE-bench_Verified --limit 5 --format jsonzIhf datasets ls --filter benchmark:official  # list available leaderboards   
dataset_idz?The benchmark dataset ID (e.g. `SWE-bench/SWE-bench_Verified`).c                 *   t          |          }|                    |           }d |d|         D             }t          j        |g ddddd	           t          j        d
           |r%t          j        d|d         j         d           dS dS )zList model scores from a dataset leaderboard. This command helps find the best models for a task or compare models by benchmark scores. Use 'hf datasets ls --filter benchmark:official' to list available leaderboards.r7   )r'   c                 ,    g | ]}t          |          S r   r9   r   entrys     r   r;   z(datasets_leaderboard.<locals>.<listcomp>   s!    JJJU!%((JJJr!   N)rankmodel_idr>   sourcerM   right)rL   r>   )headersid_key
alignmentszPUse 'hf datasets ls --filter benchmark:official' to list available leaderboards.zUse 'hf models info r   z' to get details about a model.)r   get_dataset_leaderboardr   r@   hintrM   )rG   r,   r2   rA   rE   rC   s         r   datasets_leaderboardrU      s     5
!
!
!C--j-AAKJJk&5&6IJJJGI777#g66	    H_``` b`A(?```aaaaab br!   infoz&hf datasets info HuggingFaceFW/finewebz9hf datasets info my-dataset --expand downloads,likes,tagsz+The dataset ID (e.g. `username/repo-name`).c                    t          |          }	 |                    | ||          }nJ# t          $ r}t          d|  d          |d}~wt          $ r}t          d| d|  d          |d}~ww xY wt          j        |           dS )	z$Get info about a dataset on the Hub.r7   )r'   r1   r-   z	Dataset 'z' not found.Nz
Revision 'z' not found on 'z'.)r   r:   r   r   r   r   dict)rG   r1   r-   r2   rA   rV   es          r   datasets_inforZ      s     5
!
!
!CU
XfUU" D D D;:;;;<<!C  U U ULHLLjLLLMMSTTUHTNNNNNs    + 
A2A		A2A--A2parquet)z(hf datasets parquet cfahlgren1/hub-statsz8hf datasets parquet cfahlgren1/hub-stats --subset modelsz6hf datasets parquet cfahlgren1/hub-stats --split trainz6hf datasets parquet cfahlgren1/hub-stats --format jsonsubsetz--subsetz(Filter parquet entries by subset/config.splitz Filter parquet entries by split.c                     t          |          }|                    | |          }fd|D             }d |D             }t          j        |g dd           dS )	z/List parquet file URLs available for a dataset.r7   )r'   configc                 .    g | ]}|j         k    |S N)r]   )r   rK   r]   s     r   r;   z$datasets_parquet.<locals>.<listcomp>   s)    TTT%emu{e?S?S?S?S?Sr!   c                 D    g | ]}|j         |j        |j        |j        d S )r\   r]   urlsize)r_   r]   rd   re   rJ   s     r   r;   z$datasets_parquet.<locals>.<listcomp>   s;       af5<%+eiQVQ[\\  r!   rc   rd   )rP   rQ   N)r   list_dataset_parquet_filesr   r@   )rG   r\   r]   r2   rA   entriesfilteredrC   s     `     r   datasets_parquetri      s      5
!
!
!C,,Z,OOGTTTT7TTTH jr  G IgAAA%PPPPPPr!   sqlzhf datasets sql "SELECT COUNT(*) AS rows FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet')"zhf datasets sql "SELECT * FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet') LIMIT 5" --format jsonzRaw SQL query to execute.c                     	 t          | |          }n/# t          $ r"}t          t          |                    |d}~ww xY wt	          j        |           dS )zAExecute a raw SQL query with DuckDB against dataset parquet URLs.)	sql_queryr2   N)r   ImportErrorr   strr   r@   )rj   r2   resultrY   s       r   datasets_sqlrp      sf    &&EBBB & & &s1vvA%&Ifs    
A ;A card)z&hf datasets card HuggingFaceFW/finewebz1hf datasets card HuggingFaceFW/fineweb --metadataz?hf datasets card HuggingFaceFW/fineweb --metadata --format jsonz-hf datasets card HuggingFaceFW/fineweb --textmetadataz
--metadataz'Output only the metadata from the card.textz--textz(Output only the text body (no metadata).c                 R   |r|rt          d          t          j        | |          }|r-t          j        |j                                                   dS |rt          j        |j                   dS t          j        |j                   t          j	        d|  d           dS )z7Get the dataset card (README) for a dataset on the Hub.z---metadata and --text are mutually exclusive.r7   zUse `hf datasets card z/ --metadata` to extract only the card metadata.N)
r   r   loadr   rX   datato_dictrs   contentrT   )rG   rr   rs   r2   rq   s        r   datasets_cardry     s       HD HFGGGJe444D g""$$%%%%%	 ge*eeefffffr!   )rF   N)NNNra   )FFN)4__doc__enumtypingr   r   r<   huggingface_hub._dataset_viewerr   huggingface_hub.errorsr   r   r   huggingface_hub.hf_apir	   r
   huggingface_hub.repocardr   
_cli_utilsr   r   r   r   r   r   r   r   r   r   r   _file_listingr   _outputr   sorted_EXPAND_PROPERTIES_SORT_OPTIONSEnumrn   r   Optionjoin	ExpandOptdatasets_clicommandArgumentboolrD   rU   rZ   ri   rp   ry   r   r!   r   <module>r      s     & & & & & & & &  A A A A A A [ [ [ [ [ [ [ [ [ [ I I I I I I I I 0 0 0 0 0 0                          / . . . . .       VHH%<==>> ''$)-/L/Lm/L/L/LSVWWW $JEL E  ei  en  en  oA  eB  eB  E  E  E../ABB  	 }"FGGG        	 	- 	 	 	 7O Od
tuuu	wO
 O O O $/***	,O O O '4sttt	vO$ X$YZZZ	\%O, ]D/abbb	d-O4 5O6 7O8 
9O O O Od       b b#~u~3tuuuuvbb b 
	b b b b( 
0C    !	 #~u~3`aaaab  	
 
   "       ptZ^	Q Q#~u~3`aaaabQcDj,%,z@j"k"k"kklQ S4Z3U!V!V!VVWQ 	Q
 
Q Q Q Q  	 	c 	j    	 		3,GHHHH	I		 
	 	 	 	 
      mrej	g g#~u~3`aaaabglel<>ghhhhig D,%,x6`aaaa
bg 	g
 
g g g g g gr!   