
    j<                     D   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ ddlmZmZmZ  ej        e          Zh d	Zh d
Ze G d d                      Zdej        ez  deeef         fdZdeej        z  dee eeez  e!z  f                  ddfdZ"deej        z  deej        z  ddfdZ#dej$        dedeej        z  e!z  ddfdZ%deez  e!z  de!fdZ&dedefdZ'dedee         ddfdZ(dej$        dej)        de*fd Z+dS )!    N)	GeneratorIterable)contextmanager)	dataclassfield)Path)Any   )DDUFCorruptedFileErrorDDUFExportErrorDDUFInvalidEntryNameError>   .txt.json.model.safetensors>   config.jsonscheduler_config.jsontokenizer_config.jsonpreprocessor_config.jsonc                       e Zd ZU dZeed<   eed<   eed<    ed          Ze	ed<   e
deed	d	f         fd
            ZddedefdZd	S )	DDUFEntrya  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    filenamelengthoffsetF)repr	dduf_pathreturnNc              #   B  K   | j                             d          5 }t          j        |                                dt          j                  5 }|| j        | j        | j        z            V  ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        rbr   )r   accessN)r   openmmapfilenoACCESS_READr   r   )selffmms      f/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/huggingface_hub/serialization/_dduf.pyas_mmapzDDUFEntry.as_mmap:   s7      ^  && 	B!188::a8HIII BRt{T['@@AAAAB B B B B B B B B B B B B B B	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	Bs5   4BA<0B<B 	 BB 	BBButf-8encodingc                     | j                             d          5 }|                    | j                   |                    | j                                      |          cddd           S # 1 swxY w Y   dS )zRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r   )r+   N)r   r!   seekr   readr   decode)r%   r+   r&   s      r(   	read_textzDDUFEntry.read_textK   s     ^  && 	A!FF4;66$+&&--x-@@	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	As   AA00A47A4)r*   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r   bytesr)   r0        r(   r   r   #   s           MMMKKKKKKe'''It'''B5$#45 B B B ^B A A# AC A A A A A Ar:   r   r   r   c                    i }t          |           } t                              d|             t          j        t          |           d          5 }|                                D ]}t                              d|j                    |j	        t          j
        k    rt          d          	 t          |j                   n*# t          $ r}t          d|j                   |d}~ww xY wt          ||          }t          |j        ||j        |           ||j        <   	 ddd           n# 1 swxY w Y   d|vrt          d	          t#          j        |d                                                   }t)          ||                                           t                              d
|  dt-          |           d           |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   model_index.json7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   loggerinfozipfileZipFiler5   infolistdebugr   compress_type
ZIP_STOREDr   _validate_dduf_entry_namer   _get_data_offsetr   	file_sizejsonloadsr0   _validate_dduf_structurekeyslen)r   entrieszfr@   er   indexs          r(   read_dduf_filerS   [   s   N GYI
KK0Y00111	Y	-	- KKMM 	 	DLL9$-99:::!W%777,-XYYYi)$-8888, i i i,-`QUQ^-`-`aaghhi &b$//F%.vdnXa& & &GDM""	              $ (($%^___Jw12<<>>??EUGLLNN333
KKS)SSS\\SSSTTTNs7   AD1/CD1
C+C&&C++9D11D58D5rO   c                    t                               d|  d           t                      }d}t          j        t          |           dt          j                  5 }|D ]\  }}||v rt          d|           |                    |           |dk    r\	 t          j
        t          |                                                    }n'# t          j        $ r}t          d          |d}~ww xY w	 t          |          }n%# t          $ r}t          d|           |d}~ww xY wt                               d	| d
           t#          |||           	 ddd           n# 1 swxY w Y   |t          d          	 t%          ||           n"# t&          $ r}t          d          |d}~ww xY wt                               d|             dS )a  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[tuple[str, bytes], None, None]:
        ...     # Build a generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file ''NwzCan't add duplicate entry: r=   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer>   zInvalid DDUF file structure.zDone writing DDUF file )r?   r@   setrA   rB   r5   rF   r   addrJ   rK   _load_contentr/   JSONDecodeErrorrG   r   rD   _dump_content_in_archiverL   r   )r   rO   	filenamesrR   archiver   contentrQ   s           r(   export_entries_as_ddufr_      sy   p KK4	444555IE	Yg.@	A	A AW!( 	A 	AHg9$$%&NH&N&NOOOMM(###---X J}W'='='D'D'F'FGGEE+ X X X)*OPPVWWXP4X>>, P P P%&GX&G&GHHaOPLLB(BBBCCC$Wh@@@@!	AA A A A A A A A A A A A A A A( }WXXXE 	2222! E E E<==1DE KK5)5566666sr   9E$3C
	E$
C.C))C..E$2DE$
D$DD$$3E$$E(+E(F 
F1F,,F1folder_pathc                     t                    dt          t          t          t           f                  ffd}t	          |  |                       dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r   c               3     K   t                                        d          D ]} |                                 s| j        t          vrt
                              d|  d           D|                               }t          |j	                  dk    rt
                              d|  d           |
                                | fV  d S )Nz**/*zSkipping file 'z' (file type not allowed)   z"' (nested directories not allowed))r   globis_filesuffixDDUF_ALLOWED_ENTRIESr?   rD   relative_torN   partsas_posix)pathpath_in_archiver`   s     r(   _iterate_over_folderz3export_folder_as_dduf.<locals>._iterate_over_folder  s      %%**622 
	3 
	3D<<>> {"666NtNNNOOO"..{;;O?())Q..WtWWWXXX!**,,d22222
	3 
	3r:   N)r   r   tupler5   r_   )r   r`   rm   s    ` r(   export_folder_as_ddufro      sg    $ {##K3(5d+;"< 3 3 3 3 3 3 9&:&:&<&<=====r:   r]   r   r^   c                    |                      |dd          5 }t          |t          t          f          rSt          |          }|                     d          5 }t	          j        ||d           d d d            n# 1 swxY w Y   n>t          |t                    r|                    |           nt          d| d          d d d            d S # 1 swxY w Y   d S )NrV   T)force_zip64r   i   zInvalid content type for z. Must be str, Path or bytes.)	r!   
isinstancer5   r   shutilcopyfileobjr8   writer   )r]   r   r^   
archive_fhcontent_path
content_fhs         r(   r[   r[     s   	h	6	6 g*gT{++ 	g==L""4(( LJ":z?KKKL L L L L L L L L L L L L L L'' 	gW%%%%!"eh"e"e"efffg g g g g g g g g g g g g g g g g gs7   ACA=1C=B	CB	ACCCc                     t          | t          t          f          r!t          |                                           S t          | t                    r| S t          dt          |            d          )zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got .)rr   r5   r   
read_bytesr8   r   type)r^   s    r(   rY   rY   )  so    
 'C;'' iG}}'')))	GU	#	# igW[\cWdWdggghhhr:   
entry_namec                 $   d|                      d          d         z   t          vrt          d|            d| v rt          d|  d          |                     d          } |                     d          dk    rt          d|  d          | S )	Nrz   zFile type not allowed: \z0Entry names must use UNIX separators ('/'). Got /   z-DDUF only supports 1 level of directory. Got )splitrg   r   stripcount)r}   s    r(   rG   rG   6  s    
Zc""2&&.BBB'(N*(N(NOOOz'(h[e(h(h(hiii!!#&&Jq  '(eXb(e(e(efffr:   rR   entry_namesc                 <   t          | t                    s t          dt          |            d          d D             }|D ]U| vrt          d d          t	          fdt
          D                       st          d dt
           d          Vd	S )
a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got rz   c                 L    h | ]!}d |v |                     d           d         "S )r   r   )r   ).0entrys     r(   	<setcomp>z+_validate_dduf_structure.<locals>.<setcomp>V  s.    QQQEC5LLEKK$$Q'LLLr:   zMissing required entry 'z' in 'model_index.json'.c              3   *   K   | ]} d | v V  dS )r   Nr9   )r   required_entryr   folders     r(   	<genexpr>z+_validate_dduf_structure.<locals>.<genexpr>Z  s6      rr>f//~//;>rrrrrrr:   z!Missing required file in folder 'z!'. Must contains at least one of N)rr   dictr   r|   anyDDUF_FOLDER_REQUIRED_ENTRIES)rR   r   dduf_foldersr   s    ` @r(   rL   rL   A  s    $ eT"" v$%tfjkpfqfq%t%t%tuuuQQ[QQQL  ()dF)d)d)deeerrrrrUqrrrrr 	(|F||]y|||  	 r:   rP   r@   c                    | j         t          d          |j        }| j                             |           | j                             d          }t          |          dk     rt          d          t                              |dd         d          }t                              |dd         d          }|dz   |z   |z   }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.   zIncomplete local file header.      little)fpr   header_offsetr-   r.   rN   r7   
from_bytes)rP   r@   r   local_file_headerfilename_lenextra_field_lendata_offsets          r(   rH   rH   `  s     
u}$%RSSS &M EJJ}

2
""$%DEEE >>"3BrE":HEELnn%6r"u%=xHHO  "$|3oEKr:   ),rJ   loggingr"   osrs   rA   collections.abcr   r   
contextlibr   dataclassesr   r   pathlibr   typingr	   errorsr   r   r   	getLoggerr1   r?   rg   r   r   PathLiker5   r   rS   rn   r8   r_   ro   rB   r[   rY   rG   rL   ZipInfor7   rH   r9   r:   r(   <module>r      s      				   / / / / / / / / % % % % % % ( ( ( ( ( ( ( (             W W W W W W W W W W 
	8	$	$          4A 4A 4A 4A 4A 4A 4A 4AnBbkC/ BDi4H B B B BJV7cBK&7 V7(5QTVY\`V`chVhQhKiBj V7os V7 V7 V7 V7r!>S2;%6 !>S2;EV !>[_ !> !> !> !>H	ggo 	g 	gsUWU`O`chOh 	gmq 	g 	g 	g 	g
i3:- 
i% 
i 
i 
i 
i# #    C hsm     >" " "C " " " " " "r:   