
    |-js                     6   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	Z
d dlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZmZmZmZmZ ddlmZmZ ddlmZm Z m!Z!m"Z"  e!            rddlm#Z#  e             rd dl$m%Z&  e"j'        e(          Z)ddgZ* G d de          Z+ddhdhddhdhddhfZ,d Z-	 	 	 	 d&de.ee.         z  dz  de.dz  de/de/de0e1e.f         f
d Z2	 	 	 	 	 d'de.ee.         z  e0e1e.f         z  ez  dz  de.dz  de/de/de0f
d!Z3d"e4d#e5de4fd$Z6d% Z7dS )(    N)Iterable)deepcopy)partial)Any)validate_typed_dict   )BatchFeatureImageProcessingMixin)center_crop	normalizerescale)ChannelDimension
ImageInputSizeDictget_image_sizemake_flat_list_of_imagesvalidate_preprocess_arguments)ImagesKwargsUnpack)auto_docstringis_torchvision_availableis_vision_availablelogging)PILImageResampling)
functionalprocessor_classimage_processor_typec                        e Zd ZdZeZdZdZdgZde	e         f fdZ
d Zdede	e         d	efd
Zd Zd Z	 d.deded	efdZdddedede	e         d	ee         fdZdede	e         d	efdZ	 	 	 	 	 	 d/deee         z  eeef         z  ez  dz  deee         z  eeef         z  ez  dz  deee         z  eeef         z  ez  dz  dedz  deee         z  dz  deee         z  dz  d	efdZeZ	 	 	 	 	 	 	 	 	 	 d0dedz  dedz  dedz  deee         z  dz  deee         z  dz  dedz  dedz  dedz  dedz  d d!fd"Z e!dede	e         d	efd#            Z"d	eeef         f fd$Z#	 	 d1d%e$j%        d&ed'ee&z  dz  d(ee&z  dz  d	e$j%        f
d)Z'	 	 d1d%e$j%        d*eee         z  d+eee         z  d'ee&z  dz  d(ee&z  dz  d	e$j%        fd,Z(	 	 d1d%e$j%        deeef         d'ee&z  dz  d(ee&z  dz  d	e$j%        f
d-Z) xZ*S )2BaseImageProcessorue  
    Base class for image processors with an inheritance-based backend architecture.

    This class defines the preprocessing pipeline: kwargs validation, input preparation, and dispatching to the
    backend's `_preprocess` method. Backend subclasses (`TorchvisionBackend`, `PilBackend`) inherit from this class
    and implement the actual image operations (resize, crop, rescale, normalize, etc.). Model-specific image
    processors then inherit from the appropriate backend class.

    Architecture Overview
    ---------------------

    The class hierarchy is:

        BaseImageProcessor (this class)
        ├── TorchvisionBackend    (GPU-accelerated, torch.Tensor)
        │   └── ModelImageProcessor (e.g. LlavaNextImageProcessor)
        └── PilBackend            (portable CPU, np.ndarray)
            └── ModelImageProcessorPil (e.g. CLIPImageProcessorPil)

    The preprocessing flow is:

        __call__() → preprocess() → _preprocess_image_like_inputs() → _prepare_image_like_inputs()
                                                                       (calls process_image per image)
                                                                     → _preprocess()
                                                                       (batch operations: resize, crop, etc.)

    - `process_image`: Implemented by backends. Converts a single raw input (PIL, NumPy, or Tensor) to the
      backend's working format (torch.Tensor or np.ndarray), handles RGB conversion and channel reordering.
    - `_preprocess`: Implemented by backends. Performs the actual batch processing (resize, center crop, rescale,
      normalize, pad) and returns a `BatchFeature`.

    Basic Implementation
    --------------------

    For processors that only need standard operations (resize, center crop, rescale, normalize), inherit from
    a backend and define class attributes:

        from transformers.image_processing_backends import PilBackend

        class MyImageProcessorPil(PilBackend):
            resample = PILImageResampling.BILINEAR
            image_mean = IMAGENET_DEFAULT_MEAN
            image_std = IMAGENET_DEFAULT_STD
            size = {"height": 224, "width": 224}
            do_resize = True
            do_rescale = True
            do_normalize = True

    The backend's `_preprocess` method handles the standard pipeline automatically.

    Custom Processing
    -----------------

    For processors that need custom logic (e.g., patch-based processing, multiple input types), override
    `_preprocess` in your model-specific processor. The `_preprocess` method receives already-prepared images
    (converted to the backend format with channels-first ordering) and performs the actual processing:

        class MyImageProcessor(TorchvisionBackend):
            def _preprocess(self, images, do_resize, size, do_normalize, image_mean, image_std, **kwargs):
                # Group images by shape for efficient batched operations
                grouped_images, grouped_images_index = group_images_by_shape(images)
                processed_groups = {}
                for shape, stacked_images in grouped_images.items():
                    if do_resize:
                        stacked_images = self.resize(stacked_images, size=size)
                    if do_normalize:
                        stacked_images = self.normalize(stacked_images, mean=image_mean, std=image_std)
                    processed_groups[shape] = stacked_images
                processed_images = reorder_images(processed_groups, grouped_images_index)
                return BatchFeature(data={"pixel_values": processed_images})

    For processors handling multiple input types (e.g., images + segmentation maps), override
    `_preprocess_image_like_inputs`:

        def _preprocess_image_like_inputs(
            self,
            images: ImageInput,
            segmentation_maps: ImageInput | None = None,
            **kwargs,
        ) -> BatchFeature:
            images = self._prepare_image_like_inputs(images, **kwargs)
            batch_feature = self._preprocess(images, **kwargs)

            if segmentation_maps is not None:
                maps = self._prepare_image_like_inputs(segmentation_maps, **kwargs)
                batch_feature["labels"] = self._preprocess(maps, **kwargs).pixel_values

            return batch_feature

    Extending Backend Behavior
    --------------------------

    To customize operations for a specific backend, subclass the backend and override its methods:

        from transformers.image_processing_backends import TorchvisionBackend, PilBackend

        class MyTorchvisionProcessor(TorchvisionBackend):
            def resize(self, image, size, **kwargs):
                # Custom resize logic for torchvision
                return super().resize(image, size, **kwargs)

        class MyPilProcessor(PilBackend):
            def resize(self, image, size, **kwargs):
                # Custom resize logic for PIL
                return super().resize(image, size, **kwargs)

    Custom Parameters
    -----------------

    To add parameters beyond `ImagesKwargs`, create a custom kwargs class and set it as `valid_kwargs`:

        class MyImageProcessorKwargs(ImagesKwargs):
            custom_param: int | None = None

        class MyImageProcessor(TorchvisionBackend):
            valid_kwargs = MyImageProcessorKwargs
            custom_param = 10  # default value

    Key Notes
    ---------

    - Backend selection is done at the class level: inherit from `TorchvisionBackend` or `PilBackend`
    - Backends receive images as `torch.Tensor` (Torchvision) or `np.ndarray` (PIL), always channels-first
    - All images have channel dimension first during processing, regardless of backend
    - Arguments not provided by users default to class attribute values
    - Backend classes encapsulate backend-specific logic (resize, normalize, etc.) and can be overridden
    Tgp?pixel_valueskwargsc                 :     t                      j        di | d S )N )super__init__)selfr!   	__class__s     c/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/image_processing_utils.pyr%   zBaseImageProcessor.__init__   s&    ""6"""""    c                 v   i }| j         j        D ]A}|                    |d          }||||<    t          t	          | |d                    ||<   B | j        di |}|                                D ]\  }}t          | ||           t          | j         j        	                                          | _
        dS )z^Resolve and set instance attributes from kwargs and class-level defaults for all valid kwargs.Nr#   )valid_kwargs__annotations__popr   getattr_standardize_kwargsitemssetattrlistkeys_valid_kwargs_names)r&   r!   
attributeskeykwargvalues         r(   _set_attributesz"BaseImageProcessor._set_attributes   s    
$4 	E 	ECJJsD))E "'
3"*74d+C+C"D"D
3-T-;;
;;
$**,, 	& 	&JCD#u%%%%#'(9(I(N(N(P(P#Q#Q   r)   imagesreturnc                 $     | j         |g|R i |S )z)Preprocess an image or a batch of images.)
preprocessr&   r:   argsr!   s       r(   __call__zBaseImageProcessor.__call__   s$    tv7777777r)   c                     t           )a  
        Process a single raw image into the backend's working format.

        Implemented by backend subclasses (`TorchvisionBackend`, `PilBackend`). Converts a raw input
        (PIL Image, NumPy array, or torch Tensor) to the backend's internal format (`torch.Tensor` for
        Torchvision, `np.ndarray` for PIL), handles RGB conversion and ensures channels-first ordering.
        NotImplementedErrorr&   r?   r!   s      r(   process_imagez BaseImageProcessor.process_image   s
     "!r)   c                     t           )a-  
        Perform the actual batch image preprocessing (resize, center crop, rescale, normalize, pad).

        Implemented by backend subclasses (`TorchvisionBackend`, `PilBackend`). Receives a list of
        already-prepared images (in the backend's format, channels-first) and applies the configured
        preprocessing operations. Returns a `BatchFeature` with the processed pixel values.

        Model-specific processors can override this method to implement custom preprocessing logic
        (e.g., patch-based processing in LLaVA-NeXT).
        rB   rD   s      r(   _preprocesszBaseImageProcessor._preprocess   s
     "!r)      expected_ndimsc                 N    |                      |          }t          ||          S )z
        Prepare the images structure for processing.

        Args:
            images (`ImageInput`):
                The input images to process.

        Returns:
            `ImageInput`: The images with a valid nesting.
        rI   )fetch_imagesr   )r&   r:   rI   s      r(   _prepare_images_structurez,BaseImageProcessor._prepare_images_structure   s*     ""6**'~NNNNr)   rK   c                
   |                      ||          }t          | j        g|R i |t          |          dk    o"t	          |d         t
          t          z            }|rfd|D             }nfd|D             }|S )a  
        Prepare image-like inputs for processing by converting each image via `process_image`.

        Flattens the input structure and applies `process_image` (implemented by the backend) to each
        individual image, converting raw inputs (PIL, NumPy, Tensor) into the backend's working format
        with channels-first ordering.

        Args:
            images (`ImageInput`):
                The image-like inputs to process.
            expected_ndims (`int`, *optional*, defaults to 3):
                The expected number of dimensions for the images.

        Returns:
            `list[torch.Tensor]` or `list[np.ndarray]`: The prepared images in the backend's format,
            with channels-first ordering.
        rK   r   c                 ,    g | ]}fd |D             S )c                 &    g | ]} |          S r#   r#   .0imgprocess_image_partials     r(   
<listcomp>zLBaseImageProcessor._prepare_image_like_inputs.<locals>.<listcomp>.<listcomp>#  s%     S S S!6!6s!;!; S S Sr)   r#   )rR   nested_listrT   s     r(   rU   zABaseImageProcessor._prepare_image_like_inputs.<locals>.<listcomp>#  s/    nnnXc S S S S{ S S Snnnr)   c                 &    g | ]} |          S r#   r#   rQ   s     r(   rU   zABaseImageProcessor._prepare_image_like_inputs.<locals>.<listcomp>%  s%    MMMs 5 5c : :MMMr)   )rM   r   rE   len
isinstancer2   tuple)r&   r:   rI   r?   r!   has_nested_structureprocessed_imagesrT   s          @r(   _prepare_image_like_inputsz-BaseImageProcessor._prepare_image_like_inputs  s    0 //~/VV '(: LT L L LV L L"6{{QV:fQiPU3V3V 	NnnnngmnnnMMMMfMMMr)   c                 @     | j         |fi |} | j        |g|R i |S )a  
        Preprocess image-like inputs by preparing them and dispatching to `_preprocess`.

        This method first calls `_prepare_image_like_inputs` to convert raw inputs into the backend's
        format, then calls `_preprocess` for the actual batch processing. Override this method in
        model-specific processors that need to handle multiple image-like input types (e.g., images
        and segmentation maps) or need custom orchestration of the preprocessing pipeline.
        )r]   rG   r>   s       r(   _preprocess_image_like_inputsz0BaseImageProcessor._preprocess_image_like_inputs)  sA     10BB6BBt8888888r)   Nsize	crop_sizepad_sizedefault_to_square
image_mean	image_stdc                    |i }|0t          |t                    st          d
i t          ||          }|0t          |t                    st          d
i t          |d          }|0t          |t                    st          d
i t          |d          }t          |t                    rt	          |          }t          |t                    rt	          |          }||d<   ||d<   ||d<   ||d<   ||d	<   |S )z
        Standardize kwargs to canonical format before validation.
        Can be overridden by subclasses to customize the processing of kwargs.
        N)r`   rc   ra   )
param_namerb   )r`   rg   r`   rd   re   r#   )rY   r   get_size_dictr2   rZ   )r&   r`   ra   rb   rc   rd   re   r!   s           r(   r/   z&BaseImageProcessor._standardize_kwargs:  s"    >FJtX$>$>\\mIZ[[[\\D Ix)H)H  TT={#S#S#STTI
8X(F(FVV-X*"U"U"UVVHj$'' 	+z**Ji&& 	)i((Iv'{%z)|'{r)   
do_rescalerescale_factordo_normalize	do_resizedo_center_cropresamplez7PILImageResampling | tvF.InterpolationMode | int | Nonec                 8    t          |||||||	|||

  
         dS )z@
        Validate the kwargs for the preprocess method.
        )
ri   rj   rk   rd   re   rm   ra   rl   r`   rn   N)r   )r&   ri   rj   rk   rd   re   rl   r`   rm   ra   rn   r!   s               r(   _validate_preprocess_kwargsz.BaseImageProcessor._validate_preprocess_kwargs`  sC    " 	&!)%!)	
 	
 	
 	
 	
 	
r)   c           	          t          | j        |           | j        D ]'}|                    |t	          | |d                     ( | j        di |} | j        di |  | j        |g|R i |S )z;
        Preprocess an image or a batch of images.
        Nr#   )r   r+   r4   
setdefaultr.   r/   rp   r_   )r&   r:   r?   r!   
kwarg_names        r(   r=   zBaseImageProcessor.preprocess~  s     	D-v666 2 	K 	KJj'$
D*I*IJJJJ *)33F33 	)(2262221t1&J4JJJ6JJJr)   c                    t                                                      }i }|                                D ]\\  }}t          |t                    rt          |          }|,t          t          |           |d          }|dk    r||||<   W|||<   ]|                    dd            |                    dd            |S )N	NOT_FOUND_valid_processor_keysr4   )	r$   to_dictr0   rY   r   dictr.   typer-   )r&   processor_dictfiltered_dictr6   r8   class_defaultr'   s         r(   rw   zBaseImageProcessor.to_dict  s    ** (..00 		+ 		+JC%** $U} 'T

C E E K//M4M).M#&%*c""14888/666r)   imagescaledata_formatinput_data_formatc                 $    t          |f|||d|S )a  
        Rescale an image by a scale factor. image = image * scale.

        Args:
            image (`np.ndarray`):
                Image to rescale.
            scale (`float`):
                The scaling factor to rescale pixel values by.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.

        Returns:
            `np.ndarray`: The rescaled image.
        )r~   r   r   )r   )r&   r}   r~   r   r   r!   s         r(   r   zBaseImageProcessor.rescale  s%    < urE{Vgrrkqrrrr)   meanstdc                 &    t          |f||||d|S )aZ  
        Normalize an image. image = (image - image_mean) / image_std.

        Args:
            image (`np.ndarray`):
                Image to normalize.
            mean (`float` or `Iterable[float]`):
                Image mean to use for normalization.
            std (`float` or `Iterable[float]`):
                Image standard deviation to use for normalization.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.

        Returns:
            `np.ndarray`: The normalized image.
        )r   r   r   r   )r   )r&   r}   r   r   r   r   r!   s          r(   r   zBaseImageProcessor.normalize  s6    B 
#;Rc
 
gm
 
 	
r)   c                     t          |          }d|vsd|vr$t          d|                                           t          |f|d         |d         f||d|S )a	  
        Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
        any edge, the image is padded with 0's and then center cropped.

        Args:
            image (`np.ndarray`):
                Image to center crop.
            size (`dict[str, int]`):
                Size of the output image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        heightwidthz=The size dictionary must have keys 'height' and 'width'. Got )r`   r   r   )rh   
ValueErrorr3   r   )r&   r}   r`   r   r   r!   s         r(   r   zBaseImageProcessor.center_crop  s    8 T""47$#6#6j]a]f]f]h]hjjkkk
x.$w-0#/	
 

 
 
 	
r)   )rH   )NNNNNN)
NNNNNNNNNN)NN)+__name__
__module____qualname____doc__r   r+   rc   rj   model_input_namesr   r%   r9   r   r	   r@   rE   rG   intrM   r2   r   r]   r_   r   rx   strr   boolfloatr/   _further_process_kwargsrZ   rp   r   r=   rw   npndarrayr   r   r   r   __classcell__)r'   s   @r(   r   r   <   sJ       ~ ~@  LN'(#!5 # # # # # #
R R R8z 8F<<P 8Ua 8 8 8 8" " "" " "   O OO O 
	O O O O,  	#  #  # #  	# 
 &#  
c#  #  #  # J99 &	9
 
9 9 9 9& HLLPKO)-1504! !HSM!DcN2X=D! #&c3h7(BTI! %S#X6ADH	!
  $;! DK'$.! 4;&-! 
! ! ! !H 2 #''+$(2615!% $&*%)NR
 
4K
 
 Tk	

 E%L(4/
 5<'$.
 $;
 o
 t
 d?
 L
 
 
 
< K Kf\>R KWc K K K ^K&c3h      0 6:;?s szs s ++d2	s
 !11D8s 
s s s sL 6:;?#
 #
z#
 huo%#
 Xe_$	#

 ++d2#
 !11D8#
 
#
 #
 #
 #
R 6:;?%
 %
z%
 38n%
 ++d2	%

 !11D8%
 
%
 %
 %
 %
 %
 %
 %
 %
r)   r   r   r   shortest_edgelongest_edge
max_height	max_widthc                     t          | t                    sdS t          |                                           }t          D ]}||k    r dS dS )NFT)rY   rx   setr3   VALID_SIZE_DICT_KEYS)	size_dictsize_dict_keysallowed_keyss      r(   is_valid_size_dictr     s[    i&& u))**N,  \))44 *5r)   Tr`   max_sizerc   height_width_orderr;   c                    t          | t                    r|r|t          d          | | dS t          | t                    r|sd| i}|||d<   |S t          | t          t          f          r|r| d         | d         dS t          | t          t          f          r|s| d         | d         dS | ||rt          d          d|iS t          d|            )	NzLCannot specify both size as an int, with default_to_square=True and max_size)r   r   r   r   r   r   z7Cannot specify both default_to_square=True and max_sizez+Could not convert size input to size dict: )rY   r   r   rZ   r2   )r`   r   rc   r   r   s        r(   convert_to_size_dictr   (  s)    $ *!2 *klll... 
D#		 *'8 *$d+	(0In%	D5$-	(	( *-? *q'DG444	D5$-	(	( *1C *q'DG444	(. 	XVWWW))
I4II
J
JJr)   c           
         t          | t          t          z            s>t          | |||          }t                              | dt           d|  d| d           n't          | t                    rt          |           }n| }t          |          s.t          | dt           d|	                                           |S )a  
    Converts the old size parameter in the config into the new dict expected in the config. This is to ensure backwards
    compatibility with the old image processor configs and removes ambiguity over whether the tuple is in (height,
    width) or (width, height) format.

    - If `size` is tuple, it is converted to `{"height": size[0], "width": size[1]}` or `{"height": size[1], "width":
    size[0]}` if `height_width_order` is `False`.
    - If `size` is an int, and `default_to_square` is `True`, it is converted to `{"height": size, "width": size}`.
    - If `size` is an int and `default_to_square` is False, it is converted to `{"shortest_edge": size}`. If `max_size`
      is set, it is added to the dict as `{"longest_edge": max_size}`.
    - If `size` is `None` and `default_to_square` is False, the result is `{"longest_edge": max_size}` (requires
      `max_size` to be set). Tuple/list/SizeDict/dict `size` values do not use `max_size`.

    Args:
        size (`int | Iterable[int] | dict[str, int] | SizeDict`, *optional*):
            The `size` parameter to be cast into a size dictionary.
        max_size (`int | None`, *optional*):
            With `default_to_square=False`, sets `longest_edge` when `size` is an int or `None`; unused for dict,
            `SizeDict`, or tuple/list `size`. Raises if set with `default_to_square=True` when `size` is an int or `None`.
        height_width_order (`bool`, *optional*, defaults to `True`):
            If `size` is a tuple, whether it's in (height, width) or (width, height) order.
        default_to_square (`bool`, *optional*, defaults to `True`):
            If `size` is an int, whether to default to a square image or not.
    z@ should be a dictionary with one of the following sets of keys: z, got z. Converted to .z- must have one of the following set of keys: )
rY   rx   r   r   loggerinfor   r   r   r3   )r`   r   r   rc   rg   r   s         r(   rh   rh   G  s    > dD8O,, 
(x9JL^__	 * *[o * *w{ * *&* * *	
 	
 	
 	

 
D(	#	# JJ			i(( 
vvH\vvdmdrdrdtdtvv
 
 	
 r)   original_sizepossible_resolutionsc                 *   | \  }}d}d}t          d          }|D ]w\  }}t          ||z  ||z            }	t          ||	z            t          ||	z            }}
t          |
|z  ||z            }||z  |z
  }||k    s||k    r||k     r|}|}||f}x|S )a  
    Selects the best resolution from a list of possible resolutions based on the original size.

    This is done by calculating the effective and wasted resolution for each possible resolution.

    The best fit resolution is the one that maximizes the effective resolution and minimizes the wasted resolution.

    Args:
        original_size (tuple):
            The original size of the image in the format (height, width).
        possible_resolutions (list):
            A list of possible resolutions in the format [(height1, width1), (height2, width2), ...].

    Returns:
        tuple: The best fit resolution in the format (height, width).
    Nr   inf)r   minr   )r   r   original_heightoriginal_widthbest_fitmax_effective_resolutionmin_wasted_resolutionr   r   r~   downscaled_widthdownscaled_heighteffective_resolutionwasted_resolutions                 r(   select_best_resolutionr   y  s    " '4#O^H !%LL- ' 'EN*F_,DEE.1.52H.I.I3afOfKgKg+"#36G#GZiIijj"V^/CC"::: $<<<ARUjAjAj';$$5!HOr)   c                     t          | |          \  }}|\  }}||z  }||z  }||k     r(|}	t          t          j        ||z            |          }
n'|}
t          t          j        ||z            |          }	|
|	fS )zu
    Given an image and a target resolution, calculate the output size of the image after cropping to the target
    )channel_dim)r   r   mathceil)r}   target_resolutionr   r   r   target_heighttarget_widthscale_wscale_h	new_width
new_heights              r(   get_patch_output_sizer     s     '5UHY&Z&Z&Z#O^"3M<^+Go-G 	?W#<==}MM

"
	.7":;;\JJ	y  r)   )NNTT)NNTTr`   )8r   collections.abcr   copyr   	functoolsr   typingr   numpyr   huggingface_hub.dataclassesr   image_processing_baser	   r
   image_transformsr   r   r   image_utilsr   r   r   r   r   r   processing_utilsr   r   utilsr   r   r   r   r   torchvision.transforms.v2r   tvF
get_loggerr   r   INIT_SERVICE_KWARGSr   r   r   r   r   rx   r   r   rh   rZ   r2   r   r   r#   r)   r(   <module>r      sn    $ $ $ $ $ $                       ; ; ; ; ; ; E E E E E E E E = = = = = = = = = =                3 2 2 2 2 2 2 2             0//////  <;;;;;; 
	H	%	%  U
 U
 U
 U
 U
- U
 U
 U
r wn%;    (,"#	K K


$KDjK K 	K
 
#s(^K K K K@ DH#"/ /

S#X
.
9D
@/Dj/ / 	/ 
/ / / /d#% #t #PU # # # #L! ! ! ! !r)   