
    j                     Z    d dl Z d dlmZ d dlmZ d dlmZ d dlmZ  G d de          Z	dS )    N)	LetterBox)BasePredictor)Results)opsc                       e Zd ZdZd Zd ZdS )RTDETRPredictoraH  RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions.

    This class leverages Vision Transformers to provide real-time object detection while maintaining high accuracy. It
    supports key features like efficient hybrid encoding and IoU-aware query selection.

    Attributes:
        imgsz (int): Image size for inference (must be square and scale-filled).
        args (dict): Argument overrides for the predictor.
        model (torch.nn.Module): The loaded RT-DETR model.
        batch (list): Current batch of processed inputs.

    Methods:
        postprocess: Postprocess raw model predictions to generate bounding boxes and confidence scores.
        pre_transform: Pre-transform input images before feeding them into the model for inference.

    Examples:
        >>> from ultralytics.utils import ASSETS
        >>> from ultralytics.models.rtdetr import RTDETRPredictor
        >>> args = dict(model="rtdetr-l.pt", source=ASSETS)
        >>> predictor = RTDETRPredictor(overrides=args)
        >>> predictor.predict_cli()
    c           	      |   t          |t          t          f          r|d         }|                    dd          \  }}}t          |t                    st	          j        |          ddddf         }g }t          ||||| j        d                   D ]&\  }}	}
}}t	          j        |          }|		                    d          | j
        j        k    }| j
        j        ?|
t          j        | j
        j        |
j                  k                        d          |z  }t          j        ||	|
gd          |         d| j
        j                 }|j        dd	         \  }}|ddd	gfxx         |z  cc<   |ddd
gfxx         |z  cc<   |                    t+          ||| j        j        |                     (|S )au  Postprocess the raw predictions from the model to generate bounding boxes and confidence scores.

        The method filters detections based on confidence and class if specified in `self.args`. It converts model
        predictions (already top-k selected by the decoder head) to Results objects containing properly scaled bounding
        boxes.

        Args:
            preds (list | tuple): List of [predictions, extra] from the model, where predictions have shape (bs,
                num_queries, 6) with format [cx, cy, w, h, score, class].
            img (torch.Tensor): Processed input images with shape (N, 3, H, W).
            orig_imgs (list | torch.Tensor): Original, unprocessed images.

        Returns:
            (list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores,
                and class labels.
        r   )      r   )dim.N)devicer         )pathnamesboxes)
isinstancelisttuplesplitr   convert_torch2numpy_batchzipbatch	xywh2xyxysqueezeargsconfclassestorchtensorr   anycatmax_detshapeappendr   modelr   )selfpredsimg	orig_imgsbboxesscoreslabelsresultsbboxscorelabelorig_imgimg_pathidxpredohows                    d/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/ultralytics/models/rtdetr/predict.pypostprocesszRTDETRPredictor.postprocess#   s   " edE]++ 	!HE!&YB!?!?)T** 	L5i@@dddKI69&&&R[]a]ghi]j6k6k 		a 		a2D%(=&&D--##din4Cy ,TY->u| T T TTYYZ[\\_bb9dE51r:::3?@S$)BS@STD^BQB'FBq!f#q!f#NN78($*BRZ^___````    c                 N    t          | j        dd          fd|D             S )aj  Pre-transform input images before feeding them into the model for inference.

        The input images are letterboxed to ensure a square aspect ratio and scale-filled.

        Args:
            im (list[np.ndarray]): Input images of shape [(H, W, 3) x N].

        Returns:
            (list): List of pre-transformed images ready for model inference.
        FT)auto
scale_fillc                 (    g | ]} |           S ))image ).0x	letterboxs     r9   
<listcomp>z1RTDETRPredictor.pre_transform.<locals>.<listcomp>S   s&    ///q		"""///r;   )r   imgsz)r(   imrD   s     @r9   pre_transformzRTDETRPredictor.pre_transformG   s5     djuFFF	////B////r;   N)__name__
__module____qualname____doc__r:   rH   rA   r;   r9   r   r      s=         ." " "H0 0 0 0 0r;   r   )
r    ultralytics.data.augmentr   ultralytics.engine.predictorr   ultralytics.engine.resultsr   ultralytics.utilsr   r   rA   r;   r9   <module>rQ      s     . . . . . . 6 6 6 6 6 6 . . . . . . ! ! ! ! ! !H0 H0 H0 H0 H0m H0 H0 H0 H0 H0r;   