
    |-jC                        d dl Z d dlmZ d dlmZmZ d dlmZ d dlZddl	m
Z
mZ ddlmZ  e
            rd dlZdZ ej        d	          Zej        rN ej                    Ze                     ej        d
                     e                    e           de_        deej        eeef         fdZ G d de          Ze G d d                      Ze G d d                      Z G d d          ZdS )    N)deepcopy)	dataclassfield)IntEnum   )is_psutil_availableis_torch_xpu_available)loggingContinuousBatchingLoggerz4%(asctime)s - %(name)s - %(levelname)s - %(message)sFreturnc                  L   t           j                                        rt          j        d          } t           j                                         t           j                                         t           j                            |           \  }}t           j                            |           }||z
  }nt                      rt          j        d          } t           j	                                         t           j	                                         t           j	        
                    |           j        }t           j	                            |           }t           j	                            |           }n't           j        j                                        r}t           j        j                                        rZt          j        d          } t           j                                        }| t#          t           j        d                      z
  }d}nt          j        d          } t%                      rEt'          j                    j        }t'          j                                                    j        }|}n t2                              d           d}d}d}| |||fS )Ncudaxpumpsrecommended_max_memoryr   cpuzCannot get memory breakdown on CPU without psutil: returning 0 for all memory values. Please install psutil to get an actual memory breakdown.)torchr   is_availabledeviceempty_cachesynchronizemem_get_infomemory_reservedr	   r   get_device_propertiestotal_memorymemory_allocatedbackendsr   is_builtdriver_allocated_memorygetattrr   psutilvirtual_memorytotalProcessmemory_inforssloggererror)r   free_memoryr   reserved_memoryallocated_memorys        t/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr.   +   s#   z   #!f%%
   
    %*J$;$;F$C$C!\*44V<<'+5		!	! !e$$		y66v>>K)33F;; 955f==			(	(	*	* !u~/A/J/J/L/L !e$$y88::'*V'%)=U*V*V*X*XXe$$   	!!0228L%~//;;==A.OOLL<   LO <2BBB    c                   &    e Zd ZdZdZdZdZdZdZdS )RequestStatusz5Status of a generation request through its lifecycle.r         r      N)	__name__
__module____qualname____doc__PENDING
PREFILLINGDECODINGFINISHEDFAILED r/   r-   r1   r1   T   s-        ??GJHHFFFr/   r1   c                   J   e Zd ZU dZeed<    ee          Zee	         ed<    ee          Z
ee	         ed<    ee          Zee         ed<   dZedz  ed<   ej        Zeed	<    eej                  Zeed
<   dZeeef         ed<   dZee         dz  ed<   defdZdS )GenerationOutputa  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        lifespan (tuple[float, float]): The time the request was no longer pending and the time the request finished.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNr)   statuscreated_timer   r   lifespan
timestampsr   c                 ,    | j         t          j        k    S N)rG   r1   r<   selfs    r-   is_finishedzGenerationOutput.is_finishedw   s    {m444r/   )r5   r6   r7   r8   str__annotations__r   listrD   intrE   rF   floatr)   r1   r9   rG   timeperf_counterrH   rJ   tuplerK   boolrP   r>   r/   r-   r@   r@   ^   s#          OOO!E$777JS	777"'%"="="=d3i===!E$777Hd5k777E3:)1FM111%0ABBBL%BBB$,HeE5L!,,,%)JUd")))5T 5 5 5 5 5 5r/   r@   c                   h   e Zd ZU dZeed<   ee         ed<   dZe	ed<   dZ
e	ed<   dZedz  ed	<   dZeee         z  dz  ed
<   dZeed<    ee          Zeed<    ee          Zee         ed<    ee          Zee         ed<    ee          Zee         ed<   dZeed<   dZeed<   ej        Zeed<    ee          Zee         ed<    eej                  Zeed<   dZedz  ed<   dZ e!eef         ed<    ee          Z"ee         ed<   dZ#eed<   dZ$eed<    ee          Z%ee         ed<   dZ&e	ed<   d  Z'e(d!efd"            Z)e)j*        d#efd$            Z)e(d!ee         dz  fd%            Z+d& Z,d!efd'Z-d!efd(Z.d)ed*edz  d!e	fd+Z/d, Z0d- Z1d.ed!d fd/Z2d!efd0Z3d2d1Z4dS )3RequestStatea  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        initial_tokens (list[int]): The initial prompt tokens.
        num_children (int): The number of children requests
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The initial tokens IDs remaining to be processed.
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int | None): The maximum number of new tokens to generate.
        eos_token_id (None | int | list[int]): The ID(s) of the end-of-sequence tokens. Only used in post-init.
        _eos_token_ids (set[int]): The IDs of the end-of-sequence tokens, formatted as a set.
        streaming (bool): Whether to stream tokens as they're generated
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    rA   initial_tokensF	streamingrecord_timestamps   Nmax_new_tokenseos_token_idr   num_childrenrB   logit_processor_kwargstokens_to_processrE   rF   position_offsetallocated_blocks_status_eos_token_idsrH   r)   rI   rJ   _timestamps_true_initial_tokens_new_tokens_limitremaining_prefill_tokensis_cpu_offloadedc                 N   | j         dn| j         | _        | j        d d          | _        | j        d S t          | j        t                    r.| j        dk    r!| j                            | j                   d S d S | j        D ]"}|dk    r| j                            |           #d S )Nrk   r   )	r`   rl   r\   rm   ra   
isinstancerT   rh   add)rO   token_ids     r-   __post_init__zRequestState.__post_init__   s    /3/B/JPTPc(,(;AAA(>%$D)3// 	6 A%%#''(9::::: &% !- 6 6q=='++H5556 6r/   r   c                     | j         S rM   )rg   rN   s    r-   rG   zRequestState.status   s
    |r/   valuec                    | j         t          j        k    rt          j                    df| _        nI|t          j        k    r9| j        d         t          j                    f| _        |                                  || _         d S )Nr   r   )rg   r1   r9   rV   rW   rJ   r<   log_end_of_request)rO   ru   s     r-   rG   zRequestState.status   sp    <=000!.00"5DMMm,,,!]1-t/@/B/BCDM##%%%r/   c                 "    | j         r| j        nd S rM   )r^   ri   rN   s    r-   rK   zRequestState.timestamps   s    #'#9CttCr/   c                    t          | j                  }|                                 }| j        d         | j        z
  }| j        d         | j        z
  }t
                              d| j         d|d|d|d|
           d S )Nr   r2   Request z finished: prefill_len = z decode_len = z start_time = z end_time = )lenr\   generated_lenrJ   rH   r(   inforA   )rO   prefill_len
decode_len
start_timeend_times        r-   rw   zRequestState.log_end_of_request   s    $-..''))
]1%(99
=#d&77qtqq;qqJqqT^qqdlqq	
 	
 	
 	
 	
r/   c                     | j         S )zCGet the current length of the sequence (prompt + generated tokens).)re   rN   s    r-   current_lenzRequestState.current_len   s    ##r/   c                 *    t          | j                  S )z*Get the number of tokens generated so far.)r{   rE   rN   s    r-   r|   zRequestState.generated_len   s    4()))r/   rr   logprobc                    | j         t          j        k    rdS | j        r+| j                            t          j                               || j        v }| 	                                }|s|| j
        k     rD| j                            |           |g| _        |dz  }|| j                            |           n%t                              d| j         d|            |s|| j
        k    rt          j        | _         dS dS )zUpdate the request with a newly generated token (and optional log probability of the token) and check for
        completion. Returns True if the request is now complete, False otherwise.Fr2   Nrz   z generated a useless token: T)rG   r1   r;   r^   ri   appendrV   rW   rh   r|   rl   rE   rd   rF   r(   warningrA   r<   )rO   rr   r   is_eosr   s        r-   update_and_check_completionz(RequestState.update_and_check_completion   s    ;-0005 ! 	9##D$5$7$7888 T00((**  	_kD$:::!((222&.ZD"1K"$$W---NN]do]]S[]]^^^ 	[D$:::'0DK4ur/   c                 X   d| j          d| j         d|                                  dt          | j                   dt          | j                   d| j         dt          | j                   d| j         d	| j	         d
| j
         g
}dd                    |          z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zlogit_processor_kwargs=zRequestState(
	z,
	z
))rA   rg   r|   r{   rd   rm   re   r\   rf   rE   rc   join)rO   msgs     r-   __repr__zRequestState.__repr__  s    +$/++$dl$$0$,,..009C 67799DD$A B BDD/-//<#d&9":":<<7 5777 577Cd&ACC
 #W\\#%6%66>>r/   c                    | j         r:| j        | j         d         | j        z   | _        | j        d| j                  | _        t          | j        | j        | j        | j        | j        | j        | j        | j	        | j
        	  	        S )z7Convert the request state to a GenerationOutput object.N)	rA   rD   rE   rF   r)   rG   rH   rJ   rK   )rj   r\   rE   r@   rA   rF   r)   rG   rH   rJ   rK   rN   s    r-   to_generation_outputz!RequestState.to_generation_output  s    $ 	S$($78Q8S8S$TW[Wl$lD!"&"56Q8Q6Q"RD*!2]*;*]

 

 

 
	
r/   new_request_idc                     t          |           }||_        t          j                    |_        |j        df|_        g |_        | j        dd         |_        |S )ziFork the request into a new request with the same state except for request_id, created_time and lifespan.r   N)r   rA   rV   rW   rH   rJ   ri   rm   )rO   r   new_requests      r-   forkzRequestState.fork,  sX    tnn!/#'#4#6#6  + 8"="$/3/LQQQ/O,r/   c                 j    | j         | j        | j        | j        | j        t          | j                  dS )zXGet all the fields necessary to create a request that would have the same configuration.)r]   r^   r`   ra   rb   rc   )r]   r^   r`   ra   rb   r   rc   rN   s    r-   get_request_configzRequestState.get_request_config8  s>     !%!7"1 - -&.t/J&K&K
 
 	
r/   c           
      h   |                                  }| j        | j        t          | j                  z
  |d<   t	          d| j        | j        | j        z   | j        dd         | j        t          | j                  z   d|}| j        r| j        |_        nt          | j                  |_        |S )aT  Creates an equivalent new request by removing the generated tokens and adding them to the initial prompt. The
        created request has THE SAME request_id. Notably, we can retrieve the original request from the created one with
        the _true_initial_tokens attribute. The logprobs of the generated tokens are kept in the new request.Nr`   )rA   r\   rF   rj   r>   )	r   r`   r{   rE   r[   rA   r\   rF   rj   )rO   request_config	new_states      r-   !create_equivalent_initial_requestz.RequestState.create_equivalent_initial_requestC  s    
 0022*/3/BSI^E_E_/_N+,  
.1FF]111%!%!:SAT=U=U!U	
 

 
 
	 $ 	F-1-FI** .11D-E-EI*r/   )r   r[   )5r5   r6   r7   r8   rQ   rR   rS   rT   r]   rY   r^   r`   ra   rb   r   dictrc   rd   rE   rF   rU   re   rf   r1   r9   rg   setrh   rV   rW   rH   r)   rJ   rX   ri   rj   rl   rm   rn   rs   propertyrG   setterrK   rw   r   r|   r   r   r   r   r   r   r>   r/   r-   r[   r[   {   s         . OOOI It#t### "$NC$J###+/L#S	/D(///L##(5#>#>#>D>>> $)5#>#>#>tCy>>>"'%"="="=d3i===!E$777Hd5k777OSc*2G]222$uS999NCH999  %0ABBBL%BBBE3:$,HeE5L!,,,$uT:::Ke::: !#!!! (s'''*/%*E*E*Ed3iEEE"d"""6 6 6$     X ]M    ] DDK$. D D D XD
 
 
$S $ $ $ $*s * * * *
C %$, SW    >? ? ?
 
 
"
3 
> 
 
 
 
	
D 	
 	
 	
 	
     r/   r[   c            
       2    e Zd ZdZdZdededededdf
d	ZdS )
FutureRequestStatezPTracks the current state of a request and the relevant information to update it.statehas_new_tokencomplete_blocksquery_lengthr   r   r   r   r   Nc                 >    || _         || _        || _        || _        d S rM   r   )rO   r   r   r   r   s        r-   __init__zFutureRequestState.__init__c  s'    
*.(r/   )	r5   r6   r7   r8   	__slots__r[   rY   rT   r   r>   r/   r-   r   r   ]  s\        ZZ NI)l )4 )RU )eh )mq ) ) ) ) ) )r/   r   ) rV   copyr   dataclassesr   r   enumr   r   utilsr   r	   utils.loggingr
   r"   TMP_TOKEN_ID	getLoggerr(   	propagateStreamHandlerhandlersetFormatter	Formatter
addHandlerrX   r   rT   r.   r1   r@   r[   r   r>   r/   r-   <module>r      s          ( ( ( ( ( ( ( (        @ @ @ @ @ @ @ @ $ $ $ $ $ $  MMM  
	5	6	6	 #g#%%G**+abbccc
gF&Cu|S#s/J)K &C &C &C &CR    G    5 5 5 5 5 5 5 58 ^ ^ ^ ^ ^ ^ ^ ^B
) 
) 
) 
) 
) 
) 
) 
) 
) 
)r/   