
    |-je                     v    d dl Z d dlmZ d dlZd dlmZ d dlmZ ddl	m
Z
  ed          Z G d d          ZdS )	    N)TypeVar)
DeviceMesh   )loggerTc                       e Zd ZdZdedz  ddfdZdefdZddZde	j
        de	j
        fd	Zdedefd
Zde	j
        de	j
        fdZdedefdZddZdedz  de	j        ddfdZdS )DistributedHelperzlA helper class to handle distributed-related operations. Notably, it does not crash when distributed is off.device_meshNreturnc                 |   || _         t          j                    ot          j                    | _        | j        rt          j                    nd| _        | j        rt          j                    nd| _        | j        r| j         | j         	                                | _
        | j                                         | _        t          j        | j        d          | _        | j                                         | _        t          j        | j                  }t          j        |d          | _        n#d| _
        d | _        d| _        d| _        d | _        |                                 | _        | j        | j
        z  | _        | j        | j
        z  | _        t1          j        dgt0          j        d          | _        d S )Nr   r   gloo)ranksbackendcpudtypedevice)r
   distis_availableis_initializeddist_onget_rankglobal_rankget_world_size
world_sizesizetp_size	get_grouptp_groupget_global_ranktp_root_global_rankget_local_ranktp_local_rankget_process_group_ranks	new_groupcpu_comm_groupinfer_if_tp_driveris_tp_driverdp_rankdp_sizetorchtensorint64_cpu_int_acc)selfr
   tp_rankss      w/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/transformers/generation/continuous_batching/distributed.py__init__zDistributedHelper.__init__   s}   &(**Dt/B/D/D /3lA4=???37<F$-///Q < 	'D,8+0022DL ,6688DM'+';DM1'M'MD$!%!1!@!@!B!BD3DMBBH"&.x"P"P"PDDL DM'(D$!"D"&D !3355 '4<7$,6 "L!EKNNN    c                     | j         dk    S )Nr   )r#   r/   s    r1   r'   z$DistributedHelper.infer_if_tp_driverA   s    !Q&&r3   c                 X    | j         "t          j        | j                    d| _         dS dS )zDestroys the CPU comm group.N)r&   r   destroy_process_groupr5   s    r1   destroy_cpu_comm_groupz(DistributedHelper.destroy_cpu_comm_groupD   s5    *&t':;;;"&D +*r3   valuec                 `    | j         dk    r"t          j        || j        d| j                   |S )zPInside each TP group, broadcasts the given value from rank 0 to all other ranks.r   Fsrcasync_opgroup)r   r   	broadcastr!   r   r/   r9   s     r1   tp_broadcast_from_rank_0z*DistributedHelper.tp_broadcast_from_rank_0J   s4    <!N5d&>VZVcddddr3   c                     | j         dk    rP|| j        d<   t          j        | j        | j        d| j                   | j        d                                         }|S )zUInside each TP group, broadcasts an integer from rank 0 over the gloo CPU comm group.r   r   Fr;   )r   r.   r   r?   r!   r&   itemr@   s     r1   tp_broadcast_intz"DistributedHelper.tp_broadcast_intP   s^    <!#(Da N4,$2JUZbfbuvvvv%a(--//Er3   c                 r    | j         dk    r+t          j        |t          j        j        | j                   |S )zQInside each TP group, all-reduces a tensor with the MIN op. No-op when TP is off.r   )opr>   )r   r   
all_reduceReduceOpMINr   r@   s     r1   tp_all_reduce_minz#DistributedHelper.tp_all_reduce_minX   s2    <!OEdm&7t}MMMMr3   objc                     | j         dk    r|S | j        r|gndg}t          j        || j        | j        t          j        d                     |d         S )aI  Inside each TP group, broadcasts an arbitrary picklable Python object from TP-rank 0 to all other ranks.
        Used to keep request ingress and cancellations consistent across TP workers without requiring all ranks to
        receive the same external request stream. Uses a dedicated CPU (gloo) `cpu_comm_group` for broadcast.r   Nr   )r<   r>   r   r   )r   r(   r   broadcast_object_listr!   r&   r+   r   )r/   rK   holders      r1   tp_broadcast_objectz%DistributedHelper.tp_broadcast_object^   sm     <1J+7#$"08KTYT`afTgTg	
 	
 	
 	
 ayr3   c                     | j         dk    }t          j                            d          dk    }|r|rt	          j        d           dS dS dS )a  Throws a warning if TP is on and NCCL's graph mixing support was supposed to be disabled but isn't. That can
        happen if the distributed group is created before graph mixing is disabled. Typically, if the model is
        initialized before the ContinuousBatchingConfig is created.r   NCCL_GRAPH_MIXING_SUPPORT0a  NCCL_GRAPH_MIXING_SUPPORT was not set to '0' before init_process_group: performance will be harmed. Construct your `ContinuousBatchingConfig(...)` BEFORE calling `from_pretrained(tp_plan='auto')`, or set NCCL_GRAPH_MIXING_SUPPORT=0 in the launch environment.N)r   osenvirongetr   warning)r/   tp_ongraph_mixing_not_disableds      r1   maybe_warn_nccl_graph_mixingz.DistributedHelper.maybe_warn_nccl_graph_mixingj   ss     q $&JNN3N$O$OSV$V! 	. 	NM    	 	 	 	r3   seedmodel_devicec                 h   |$t          j        dddt           j        |          }n!t          j        |t           j        |          }|                     |          }|                                }| j        dk    r|t          j        d| d           t          j	        || j
        z              d S )Nr   l    )r   r   zHFound no user-specified seed in the config. Setting the config seed to: .)r+   randintr-   r,   rA   rC   r   r   infomanual_seedr)   )r/   rZ   r[   tp_seed_tensortp_seeds        r1   set_tp_seedzDistributedHelper.set_tp_seedw   s    <"]1iU[YefffNN"\$ek,WWWN66~FF %%''q  T\Kmcjmmmnnn'DL011111r3   )r   N)__name__
__module____qualname____doc__r   r2   boolr'   r8   r+   TensorrA   intrD   rJ   r   rO   rY   r   rc    r3   r1   r	   r	      sJ       vv!OJ$5 !O$ !O !O !O !OF'D ' ' ' '' ' ' 'el u|    c c    u|     
q 
Q 
 
 
 
   2d
 2%, 24 2 2 2 2 2 2r3   r	   )rS   typingr   r+   torch.distributeddistributedr   $torch.distributed.tensor.device_meshr   requestsr   r   r	   rk   r3   r1   <module>rq      s    
			                    ; ; ; ; ; ;       GCLLh2 h2 h2 h2 h2 h2 h2 h2 h2 h2r3   