
    u-jO:                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
mZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ defdZdedefdZd	 Zd0dededdfdZdeddfdZdeddfdZd1dedee         ddfdZ d1deeef         fdZ!d Z"d Z#d Z$d Z%d Z&d1dZ'd1dZ(d Z)d Z*d Z+ e j,                    d             Z-d Z.d Z/d1dZ0d ej1        j2        defd!Z3d"e	d#ej4        f         d$ed%edej4        fd&Z5d'e6e         d(ed)ed*e7e         de8e7e         ej4        f         f
d+Z9	 d2d,ej1        j:        d-ej;        d.edej1        j:        fd/Z<dS )3    N)CallableListOptionalTuple)version)distributedreturnc                      t          j         t           j        t           j                  } |                     d           |                                 d         }|                                  |S )N) r      )socketAF_INETSOCK_STREAMbindgetsocknameclose)sockports     \/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/modelscope/utils/torch_utils.py_find_free_portr      sR    =);<<DIIga DJJLLLK    r   c                 D    t          j        t          j                              d         }|                    d           t          j         t           j        t           j                  5 t           fd|D                       cd d d            S # 1 swxY w Y   d S )N	localhostc              3   N   K   | ]}                     |f          d k    V   dS )r   N)
connect_ex).0ipr   ss     r   	<genexpr>z _is_free_port.<locals>.<genexpr>#   s8      ??R1<<T
++q0??????r   )r   gethostbyname_exgethostnameappendr   r   all)r   ipsr   s   ` @r   _is_free_portr&      s    

!&"4"6"6
7
7
;CJJ{	v~v'9	:	: @a?????3?????@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @s   ,BBBc                 
   t          | d          r | j        di |} ndt          j        t          j                  t          j        d          k    rt	          j        | fi |} nt          dt          j         d           | S )Ncompilez	2.0.0.devzDCompiling model needs torch version > 2.0.0, your torch version is: z , origin model will be returned. )hasattrr(   r   parsetorch__version__print)modelcompile_optionss     r   compile_modelr1   &   s    ui   
0000	u(	)	)W];-G-G	G	Ge7777Z&+&7Z Z Z	
 	
 	
 Lr   nccllauncherbackendc                     t          j        d          t          j        d           | dk    rt          |fi | d S | dk    rt	          |fi | d S | dk    rt          |fi | d S t          d|            )NT)
allow_nonespawnpytorchmpislurmzInvalid launcher type: )mpget_start_methodset_start_method_init_dist_pytorch_init_dist_mpi_init_dist_slurm
ValueError)r3   r4   kwargss      r   	init_distrC   4   s    	d+++3
G$$$97--f-----	U		w))&)))))	W		++F+++++=8==>>>r   c                     t          t          j        d                   }t          j                            |           t          j        dd| i| d S )N
LOCAL_RANKr4   r)   )intosenvironr,   cuda
set_devicedistinit_process_groupr4   rB   
local_ranks      r   r>   r>   A   sN    RZ-..J	J*%%%66G6v66666r   c                    t          t          j        d                   }t          j                            |           dt          j        vrdt          j        d<   dt          j        vrt          d          t          j        d         t          j        d<   t          j        d         t          j        d	<   t          j        dd
| i| d S )NOMPI_COMM_WORLD_LOCAL_RANKMASTER_PORT29500MASTER_ADDRz/The environment variable MASTER_ADDR is not setOMPI_COMM_WORLD_SIZE
WORLD_SIZEOMPI_COMM_WORLD_RANKRANKr4   r)   )	rF   rG   rH   r,   rI   rJ   KeyErrorrK   rL   rM   s      r   r?   r?   H   s    RZ <=>>J	J*%%%BJ&&$+
=!BJ&&HIII!z*@ABJ|$:;BJv66G6v66666r   c                 R   t          t          j        d                   }t          t          j        d                   }t          j        d         }t          j                                        }t          j                            ||z             t          j        d| d          }|t          |          t          j        d<   nVdt          j        v rnGt          d          rd	t          j        d<   n(t          t                                t          j        d<   d
t          j        vr|t          j        d
<   t          |          t          j        d<   t          ||z            t          j        d<   t          |          t          j        d<   t          j        |            dS )a  Initialize slurm distributed training environment.

    If argument ``port`` is not specified, then the master port will be system
    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
    environment variable, then a default port ``29500`` will be used.

    Args:
        backend (str): Backend of torch.distributed.
        port (int, optional): Master port. Defaults to None.
    SLURM_PROCIDSLURM_NTASKSSLURM_NODELISTzscontrol show hostname z | head -n1NrQ   i<s  rR   rS   rU   rE   rW   r4   )rF   rG   rH   r,   rI   device_countrJ   
subprocess	getoutputstrr&   r   rK   rL   )r4   r   proc_idntasks	node_listnum_gpusaddrs          r   r@   r@   U   s_    "*^,--GN+,,F
+,Iz&&((H	J'H,---8)888: :D $'II
=!!	"*	$	$  	?(/BJ}%%(+O,=,=(>(>BJ}%BJ&&$(
=!"6{{BJ|"7X#566BJ|WBJvG,,,,,,r   c                     t                      rUddlm} | $ |            rddlm} |                                } t          j        |           }t          j        |           }nd}d}||fS )zGet dist info of a specified group

    Args:
        group: The parallel group, default None, for the global group

    Returns:
        A tuple of the current rank and world_size of the group
    r   )is_megatron_initializedN)mpur   )	is_distmodelscope.utils.megatron_utilsrh   megatron_utilri   get_data_parallel_grouprK   get_rankget_world_size)grouprh   ri   rank
world_sizes        r   get_dist_infors   |   s     yy 	KKKKKK=4466=))))))//11E}U##(//


r   c                  \    t          t          j                            dd                    S )NrE   r   )rF   rG   rH   getr)   r   r   get_local_rankrv      s     rz~~lA..///r   c                  |    t          j                    sdS t          j                    sdS t          j                    S )Nr   )rK   is_availableis_initializedrn   r)   r   r   rn   rn      s:     q   q=??r   c                  |    t          j                    sdS t          j                    sdS t          j                    S )Nr   )rK   rx   ry   ro   r)   r   r   ro   ro      s=     q   q   r   c                      t          j                    sdS t          j                    sdS t          j                    } | dk    rdS t          j                     dS )zj
    Helper function to synchronize (barrier)
    among all processes when using distributed training
    Nr   )rK   rx   ry   ro   barrier)rr   s    r   synchronizer}      s[    
     $&&JQLNNNNNr   c                  N    t          j                    ot          j                    S N)rK   rx   ry   r)   r   r   rj   rj      s    84#6#8#88r   c                 R    t                      rt          j        |           dk    ndS )Nr   T)rj   rK   rn   rp   s    r   	is_masterr      s%    (/		;4=1$$t;r   c                 0     dt           dt           f fd}|S )Nfuncr	   c                 J     t          j                    fd            }|S )Nc                  6    t                    r | i |S d S r   )r   )argsrB   r   rp   s     r   wrapperz.master_only.<locals>.decorate.<locals>.wrapper   s2     -tT,V,,,- -r   )	functoolswraps)r   r   rp   s   ` r   decoratezmaster_only.<locals>.decorate   s>    				- 	- 	- 	- 	- 
		- r   )r   )rp   r   s   ` r   master_onlyr      s6    x H       Or   c                      t                      st          j                    S d} t                      rt          j                    } t	          j                     t          | d          } | S )zRMake sure each rank has the same temporary directory on the distributed mode.
    Nr   )rj   tempfilemkdtempr   rK   r|   	broadcast)tmpdirs    r   make_tmp_dirr      s]     99 "!!!F{{ $!##LNNNvq!!FMr   c                    t          j                    }t          j        dgd          }||k    r[t          j        t	          t          j        |                     t          j        d          }t          j        |j        d          }t          j	                     t          j
        ||           ||k    r5t          j        |                                fdt          j        d          }t          j	                     t          j
        ||           t          j        |                                                                                                          S )z
    Broadcasts the inputs to all ranks.

    Arguments:
        inputs : Any objects that can be serialized by pickle.
        src (int): Source rank.
    Returns:
        Each rank returns the same value as src.
    r   rI   devicedtyper   )rK   rn   r,   tensor	bytearraypickledumpsuint8shaper|   r   fullitemloadscpunumpytobytes)inputssrcrq   shape_tensorinputs_tensors        r   r   r      s$    =??D<F333Ls{{fl6**++5;vO O O|M$7GGGLNNNN<%%%s{{
L$5$5$7$7#:#$).*02 2 2
 	LNNNN=#&&&<))++1133;;==>>>r   c                    | n| dk    rht          j        |            t          j                             |            t          j        |            t          j                            |            d S t          d|            )Nr   z0Random seed should be positive, current seed is )randomseednpr,   manual_seedrI   manual_seed_allrA   )r   s    r   set_random_seedr      s    DAIID
	t$
""4(((((EtEEG G 	Gr   c                  |    t          j                    dk    rt          j        d          S t           j        j        S )zj
    Return a process group based on gloo backend, containing all the ranks
    The result is cached.
    r2   gloor]   )rK   get_backend	new_grouprp   WORLDr)   r   r   _get_global_gloo_groupr     s6     V##~f----zr   c                    t          j        |          }|dv sJ t          j        |dk    rdnd          }t	          j        |           }t          |          dk    rKt                              d	                    t                      t          |          dz  |                     t          j                            |          }t          j        |                              |          }|S )N)r   r2   r   r   rI   i   @z;Rank {} trying to all-gather {:.2f} GB of data on device {}r   )rK   r   r,   r   r   r   lenloggerwarningformatrn   ByteStoragefrom_buffer
ByteTensorto)datarp   r4   r   bufferstorager   s          r   _serialize_to_tensorr     s    u%%G&&&&&\7f#4#4%%&AAF\$F
6{{WIF8::v;;'*F4 4	5 	5 	5 ++F33Gg&&)))88FMr   c                     t          j        |          }|dk    s
J d            t          j                                         gt          j         j                  } fdt          |          D             }t          j        |||           d |D             }t          |          }||k    rBt          j
        ||z
  ft          j         j                  }t          j         |fd           | fS )	zz
    Returns:
        list[int]: size of the tensor, on each rank
        Tensor: padded tensor that has the max size
    r   r   zBcomm.gather/all_gather must be called from ranks within the group!r   c                 ^    g | ])}t          j        d gt           j        j                  *S )r   r   )r,   zerosint64r   )r   _r   s     r   
<listcomp>z*_pad_to_largest_tensor.<locals>.<listcomp>0  s@        	QCu{6=AAA  r   c                 P    g | ]#}t          |                                          $S r)   )rF   r   )r   sizes     r   r   z*_pad_to_largest_tensor.<locals>.<listcomp>5  s(    888dTYY[[!!888r   r   dim)rK   ro   r,   r   numelr   r   range
all_gathermaxr   r   cat)r   rp   rr   
local_size	size_listmax_sizepaddings   `      r   _pad_to_largest_tensorr   #  s     $5111JaK 	v||~~.$)K%+]4 4 4J   z""  I 	OIz777788i888I9~~H X+x*47$)K%+]4 4 4 FG,!444fr   c                 8   t                      dk    r| gS |t                      }t          j         |          dk    r| gS t          | |          t	          |          \  }t          |          fd|D             }t          j        ||           g }t          ||          D ]l\  }                                	                                
                                d|         }|                    t          j        |                     m|S )a;  
    Run all_gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: list of data gathered from each rank
    r   Nc                 ^    g | ])}t          j        ft           j        j                   *S )r   )r,   emptyr   r   )r   r   r   r   s     r   r   zall_gather.<locals>.<listcomp>Z  s@        	XLFMJJJ  r   r   )ro   r   rK   r   r   r   r   zipr   r   r   r#   r   r   )	r   rp   r   tensor_list	data_listr   r   r   r   s	          @@r   r   r   C  s6    1v}&((5!!Q&&v!$..F.vu==Iv9~~H      K 	OKu5555II{33 / /f##%%--//6f--....r   r/   c                     t          d |                                 D                       dhz
  }t          |          dk    S )Nc              3   >   K   | ]}t          |j                  V  d S r   )ra   r   )r   ps     r   r    z$is_on_same_device.<locals>.<genexpr>i  s*      ??qS]]??????r   r   r   )set
parametersr   )r/   
device_sets     r   is_on_same_devicer   h  sB    ??E,<,<,>,>?????5'IJz??ar   
forward_fn.
chunk_size	chunk_dimc                    	 t          |          dk    sJ | d            t          t          j                   j                  }|t          |          k    r#t	          d| dt          |           d          |dk    r|d         j                 }|D ]3}|j                 |k    r t	          d| d|j                            4|d         j                 |z  dk    r&t	          d|d         j                  d	|           |d         j                 |z  	t          	fd
|D                       }t           fdt          | D                       }t          j	        |          S   | S )Nr   z" has to be a tuple/list of tensorszforward_chunk_fn expects z arguments, but only z input tensors are givenz/All input tenors have to be of the same shape: z, found shape zThe dimension to be chunked z( has to be a multiple of the chunk size c              3   F   K   | ]}|                                V  dS )r   N)chunk)r   input_tensorr   
num_chunkss     r   r    z,apply_chunking_to_forward.<locals>.<genexpr>  sJ       %/ %/ zy99%/ %/ %/ %/ %/ %/r   c              3   "   K   | ]	} | V  
d S r   r)   )r   input_tensors_chunkr   s     r   r    z,apply_chunking_to_forward.<locals>.<genexpr>  sG       C C# J+,C C C C C Cr   r   )
r   inspect	signaturer   rA   r   tupler   r,   r   )
r   r   r   input_tensorsnum_args_in_forward_chunk_fntensor_shaper   input_tensors_chunksoutput_chunksr   s
   ` `      @r   apply_chunking_to_forwardr   m  sZ    }    &JJJ   $'*%%0$2 $2 #s='9'999 (D    [^_l[m[m      ! ! 	! A~~$Q'-i8) 	D 	DL!),<< Cl C C#/#5i#@C CD D D =
 !),z9Q>>%}Q/?/Ei/P % %"% %& & & #1%+I6*D
  % %/ %/ %/ %/ %/ -%/ %/ %/  /  /  C C C C'*,@'AC C C C C yI6666:}%%r   headsn_heads	head_sizealready_pruned_headsc                    t          j        ||          }t          |           |z
  } | D ]%t          fd|D                       z
  d|<   &|                    d                                                              d          }t          j        t          |                    |         	                                }| |fS )Nc              3   ,   K   | ]}|k     rd ndV  dS )r   r   Nr)   )r   hheads     r   r    z3find_pruneable_heads_and_indices.<locals>.<genexpr>  s/      MM1q4xx!!QMMMMMMr   r   r   r   )
r,   onesr   sumview
contiguouseqaranger   long)r   r   r   r   maskindexr   s         @r    find_pruneable_heads_and_indicesr    s     :gy))D E   cMMMM8LMMMMMMT

99R==##%%((++D#l3t9955d;@@BBE%<r   layerr  r   c                    |                     | j        j                  }| j                            ||                                                                          }| j        c|dk    r,| j                                                                        }n1| j        |                                                                         }t          | j                                                  }t          |          ||<   t          j                            |d         |d         | j        d u                               | j        j                  }d|j        _        |j                            |                                           d|j        _        | j        Dd|j        _        |j                            |                                           d|j        _        |S )Nr   r   )biasFT)r   weightr   index_selectdetachcloner
  listr   r   r,   nnLinearrequires_gradcopy_r  )r  r  r   Wbnew_size	new_layers          r   prune_linear_layerr    s    HHU\())E!!#u--4466<<>>Az!88
!!##))++AA
5!((**0022AEL%%''((HJJHSMXa[uz(    R+,,  &+I"1<<>>***%)I"z',	$Q\\^^,,,'+	$r   )r2   r   )r   )=r   r   rG   r   r   r   r_   r   typingr   r   r   r   r   r   r,   torch.multiprocessingmultiprocessingr;   	packagingr   r   rK   ra   r   rF   boolr&   r1   rC   r>   r?   r@   rs   rv   rn   ro   r}   rj   r   r   r   r   r   	lru_cacher   r   r   r   r  Moduler   Tensorr   r  r   r   r  r  
LongTensorr  r)   r   r   <module>r"     s        				         2 2 2 2 2 2 2 2 2 2 2 2      " " " " " "       % % % % % %    @ @ @ @ @ @  
? 
? 
?c 
? 
? 
? 
? 
?7 7$ 7 7 7 7
7C 
7d 
7 
7 
7 
7$- $-c $-# $-$ $- $- $- $-N sCx    ,0 0 0  ! ! !  9 9 9< < < <      ? ? ?BG G G           @" " " "J UX_          
,&el*+,&,& ,&
 \,& ,& ,& ,&^Cy#&36!#h+0S5<1G+H   & #$ eho #.(-     r   