
    Αi                        S SK r S SKrS SKrS SKrS SKrS SKJrJr  S SKrS SK	J
r
  S SKJr  S SKJr  S SKJr  S SKJr  SS	KJr  \ " S
 S\5      5       r\ " S S\5      5       r " S S5      r " S S5      r " S S5      r " S S5      r " S S5      r " S S5      r " S S5      r\" \R<                  5      rSS jr g)    N)IntEnumunique)get_all_custom_device_type)Node)KVClient)KVServer)SingleNodeTopology   )
get_loggerc                   ,    \ rS rSrSrSrSrSrSrSr	Sr
g	)

DeviceType    r         r
          N)__name__
__module____qualname____firstlineno__UNKNOWNCPUGPUXPUDCUNIC__static_attributes__r       o/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/distributed/auto_parallel/static/cluster.pyr   r       s     G
C
C
C
C
Cr   r   c                   8    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrg)LinkType*   r   r   r   r
      r   r         r   N)r   r   r   r   r   LOCSYSPHBPIXPIBNVLNVBNETr   r   r   r    r"   r"   *   s/    G
C
C
C
C
C
C
C
Cr   r"   c                   
   \ rS rSrS r\S 5       r\S 5       r\S 5       r\R                  S 5       r\S 5       r
\
R                  S 5       r
\S	 5       r\S
 5       r\R                  S 5       rS rS rS rS rS rS rSrg)Mesh7   c                 T    Xl         X l        S U l        S U l        0 U l        0 U l        g N)_id_name_type
_full_type	_machines_links)selfidnames      r    __init__Mesh.__init__8   s(    

r   c                     U R                   $ r3   r4   r:   s    r    r;   Mesh.id@       xxr   c                     U R                   $ r3   )r5   rA   s    r    r<   	Mesh.nameD       zzr   c                     U R                   $ r3   r6   rA   s    r    type	Mesh.typeH   rF   r   c                     Xl         g r3   rH   r:   values     r    rI   rJ   L       
r   c                     U R                   $ r3   r7   rA   s    r    	full_typeMesh.full_typeP       r   c                     Xl         g r3   rP   rL   s     r    rQ   rR   T       r   c                     U R                   $ r3   r8   rA   s    r    machinesMesh.machinesX       ~~r   c                     U R                   $ r3   r9   rA   s    r    links
Mesh.links\       {{r   c                     Xl         g r3   rW   rL   s     r    rX   rY   `       r   c                 4    XR                   UR                  '   g r3   )r8   r;   )r:   machines     r    add_machineMesh.add_machined   s    %,wzz"r   c                 :    U R                   R                  US 5      $ r3   )r8   getr:   r;   s     r    get_machineMesh.get_machineg   s    ~~!!"d++r   c                 ,    [        U R                  5      $ r3   )lenr8   rA   s    r    get_num_machinesMesh.get_num_machinesj   s    4>>""r   c                 L    XR                   UR                  UR                  4'   g r3   r9   sourcetargetr:   links     r    add_linkMesh.add_linkm       26T[[$++./r   c                 <    U R                   R                  X4S 5      $ r3   r9   rg   r:   rq   rr   s      r    get_linkMesh.get_linkp       {{/66r   c                 $   U R                   U R                  U R                  R                  5        Vs/ s H  oR	                  5       PM     snU R
                  R                  5        Vs/ s H  oR	                  5       PM     snS.$ s  snf s  snf )N)r;   r<   rX   r]   )r;   r<   rX   valuesto_jsonr]   r:   xs     r    r   Mesh.to_jsons   sm    ''II.2mm.B.B.DE.D.DE+/::+<+<+>?+>aiik+>?	
 	
 F?s   B*B
)r7   r4   r9   r8   r5   r6   N)r   r   r   r   r=   propertyr;   r<   rI   setterrQ   rX   r]   rd   ri   rm   ru   r{   r   r   r   r   r    r0   r0   7   s           
[[            __ -,#77
r   r0   c                   ^    \ rS rSrS r\S 5       r\S 5       rS rS r	S r
S rS	 rS
 rSrg)	MeshGroup|   c                 .    0 U l         0 U l        SU l        g Nr   )_meshesr9   _global_device_numrA   s    r    r=   MeshGroup.__init__}   s    "#r   c                     U R                   $ r3   )r   rA   s    r    meshesMeshGroup.meshes       ||r   c                     U R                   $ r3   r\   rA   s    r    r]   MeshGroup.links   r_   r   c                 4    XR                   UR                  '   g r3   )r   r;   )r:   meshs     r    add_meshMeshGroup.add_mesh   s     $TWWr   c                 :    U R                   R                  US 5      $ r3   )r   rg   rh   s     r    get_meshMeshGroup.get_mesh   s    ||D))r   c                 L    XR                   UR                  UR                  4'   g r3   rp   rs   s     r    ru   MeshGroup.add_link   rw   r   c                 <    U R                   R                  X4S 5      $ r3   ry   rz   s      r    r{   MeshGroup.get_link   r}   r   c                 H    U R                   nU =R                   S-  sl         U$ Nr   )r   )r:   curr_device_ids     r    generate_global_device_id#MeshGroup.generate_global_device_id   s$    001$r   c                     U R                   R                  5        Vs/ s H  oR                  5       PM     snU R                  R                  5        Vs/ s H  oR                  5       PM     snS.$ s  snf s  snf )N)r   r]   )r   r   r   r]   r   s     r    r   MeshGroup.to_json   s\    ,0KK,>,>,@A,@qyy{,@A+/::+<+<+>?+>aiik+>?
 	
A?s   A2A7)r   r9   r   N)r   r   r   r   r=   r   r   r]   r   r   ru   r{   r   r   r   r   r   r    r   r   |   sM    $
    %*77

r   r   c                   P   \ rS rSr\R
                  \R                  \R                  /rSS jr	\
S 5       r\R                  S 5       r\
S 5       r\R                  S 5       r\
S 5       r\R                  S	 5       r\
S
 5       r\R                  S 5       r\
S 5       r\R                  S 5       r\
S 5       r\R                  S 5       r\
S 5       r\R                  S 5       r\
S 5       r\R                  S 5       r\
S 5       r\R                  S 5       rS rS rS rS rSrg)Device   Nc                     Xl         X l        X0l        X@l        S U l        S U l        S U l        S U l        S U l        S U l	        0 U l
        g r3   )
_global_id	_local_id_machine_meshr6   _model
_dp_gflops
_sp_gflops
_hp_gflops_memoryr9   )r:   	global_idlocal_idrc   r   s        r    r=   Device.__init__   sI    #!

 r   c                     U R                   $ r3   r   rA   s    r    r   Device.global_id   rS   r   c                     Xl         g r3   r   rL   s     r    r   r      rU   r   c                     U R                   $ r3   r   rA   s    r    r   Device.local_id   rZ   r   c                     Xl         g r3   r   rL   s     r    r   r      ra   r   c                     U R                   $ r3   r   rA   s    r    rc   Device.machine       }}r   c                     Xl         g r3   r   rL   s     r    rc   r          r   c                     U R                   $ r3   rH   rA   s    r    rI   Device.type   rF   r   c                     Xl         g r3   rH   rL   s     r    rI   r      rN   r   c                     U R                   $ r3   r   rA   s    r    modelDevice.model   r_   r   c                     Xl         g r3   r   rL   s     r    r   r      s    r   c                     U R                   $ r3   r   rA   s    r    	dp_gflopsDevice.dp_gflops   rS   r   c                     Xl         g r3   r   rL   s     r    r   r      rU   r   c                     U R                   $ r3   r   rA   s    r    	sp_gflopsDevice.sp_gflops   rS   r   c                     Xl         g r3   r   rL   s     r    r   r      rU   r   c                     U R                   $ r3   r   rA   s    r    	hp_gflopsDevice.hp_gflops   rS   r   c                     Xl         g r3   r   rL   s     r    r   r      rU   r   c                     U R                   $ r3   r   rA   s    r    memoryDevice.memory   r   r   c                     Xl         g r3   r   rL   s     r    r   r          r   c                 L    XR                   UR                  UR                  4'   g r3   rp   rs   s     r    ru   Device.add_link   rw   r   c                     U R                   U R                  U R                  U R                  U R                  U R
                  U R                  S.$ )Nr   r   rI   r   r   r   r   r   rA   s    r    r   Device.to_json  s=    IIZZkk
 	
r   c                 &   SnUSU R                    SU R                   SU R                  R                   SU R                  R
                   SU R                   SU R                   SU R                   S	U R                   S
U R                   3-  nU$ )N zglobal_id: z, local_id: z, machine_id: , type: z	, model: z, dp_flops: z, sp_flops: z, hp_flops: z
, memory: )r   r   rc   r;   rI   r<   r   r   r   r   r   )r:   strs     r    __str__Device.__str__  s
   T^^,L~VZVbVbVeVeUffnosoxoxo}o}n~  H  IM  IS  IS  HT  T`  ae  ao  ao  `p  p|  }A  }K  }K  |L  LX  Y]  Yg  Yg  Xh  hr  sw  s~  s~  r  @  	@
r   c                 "    U R                  5       $ r3   r   rA   s    r    __repr__Device.__repr__      ||~r   )r   r   r   r9   r   r   r   r   r   r   r6   r3   )r   r   r   r   r   r   r   r   NON_ACCELERATOR_TYPEr=   r   r   r   r   rc   rI   r   r   r   r   r   ru   r   r   r   r   r   r   r    r   r      s   &NNJNNJ<N<NO&         __    ^^    
[[    \\                      ]] 7	

r   r   c                      \ rS rSrSrSrSS jr\S 5       r\R                  S 5       r\S 5       r
\
R                  S 5       r
\S	 5       r\R                  S
 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       rS rS rS rSrg)Linki  r      c                 n    Xl         X l        S U l        S U l        S U l        S U l        S U l        X0l        g r3   )_src_tgtr6   
_bandwidth_latency_link_level_hop_topo)r:   rq   rr   topos       r    r=   Link.__init__  s5    		
	
r   c                     U R                   $ r3   )r   rA   s    r    rq   Link.source(      yyr   c                     Xl         g r3   )_sourcerL   s     r    rq   r   ,  r   r   c                     U R                   $ r3   )r   rA   s    r    rr   Link.target0  r  r   c                     Xl         g r3   )_targetrL   s     r    rr   r  4  r   r   c                     U R                   $ r3   rH   rA   s    r    rI   	Link.type8  rF   r   c                     Xl         g r3   rH   rL   s     r    rI   r	  <  rN   r   c                     U R                   $ r3   r   rA   s    r    	bandwidthLink.bandwidth@  rS   r   c                     Xl         g r3   r  rL   s     r    r  r  D  rU   r   c                     U R                   $ r3   r   rA   s    r    latencyLink.latencyH  r   r   c                     Xl         g r3   r  rL   s     r    r  r  L  r   r   c                     U R                   $ r3   r   rA   s    r    hopLink.hopP  r  r   c                     Xl         g r3   r  rL   s     r    r  r  T  s    	r   c                     U R                   $ r3   r   rA   s    r    
link_levelLink.link_levelX      r   c                     Xl         g r3   r  rL   s     r    r  r  \       r   c                 v    U R                   U R                  U R                  U R                  U R                  S.$ )N)	source_id	target_idrI   r  r  )rq   rr   rI   r  r  rA   s    r    r   Link.to_json`  s/    II||
 	
r   c                 :   SnU R                   (       a  U R                  OU R                  R                  nU R                   (       a  U R                  OU R                  R                  nUSU SU SU R                   SU R
                   SU R                   3
-  nU$ )Nr   zsource_global_id: z, target_global_id: r   z, bandwidth: z, latency: )r   rq   r   rr   rI   r  r  )r:   r   r"  r#  s       r    r   Link.__str__i  s    #'::DKK4;;3H3H	#'::DKK4;;3H3H	#I;.B9+XVZV_V_U``mnrn|n|m}  ~I  JN  JV  JV  IW  X  	X
r   c                 "    U R                  5       $ r3   r   rA   s    r    r   Link.__repr__p  r   r   )
r   r   r   r   r  r   r  r   r   r6   N)F)r   r   r   r   default_hopdefault_nic_bandwidthr=   r   rq   r   rr   rI   r  r  r  r  r   r   r   r   r   r   r    r   r     sY   K   ]]    ]]    
[[          ^^    	ZZ      ! !
r   r   c                      \ rS rSrS#S jr\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r	\	R                  S	 5       r	\S
 5       r
\
R                  S 5       r
\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\R                  S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\R                  S 5       rS rS rS rS rS rS  rS! rS"rg)$Machineit  Nc                     Xl         S U l        S U l        S U l        S U l        S U l        S U l        S U l        S U l        0 U l	        0 U l
        0 U l        SU l        0 U l        X l        X0l        g r   )r4   	_hostname_addrr   r   r   r   r   _port_devicesr9   _accelerators!_non_accelerator_cumulative_count_topo_linksr   r   )r:   r;   r   r   s       r    r=   Machine.__init__u  so    

12.

r   c                     U R                   $ r3   r@   rA   s    r    r;   
Machine.id  rC   r   c                     Xl         g r3   r@   rL   s     r    r;   r7    s    r   c                     U R                   $ r3   r.  rA   s    r    hostnameMachine.hostname  rZ   r   c                     Xl         g r3   r:  rL   s     r    r;  r<    ra   r   c                     U R                   $ r3   r/  rA   s    r    addrMachine.addr  rF   r   c                     Xl         g r3   r?  rL   s     r    r@  rA    rN   r   c                     U R                   $ r3   r   rA   s    r    r   Machine.sp_gflops  rS   r   c                     Xl         g r3   r   rL   s     r    r   rD    rU   r   c                     U R                   $ r3   r   rA   s    r    r   Machine.dp_gflops  rS   r   c                     Xl         g r3   r   rL   s     r    r   rG    rU   r   c                     U R                   $ r3   r   rA   s    r    r   Machine.memory  r   r   c                     Xl         g r3   r   rL   s     r    r   rJ    r   r   c                     U R                   $ r3   r  rA   s    r    r  Machine.bandwidth  rS   r   c                     Xl         g r3   r  rL   s     r    r  rM    rU   r   c                     U R                   $ r3   r  rA   s    r    r  Machine.latency  r   r   c                     Xl         g r3   r  rL   s     r    r  rP    r   r   c                     U R                   $ r3   r0  rA   s    r    portMachine.port  rF   r   c                     Xl         g r3   rS  rL   s     r    rT  rU    rN   r   c                     U R                   $ r3   )r1  rA   s    r    devicesMachine.devices  r   r   c                 T    U R                   (       a  U R                  $ U R                  $ r3   )r   r4  r9   rA   s    r    r]   Machine.links  s    ::###{{r   c                     U R                   $ r3   )r2  rA   s    r    acceleratorsMachine.accelerators  s    !!!r   c                     U R                   $ r3   r   rA   s    r    r   Machine.mesh  rF   r   c                     Xl         g r3   r`  rL   s     r    r   ra    rN   r   c                     XR                   UR                  '   UR                  [        R                  ;  a  XR
                  UR                  '   g g r3   )r1  r   rI   r   r   r2  r:   devices     r    
add_deviceMachine.add_device  s?    *0f&&';;f99939v//0 :r   c                 :    U R                   R                  US 5      $ r3   )r1  rg   rh   s     r    
get_deviceMachine.get_device  s    }}  T**r   c                     U R                   (       a%  XR                  UR                  UR                  4'   g XR                  UR                  R
                  UR                  R
                  4'   g r3   )r   r4  rq   rr   r9   r   rs   s     r    ru   Machine.add_link  sI    ::;?dkk4;;78JNKK..0E0EFGr   c                     U R                   (       a  U R                  R                  X4S 5      $ U R                  R                  X4S 5      $ r3   )r   r4  rg   r9   )r:   source_global_idtarget_global_ids      r    r{   Machine.get_link  sE    ::##''!4d  {{ 0CTJJr   c                    U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  R                  5        Vs/ s H  oR                  5       PM     snU R                  R                  5        Vs/ s H  oR                  5       PM     snS.
$ s  snf s  snf )N)
r;   r;  r@  r   r   r   r  r  rX  r]   )r;   r;  r@  r   r   r   r  r  rX  r   r   r]   r   s     r    r   Machine.to_json  s    ''IIkk||-1\\-@-@-BC-B		-BC+/::+<+<+>?+>aiik+>?
 	
 D?s   5C
,Cc                     SnU R                   R                  5        H  nUSU 3-  nM     U R                  R                  5        H  nUSU 3-  nM     U$ )Nr   z
, device: z, link: )rX  r   r]   )r:   r   re  rt   s       r    r   Machine.__str__  s\    ll))+FZx((C ,JJ%%'DXdV$$C (
r   c                 "    U R                  5       $ r3   r   rA   s    r    r   Machine.__repr__  r   r   )r2  r/  r   r1  r   r.  r4   r   r9   r   r   r3  r0  r   r   r4  )NF)r   r   r   r   r=   r   r;   r   r;  r@  r   r   r   r  r  rT  rX  r]   r]  r   rf  ri  ru   r{   r   r   r   r   r   r   r    r,  r,  t  s&   *   YY    __    
[[                ]]          ^^    
[[     
 " "   
[[ :+OK
r   r,  c                       \ rS rSrS r\S 5       r\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       rS
rg)AlphaLatencyi  c                 t   [        U[        5      (       d   eUR                  SS 5      U l        UR                  SS 5      U l        UR                  SS 5      U l        UR                  SS 5      U l        U R                  b   [        U R                  5      U l        U R                  b  U R                  R                  SS 5      OS U l	        U R                  b  U R                  R                  SS 5      OS U l
        U R                  b  U R                  R                  SS 5      OS U l        U R                  b   [        U R                  5      U l	        U R                  b   [        U R                  5      U l
        U R                  R                  SS 5      U l        U R                  R                  SS 5      U l        U R
                  R                  SS 5      U l        U R
                  R                  SS 5      U l        U R                  be  [        U R                  [         5      (       a+  U R                  S	;   d   e["        U R                     U l        O [        U R                  5      U l        U R                  be  [        U R                  [         5      (       a+  U R                  S	;   d   e["        U R                     U l        O [        U R                  5      U l        U R                  be  [        U R                  [         5      (       a+  U R                  S;   d   e["        U R                     U l        O [        U R                  5      U l        U R                  bf  [        U R                  [         5      (       a+  U R                  S;   d   e["        U R                     U l        g  [        U R                  5      U l        g g !   [        S5      e= f!   [        S5      e= f!   [        S5      e= f!   [        S
5      e= f!   [        S5      e= f!   [        S5      e= f!   [        S5      e= f)Nbaseinterintraswitchz The switch latency must be floatringtreez$The base ring latency must be float.)r.   z%The inter ring latency must be float.z%The inter tree latency must be float.)r,   r)   z%The intra ring latency must be float.z%The intra tree latency must be float.)
isinstancedictrg   _base_inter_intra_switchfloat	TypeError
_base_ring
_base_tree_base_inter_inter_ring_inter_tree_intra_ring_intra_treer   r"   )r:   alpha_latencys     r    r=   AlphaLatency.__init__  s   -...."&&vt4
#''6#''6$((48<<#D$T\\2 -1JJ,BDJJNN64( 	 -1JJ,BDJJNN64( 	 .2ZZ-CDJJNN7D) 	 ??&H"'"8 ??&H"'"8  ;;??648;;??648;;??648;;??648'$**C00''7222#+D,<,<#= M',T-=-='>D$ '$**C00''7222#+D,<,<#= M',T-=-='>D$ '$**C00''>999#+D,<,<#= M',T-=-='>D$ '$**C00''>999#+D,<,<#= M',T-=-='>D$ (sD BCCH FGGH FGGM#$KLLM#$KLLM#$KLLM#$KLLsT   O
 :O "O* O: 
P
 <P .P* 
OO'*O7:P
PP'*P7c                     U R                   $ r3   )r  rA   s    r    	base_ringAlphaLatency.base_ringf  rS   r   c                     U R                   $ r3   )r  rA   s    r    	base_treeAlphaLatency.base_treej  rS   r   c                     U R                   $ r3   )r  rA   s    r    r}  AlphaLatency.switchn  r   r   c                     U R                   $ r3   )r  rA   s    r    
inter_ringAlphaLatency.inter_ringr  r  r   c                     U R                   $ r3   )r  rA   s    r    
inter_treeAlphaLatency.inter_treev  r  r   c                     U R                   $ r3   )r  rA   s    r    
intra_ringAlphaLatency.intra_ringz  r  r   c                     U R                   $ r3   )r  rA   s    r    
intra_treeAlphaLatency.intra_tree~  r  r   )r  r  r  r  r  r  r  r  r  r  r  N)r   r   r   r   r=   r   r  r  r}  r  r  r  r  r   r   r   r    rx  rx    s    JMX                      r   rx  c                   b   \ rS rSrSrS r\S 5       r\R                  S 5       r             S#S jr	\S 5       r
\S 5       r\S	 5       r\R                  S
 5       r\S 5       rS r\S 5       rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S r!S  r"S!r#g")$Clusteri  z
The cluster is an abstract of the hardware resource for training, which contains the cluster topology and
related hardware information. It will serve the task mapping, cost model and auto searching.
c                     SU l         SU l        0 U l        S U l        S U l        0 U l        0 U l        S U l        S U l        SU l	        S U l
        SU l        SU l        g )Nr   F)_num_meshes_num_machinesr8   	_topology_alpha_latency_rank_to_device_id_device_id_to_rank_num_devices_per_machine
_gpu_model_initialized_mesh_groupr   _heterorA   s    r    r=   Cluster.__init__  sg    ""$"$ )-%!
r   c                     U R                   $ r3   r  rA   s    r    initializedCluster.initialized  s       r   c                     Xl         g r3   r  rL   s     r    r  r    s    !r   c                   ^.^/^0 / SQm/S/m0S/m.T/T0-   T.-   nX@l         Xl        U.U/U04S jnS nS n0 n/ US'   Sn0 n0 n[        U5       GHg  n0 nS	[        U5      -   US
'   SUS'   SUS'   / US'   / nSn[        U5       Hw  n0 nUS:X  a  US:X  a  UOUS-   nUS-  nU" U5      nU" X5      nUnUUS'   UUS'   UUS'   UUS'   UUS'   U
US'   U	US'   UUS'   SUS'   UUU'   UUU'   UR	                  U5        My     0 nU" U5      u  n n!nUn"Un#US-  nSnUnSnU US'   U!US'   UUS'   U"US'   U#US'   UUS'   UUS'   UUS'   UUS'   UUU'   UUU'   UR	                  U5        0 n$US-  nSnSn%Sn&SnUU$S'   UU$S'   UU$S'   UUU'   UUU'   UR	                  U$5        UUS'   US   R	                  U5        GMj     [        SUS-   5       H  n[        SUS-   5       Hj  nUU:X  a  M  UU   n'UU   n(UU   n)UU   n*0 n+Un,Un-U,U+S '   U-U+S!'   U'U(:X  a  U)U*:X  a  S"U+S'   UU+S#'   O
S$U+S'   UU+S#'   US   U'   S   R	                  U+5        Ml     M     U R                  U5        g%)&z#Generate cluster by default config.)V100A100H100A2A10A16A30A40r   r   c                 P   > S nU T;   a  SnOU T;   a  SnOU T;   a  SnOSnUc   eU$ )Nr   r   r   r   )	gpu_modelrI   
dcu_models
gpu_models
xpu_modelss     r    _convert_to_type<Cluster.gen_default_config_cluster.<locals>._convert_to_type  sE    DJ&j(j(###Kr   c                     S nU S:X  a  S[        U5      -   S-   nOAU S:X  a  S[        U5      -   S-   nO)U S:X  a  S[        U5      -   S-   nOU [        U5      -   S-   nUc   eU$ )Nr  zTesla V100-SXM2-GBr  zTesla A100-SXM-r  zTesla A30-SXM-)r   )r  
gpu_memoryr   s      r    _convert_to_model=Cluster.gen_default_config_cluster.<locals>._convert_to_model  s|    EF"*S_<tCf$)C
O;dBe#(3z?:TA!C
O3d:$$$Lr   c                 d    Su  pnU S:X  a  SnSnSnOU S:X  a  SnSnSnUc   eUc   eUc   eXU4$ )N)NNN6271Cx86_64GenuineIntelz'Intel(R) Xeon(R) Gold 6271C CPU @ 2.60G6148z&Intel(R) Xeon(R) Gold 6148 CPU @ 2.40Gr   )	cpu_modelarchvendorr   s       r    _convert_to_cpu_info@Cluster.gen_default_config_cluster.<locals>._convert_to_cpu_info  si    "2D%G#'Af$'@###%%%$$$&&r   rX   r   host_r;  z	127.0.0.1r@  ii  rT  r]   r   r   r   rI   r   r   r   r   r   r   r   r  r  r   g      )@rX  rn  ro  r,   r  r)   N)r  r  ranger   append_build_from_dict)1r:   r  r  
node_countdevice_countr  
cpu_memoryinter_bandwidthintra_bandwidthgpu_dp_gflopsgpu_sp_gflopsgpu_hp_gflopscpu_dp_gflopscpu_sp_gflopsall_gpu_modelsr  r  r  cluster_infor   global_id_to_device_typeglobal_id_to_nodeirc   rX  r   jre  rI   r   r   
cpu_devicer  r  r   r   
nic_devicewidthip	node_id_i	node_id_jdevice_type_idevice_type_jrt   rn  ro  r  r  r  s1                                                 @@@r    gen_default_config_cluster"Cluster.gen_default_config_cluster  s   " P
W
W
#j0:=(4%#			'  #%Z 	#% z"AG")CF"2GJ)GFO#GFO!GGGH<()*aAFI	A	A'	2))@#&/{#%-z"!%v"'w#)x &3{#&3{#&3{#!&v6:(3/0!),v&+ )0 J"6y"AD&%%I%INIHFD!%Jv#)Jx "'Jw&/J{#&/J{#&/J{#%-Jz"#)Jx !%Jv+,i(26$Y/NN:&JNI DEBH!%Jv%)Jz"&/J{#26$Y/+,i(NN:&!(GI$++G4S #X q)a-(A1i!m,6-a0	-a0	 8 ; 8 ;#$ #$ +;'(+;'(	)m}.L#(DL(7D%#(DL(7D%Z(3G<CCDI' - ), 	l+r   c                     U R                   $ r3   )r  rA   s    r    rank_to_device_idCluster.rank_to_device_idT      &&&r   c                     U R                   $ r3   )r  rA   s    r    device_id_to_rankCluster.device_id_to_rankX  r  r   c                     U R                   $ r3   r  rA   s    r    
mesh_groupCluster.mesh_group\  r  r   c                     Xl         g r3   r  rL   s     r    r  r   `  r   r   c                     U R                   $ r3   rW   rA   s    r    rX   Cluster.machinesd  rZ   r   c                 b   [        U[        5      (       d   eXR                  UR                  '   UR                  S:w  a  U R                  UR                  S-
     nUR                  nUR
                   HP  nUR
                  U   R                  [        R                  ;  d  M0  XC-
  nX@R                  U'   XPR                  U'   MR     [        UR
                  5      [        UR                  5      -
  UR                  -   Ul        g UR
                   Hj  nUR
                  U   R                  [        R                  ;  d  M0  UnX@R                  U'   XPR                  U'   UR
                  U   UR                  U'   Ml     [        UR
                  5      [        UR                  5      -
  Ul        g )Nr   r   )r  r,  r8   r;   r3  rX  rI   r   r   r  r  rl   r]  )r:   rc   prev_machineoffsetr   rank_ids         r    rd   Cluster.add_machineh  st   '7++++%,wzz" ::?>>'**q.9L!CCF$__	OOI.33!667 (0G7@++G49@++I6 - GOO$g**+,@@A 5 %__	OOI.33!667 (G7@++G49@++I66=ooi6PG((3 - 9<9G(()9*G5r   c                     U R                   $ r3   )r  rA   s    r    r  Cluster.alpha_latency  s    """r   c                 h    [        U[        5      (       d   eUR                  R                  U5        g r3   )r  r   rc   rf  rd  s     r    rf  Cluster.add_device  s'    &&))))!!&)r   c                 |    [        U[        5      (       d   eUR                  R                  R	                  U5        g r3   )r  r   rq   rc   ru   rs   s     r    ru   Cluster.add_link  s-    $%%%%$$T*r   c                 |   S nU R                   (       aW  / nU R                  R                  R                  5        H,  nUR	                  UR
                  R                  5       5        M.     OU R
                  R                  5       nU H1  nXR                  R                  5       ;   d  M"  UR                  U   nM3     U$ r3   )r   r  r   r   extendrX   rX  keys)r:   device_global_idre  target_machinesr   rc   s         r    ri  Cluster.get_device  s    :: O..557&&t}}';';'=> 8 #mm224O&G??#7#7#99 )9: ' r   c                    US   nU GH  nU R                  5       n[        U5      nUR                  S5      Ul        UR                  S5      Ul        UR                  S5      Ul        UR                  S/ 5      nU GH  nUR                  S5      nUR                  S5      n	[        XU5      n
UR                  SS 5      nUb
  [        U   nO[        R                  nXl	        UR                  S	S 5      U
l
        [        UR                  S
S5      5      U
l        [        UR                  SS5      5      U
l        [        UR                  SS5      5      U
l        [        UR                  SS5      5      U
l        U R!                  U
5        GM     U R#                  U5        GM     U GHk  nUR                  S/ 5      nU GHN  nUR                  S5      nUR                  S5      nU R%                  U5      nU R%                  U5      n['        UU5      nUR                  SS 5      nUb
  [(        U   nO[(        R                  nUUl	        [        UR                  SS5      5      Ul        [        UR                  SS5      5      Ul        UR                  SS 5      Ul        UR.                  cO  UR0                  nUR0                  nUR2                  UR2                  :X  a  SUl        O[&        R4                  Ul        U R7                  U5        GMQ     GMn     SU;   a   [9        UR                  S5      5      U l        g S U l        g )NrX   r;  r@  rT  rX  r   r   rI   r   r   r   r   r   r   r]   rn  ro  r  r  r  r  )_generate_machine_idr,  rg   r;  r@  rT  r   r   r   rI   r   r  r   r   r   r   rf  rd   ri  r   r"   r  r  r  rc   r;   r)  ru   rx  r  )r:   r  machines_infomachine_info
machine_idrc   devices_infodevice_infor  device_local_idre  device_type
links_info	link_inforn  ro  rq   rr   rt   	link_typesource_machinetarget_machines                         r    r  Cluster._build_from_dict  s   $Z0)L224Jj)G+//
;G'++F3GL'++F3GL'++Ir:L+#.??;#? "-//*"= 07K)oofd;*",["9K","4"4K)*w=#(a)H#I #(a)H#I #(a)H#I  %kooh&B C'  ,  W%/ *0 *L%))'26J'	#,==1C#D #,==1C#D )9:)9:FF+%MM&$7	( ( 3I ( 0 0I%	!&y}}[!'D!E$Y]]9a%@A$==588#%+^^N%+^^N%((N,=,==#$#'#3#3d#/ ( *6 l*".  1#D #'Dr   c           	      <	   [        5       U l        UR                  5        GH  u  p4U R                  5       n[	        XS5      nUR                  S5      nUS   Ul        SR                  USS  5      Ul        [        [        [        U5      5      5      n[        [        U5      5       GH  n	XI   n
[        XSS9nU
R                  S5      Ul        U
R                  S5      Ul        [!        U
R                  S5      5      Ul        [!        U
R                  S	5      5      Ul        [!        U
R                  S
5      5      Ul        [!        U
R                  S5      5      Ul        [!        U
R                  S5      5      Ul        [        U
R                  S5      5      U l        U
R                  S5       H  n[/        UR                  S5      UR                  S5      UU5      nUR                  S5      Ul        UR                  S5      Ul        [!        UR                  S5      5      Ul        [!        UR                  S	5      5      Ul        [!        UR                  S
5      5      Ul        UR3                  U5        M     U
R                  S5       H  nUR                  S5      nUR                  S5      n[5        UUSS9nUR                  S5      Ul        [!        UR                  S5      5      Ul        [!        UR                  S5      5      Ul        SUl        UR                  SS 5      Ul        UR8                  b  M  UU:X  a  SUl        O[4        R:                  Ul        UR=                  U5        M     UR?                  U5        GM     UR@                   HV  nUR@                   HC  nUU:X  a  M  [5        UUSS9nSUl        SUl        SUl        SUl        UR=                  U5        ME     MX     U R                  RC                  U5        GM     U R                  RD                   Hj  nU R                  RD                   HM  nUU:X  a  M  [5        UUSS9nSUl        SUl        SUl        SUl        U R                  R=                  U5        MO     Ml     SU l#        g )N-r   r   T)r;   r   r   r;  r@  r   r   r   r  r  rX  r   r   rI   r   r]   rn  ro  )rq   rr   r   re  r  )r   r.      g      ?rc   r   )$r   r  items_generate_mesh_idr0   splitrI   joinrQ   listr  rl   r,  rg   r;  r@  intr   r   r   r  r  r  r   r   rf  r   r  r  r)  ru   rd   rX   r   r   r   )r:   	topo_info
local_sizemesh_keymesh_valmesh_idr   mesh_fieldsmachine_idsr  machine_valrc   
device_valre  link_valsource_device_idtarget_device_iddevice_linkr  r  machine_link	mesh_links                         r    _build_from_topoCluster._build_from_topo  s   #+"+//"3H,,.G*D"..-K#ADI XXk!"o6DN uS]34K#CM2
&2!ZF#.??:#> *v6$'(D$E!$'(D$E!!$[__X%>!?$'(D$E!"%kooi&@"A 14KOOI4N0O-"-//)"<J#"{3"z2	F #-.."8FK#->>'#:FL'*:>>++F'GF$'*:>>++F'GF$$'
x(@$AFM&&v. #= !, 8H'/||4F'G$'/||4F'G$"&//!#K
 (0||F';K$,/[0I,JK)*-hll9.E*FK'-5K*&.ll5$&?KO".+/??./KO.2.>.>KO((5' !9(   )_ 3` ]]AAv #'14#8L(-L%-/L*+.L(.7L+MM,/ ' # OO$$T*K #4L ''A__++6 AD1	!&	&(	#$'	!'-	$((3 , ( 
r   c                     [        U5       n[        R                  " U5      nS S S 5        U R                  W5        g ! , (       d  f       N = fr3   )openjsonloadr  )r:   json_file_path	json_filer  s       r    build_from_fileCluster.build_from_file6  s6    .!Y99Y/L "l+ "!s	   =
Ac                 H    U R                   nU =R                   S-  sl         U$ r   )r  )r:   cur_mesh_ids     r    r(  Cluster._generate_mesh_id;  s$    &&Ar   c                 H    U R                   nU =R                   S-  sl         U$ r   )r  )r:   cur_machine_ids     r    r  Cluster._generate_machine_id@  s$    ++ar   c                    / nU R                   (       aM  / nU R                  R                  5        H,  nUR                  UR                  R                  5       5        M.     OU R                  R                  5       nU HN  nUR                  R                  5        H-  nUR                  [        U   :X  d  M  UR                  U5        M/     MP     U$ r3   )
r   r  r   r  rX   r   rX  rI   r   r  )r:   r  rX  r  r   rc   re  s          r    get_all_devicesCluster.get_all_devicesE  s    :: O..0&&t}}';';'=> 1 #mm224O&G!//002;;*["99NN6* 3 ' r   c                    S nSnU R                  U5      nU R                  U5      nUR                  nUR                  nUR                  n	UR                  n
U	R                  U
R                  :w  a2  U R                  R                  U	R                  U
R                  5      nU$ UR                  UR                  :w  aM  U R                  R                  U	R                  5      nUR                  UR                  UR                  5      nU$ U R                  R                  U	R                  5      nUR                  UR                  5      nUR                  X5      nU$ )N  )ri  rc   r   r;   r  r{   r   ri   )r:   r7  r8  betaconvert_base
src_device
tgt_devicesrc_machinetgt_machinesrc_meshtgt_meshrt   r   rc   s                 r    get_beta_topoCluster.get_beta_topoT  s   __%56
__%56
 (( ((####;;(++%??++HKKED  ^^{~~-??++HKK8D==@D
  ??++HKK8D&&{~~6G##$4GDr   c                 "   U R                   (       a  U R                  X5      nO.U R                  U5      nUR                  nUR	                  X5      nS nSnS nUc  [
        R                  nOUR                  nUS:X  a  SnU$ SXS-  S-  -  -  nU$ )NrP  g        r   r   r
   i@B )r   rY  ri  rc   r{   r   r*  r  )	r:   r7  r8  rt   re  rc   rQ  rR  r  s	            r    get_betaCluster.get_betak  s    ::%%&6ID__%56FnnG##$4GD	<22IID  	1_u%<=>Dr   c                     S nS nU R                  U5      nUR                  nUR                  X5      nUb  UR                  nU$ [        R
                  nU$ r3   )ri  rc   r{   r  r   r)  )r:   r7  r8  rQ  r  re  rc   rt   s           r    get_hopCluster.get_hop  s]    !12.. 0C((C 
 ""C
r   c                    [        5       n[        5       nU H  nU R                  U5      nUR                  R                  nUR	                  U5        U R
                  (       d  MN  UR                  R                  R                  nUR	                  U5        M     U R
                  (       a   [        U5      S:X  a  [        U5      S:X  a  gg[        U5      S:X  a  gg)Nr   FT)setri  rc   r;   addr   r   rl   )r:   
device_idsr3  mesh_ids	device_idre  r  r1  s           r    cross_machineCluster.cross_machine  s    e5#I__Y/F**JOOJ'zzz ..--00W% $ ::8}!c+&6!&;"r   c                 ~    U R                   (       a  U$ / nU H!  nUR                  U R                  U   5        M#     U$ r3   )r   r  r  )r:   group_ranksrd  ranks       r    convert_rank_to_device_id!Cluster.convert_rank_to_device_id  s?     ::
Dd44T:;  r   c                     [        5       nU H;  nU R                  U5      nUR                  R                  nUR	                  U5        M=     [        U5      nUS:  d   eU$ r   )rb  ri  rc   r;   rc  rl   )r:   rd  r3  rf  re  r  counts          r    get_involved_machine_count"Cluster.get_involved_machine_count  sY    e#I__Y/F**JOOJ' $ K qyyr   c                     U R                   (       aA  SnU R                  R                  R                  5        H  nXR	                  5       -  nM     U$ [        U R                  5      $ r   )r   r  r   r   rm   rl   r8   )r:   nr   s      r    rm   Cluster.get_num_machines  sP    ::A..557**,, 8Ht~~&&r   c                 @    U R                   (       d   eU R                   $ r3   )r  rA   s    r    get_num_devices_per_machine#Cluster.get_num_devices_per_machine  s     ,,,,,,,r   c                 ^    SnU R                   R                  5        H  nUSU S3-  nM     U$ )Nr   z	machine: 
)rX   r   )r:   r   rc   s      r    r   Cluster.__str__  s6    }}++-GYwir**C .
r   c                 "    U R                  5       $ r3   r   rA   s    r    r   Cluster.__repr__  r   r   )r  r  r  r  r  r8   r  r  r  r  r  r   r  r  N)r  r  r   r   r   i  r      x  T=  iz  K      )$r   r   r   r   __doc__r=   r   r  r   r  r  r  r  rX   rd   r  rf  ru   ri  r  r<  rD  r(  r  rM  rY  r\  r_  rg  rl  rp  rm   rv  r   r   r   r   r   r    r  r    sP   
* ! ! " "
 l,\ ' ' ' '     ! !  !*F # #*+
:'xRh,


.0
*	'-r   r  c                 J   S n[        5       nU (       aQ  U" U 5      (       aD  SU S   ;   a  UR                  U S   S   5        U$ U S   S   nU S   S   nU S   S   nU S   S   nGOU(       Ga  [        R                  " S5      n[	        5       n	U	R                  5         [        [        R                  " S	5      5      n
[        R                  " S
5      n[        [        R                  " S5      5      n[        [        R                  " S5      5      n[        [        R                  " S5      5      n[        X-
  U-  5      nU
S:  GaV  UGbR  [        5       nUR                  S5      u  nnSnU SU 3nUS:X  a9  UU;   a3  [        U5      nUR                  5         [        R                  SU 35        [        U5      nU	R                  S   nUS:X  a2  SnU(       d)  UR                  SU SU 3U	R                   S9nU(       d  M)  SnU(       Ga   UR#                  SS9nU(       a  [%        U5      U
:X  a  0 nUR'                  5        H`  u  nnUR                  S5      u      nnnUU;  a  / UU'   [%        UU   5      n [(        R*                  " U5      n!UU   R-                  U!5        Mb     UR/                  UU5        SnOFU(       a  [%        U5      OSn"[        R                  SU" SU
 S35        [0        R2                  " S5        U(       a  GM   SnU(       d<  UR                  SU 3SS9nU(       d  [        R                  S U S!35        U(       d  M<  US:X  a  Sn[        [        R                  " S"5      5      n"U(       a  UR#                  S#S9nU(       a7  [%        U5      U":X  a(  WR5                  5         Sn[        R                  S$5        O+[        R                  S%5        [0        R2                  " S5        U(       a  M  [        R                  S&[(        R6                  " UR8                  R;                  5       S'S(9 35        S n#UR8                  R<                  R?                  5        H-  n$U#c  U$R@                  n#M  U#U$R@                  :w  d  M&  SUl!        M/     U$ U	R                  S   SU	R                  00nUR/                  UU5        SUl!        [        R                  S&[(        R6                  " UR8                  R;                  5       S'S(9 35        U$ [        R                  " S5      nUc  SnO[        U5      n[        R                  " S"5      n%U%c  SnO$[        U%5      n%U%U-  S:X  d   e[        U%5      U-  n[        R                  " S)S 5      S*:X  ac  [E        5       n&U&(       a  [G        U&S   5      OS n'U'n[        [H        RJ                  RL                  RN                  RQ                  U'5      5      S+-  nOp[H        RR                  RT                  RW                  5       n(U((       d   S,5       eU(R@                  n' [X        R                  " S-U'5      n)U)S   n[        U)S.   S S/ 5      n[        R                  S0R]                  UUUU[H        R^                  Ra                  5       [        R                  " S
S 5      5      5        S1S2S3S4.S5S6S7S4.S8.n*US9:X  a  U*S9   OU*S:   n+URc                  UUUUU+S;   U+S<   U+S=   S>9  U$ !   [        U(RZ                  5      S+-  nU'n N= f)?Nc                     U (       d  gSU ;  a  gSU S   ;  a)  SU S   ;  a  gSU S   ;  a  gSU S   ;  a  gSU S   ;  a  ggg)	NFclusterpath	num_nodesnum_gpusr  r  Tr   )json_configs    r    is_by_json_config.get_default_cluster.<locals>.is_by_json_config  sf    K'[33k)&<< [%;; k)&<< {9'== r   r  r  r  r  r  r  PADDLE_MASTERPADDLE_NNODESPADDLE_CURRENT_ENDPOINTPADDLE_GLOBAL_RANKPADDLE_LOCAL_RANKPADDLE_LOCAL_SIZEr   :i:0  zserver start at: device_type_fullFz/topo/data//)keyrM   Tz
/topo/data)r  z%get global_topo failed, actual size: z, expected size: z, retry later!r   z/topo/status/okzput ok status for rank z failed, retry later!PADDLE_GLOBAL_SIZEz/topo/statuszserver stopped successz"server stopped failed! retry laterzcluster_topo_info: r
   )indentPADDLE_DISTRI_BACKENDxccli ʚ;z#Auto parallel just runs on gpu now.z[ , -]zeNode Count: {}, Local Device Size: {}, GPU Model: {}, GPU Memory: {}GB, World size: {}, EndPoint: {}.r~  r  iH )dpsphpi%  i,L  i	 )r  r  r  r  r  r  r  )r  r  r  r  r  r  r  )2r  rD  osgetenvr	   detectr,  r   r)  r   startloggerinfor   rc   putjson_object
get_prefixrl   r'  r@  loadsr  r<  timesleepstopdumpsr  r   r   r   r<   r  r   r   paddlerz  core	libpaddle_get_device_total_memoryre  cudaget_device_propertiesretotal_memoryformatdistributedget_world_sizer  ),r  auto_configr  r  r  local_device_countr  r   master_endpoint
local_toponnodescurr_endpointglobal_rank
local_rankr.  node_idnode	master_ip_	free_portserver_endpointserverclientr  respretryglobal_topo	topo_dictr  rM   	mesh_typeidxmesh_idxglobal_topo_valueglobal_sizer<   r   global_device_countcustom_device_typesgpu_namegpu_info	re_resultgflops_infodefault_gflopss,                                               r    get_default_clusterr    s   & iG(55[++##K	$:6$BCN$Y/<J!,Y!7
!C#I.{;I +L9F	))O4')
RYY/0		";<"))$89:#678
#678
{/:=>A:/56D*005LIqI!*1YK8OQ9#=!),//@ABo.F$,,-?@KQ!::)+ayA(44 & D $ E$//L/A3{#3v#= "I&1&7&7&9
U25))C./1aC$I535Ii0#&y';#<,0JJu,=)!),334EF ': ,,Y
C!E6A#k"2qKKK?}L]^d]eest JJqM% %( Dzzk]&C4zPKK1+>ST d a!")),@"AB!,,,@DD	[ 8 %$<=$HI

1 e KK%djj1C1C1K1K1MVW&X%YZ D**1188:<99Dtyy(*. ; N ""#56z))9I
 $$Y
;#GOKK%djj1C1C1K1K1MVW&X%YZ N  YY':;%!"!$%7!8 ii(<=&J"%&9":&);;q@@@015GGJ99,d3v="<">/B'*+  !I  **CCHMF }}))??AHBBB8}}H%HHY9	%aL	Yr]3B/0
 KKovv--/II/6	
	 5757K
  )F2FF8K  &&'$T*$T*$T* '  NA%X223@$	s    -\ \")NN)!r@  loggingr  r  r  enumr   r   r  paddle.base.corer   &paddle.distributed.launch.context.noder   )paddle.distributed.launch.utils.kv_clientr   )paddle.distributed.launch.utils.kv_serverr   (paddle.distributed.launch.utils.topologyr	   utils.log_utilsr   r   r"   r0   r   r   r   r,  rx  r  INFOr  r  r   r   r    <module>r     s      	 	     7 7 > > G )    	w 	 	B
 B
J#
 #
Lr rjZ Zzb bJg  g TL	 L	^ 
GLL	!Er   