
    ёib(                         S SK rS SKrS SKJr  S SKJr  S SKJ	r	J
r
  SSSS.r\R                  S\R                  S\R                  S0rSqS rS	 rS
 r " S S5      r " S S5      rg)    N)_current_expected_place_)async_offload_with_offsetcreate_async_load   )gpunpuxpu      c                     U c  g [         R                  " 5       (       aF  [         R                  " 5       (       d,  U R                  5       R	                  5       R                  5       $ g N)paddleis_compiled_with_cudais_compiled_with_rocmvalue
get_tensor_share_cuda)tensors    ]/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/optimizer/fusion_utils.py_share_tensor_ipc_metar   *   sH    ~##%%f.J.J.L.L||~((*6688    c                  L   [         c  [        R                  " 5       (       a  Sn O8[        R                  " 5       (       a  Sn O[	        5       n UR                  5       n U [        R                  5       ;   d    S[        R                  5        SU  S35       eU q [         $ !   Sn  NF= f)Nr   r	   unknownz&tensor fusion helper now only support z, but got device z	 instead.)__current_device_type__r   r   is_compiled_with_xpur   get_device_type	alignmentkeys)device_typecurrent_devices     r   get_current_device_typer!   2   s    &''))K((**K57N(,<<> inn.. 	
4Y^^5E4FFWXcWddmn	
. #.""('s   B B#c                     [         R                  " U R                  5      [        U R                     -  nU[
        [        5          -  nUS:X  a  SO[
        [        5          U-
  nU[        U R                     -  nU$ Nr   )npprodshapealigndtyper   r!   )tsize	remainingalialign_s        r   	get_alignr.   F   sq    77177eAGGn,Dy!8!:;;I > 	
.01I= 
 E!''N"FMr   c                       \ rS rSrS\R
                  4S jr\R                  " 5       S 5       r	\R                  " 5       S 5       r
\R                  " 5       S 5       rS r\S 5       rS	rg)
FusionStorageR   Nc                 h   [        U[        5      (       d   S5       e[        U[        5      (       d   S5       e[        U[        5      (       d
  Ub   S5       eXl        X l        X0l        0 U l        0 U l        0 U l        X@l        S U l	        SU l
        U R                  5         U R                  5         g )Nzaccumulators must be a dictzmaster_weights must be a dictz*merged_model_params must be a dict or Noner   )
isinstancedictaccumulatorsmaster_weightsmerged_model_paramsaccumulators_metamaster_weights_metamerged_model_params_metar(   bufferoffsetbuild_buffermapping_tensor)selfr5   r6   r7   r(   s        r   __init__FusionStorage.__init__S   s     ,--L/LL-.$//P1PP/*D115H5P	87	8P(,#6 !##% (*%
r   c                    SU l         U R                  R                  5        H  u  pXR                  ;  a  0 U R                  U'   UR                  5        H  u  p4UR                  U R                  :X  d   eUR                  5       [        U5      -   nU R                   U R                   U-   UR                  UR                  S.U R                  U   U'   U =R                   U-  sl         M     M     U R                  R                  5        H  u  pUR                  U R                  :X  d   eUR                  5       [        U5      -   nU R                   U R                   U-   UR                  UR                  S.U R                  U'   U =R                   U-  sl         M     U R                  b  U R                  R                  5        H  u  pUR                  U R                  :X  d   eUR                  5       [        U5      -   nU R                   U R                   U-   UR                  UR                  S.U R                  U'   U =R                   U-  sl         M     [        R                  " U R                   4U R                  S9U l        g )Nr   )startendnamer&   )r(   )r<   r5   itemsr8   r(   _numelr.   rE   r&   r6   r9   r7   r:   r   zerosr;   )r?   kv	para_namevar_tmpsrc_lens         r   r=   FusionStorage.build_bufferk   s   %%++-DA...,.&&q)&'ggi"	}}

222!..*Yw-??![[;;0#LL$]]	8&&q))4 w& '0 . ''--/DA77djj(((hhj9Q</G{{W,	+D$$Q' KK7"K 0 ##/00668ww$**,,,((*y|3![[;;0FFWW	4--a0 w& 9 llDKK>Dr   c                    U R                   R                  5        HE  u  pUR                  5        H,  u  p4U R                  U R                  U   U   US   US   S9  M.     MG     U R                  R                  5        H)  u  pU R                  U R
                  U   US   US   S9  M+     U R                  R                  5        H)  u  pU R                  U R                  U   US   US   S9  M+     g )NrC   rD   )srcrC   rD   )r8   rF   mapping_tensor_implr5   r9   r6   r:   r7   )r?   rI   rJ   rK   metas        r   r>   FusionStorage.mapping_tensor   s    **002DA#$779	(())!,Y7w-U )  $- 3 ,,224DA$$''*!G*!E( %  5
 11779DA$$,,Q/jeH %  :r   c                 `   UR                   nUR                  nSUl        UR                  5         [        R                  " UU R
                  R                  X#5      5        UR                  5       R                  U5        XQl        U R
                  R                  X#5      R                  U5        g )NT)
r&   stop_gradientflatten_r   assignr;   _slicer   	_set_dims_share_buffer_to)r?   rP   rC   rD   tensor_shaperU   s         r   rQ   !FusionStorage.mapping_tensor_impl   s    yy)) KKu*	
 	""<0)5&77<r   c                 ,    [        U R                  5      $ r   )r   r;   r?   s    r   _refresh_buffer_ipc_meta&FusionStorage._refresh_buffer_ipc_meta   s    %dkk22r   c                 "    U R                  5       $ r   )r_   r^   s    r   buffer_ipc_metaFusionStorage.buffer_ipc_meta   s    ,,..r   )	r5   r8   r;   r(   r6   r9   r7   r:   r<   )__name__
__module____qualname____firstlineno__r   float32r@   imperative_baseno_gradr=   r>   rQ   r_   propertyrb   __static_attributes__ r   r   r0   r0   R   s    
 !nn0 (E (ET  * = =3 / /r   r0   c                       \ rS rSrS r\R                  " 5       S 5       rS r\R                  " 5       S 5       r	S r
S r\R                  " 5       S 5       rS	rg
)FusionStorageHelper   c                     [        5       U l        S U l        S U l        S U l        S U l        S U l        S U l        / U l        U R                  UUUU5        g r   )
r   async_loaderr8   r9   r:   r;   
cpu_bufferbuffer_lengthtasks
reset_meta)r?   r8   r9   r:   rb   s        r   r@   FusionStorageHelper.__init__   s\     ./!%#' (,%!
$		
r   c                 R   [        U[        5      (       d   S5       eXl        [        U[        5      (       d   S5       eX l        [        U[        5      (       d
  Ub   S5       eX0l        [        U[
        5      (       d   S5       e[        U5      S;   d   S5       e[        R                  R                  R                  R                  U5      n[        R                  " U5      U l        U R                  R                  5       U l        U R                  R!                  5       U l        g )Nz accumulators_meta must be a dictz"master_weights_meta must be a dictz/merged_model_params_meta must be a dict or Nonezbuffer_ipc_meta must be a tuple)      zbuffer_ipc_meta must be a tuple with length 5 when FLAGS_use_virtual_memory_auto_growth is True or 7 when FLAGS_use_virtual_memory_auto_growth is False.)r3   r4   r8   r9   r:   tuplelenr   basecoreDenseTensor_new_shared_cuda	to_tensorr;   
pin_memoryrs   rG   rt   )r?   r8   r9   r:   rb   
new_tensors         r   rv   FusionStorageHelper.reset_meta   s"    +T22 	
.	
2 "3-t44 	
0	
4 $7 /66'/	= =	=0 )A%/511 	
-	
1 ?#v- 	
 g	
- [[%%11BB

 &&z2++002![[//1r   c                 <    U R                  SU R                  5        g r#   )sync_partial_paramrt   r^   s    r   
sync_paramFusionStorageHelper.sync_param   s    4#5#56r   c           	      L   [        U[        5      (       d   S5       e[        U[        5      (       d   S5       eUS:  d   S5       eX R                  ::  d   S5       e[        U R                  U R
                  UUX!-
  U R                  S9nU R                  R                  U5        g )Nzstart must be an integerzend must be an integerr   zstart must be non-negativez9end must be less than or equal to the total buffer length)
src_tensor
dst_tensor
src_offset
dst_offsetoffload_sizerr   )	r3   intrt   r   r;   rs   rr   ru   append)r?   rC   rD   tasks       r   r   &FusionStorageHelper.sync_partial_param   s    %%%A'AA%#s##=%==#z777z((( 	
G	
( ){{+**
 	

$r   c                 L   [        U R                  5      S:X  a  g U R                  R                  S5      n[        U R                  5      S:  aF  U R                  R                  S5      nUR                  5         [        U R                  5      S:  a  MF  UR	                  5         g )Nr   )r|   ru   pop	cuda_waitcpu_wait)r?   	last_taskr   s      r   wait_allFusionStorageHelper.wait_all  ss    tzz?aJJNN2&	$**o!::>>!$DNN $**o! 	r   c                    S0 0nU R                   R                  5        H=  u  p#UR                  5        H$  u  pEU R                  U5      nXaUR                  '   M&     M?     U R                  R                  5        H  u  p#U R                  U5      nXaS   U'   M     U R
                  (       a@  0 US'   U R
                  R                  5        H  u  p#U R                  U5      nXaS   U'   M     U$ )Nr6   r7   )r8   rF   restore_tensor_from_metarE   r9   r:   )r?   
state_dictrI   rJ   rK   tensor_metarL   s          r   r   FusionStorageHelper.state_dict  s    &+
**002DA*+'')&	77D+27<<( +4 3 ,,224DA33A6G.5'(+ 5 ((02J,-55;;=77:7>01!4 > r   c                     US   nUS   nUS   nUS   nU R                   R                  XE5      nUR                  5       R                  U5        X6l        U$ )Nr&   rE   rC   rD   )rs   rX   r   rY   rE   )r?   r   r&   rE   rC   rD   r   s          r   r   ,FusionStorageHelper.restore_tensor_from_meta+  s_    G$6"G$% ''3%%e,r   )r8   rr   r;   rt   rs   r9   r:   ru   N)rd   re   rf   rg   r@   ri   rj   rv   r   r   r   r   r   rl   rm   r   r   ro   ro      sn    
, !2 !2F7    "   r   ro   )numpyr$   r   paddle.autogradautogradri   paddle.frameworkr   #paddle.incubate.tensor.manipulationr   r   r   float16bfloat16rh   r'   r   r   r!   r.   r0   ro   rm   r   r   <module>r      s      ) 	 NNA
OOQ
NNA	  #(	m/ m/`r rr   