
    x-jw                        d dl mZ d dlZd dlmZ d dlmZmZ 	 d dlmZ n# e	$ r dZY nw xY wddl
mZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddlmZmZ g Zd Z G d de          ZdS )    )annotationsN)	framework)P2POpbatch_isend_irecv)deep_ep   )logger   )FakeMicroDatasetHybridParallelOptimizerPipelineDatasetPreprocessorPipelineParallel)BatchCommHelper)ScheduleChunk)
EventStoreWeightGradStorec                <    |                                  }d|_        |S )NF)detachstop_gradient)xos     p/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/fleet/meta_parallel/dualpipev.pydetach_and_requires_gradr   /   s    	

AAOH    c                  "    e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
d	 Zd-d.dZd Zd-dZd-dZd/d0dZ	 	 	 d1d2dZ	 d3d4dZd4dZd4dZd.dZd.dZd.dZd.d Z	 	 	 d5d6d$Z	 	 	 d7d8d%Z	 	 	 	 	 	 d9d:d'Zd( Zd) Zd* Z	 	 d;d+Z	 	 d<d,Z  xZ!S )=DualPipeVParallelz
    An implementation of the DualPipeV, based on
    https://github.com/deepseek-ai/DualPipe/blob/main/dualpipe/dualpipe.py.
    c                X   t                                          |||           t          t          | j                  d          | _        t          j        d| j                    | j        | _	        | j
        j        | _        | j
        j        | j        dz
  | j
        j        z           | _        | j
        j        | j        dz   | j
        j        z           | _        |                                 | _        |                                  | _        t)          | j                  | _        d S )N)layershcgstrategyoverlapped_forward_backwardz:Using DualPipeVParallel with overlapping forward backward=r
   )super__init__hasattrtype_layersr!   r	   info
num_stages	num_rankspp_grouprank
group_rankranks
world_size	prev_rank	next_rankis_pipeline_first_stageneed_broadcast_metaneed_recv_metar   _using_cache_p2p_helper)selfr   r   r    	__class__s       r   r#   zDualPipeVParallel.__init__;   s   C(CCC+2 =,
 ,
( 	kIikk	
 	
 	
 -,,_q DM$<<
 ,_q DM$<<
 $(#?#?#A#A "&">">"@"@@*4+<==r   c                    | j         dk    S Nr   )r,   r6   s    r   r1   z)DualPipeVParallel.is_pipeline_first_stageS   s    !##r   c                (    | j         | j        dz
  k    S Nr
   )r,   r)   r:   s    r   is_pipeline_last_stagez(DualPipeVParallel.is_pipeline_last_stageV   s    $.1"444r   c                   g g f| _         g g f| _        g g f| _        g g f| _        g | _        g g f| _        g | _        ddg| _        ddg| _        ddg| _	        ddg| _
        ddg| _        ddg| _        g | _        g | _        g | _        d S r9   )input_tensorsoutput_tensorsinput_grad_tensorsoutput_grad_tensorsloss_tensorsschedule_chunksloss_fn_chunkscurrent_f_acc_idcurrent_b_acc_idcurrent_send_f_acc_idcurrent_send_b_acc_idcurrent_recv_f_acc_idcurrent_recv_b_acc_idcomm_forward_opscomm_backward_opsto_freer:   s    r   _reset_stateszDualPipeVParallel._reset_statesY   s     "X!2h#%r($&8 13 "Bx  "#A!"A&'V"&'V"&'V"&'V"-/.0,.r   c                   |                                  o|dk    }|r=|J | j        |                             t          ||                   d                    | j        rd | j        |         |<   | j        |         |         S r9   )r1   r?   appendnextforward_only)r6   micro_datasetsphaseacc_idis_first_stages        r   _get_forward_inputsz%DualPipeVParallel._get_forward_inputsm   s    5577FEQJ 	M!---u%,,T.2G-H-H-KLLL 	504Du%f-!%(00r   c                    |                                  o|dk    }|r=| j        r6|J t          ||                   d         }|                     |           |S d S r<   )r1   _compute_lossrR   _check_micro_batch_data_valid)r6   rT   rU   rV   is_last_stagelabelss         r   _get_forward_labelsz%DualPipeVParallel._get_forward_labelsv   so    4466E5A: 	T/ 	!---./003F..v666M4r   c                   |                      |||          }d }| j        sq | j        j        d         ||          }t          j                            d          5  | j        dk    r| j        s
|| j        z  }d d d            n# 1 swxY w Y   n^| j        j        d         	                                }||_
        | j        dk    r| j        s| j        |_        |                    |          }|                     |||           d S )Nr   Fenabler
   )r^   r!   r&   _loss_fnpaddleamp	auto_castaccumulate_steps_delay_scale_lossbuild_schedule_noder]   scale_loss_factorforward_store_forward_loss)r6   rT   rU   rV   logitsr]   loss_fn_nodeloss_tensors           r   _loss_computezDualPipeVParallel._loss_compute   s^   )).%HH/ 
	72$,/266BBK%%U%33 F F(1,,T5K,"-0E"EKF F F F F F F F F F F F F F F  <03GGIIL"(L$q((1G(151F.&..v66K  \BBBBBs   BB
B
c                J   | j         |                             |           |                                 r0|dk    r*| j        d                             d |D                        |                                 o|dk    }|s"| j        |                             |           d S d S )Nr   r
   c                ,    g | ]}t          |          S  )r   ).0outputs     r   
<listcomp>z<DualPipeVParallel._store_forward_tensors.<locals>.<listcomp>   s!    HHHf)&11HHHr   )rD   rQ   r=   r?   r1   r@   )r6   rU   outputsschedule_chunkr\   s        r   _store_forward_tensorsz(DualPipeVParallel._store_forward_tensors   s    U#**>:::&&(( 	UaZZq!((HHHHH   4466E5A: 	7&--g66666	7 	7r   NrU   intreturnNonec                   | j         |         }| j         |xx         dz  cc<   |                     |||          }| j        r1| j                            |          }|                    |          }nd }| j                            ||          }t          |t          j                  r|gn|}| 	                                o|dk    }|r| j
        r|                     ||||           |                     |||           d S )Nr
   chunk_id)rF   rX   r!   r&   get_schedule_chunkrj   
isinstancerc   Tensorr1   rZ   ro   rx   )r6   rU   rT   rV   inputsrw   rv   r\   s           r   _forward_computez"DualPipeVParallel._forward_compute   s   &u-e$$$)$$$)).%HH+ 	C!\<<e<LLN$,,V44GG!Nl**6E*BBG)'6=AAN7))w4466E5A: 	GT/ 	G~ufgFFF##E7NCCCCCr   c                    | j         |         |         }d | j         |         |<   | j        |         |         }d | j        |         |<   d t          ||          D             }t          t          |           \  }}||fS )Nc                     g | ]\  }}|||fS Nrr   )rs   tgs      r   ru   z:DualPipeVParallel._get_backward_inputs.<locals>.<listcomp>   s(     
 
 
q!Q]QF]]]r   )r@   rB   ziplist)r6   rU   rV   rv   output_grads	non_emptys         r   _get_backward_inputsz&DualPipeVParallel._get_backward_inputs   s    %e,V4-1E"6*/6v>26 '/
 
"7L99
 
 
	 !%S)_ 5 5$$r   c                Z   || j         |         |         }d |D             }d | j         |         |<   t          |t          j                  r|f}|                                 r(|dk    r"| j        d                             |           d S | j        |                             |           d S )Nc                .    g | ]}||j         |j        S r   )r   gradrs   r   s     r   ru   z=DualPipeVParallel._store_backward_tensors.<locals>.<listcomp>   s-       M!/M !MMr   r
   r   )r?   r   rc   r   r=   rB   rQ   rA   )r6   rU   rV   input_gradsr   s        r   _store_backward_tensorsz)DualPipeVParallel._store_backward_tensors   s    '.v6F   K
 -15!&)k6=11 	)&.K&&(( 	?UaZZ$Q'..{;;;;;#E*11+>>>>>r   c                v   |                                  o|dk    }|r| j        rt          |t          t          f          rt          |          dk    sJ |d         }t          |t          j                  s
J d            | j        	                    |           | j
        	                    |           d S d S d S )Nr
   r   z4Currently, loss_fn should obtain Paddle.Tensor dtype)r1   rZ   r   tupler   lenrc   r   rC   rQ   rE   )r6   rU   rn   rm   r\   s        r   rk   z%DualPipeVParallel._store_forward_loss   s    4466E5A: 		5T/ 		5+t}55 -;''1,,,,)!nk6=99  F 9 $$[111&&|44444		5 		5 		5 		5r   F	enable_zbboolc                   | j         rd S | j        |         }| j        |xx         dz  cc<   |                                 o|dk    }|t          _        d }t
          j                            d          5  |r| j        |         }| j	        ru| j
        |         }| j        |         |         }| j        	                    t          g           d d ||d | j        d d 	  	        \  }	}	}d | j
        |<   d | j        |         |<   n| j        r8t
          j                            | j                            |                     nt
          j                            |           n|                     ||          \  }
}| j	        rX| j        |         |         }| j        	                    t          g           d d |d |d d d 	  	        \  }	}	}d | j        |         |<   n@t'          |
          dk    r-d |
D             }
t
          j                            |
|           d d d            n# 1 swxY w Y   dt          _        |rt          j                     |                     |||           d S )	Nr
   Fr`   combine_bw_event_to_wait	pp_streamr   c                     g | ]}|j         	|S rr   )r   r   s     r   ru   z7DualPipeVParallel._backward_compute.<locals>.<listcomp>  s    "M"M"MQ_"M1"M"M"Mr   )tensorsgrad_tensorsr   )rS   rG   r1   r   enabledrc   rd   re   rC   r!   rE   rD   r&   r   scalerautogradbackwardscaler   r   flushr   )r6   rU   r   rV   r\   r   lossrm   backward_chunk_rv   r   s               r   _backward_computez#DualPipeVParallel._backward_compute   s    	F&u-e$$$)$$$4466E5A:"+Z!!!// 2	 2	 1(03 7#'#6v#>L%)%9%%@%HN@@)"--  *(  K59&* A 
 
 &Aq+ 37D'/:>D(/77{ 7001B1B41H1HIIII006666(,(A(A%(P(P%3 %)%9%%@%HN@@)"--  * ( 59&* A 
 
 &Aq+ ;?D(/777||a''"M"Mg"M"M"M00$+)5 1   _2	 2	 2	 2	 2	 2	 2	 2	 2	 2	 2	 2	 2	 2	 2	f #( 	$!###$$UF$LLLLLs   4F#H##H'*H'forward_phasebackward_phasec                   | j         r|                     ||           d S | j        s-|                     ||           |                     |           d S | j        |         }| j        |xx         dz  cc<   |                     |||          }|                     |||          }|J| j        j        d         	                                }	||	_
        | j        dk    r| j        s| j        |	_        nd }	| j        |         }
| j        |xx         dz  cc<   |                                 o|dk    }|r| j        |
         }d }nd }|                     ||
          \  }}| j                            |          }| j        |         |
         }| j                            |||	|||| j        ||r;| j        j                            t0          j                                                  nd 	  	        \  }}}d | j        |         |
<   |                     |||           |                     |||	           |                     ||
|           d S )Nr
   r   r}   r   r   )rS   r   r!   r   rF   rX   r^   r&   rb   rh   r]   rf   rg   ri   rG   r1   rE   r   r   rD   r   r*   process_group
get_streamrc   r   _current_expected_place_rx   rk   r   )r6   r   r   rT   combine_backward_event_to_waitpass_pp_streamforward_acc_idforward_inputsforward_labelsforward_loss_fn_nodebackward_acc_idis_last_stage1backward_loss_fn_nodebackward_gradsr   forward_chunkr   forward_outputsforward_lossbackward_input_gradss                       r   _forward_backward_computez+DualPipeVParallel._forward_backward_compute  s     	!!-@@@F/ 	!!-@@@"">222F .}=m,,,1,,,11M>
 
 11M>
 
 %#'<#8$!!## ! +9 '$q((1G(9=9N$6#'  /?n---2---5577ONa<O 	$($7$H!!NN$(! $ 9 9! !A~ 777OO-n=oNL44$%)G
 &DM/::(AACC    5   	<';& AE^,_= 	##?M	
 	
 	
 	  <)=	
 	
 	

 	$$O9M 	% 	
 	
 	
 	
 	
r   c                   | j         t          | j                   nd}| j        t          | j                  nd}|dk    ra|dk    r[t          j        t          j        }d t          _        |S t          j        t          j        	                                j
                  S |o| j        ot
          d u}| j        j                            t          j                                                  }|r| j        j                            d           |dk    r/t%          | j                   }|s|D ]}	|	                                 |r| j        j                            d           |rt          j        |          }
d}t          j        t          j                            t          j                            |                    5  t          j                                         d d d            n# 1 swxY w Y   t          j        d            | j        j                            d           d}|dk    r/t%          | j                  }|s|D ]}	|	                                 |r| j        j                            d           |r)|
                                 t          j        |          }n5t          j        t          j        	                                j
                  }g | _         g | _        |                                  |S )Nr   TF)stream_base)rL   r   rM   r   eventr   get_event_from_custom_streamrc   devicecurrent_streamr   _overlap_p2p_commr*   r   r   r   r   set_outer_waitr   waitstream_guardStreamcurrent_stream_waitset_free_tensors)r6   p2p_overlapuse_outer_event_waitcommon_forward_ops_numcommon_backward_ops_nume_tuse_stream_wait_eventpp_raw_streamfwd_reqsreqforward_event_to_waitbackward_outer_event_waitbwd_reqsr   s                 r   _commit_and_wait_commz'DualPipeVParallel._commit_and_wait_commt  s^   
 $0 %&&& 	 %1 &''' 	 
 "Q&&+Ba+G+G+ &#'
 
7,,..:  
 JD2Jwd7J 	 3>>5577
 
   	=M'66t<<<!A%%()>??H( #  CHHJJJJ 	>M'66u===  	$+$H% %! %*!'++$$$??  7 7  446667 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 N4   M'66t<<<(,%"Q&&()?@@H( #  CHHJJJJ$ 	>M'66u===  		!55777'.'K( ($$ (/'K,,..:( ($ !#!#''s   G22G69G6c                f    | j         rd S |                                  t          j                     d S r   )rS   r   r   popr:   s    r   _weight_passzDualPipeVParallel._weight_pass  s;     	F""$$$ 	r   c                H    |                      | j                   g | _        d S r   )_release_outputrN   r:   s    r   r   zDualPipeVParallel._free_tensors  s#    T\***r   c                Z   |                                  r|dk    s|                                 r|dk    rd S | j        |xx         dz  cc<   | j                            | j        |dk    r| j        n| j        | j        | j	                  }| j
        |                             |           d S )Nr   r
   alloc_on_comm_stream)r1   r=   rJ   r5   append_irecvrL   r/   r0   r*   r   r?   rQ   r6   rU   r   s      r   _recv_forwardzDualPipeVParallel._recv_forward  s    ((** 	uzz'')) 0:.3qjjF"5)))Q.)))"//!#qjjDNNdnM!%!7	 0 
 
 	5!((11111r   c                   |                                  r|dk    s|                                 r|dk    rd S | j        |         }| j        |xx         dz  cc<   | j        |         |         }| j                            | j        ||dk    r| j        n| j        | j	        | j
                   d| _
        | j                            |           d S )Nr
   r   F)r1   r=   rH   r@   r5   append_isendrL   r0   r/   r*   r2   rN   extendr6   rU   rV   r   s       r   _send_forwardzDualPipeVParallel._send_forward  s    ((** 	uzz'')) 0:.3qjjF+E2"5)))Q.)))%e,V4%%!#qjjDNNdnM$	
 	
 	
 $) G$$$$$r   c                l   | j         rd S |                                 r|dk    s|                                 r|dk    rd S | j        |xx         dz  cc<   | j                            | j        |dk    r| j        n| j        | j	        | j
                  }| j        |                             |           d S )Nr
   r   r   )rS   r1   r=   rK   r5   r   rM   r0   r/   r*   r   rB   rQ   r   s      r   _recv_backwardz DualPipeVParallel._recv_backward  s     	F((** 	uzz'')) 0:.3qjjF"5)))Q.)))"//"#qjjDNNdnM!%!7	 0 
 
 	 '..w77777r   c                   | j         rd S |                                 r|dk    s|                                 r|dk    rd S | j        |         }| j        |xx         dz  cc<   | j        |         |         }d | j        |         |<   | j                            | j        ||dk    r| j        n| j	        | j
                   d S )Nr   r
   )rS   r1   r=   rI   rA   r5   r   rM   r/   r0   r*   r   s       r   _send_backwardz DualPipeVParallel._send_backward  s     	F((** 	uzz'')) 0:.3qjjF+E2"5)))Q.))))%0815&v.%%"#qjjDNNdnM		
 	
 	
 	
 	
r   Trecvsendc                    |r|                      |           |                                  |                     ||           |r|                     |           d S d S r   )r   r   r   r   )r6   rU   rT   r   r   s        r   _forward_passzDualPipeVParallel._forward_pass$  st      	&u%%%""$$$e^444 	&u%%%%%	& 	&r   c                    |r|                      |           |                                  |                     ||           |r|                     |           d S d S r   )r   r   r   r   )r6   rU   r   r   r   s        r   _backward_passz DualPipeVParallel._backward_pass4  st      	'&&&""$$$ui000 	'&&&&&	' 	'r   recv0c	                R   |r|                      |           |                     |           |                                 o|dk     p|                                 o|dk    }	|                                 o|dk     p|                                 o|dk    }
|o| o| j        ot
          d uo|	o|
}|o| o| j        ot
          d uo|	o|
o| }|                     | |          }|                     |||||           |                     |           | 	                    |           d S )Nr
   r   )r   r   )
r   r   r1   r=   r   r   r   r   r   r   )r6   r   r   rT   r   first_chunk
last_chunk
main_stagelast_stage_and_first_chunkneed_send_forwardneed_send_backwardr   r   combine_bw_wait_events                 r   _forward_backward_passz(DualPipeVParallel._forward_backward_passD  s     	.}---N+++ ((**A}/A
 D))++B0B 	 ((**B~/B
 E))++C!0C 	
  ;;&; t#; #9'9 	  11&1 t#1 #9'9	1
 0/ 	 !% : :N0!
 !
 	&&+@) 	' 	
 	
 	
 	=)))N+++++r   c                6   t          |t                    r
 |            }t          |t                    st          |t                    s|S t	          ||                                 o|dk    |                                 o|dk    | j        | j                  }|S )zo
        for backward compatibility, wrap data to Fake FakeMicroDataset if it is of type list or tuple
        r   r
   )r   r   r   r   r   r1   rf   micro_batch_size)r6   datarU   micro_datasets       r   
_wrap_datazDualPipeVParallel._wrap_dataz  s     d788 	466D4'' 	*T42H2H 	K(((**9uz((**9uz!!
 
 r   c                F   t          |t                    s
J d            t          j                    j        s
J d            |                                 r|
J d            nd }|| _        || _        | j        	                                 | 
                    |           |S )Nz5optimizer should be HybridParallelOptimizer subclass.z*Please enable the generation of gradients.z7For the first and the last stage, the data must be set.)r   r   r   _dygraph_tracer	_has_gradr1   	optimizerlr_schedulerr&   train#register_sharding_comm_overlap_hook)r6   r  r  r  s       r   _prepare_trainingz#DualPipeVParallel._prepare_training  s    )%<== 	
 	
C	
 	
= (**4 	
 	
8	
 	
4 '')) 	##I $### D"(00;;;r   c                v   t          j        dgd          }|                                 rgt          | j                  dk    s
J d            | j        D ],}||                                                    d          z  }-| j        r
|| j        z  }t           j	        
                    || j        d           |S )Nr
   float32r   z4train_batch() in last stage should obtain valid lossT)groupsync_op)rc   zerosr1   r   rC   r   astyperg   rf   distributed
all_reducer*   )r6   loss_sum_tensorr   s      r   _broadcast_final_lossz'DualPipeVParallel._broadcast_final_loss  s     ,sI66'')) 	9t())A---F .-- ) C C4;;==#7#7	#B#BB% 94#88%%4=$ 	& 	
 	
 	
 r   c           	     h
   || _         | j        }| j        }| j        dk    r| j        |dz  k    sJ d| j        d|            || _        |                                  | j                            | j        | j	                   d| _	        | 
                    |d          }| 
                    |d          }||g}	||z
  dz
  dz  }
t          |
          D ]}|                     d|	           |dz   }|                     d           t          |          D ]y}|                     d|	dd           |                     d           |                     d|	|                                  p||dz
  k                |                     d           z||z
  dz
  }t          |          D ]Z}|                     dd	
           |                     d           |                                  |                     d|	d           [| j        |dz  z
  |z   dz   }||z
  dz
  dk    }t          |          D ]-}|dz   |k    }|dk    r|                                 r|                     d|	dd           |                     d           |                     dd           |                     d           |                     d           |                     dd|	d	|d	           |                     dd|	dd	d	           |                     dd|	|d	           |                     dd|	d	|                                            |                     dd|	|d	           /||z
  dz
  }t          |          D ].}|                     d           |                     dd|	           /|dz   }d}t          |          D ]X}||dz  k    r|dz  dk    rd	}|                     d|
           ||dz  k    r|dz  dk    rd	}|                     d|
           Y||z
  dz
  }t          |          D ]-}|                                  |                     dd	
           .|dz   }t          |          D ]}|                                  t(          j                                        sJ |                                  | j                                         t4          j                            d          5  |                                 }d d d            n# 1 swxY w Y   |                                  |S )Nr   r   zself.accumulate_steps=z, num_ranks=Fr
   )r   r   )r   T)r   )r   )r   r   r   )r   r   r   )r   r   )r   r   r`   )r   r,   r)   rf   rS   rO   r5   recv_meta_from_headr*   r3   r  ranger   r   r=   r   r   r   r   r   r   funcs_queueemptyr   r&   !allreduce_shared_weight_gradientsrc   rd   re   r  )r6   r  r   rS   compute_lossr+   r)   micro_dataset_phase0micro_dataset_phase1rT   step_1istep_2step_3step_4
have_step5is_last_chunkstep_5step_6r   step_7step_8
train_losss                          r   forward_backward_pipelinez+DualPipeVParallel.forward_backward_pipeline  s    N	!A%%$*?9q=*P*P*P4d#44	44 +Q*PP(
 	,,T]D<OPPP##tQ77#tQ77.0DE d"Q&!+v 	2 	2Aq.1111 1v 	" 	"Aq.u5IIIq!!!55777LQ!^    
 q!!!! T!A%v 	> 	>AT222q!!!q.u==== &Q6=A%)A-
 v 4	 4	AEVOMAvv..00 "&&>E '    &&q)))'''666&&q)))''***//&$(#0#' 0     //&#$(#' 0    //&#0#' 0     ++"#/3/J/J/L/L ,    ++",# ,     T!A%v 	> 	>A"""''1n==== 	v 	8 	8AFaKD1HMM 	Y777FaKD1HMM 	Y7777 T!A%v 	3 	3AT2222 v 	  	 A*0022222""$$$66888Z!!!// 	6 	63355J	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	s   2TTTc                    |                      |||          }|                     ||          }t          j                            d          5  |                                  d d d            n# 1 swxY w Y   |S )NFr`   )r  r+  rc   rd   re   _optimizer_step)r6   r  r  r  r   r*  s         r   train_batchzDualPipeVParallel.train_batchL  s     %%dI|DD33D&AA
 Z!!!// 	# 	#  """	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# s   A//A36A3r   )rU   ry   rz   r{   )F)rU   ry   r   r   rz   r{   )NNF)r   ry   r   ry   rz   r{   )FF)rz   r{   )NTT)rU   ry   r   r   r   r   rz   r{   )FTT)
rU   ry   r   r   r   r   r   r   rz   r{   )NTFFFF)r   ry   r   ry   r   r   rz   r{   )FT)NN)"__name__
__module____qualname____doc__r#   r1   r=   rO   rX   r^   ro   rx   r   r   r   rk   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r+  r.  __classcell__)r7   s   @r   r   r   5   s        
> > > > >0$ $ $5 5 5/ / /(1 1 1  C C C 7 7 7D D D D D&	% 	% 	%? ? ? ?"5 5 5 5BM BM BM BM BMP '+X
 X
 X
 X
 X
v 7<U( U( U( U( U(n      2 2 2 2 % % % %*8 8 8 8$
 
 
 
0 & & & & &&  ' ' ' ' '( #(4, 4, 4, 4, 4,l  &  .  ( V V V Vx        r   r   )
__future__r   rc   r   2paddle.distributed.communication.batch_isend_irecvr   r    paddle.distributed.communicationr   ImportErrorutils.log_utilr	   pipeline_parallelr   r   r   r   pp_utils.batch_comm_helperr   'pp_utils.forward_backward_overlap_utilsr   zero_bubble_utilsr   r   __all__r   r   rr   r   r   <module>r>     s{  & # " " " " "              
8888888   GGG $ # # # # #            8 7 7 7 7 7 B B B B B B : : : : : : : :
  f f f f f( f f f f fs   ! ++