
    x-jf                     |    d dl Z d dlZd dlmZ d dlmc mZ d Z	d Z
d Zd Z G d dej        j                  ZdS )    Nc                 ,   | j         }||v sJ d| d|             | j        |                    |          | j        }fd |          }fdt	          t                              D             }fd|D             }g }t          j        | D ]z}g }	t	                             D ]K}
t          |          }|	                    |
           |}|D ]
}||         }|	
                    |           L|
                    |	           {|S )Nzdim 'z' not in mesh.dim_names c                      s d         S t          t                     d         z             fdt          d                   D             S )Nr   c                 Z    g | ]'} |z  |d z   z           d d                   (S )   N ).0iflatnestshapesteps     j/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/paddle/distributed/auto_parallel/ring_conv.py
<listcomp>z8_get_comm_group_by_dim.<locals>.nest.<locals>.<listcomp>"   sS     
 
 
 Da$h!a%4/0%)<<
 
 
    )intlenrange)r
   r   r   r   s   ``@r   r   z$_get_comm_group_by_dim.<locals>.nest   sw     	7N3t99a())
 
 
 
 
 
 
58__
 
 
 	
r   c                      g | ]
}|k    |S r   r   )r   r	   dim_idxs     r   r   z*_get_comm_group_by_dim.<locals>.<listcomp>)   s    ???!w,,!,,,r   c                 :    g | ]}t          |                   S r   )r   )r   r	   r   s     r   r   z*_get_comm_group_by_dim.<locals>.<listcomp>*   s#    888E%(OO888r   )	dim_namesr   indexprocess_idsr   r   	itertoolsproductlistinsertappend)meshdimr   idsmesh_nd
other_axesother_rangescomm_groupsr   groupr	   idxvaljr   r   r   s                 @@@r   _get_comm_group_by_dimr*      sn   I)MSMM)MMJEooc""G

C
 
 
 
 
 d3G????U3u::..???J8888Z888LK"L1 	" 	"uW~&& 	 	Au++CJJw"""C  !fLL5!!!!r   c                 T   |dk    rd}nd}d }t          |          D ],\  }}|t          j        |          k    r| j        |         } n-|st	          d| d| d          t          | |          }t          j                    }|D ]}	||	v r||	fc S t          d| d|  d          )	NNCHW      zInput tensor placements z" do not contain a Shard on W axis:.zRank z1 not found in any tensor parallel group for mesh )	enumeratedistShardr   
ValueErrorr*   get_rankRuntimeError)
x_meshx_placementsdata_format
shard_axis	axis_namer	   	placement	tp_groupsrankr&   s
             r   _get_conv_tp_groupr>   ;   s   f


I!,//  9
:....(+IE /  
d|ddWaddd
 
 	
 'vy99I=??D $ $5==e####  PPPvPPP  r   c                    t          |          dk    r| S t          | j                  dk    s%t          dt          | j                   d          |dk    rd}n|dk    rd}nt          d	| d
          t          d           gdz  }	t          | d           |	|<   | t	          |	                                                   }
t          d           gdz  }t          d |          ||<   | t	          |                                                   }t          j        |          }t          j        |
          }t          j	        t          j
        |
|          }t          j	        t          j
        ||          }t          j	        t          j        ||          }t          j	        t          j        ||          }t          j        ||||g          }|D ]}|                                 ||d         k    rt          j        | |g|          }n>||d         k    rt          j        || g|          }nt          j        || |g|          }|                                S )Nr      zAInput tensor is expected to be 4D for NCHW/NHWC formats, but got zD.r,   r-   NHWCr.   Unsupported data_format: . Must be 'NCHW' or 'NHWC'.r   )axis)r   r   r3   slicetuple
contiguouspaddle
zeros_liker1   P2POpisendirecvbatch_isend_irecvwaitconcat)local_input_tensorhalo_width_to_receive_from_left halo_width_to_receive_from_rightleft_neighbor_rankright_neighbor_rankcurrent_rankconv_tp_groupr8   width_dim_idxslices_for_send_rightsegment_to_send_rightslices_for_send_leftsegment_to_send_leftbuffer_for_halo_from_rightbuffer_for_halo_from_leftop_isend_to_rightop_isend_to_leftop_irecv_from_rightop_irecv_from_leftp2p_requestsreqreconstructed_tensors                         r   _ring_conv_halo_exchangerf   X   s    =Q!! 	$%%**9-3449 9 9
 
 	

 f			PPPP
 
 	

 #4[[MA-+0	(($, ,-( /#$$jll 
 "$KK=1,*/.+ +' ."##jll  "(!23G!H!H & 12G H H

)+>  z
(*<  *
.0C  
-/A  )		
 L   



 }Q'''%}!;<= 
  
  
 
r*	*	*%}&(:;- 
  
  

  &})"*
  
  
  
  **,,,r   c                    t          |          dk    r| S |dk    rd}n|dk    rd}nt          d| d          t          d           gdz  }	t          | d           |	|<   | t          |	                                                   }
t          d           gdz  }t          d |          ||<   | t          |                                                   }t          j        |
          }t          j        |          }t          j        t          j	        |
|          }t          j        t          j	        ||          }t          j        t          j
        ||          }t          j        t          j
        ||          }t          j        ||||g          }|D ]}|                                 | }||d	         k    rt          d           gdz  }t          d |           ||<   |t          |                   }t          d           gdz  }t          | d           ||<   |t          |                   }|                    |           nu||d
         k    rt          d           gdz  }t          |d           ||<   |t          |                   }t          d           gdz  }t          d |          ||<   |t          |                   }|                    |           nt          d           gdz  }t          ||           ||<   |t          |                   }t          d           gdz  }t          | d           ||<   |t          |                   }|                    |           t          d           gdz  }t          d |          ||<   |t          |                   }|                    |           |                                S )Nr   r,   r-   rA   r.   rB   rC   r@   r   rE   )r   r3   rF   rG   rH   rI   rJ   r1   rK   rL   rM   rN   rO   add_)local_gradient_tensorhalo_width_send_lefthalo_width_send_rightrT   rU   current_process_rankrW   r8   rX   rY   rZ   r[   r\   buffer_for_gradient_from_leftbuffer_for_gradient_from_rightr_   r`   ra   rb   rc   rd   processed_gradient_tensorcrop_slices
agg_slicestarget_sliceagg_slices_right_edgetarget_slice_rightagg_slices_left_edgetarget_slice_lefts                                r   _ring_conv_halo_aggregaterw      sR    =Q$$f			PPPP
 
 	

 #4[[MA-+0	, ,-( 2#$$jll  "$KK=1,*/"+ +' 1"##jll 
 %+$56K$L$L!%+%67K%L%L"

)+>  z
(*<  *
24G  
13E  )		
 L   



 5}Q///T{{ma'%*42G1G%H%HM"$=+%
! Dkk]Q&
$)+?*?$F$F
=!0z1B1BC89999	r!2	2	2T{{ma'%*+?%F%FM"$=+%
!
 Dkk]Q&
$)$0E$F$F
=!0z1B1BC78888 T{{ma'%* #8"8&
 &
M" %>+%
!
 "'t 1/4!!40
 0
m, 7'((
 	 >??? !&d}q0.3D:O.P.P]+5&''
 	<===$//111r   c                   f    e Zd Ze	 dd            Ze	 	 	 	 	 	 	 	 d	d            Zed             ZdS )

RingConv2dr,   c           	          d}d}|dk    rd}d}n|dk    rd}d}nt          d| d          |d         }|d         }	|d         }
| |         }||         }|dk    rt          d	| d
| d          |	dk    rB||
z  dk    rt          d| d|
 d| d          |
|k    rt          d|
 d| d| d          nD|
dk    rt          d|	 d|
 d
| d          |dz  |k    rt          d|dz   d| d| d          dS )NrE   r,   r-   rA   r.   zUnsupported data_format 'z'. Expected 'NCHW' or 'NHWC'.r   z`Only dilation=1 on the W-dimension is supported for tensor-parallel convolution. Got dilation_w=z (data_format='z').r   zWhen padding_w=0, input_W=z must be divisible by stride_W=z/ for tensor-parallel convolution (data_format='zWhen padding_w=0, stride_W=z must equal kernel_W=z& to avoid halo exchange (data_format='zWhen padding_w=zC, stride_W must be 1 for tensor-parallel convolution. Got stride_W=zHalf of kernel_W (z) must not exceed input_W=z* to ensure halo region fits (data_format='T)r3   r5   )
input_sizekernel_sizestridepaddingdilationr8   idx_w_inputidx_w_kernel
dilation_w	padding_wstride_winput_wkernel_ws                r   _is_supportedzRingConv2d._is_supportedA  sV    &   KLLF"" KLLVKVVV   a[
AJ	!9[)|,?? N",N N=HN N N  
 >> !Q&&"V V VYa V VEPV V V   8##"M( M MQY M M<GM M M   $ 1}}"Ni N N$,N N=HN N N   1}w&&"QQ Q QRY Q Q@KQ Q Q  
 tr   Nr   r   c           
         t          j                    }t                              |j        |j        ||||	          sJ |                                s
J d            |                                sVd t          t          |j                            D             }t           j	        j
                            ||j        |          }|j|                                sVd t          t          |j                            D             }t           j	        j
                            ||j        |          }|                     |||           |j        }|j        }t           j	        j
                            |||          }t           j	        j
                            ||j        |j                  }|0t           j	        j
                            ||j        |j                  }||||||	f| _        t!          |||	          \  }}|d         dk    st          |          dk    r(t"          j                            ||||||||	          }n|	dk    rd}d}n|	dk    rd}d	}nt)          d
|	 d          |j        |         }|dz
  }|d	z  }||z
  }||z   |k    sJ || _        |                    |          }||dz   t          |          z           }||dz
  t          |          z           }t/          ||||||||	          }t"          j                            ||||||||	          }|d         }|j        |         }|	dk    r]||d         k    r|d d d d d d d ||z
  f         }n||d         k    r|d d d d d d |d f         }nt|d d d d d d |||z
  f         }n\||d         k    r|d d d d d ||z
  d d f         }n8||d         k    r|d d d d |d d d f         }n|d d d d |||z
  d d f         }|| _        || _        || _        || _        t           j	        j
                            |||          }|                                S )Nz.Input tensor `x` must be a distributed tensor.c                 4    g | ]}t          j                    S r   r1   	Replicater   _s     r   r   z&RingConv2d.forward.<locals>.<listcomp>  s-     ! ! !%&  ! ! !r   c                 4    g | ]}t          j                    S r   r   r   s     r   r   z&RingConv2d.forward.<locals>.<listcomp>  s-       %&    r   r   r   r,   r-   rA   r.   rB   rC   rE   )r1   r4   ry   r   r   is_distr   r   
placementsauto_parallelapidtensor_from_localprocess_meshsave_for_backwarddtensor_to_localattrsr>   rI   _C_opsconv2dr3   mesh_axis_namer   rf   left_halo_widthright_halo_widthoutput_halo_trim_widthoutput_width_dim_idxrH   )ctxxweightbiasr}   r~   padding_algorithmr   groupsr8   channel_dimr=   weight_placementsbias_placementsr6   r7   r   rW   final_local_resultskernel_width_dim_idxr   kernel_widthkernel_total_halo_spanr   r   rank_idx	next_rank	prev_ranklocal_results_with_halor   width_before_trimmings                                  r   forwardzRingConv2d.forward  s    }''GV\67Hk
 
 	
 	
 
 yy{{LLLLL{~~ 	! !*/AL0A0A*B*B! ! ! '+>>(9 F DLLNN */AL0A0A*B*B  O %)<<ano D 	a...|"33Av|LL#'88F'):
 
 %)::d' D
 
	 );L+)
 )
% 1:??c-00A55"(-"6"6!	# 	# f$$'($'($$&&'($'($$ XXXX   "<(<=L%1A%5"49O5G"%559OOOOO!/C$**400H%x!|s=7I7I&IJI%x!|s=7I7I&IJI ) 	 	A '-m&:&:!	' 	'# &-QZ"$;$A$%! f$$=+++*AH/2HHHJ+'' ]2...*A111aaa!7!8!88+'' +B.1F021 12+'' =+++*AH/2HHH+'' ]2...*A111455qqq8+'' +B.1F021 1	+' #2C#3C )?C&';C$"04GG
 
 #--///r   c           
         t          j                    }|                                 \  }}}|j        }|j        }||j        nd}|j        }	|j        }
t           j        j                            ||	|
          }|j        }|j        }t           j        j                            |||          }t           j        j                            ||j        |j                  }|4|j        }|j        }t           j        j                            |||          }| j	        }|d         }|d         }d }d }d }t          |	|
|          \  }}|d         dk    st          |          dk    rt          j        j        |||g|R  \  }}n+|                    |          }||dz   t          |          z           }||dz
  t          |          z           }| j        }| j        }| j        }| j        }t)          ||||||||          }|d         }|dk    r'||d         k    rd|g} nB||d         k    r|dg} n1||g} n,||d         k    rd|ddg} n||d         k    r|dddg} n||ddg} t+          j        || dd|          }!t          j        j        |||!g|R  \  }"}|st/          |"|||||||          }|?|dk    rg d	ng d
}#t          j        ||#d          }|                    |j                  }|&t           j        j                            ||	|
          }t          |	|
|          \  }$}t9          |j                  D ]Y\  }%}&|&|$k    rNt          j        t           j        j                   ||%<   |&t          j        t           j        j                   ||%<   Zt           j        j                            |||          }t          j!        ||d tE          t          |                    D                       }|`t           j        j                            |||          }t          j!        ||d tE          t          |                    D                       }|rd }|rd }|rd }||||fS ||fS )NTrE   r   r   r,   constantg        )modevaluer8   )r   r.   r-   )r   r   r.   )rD   keepdimc                 4    g | ]}t          j                    S r   r   r   s     r   r   z'RingConv2d.backward.<locals>.<listcomp>  s     EEE!T^EEEr   c                 4    g | ]}t          j                    S r   r   r   s     r   r   z'RingConv2d.backward.<locals>.<listcomp>  s     GGGa!!GGGr   )#r1   r4   saved_tensorstop_gradientr   r   r   r   r   r   r>   r   rI   r   conv2d_gradr   r   r   r   r   rf   Fpadrw   sumreshaper   r   r0   r   Partial
ReduceTypekRedSumreshardr   )'r   grad_outrV   r   r   r   x_stop_gradientweight_stop_gradientbias_stop_gradientr6   r7   weight_meshr   	bias_meshr   
conv_attrsr8   r~   grad_xgrad_weight	grad_biasr   rW   r   r   r   r   r   r   r   in_tensor_augmentedr   padding_listgrad_out_paddedgrad_x_augmentedsum_axestp_axis_namer'   r:   s'                                          r   backwardzRingConv2d.backward?  sV   }**,,64/%3373CT//|"33Av|LL)"-#'88K!2
 
 %)::h+X-@
 
 )I"oO%)::i D Y
 nQ-	-flKPP=1:??c-00A55"(-";68#&0# # #FKK %**<88H%x!|s=7I7I&IJI%x!|s=7I7I&IJI!1O"3%(%?"#&#;  #; 	# 	#  
If$$=#333$%y>LL!]2%666$-q>LL$-y#9LL=#333$%y!Q#7LL!]2%666$-q!Q#7LL$-y!Q#?Le'  O -3M,E#V_-?I- - -)k
 # 
2$#$ !	 	 $/6$9$9yyyyyyyH
8(DIIII!))$*55I'+>> F -V\;OOa'(=>> 	Q 	QNCL(()-do6M)N)N!#&#+/<8O+P+POC((,??&7
 
 lEEuS1B-C-C'D'DEEE
 
 *.AA9o I GG5_1E1E+F+FGGG I  	F 	K 	I;	11{""r   )r,   )Nr   r   Nr   r   r,   r   )__name__
__module____qualname__staticmethodr   r   r   r   r   r   ry   ry   @  s        HND D D \DL 
 t0 t0 t0 \t0l d# d# \d# d# d#r   ry   )r   rI   paddle.distributeddistributedr1   paddle.nn.functionalnn
functionalr   r*   r>   rf   rw   autogradPyLayerry   r   r   r   <module>r      s         ! ! ! ! ! !                  " " "J  :b- b- b-J@2 @2 @2Fd# d# d# d# d#( d# d# d# d# d#r   