
    ёi?                       S SK r S SKrS SKJr  S SKJrJr  S SKJr  SSK	J
r
JrJrJr  \R                  \R                  \R                   \R"                  \R$                  \R&                  \R(                  \R*                  \R,                  \R.                  \R0                  \R2                  \R4                  \R6                  \R8                  /r/ SQr " S S	\5      rS
 r  " S S5      r!S r"S r#S r$S%S jr%S%S jr&S&S jr'S&S jr(S r) " S S5      r* " S S5      r+ " S S5      r, " S S5      r- " S S5      r.\R^                  SS S!S"S#S4S$ jr0g)'    N)Enum)TracerEventTypeTracerMemEventType)flops   )intersection_rangesmerge_rangesmerge_self_ranges
sum_ranges)	allreduce	broadcastrpcc                   8    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrg)
SortedKeys1   aJ  
SortedKeys is used to specify how to sort items when printing ``paddle.profiler.Profiler.summary`` table.

The meaning of each SortedKeys is as following

- **SortedKeys.CPUTotal** : Sorted by CPU total time.

- **SortedKeys.CPUAvg**  : Sorted by CPU average time.

- **SortedKeys.CPUMax**  : Sorted by CPU max time.

- **SortedKeys.CPUMin**  : Sorted by CPU min time.

- **SortedKeys.GPUTotal**  : Sorted by GPU total time.

- **SortedKeys.GPUAvg**  : Sorted by GPU average time.

- **SortedKeys.GPUMax**  : Sorted by GPU max time.

- **SortedKeys.GPUMin**  : Sorted by GPU min time.
r   r                      N)__name__
__module____qualname____firstlineno____doc__CPUTotalCPUAvgCPUMaxCPUMinGPUTotalGPUAvgGPUMaxGPUMin__static_attributes__r       b/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/profiler/profiler_statistic.pyr   r   1   s1    , HFFFHFFFr'   r   c                 r    U R                  SS5      nUR                  SS5      nUR                  SS5      nU$ )z0
convert static host node name to operator name
z compute z dygraphz pybind_imperative_func)replace)nameop_names     r(   _nodename2opnamer.   R   s:     ll:r*Gooj"-Goo7<GNr'   c                   P    \ rS rSrSrS rS rS r\S 5       r	\S 5       r
S rS	rg
)HostStatisticNode\   z7
Wrap original node for calculating statistic metrics.
c                     Xl         / U l        / U l        SU l        SU l        SU l        SU l        SU l        SU l        SU l	        g Nr   )
hostnodechildren_noderuntime_nodecpu_timeself_cpu_timegpu_timeself_gpu_timegeneral_gpu_timeself_general_gpu_timer   )selfr4   s     r(   __init__HostStatisticNode.__init__a   sL      !%&"
r'   c                 @   U R                   R                  [        R                  :X  av  [	        U R                   S5      (       aZ  [        U R                   R                  5      n[        UU R                   R                  U R                   R                  5      U l        g g g )Ninput_shapes)
r4   typer   Operatorhasattrr.   r,   r   rA   
attributes)r=   r-   s     r(   	cal_flopsHostStatisticNode.cal_flopsm   sr    ==!9!99t}}n55*4==+=+=>"MM..MM,,
 6 :r'   c                    U R                   R                  U R                   R                  -
  U l        U R                  U l        U R                  5         U R                   H  nUR                  5         UR                  5         U =R                  UR                  -  sl        U =R                  UR                  -  sl	        U =R                  UR                  UR                  -
  -  sl        U =R                  UR                  -  sl
        M     U R                   H  nUR                  5         U =R                  UR                  UR                  -
  -  sl        U =R                  UR                  -  sl        U =R                  UR                  -  sl        U =R                  UR                  -  sl	        U =R                  UR                  -  sl        M     U R                   R                   H  nUR                  [         R"                  :X  aX  U =R                  UR                  UR                  -
  -  sl        U =R                  UR                  UR                  -
  -  sl        U =R                  UR                  UR                  -
  -  sl	        U =R                  UR                  UR                  -
  -  sl        M     g N)r4   end_nsstart_nsr7   r8   rF   r5   cal_statisticr9   r;   r   r6   r:   r<   device_noderB   r   Kernel)r=   childrtdevices       r(   rL   HostStatisticNode.cal_statisticw   s   ,,t}}/E/EE!]]''EOO!MMU^^+M!!U%;%;;!%,,"??JJ%++%J ( ##B"))bkk"99MMR[[(M"++-!!R%8%88!&&"*=*==& $ mm//F{{o444!@@""fmmfoo&EE"!!V]]V__%DD!&&&--&//*II& 0r'   c                 .    U R                   R                  $ rI   )r4   rJ   r=   s    r(   rJ   HostStatisticNode.end_ns   s    }}###r'   c                 .    U R                   R                  $ rI   )r4   rK   rT   s    r(   rK   HostStatisticNode.start_ns   s    }}%%%r'   c                 .    [        U R                  U5      $ rI   )getattrr4   r=   r,   s     r(   __getattr__HostStatisticNode.__getattr__   s    t}}d++r'   )
r5   r7   r   r;   r9   r4   r6   r8   r<   r:   N)r   r   r   r   r   r>   rF   rL   propertyrJ   rK   r[   r&   r   r'   r(   r0   r0   \   sD    
J6 $ $ & &,r'   r0   c                 H   [         R                  " [        5      nU R                  5        Hs  u  p#/ nUR	                  U5        X   nU(       d  M%  UR                  5       nUR	                  U5        UR                   H  nUR	                  U5        M     U(       a  MN  Mu     U$ rI   )collectionsdefaultdictlistitemsappendpopr5   )	nodetreesresults	thread_idrootnodestack
threadlistcurrent_node	childnodes           r(   traverse_treerm      s    %%d+G(0	X'
e 99;Ll+)77	Y' 8 e	  1 Nr'   c                 &   / n/ nUR                  U 5        U(       at  UR                  5       nUR                   H  nUR                  U5        M     UR                   H'  nUR                   H  nUR                  U5        M     M)     U(       a  Mt  U$ )z<
Get all device nodes called in the time range of hostnode.
)rc   rd   r5   r6   rM   )r4   ri   device_nodesrk   rl   runtimenode
devicenodes          r(   get_device_nodesrr      s     EL	LL
yy{%33ILL# 4'44K)55
##J/ 6 5	 % r'   c                    ^ SU4S jjm/ nU R                  5        H   u  p#T" U5      u  pBUR                  U5        M"     U$ )Nc                 .  > SU R                   ;   a  / S4$ U R                  [        R                  [        R                  4;   a  / S4$ U R                  [        R
                  :X  a(  [        U 5      nUR                  5         X"R                  4$ / nSnU R                   H.  nT" XQS-   5      u  pgU(       d  M  XG-  nUR                  U5        M0     U R                  [        R                  :X  a&  [        U 5      nUR                  5         XBl        X#/U4$ X44$ )NGradNoder   r   )r,   rB   r   BackwardOptimizationrC   r0   rL   r   r5   rc   Forward)	nodedepth	stat_nodelayernflopsclfbuild_layers	           r(   r   +_build_layer_from_tree.<locals>.build_layer   s   "q5L99$$((
 
 q5L99000)$/I##%oo--##Aq!),DAqQ	 $ 99///)$/I##%$O%v--}r'   r   )rb   rc   )re   ret_rh   r|   r   s        @r(   _build_layer_from_treer      sA    > C (x(

5 ) Jr'   c                     U S-  S:  a  [        U S-  U5       S3$ U S-  S:  a  [        U S-  U5       S3$ U S-  S:  a  [        U S-  U5       S3$ U S-  S:  a  [        U S-  U5       S	3$ [        X5       $ )
Ng   mBr   z T    eAz G    .Az M     @@z Kroundn	precisions     r(   _format_large_numberr      s    Dy1}D),-R00Cx!|C+,B//Cx!|C+,B//Cx!|C+,B//A!"#r'   c                     U S-  S:  a  [        U S-  U5       S3$ U S-  S:  a  [        U S-  U5       S3$ U S-  S:  a  [        U S-  U5       S3$ [        X5       S3$ )	Nr   r   z sr   z msr   z usz nsr   r   s     r(   _format_timer      s    Cx!|C+,B//Cx!|C+,C00Cx!|C+,C00A!"#&&r'   c                 z   ^^^^^ / m/ m/ mSUUUUU4S jjmU SS   H  nT" U5        M     SR                  T5      $ )Nc                   > [        U [        5      (       a  U  H  nT
" X!S-   5        M     g U R                  [        R                  [        R
                  4;   Ga  [        T	5      S:X  a  T	R                  U5        [        U R                  5      nUT	S   :X  a9  [        T5      S:  a*  TS   R                  U5      (       a  TR                  S5        [        T5      T:  a  SR                  T5      $ SUT	S   -
  -  n[        U R                  5      n[        U R                  5      n[        U R                  S-  U R                  -  5      nTR                  U U SU SU S	U S
3	5        g g )Nr   r   r*    r   z
 latency: z	, FLOPs: z	, FLOPS: 
)
isinstancera   rB   r   rx   rC   lenrc   r.   r,   
startswithjoinr   r7   r   r   )ry   rz   r   r,   aligntmflops_nflops_sloopoffsetprint_layer_treerepeatr   s           r(   r   *_gen_layer_flops.<locals>.print_layer_tree   s:   dD!! AI.  YY?22O4L4LMM6{ae$#DII.D #C1Q9J9J49P9PA4yF"wws|#56":-.Edmm,B*4::6G*4::+;dmm+KLGJJ'$z"Ywiy	QST% Nr'   r   r*   r   )r   )ry   r   r   r   r   r   r   s    ` @@@@r(   _gen_layer_flopsr      sF    
CFD 6 !"X  773<r'   c                 .    [        U 5      n[        X!5      $ )zN
gen_layer_flops generate flops/runtime information depend on layer/operator.
)r   r   )re   r   
layer_trees      r(   gen_layer_flopsr   !  s     (	2JJ//r'   c                 F   0 n[         R                  " [        5      n[         R                  " [        5      nU R                  5        GH+  u  pE/ nUR	                  U5        [        U5      n/ nUR	                  U5        XqU'   X$   n	X4   n
U(       d  ML  UR                  5       nU	R	                  U5        UR                  5       nU
R	                  U5        UR                   HK  nUR	                  U5        [        U5      nUR                  R	                  U5        UR	                  U5        MM     UR                   H)  n[        U5      nUR                  R	                  U5        M+     U(       a  M  GM.     UR                  5        H  u  pGUR                  5         M     X4$ )zf
Using HostStatisticNode to wrap original profiler result tree, and calculate node statistic metrics.
)
r_   r`   ra   rb   rc   r0   rd   r5   r6   rL   )re   node_statistic_treerf   
newresultsrg   rh   ri   root_statistic_nodenewstackrj   newthreadlistrk   current_statistic_noderl   child_statistic_noderp   runtime_statistic_nodes                    r(   	wrap_treer   )  su    %%d+G((.J(0	X/9+,)<I&'
"-e 99;Ll+%-\\^"  !78)77	Y''8'C$&44;;(  45 8  ,88):;)G&&33::*  9 e  16 +>*C*C*E&	))+ +F **r'   c                   6    \ rS rSrSrS rS rS rS rS r	Sr
g	)
TimeRangeSummaryiQ  zG
Analyse time ranges for each TracerEventType, and summarize the time.
c                 .   [         R                  " [        5      U l        [         R                  " S 5      U l        [         R                  " [
        5      U l        [         R                  " S 5      U l        [         R                  " [
        5      U l        g )Nc                  6    [         R                  " [        5      $ rI   r_   r`   ra   r   r'   r(   <lambda>+TimeRangeSummary.__init__.<locals>.<lambda>Y  s    K++D1r'   c                  6    [         R                  " [        5      $ rI   )r_   r`   intr   r'   r(   r   r   ]  s    K++C0r'   )	r_   r`   ra   CPUTimeRangeGPUTimeRanger   CPUTimeRangeSumGPUTimeRangeSum
call_timesrT   s    r(   r>   TimeRangeSummary.__init__V  sj    '33D9'331
  +66s;*660 
 &11#6r'   c           
         [        U5      nUR                  5        GHI  u  p4[        R                  " [        5      n[        R                  " S 5      nUSS  GH@  nXWR
                     R                  UR                  UR                  45        U R                  UR
                  ==   S-  ss'   UR                   H  nXXR
                     R                  UR                  UR                  45        U R                  UR
                  ==   S-  ss'   UR                   Hq  n	XiR                     U	R
                     U	R                     R                  U	R                  U	R                  45        U R                  U	R
                  ==   S-  ss'   Ms     M     GMC     UR                  5        H4  u  p[        USS9n[        U R                   U
   USS9U R                   U
'   M6     UR                  5        Hl  u  pUR                  5        HS  u  pUR                  5        H:  u  p[        USS9n[        U R"                  U   U
   USS9U R"                  U   U
'   M<     MU     Mn     GML     U R                   R                  5        H  u  p[%        U5      U R&                  U
'   M     U R"                  R                  5        H9  u  pUR                  5        H   u  p[%        U5      U R(                  U   U
'   M"     M;     g)z]
Analysis node trees in profiler result, and get time range for different tracer event type.
c                  0    [         R                  " S 5      $ )Nc                  6    [         R                  " [        5      $ rI   r   r   r'   r(   r   :TimeRangeSummary.parse.<locals>.<lambda>.<locals>.<lambda>j  s    K33D9r'   )r_   r`   r   r'   r(   r   (TimeRangeSummary.parse.<locals>.<lambda>i  s    //9r'   r   NF	is_sortedT)rm   rb   r_   r`   ra   rB   rc   rK   rJ   r   r6   rM   	device_id	stream_idr
   r	   r   r   r   r   r   )r=   re   thread2hostnodesthreadid	hostnodesr   r   r4   rp   rq   
event_typetime_rangesr   device_time_rangesevent_time_rangesr   s                   r(   parseTimeRangeSummary.parsea  s    )3#3#9#9#;H&2248L&22L
 &abM]]+22&&8 .!3.#+#8#8K !1!1299$--{/A/AB OOK$4$45:5&1&=&=
$%9%9::??K&00 &*"5"5z7H7H!IJ
8A=8	 '> $9 *  ,8+=+=+?'
/uM0<%%j1;$1!!*- ,@
 2>1C1C1E-	5G5M5M5O1J2C2I2I2K.	&7'5' DP --i8D'&*D)))4Z@	 3L 6P 2F9 $<P (,'8'8'>'>'@#J/9+/FD  , (A-1->->-D-D-F)I+=+C+C+E'
>H?$$Y/
; ,F .Gr'   c                 6    U R                   R                  5       $ rI   )r   keysrT   s    r(   get_gpu_devices TimeRangeSummary.get_gpu_devices  s      %%''r'   c                 &    U R                   U   U   $ rI   )r   )r=   r   r   s      r(   get_gpu_range_sum"TimeRangeSummary.get_gpu_range_sum  s    ##I.z::r'   c                      U R                   U   $ rI   )r   )r=   r   s     r(   get_cpu_range_sum"TimeRangeSummary.get_cpu_range_sum  s    ##J//r'   )r   r   r   r   r   N)r   r   r   r   r   r>   r   r   r   r   r&   r   r'   r(   r   r   Q  s!    	73j(;0r'   r   c                   $    \ rS rSrSrS rS rSrg)DistributedSummaryi  z
Analysis communication and computation time range, and their overlap.
The computation time is all kernel except kernels for communication like nccl.
c                 f    / U l         / U l        / U l        / U l        / U l        SU l        SU l        g r3   )cpu_communication_rangegpu_communication_rangecommunication_rangecomputation_rangeoverlap_range	cpu_calls	gpu_callsrT   s    r(   r>   DistributedSummary.__init__  s8    ')$')$#% !#r'   c                   ^
 [        U5      nUR                  5        GHo  u  p4USS  GH_  m
T
R                  [        R                  :X  a  U R
                  R                  T
R                  T
R                  45        [        T
5      nU HT  nUR                  [        R                  :X  d  M#  U R                  R                  UR                  UR                  45        MV     M  T
R                  [        R                  :X  a  [        U
4S j[         5       5      (       a  U R
                  R                  T
R                  T
R                  45        [        T
5      nU HT  nUR                  [        R                  :X  d  M#  U R                  R                  UR                  UR                  45        MV     GM  T
R                   H  nUR                    H  nUR                  [        R                  :X  d  M#  UR"                  R%                  5       n	SU	;   d  SU	;   a3  U R                  R                  UR                  UR                  45        M|  U R&                  R                  UR                  UR                  45        M     M     GMb     GMr     [)        [+        U R
                  5      5      U l        [)        [+        U R                  5      5      U l        [1        U R
                  SS9U l        [1        U R                  SS9U l        [3        U R
                  U R                  SS9U l        [1        U R&                  SS9U l        [7        U R4                  U R&                  SS9U l        g)	z8
Collect all communication and computation time ranges.
r   Nc              3   ^   >#    U  H"  nUTR                   R                  5       ;   v   M$     g 7frI   )r,   lower).0r,   r4   s     r(   	<genexpr>+DistributedSummary.parse.<locals>.<genexpr>  s+      G 4 HMM//11 4s   *-ncclxcclFr   T)rm   rb   rB   r   Communicationr   rc   rK   rJ   rr   rN   r   rC   any_CommunicationOpNamer6   rM   r,   r   r   r   setr   r   r
   r	   r   r   r   )r=   re   r   r   r   ro   rM   rp   rq   kernel_namer4   s             @r(   r   DistributedSummary.parse  s    )3#3#9#9#;H%abM==O$A$AA0077!**HOO< $4H#=L'3&++/E/EE 88??!,!5!5{7I7I J (4 ]]o&>&>>3 G 4G D D 0077!**HOO< $4H#=L'3&++/E/EE 88??!,!5!5{7I7I J (4 (0'<'<*5*A*AJ)/2H2HH.8oo.C.C.E$*k$9'-'<$($@$@$G$G)3)<)<j>O>O(P%& %)$:$:$A$A)3)<)<j>O>O(P%& +B (== * $<\ S!=!=>?S!=!=>?'8((E(
$ (9((E(
$ $0(((($
 
 "3""e"
 1$$d&<&<
r'   )r   r   r   r   r   r   r   N)r   r   r   r   r   r>   r   r&   r   r'   r(   r   r     s    
E
r'   r   c                       \ rS rSrSr " S S5      r " S S\5      r " S S\5      r " S	 S
\5      r " S S\5      r	S r
S rS rS rS rS rS rS rSrg)EventSummaryi  zL
Analyse operator event in profiling data, correlate with its device event.
c                   ~    \ rS rSrS r\S 5       r\S 5       r\S 5       r\S 5       r	S r
S rS	 rS
 rS rS rSrg)EventSummary.ItemBasei  c                     Xl         SU l        SU l        SU l        SU l        [        S5      U l        SU l        [        S5      U l        0 U l	        0 U l
        SU l        [        S5      U l        SU l        SU l        g )Nr   inf)r,   callr7   r9   max_cpu_timefloatmin_cpu_timemax_gpu_timemin_gpu_timedevicesoperator_innersr;   min_general_gpu_timemax_general_gpu_time_flopsrZ   s     r(   r>   EventSummary.ItemBase.__init__  sv    IDIDMDM !D %eD !D %eDDL#%D $%D!(-eD%()D%DKr'   c                     U R                   $ rI   r  rT   s    r(   r   EventSummary.ItemBase.flops  s    ;;r'   c                 4    U R                   U R                  -  $ rI   )r7   r   rT   s    r(   avg_cpu_time"EventSummary.ItemBase.avg_cpu_time      ==499,,r'   c                 4    U R                   U R                  -  $ rI   )r9   r   rT   s    r(   avg_gpu_time"EventSummary.ItemBase.avg_gpu_time  r  r'   c                 4    U R                   U R                  -  $ rI   )r;   r   rT   s    r(   avg_general_gpu_time*EventSummary.ItemBase.avg_general_gpu_time  s    ((49944r'   c                     XR                   :  a  Xl         XR                  :  a  Xl        U =R                  U-  sl        g rI   )r   r   r7   r=   times     r(   add_cpu_time"EventSummary.ItemBase.add_cpu_time  4    '''$(!'''$(!MMT!Mr'   c                     XR                   :  a  Xl         XR                  :  a  Xl        U =R                  U-  sl        g rI   )r  r  r9   r  s     r(   add_gpu_time"EventSummary.ItemBase.add_gpu_time$  r  r'   c                     XR                   :  a  Xl         XR                  :  a  Xl        U =R                  U-  sl        g rI   )r  r  r;   r  s     r(   add_general_gpu_time*EventSummary.ItemBase.add_general_gpu_time+  s7    ///,0)///,0)!!T)!r'   c                 .    U =R                   S-  sl         g Nr   )r   rT   s    r(   add_callEventSummary.ItemBase.add_call2  s    IINIr'   c                 .    U =R                   U-  sl         g rI   r
  )r=   r   s     r(   	add_flopsEventSummary.ItemBase.add_flops5  s    KK5 Kr'   c                     [         erI   )NotImplementedErrorr=   ry   s     r(   add_itemEventSummary.ItemBase.add_item8  s    %%r'   )r  r   r7   r  r;   r9   r   r  r  r   r  r  r,   r  N)r   r   r   r   r>   r]   r   r  r  r  r  r  r   r$  r'  r,  r&   r   r'   r(   ItemBaser     su    	  
	 
	 
	- 
	- 
	- 
	- 
	5 
	5	"	"	*		!	&r'   r.  c                       \ rS rSrS rSrg)EventSummary.DeviceItemi;  c                 ~    U =R                   S-  sl         U R                  UR                  UR                  -
  5        g r#  )r   r  rJ   rK   r+  s     r(   r,   EventSummary.DeviceItem.add_item<  s*    IINIdkkDMM9:r'   r   Nr   r   r   r   r,  r&   r   r'   r(   
DeviceItemr0  ;  s    	;r'   r4  c                       \ rS rSrS rSrg)EventSummary.OperatorItemi@  c                 T   U R                  5         U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        UR                   H  nUR                  [        R                  :w  d  M#  UR                  U R                  ;  a6  [        R!                  UR                  5      U R                  UR                  '   U R                  UR                     R#                  U5        M     UR$                   Hq  nUR&                   H^  nUR                  nXPR(                  ;  a"  [        R+                  U5      U R(                  U'   U R(                  U   R#                  U5        M`     Ms     g rI   )r$  r  r7   r  r9   r   r;   r'  r   r5   rB   r   rC   r,   r  r   OperatorItemr,  r6   rM   r  r4  )r=   ry   rO   rp   rq   r,   s         r(   r,  "EventSummary.OperatorItem.add_itemA  s1   MMOdmm,dmm,%%d&;&;<NN4::&++::!9!99zz)=)==(55ejjA ,,UZZ8 ((4==eD ,  $00"-"9"9J%??D<</-9-D-DT-JT*LL&//
;	 #:  1r'   r   Nr3  r   r'   r(   r8  r6  @  s    	<r'   r8  c                       \ rS rSrS rSrg)EventSummary.ForwardItemiV  c                 R   U R                  5         U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        UR                   H  nUR                  [        R                  :w  d  M#  UR                  U R                  ;  a6  [        R!                  UR                  5      U R                  UR                  '   U R                  UR                     R#                  U5        M     g rI   )r$  r  r7   r  r9   r   r;   r'  r   r5   rB   r   rC   r,   r  r   r8  r,  )r=   ry   rO   s      r(   r,  !EventSummary.ForwardItem.add_itemW  s    MMOdmm,dmm,%%d&;&;<NN4::&++::!9!99zz)=)==(55ejjA ,,UZZ8 ((4==eD ,r'   r   Nr3  r   r'   r(   ForwardItemr;  V  s    	Er'   r>  c                       \ rS rSrS rSrg)EventSummary.GeneralItemie  c                     U R                  5         U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        g rI   )r$  r  r7   r  r9   r   r;   r+  s     r(   r,  !EventSummary.GeneralItem.add_itemf  sC    MMOdmm,dmm,%%d&;&;<r'   r   Nr3  r   r'   r(   GeneralItemr@  e  s    	=r'   rC  c                     0 U l         [        R                  " [        5      U l        0 U l        [        R                  " [        5      U l        0 U l        0 U l        0 U l	        g rI   )
rb   r_   r`   dictthread_itemsuserdefined_itemsuserdefined_thread_itemsmodel_perspective_itemsmemory_manipulation_itemskernel_itemsrT   s    r(   r>   EventSummary.__init__l  sY    
'33
 "$(3(?(?)
% (*$)+&r'   c                 2   [        U5      u  p#UR                  5        GH4  u  nnUSS  GH  nUR                  [        R                  :X  a  U R                  U5        UR                  [        R                  :X  d   UR                  [        R                  :X  d  Mq  SUR                  R                  5       ;   d<  SUR                  R                  5       ;   d  SUR                  R                  5       ;   a  U R                  U5        M  UR                  [        R                  :X  d  M  U R                  U5        GM     U R                  US   5        GM7     UR                  5        GH,  u  pG[        R                  " 5       nUR                  U5        U(       d  M5  UR!                  5       n	U	R"                   H  n
U
R                  [        R$                  :X  dZ  U
R                  [        R&                  :X  d<  U
R                  [        R(                  :X  d  U
R                  [        R*                  :X  a  U R-                  U
5        M  U
R                  [        R.                  :X  a  U R-                  U
5        UR                  U
5        M     U(       a  M  GM/     g)z+
Analysis operator event in the nodetress.
r   Nmemcpy
memorycopymemsetr   )r   rb   rB   r   rC   add_operator_itemUserDefinedPythonUserDefinedr,   r   add_memory_manipulation_itemadd_userdefined_itemadd_kernel_itemr_   dequerc   popleftr5   rx   
Dataloaderrv   rw   add_model_perspective_itemProfileStep)r=   re   node_statistic_treesthread2host_statistic_nodesr   host_statistic_nodeshost_statistic_noder   rW  rk   rO   s              r(   r   EventSummary.parsey  s    =Fi<P9 )..0
 ';(# '++/G/GG**+>?',,0K0KK*//&889 !$7$<$<$B$B$DD'+>+C+C+I+I+KK#':'?'?'E'E'GG99:MN 044.@@A !556IJ+(,   !5a!89/ 12 .B-G-G-I)H%%'ELL,-%$}})77E

o&=&== ::)C)CC ::)A)AA ::)E)EE77! !::)D)DD ;;EBU+ 8 % .Jr'   c                     g rI   r   r=   operator_nodes     r(   add_forward_itemEventSummary.add_forward_item  s    r'   c                 2   UR                   U R                  ;  a6  [        R                  UR                   5      U R                  UR                   '   U R                  UR                      R	                  U5        UR                   U R
                  UR                     ;  aC  [        R                  UR                   5      U R
                  UR                     UR                   '   U R
                  UR                     UR                      R	                  U5        g rI   )r,   rb   r   r8  r,  rF  rg   rb  s     r(   rQ  EventSummary.add_operator_item  s    TZZ/-9-F-F"".DJJ}))* 	

=%%&//>T%6%6}7N7N%OO))-*<*<= m556}7I7IJ 	-112=3E3EFOO	
r'   c                 2   UR                   U R                  ;  a6  [        R                  UR                   5      U R                  UR                   '   U R                  UR                      R	                  U5        UR                   U R
                  UR                     ;  aC  [        R                  UR                   5      U R
                  UR                     UR                   '   U R
                  UR                     UR                      R	                  U5        g rI   )r,   rG  r   rC  r,  rH  rg   )r=   userdefined_nodes     r(   rU  !EventSummary.add_userdefined_item  s      (>(>>(()9)>)>? ""#3#8#89 	/445>>?OP !!001A1K1KLM
 (()9)>)>? ))*:*D*DE %% 	%%&6&@&@A!!	

(#
$r'   c                     UR                   U R                  ;  a6  [        R                  UR                   5      U R                  UR                   '   U R                  UR                      R	                  U5        g rI   )r,   rJ  r   rC  r,  )r=   memory_manipulation_nodes     r(   rT  )EventSummary.add_memory_manipulation_item  sg    #((0N0NN(()A)F)FG **+C+H+HI 	&&'?'D'DENN$	
r'   c                    UR                   [        R                  :X  a  SnOUR                   [        R                  :X  a  SnOdUR                   [        R                  :X  a  SnOCUR                   [        R
                  :X  a  SnO"UR                   [        R                  :X  a  SnOg X R                  ;  a"  [        R                  U5      U R                  U'   U R                  U   R                  U5        g )Nrx   rv   rw   rY  r[  )rB   r   rx   rv   rw   rY  r[  rI  r   rC  r,  )r=   model_perspective_noder,   s      r(   rZ  'EventSummary.add_model_perspective_item  s    !&&/*A*AAD#((O,D,DDD#((O,H,HH!D#((O,F,FFD#((O,G,GG D3331=1I1I$1OD((.$$T*334JKr'   c                 "   [        U5      nU H~  nUR                  [        R                  :X  d  M#  UR                  nX@R
                  ;  a"  [        R                  U5      U R
                  U'   U R
                  U   R                  U5        M     g rI   )	rr   rB   r   rN   r,   rK  r   r4  r,  )r=   	root_nodero   rM   r,   s        r(   rV  EventSummary.add_kernel_item  su    '	2'K?#9#99"''000.:.E.Ed.KD%%d+!!$'00= (r'   )rb   rK  rJ  rI  rF  rG  rH  N)r   r   r   r   r   r.  r4  r8  r>  rC  r>   r   rd  rQ  rU  rT  rZ  rV  r&   r   r'   r(   r   r     sn    =& =&~;X ;
<x <,Eh E=h =3,j
 %&
L">r'   r   c                   >    \ rS rSrSr " S S5      rS rS rS rSr	g	)
MemorySummaryi  z*
Analyse memory events in profiling data.
c                   $    \ rS rSrSS jrS rSrg)MemorySummary.MemoryItemi  c                 n    Xl         X l        SU l        SU l        SU l        SU l        SU l        X0l        g r3   )
event_nameplaceallocation_count
free_countallocation_size	free_sizeincrease_sizememory_type)r=   ry  rz  r  s       r(   r>   !MemorySummary.MemoryItem.__init__  s8    (OJ$%D!DO#$D DN!"D*r'   c                    U[         R                  :X  d  U[         R                  :X  a+  U =R                  S-  sl        U =R                  U-  sl        O^U[         R
                  :X  d  U[         R                  :X  a+  U =R                  S-  sl        U =R                  U-  sl        O[        S5        U R                  U R                  -
  U l
        g )Nr   zNo corresponding type.)r   AllocateReservedAllocater{  r}  FreeReservedFreer|  r~  printr  )r=   sizeallocation_types      r(   add_memory_record*MemorySummary.MemoryItem.add_memory_record  s    #5#>#>>"&8&I&II%%*%$$,$  #5#:#::"&8&E&EE1$$& ./!%!5!5!FDr'   )r{  r}  ry  r|  r~  r  r  rz  N)	Allocated)r   r   r   r   r>   r  r&   r   r'   r(   
MemoryItemrw    s    	+	Gr'   r  c                     [         R                  " [        5      U l        [         R                  " [        5      U l        [         R                  " [
        5      U l        [         R                  " [
        5      U l        g rI   )r_   r`   rE  allocated_itemsreserved_itemsr   peak_allocation_valuespeak_reserved_valuesrT   s    r(   r>   MemorySummary.__init__  sX    *66 
 *55
 '2&=&=c&B#$/$;$;C$@!r'   c                    UR                    GH1  nUR                  [        R                  :X  d  UR                  [        R                  :X  a  XR
                  UR                     ;  a:  [        R                  XR                  S5      U R
                  UR                     U'   U R
                  UR                     U   R                  UR                  UR                  5        OUR                  [        R                  :X  d  UR                  [        R                  :X  a  XR                  UR                     ;  a:  [        R                  XR                  S5      U R                  UR                     U'   U R                  UR                     U   R                  UR                  UR                  5        [        U R                  UR                     UR                   5      U R                  UR                  '   [        U R"                  UR                     UR$                  5      U R"                  UR                  '   GM4     g )Nr  Reserved)mem_noderB   r   r  r  r  rz  ru  r  r  increase_bytesr  r  r  maxr  peak_allocatedr  peak_reserved)r=   ry  ry   memnodes       r(   _analyse_node_memory"MemorySummary._analyse_node_memory%  s   }}G 2 ; ;;<<#5#:#::%9%9'--%HH%00&{ ((7
C
 $$W]]3##G$:$:GLLI 2 C CC<<#5#B#BB%8%8%GG%00&z ''6zB
 ##GMM2##G$:$:GLLI9<++GMM:&&:D''6 8;))'--8':O:O8D%%gmm4? %r'   c                 v   [        U5      nUR                  5        H  u  p4USS  H  nUR                  [        R                  :X  a  M#  UR                  [        R
                  :X  a/  UR                   H  nU R                  UR                  U5        M!     U R                  UR                  U5        M     M     g)z(
Analyse memory event in the nodetress.
r   N)	rm   rb   rB   r   OperatorInnerrC   r5   r  r,   )r=   re   r   r   
host_nodes	host_noderO   s          r(   r   MemorySummary.parseI  s     )3$4$:$:$< H'^	>>_%B%BB>>_%=%==!*!8!811)..%H "9)))..)D , %=r'   )r  r  r  r  N)
r   r   r   r   r   r  r>   r  r   r&   r   r'   r(   ru  ru    s%    G G<A"HEr'   ru  c                       \ rS rSrSrS rSrg)StatisticDataiX  z
Hold all analysed results.
c                 l   Xl         X l        [        5       U l        [	        5       U l        [        5       U l        [        5       U l	        U R                  R                  U5        U R
                  R                  U5        U R                  R                  U5        U R                  R                  U5        g rI   )
node_trees
extra_infor   time_range_summaryr   event_summaryr   distributed_summaryru  memory_summaryr   )r=   r  r  s      r(   r>   StatisticData.__init__]  s    $$"2"4)^#5#7 +o%%j1  ,  &&z2!!*-r'   )r  r  r  r  r  r  N)r   r   r   r   r   r>   r&   r   r'   r(   r  r  X  s    
.r'   r  TFmsd   K   c                 zF  ^M^N^O^P^Q SSK Jn   SmMS/mQS/mNTM* /mOShUMUNUOUQ4S jjn	S n
/ mPUP4S jnSiS	 jnSjS
 jnU R                  R                  [        R
                  5      nUb  UR                  U;   Ga  SS/nSnSnU	" U5        USS   H  nU	" U5        M     TQS   nTNS   nTOS   nU" U
" US5      5        U" U5        U" UR                  " U6 5        U" U5        SU" [        U R                  S   5      5      /nU" UR                  " U6 5        SU" [        U R                  S   5      5      /nU" UR                  " U6 5        U R                  R                  5        H]  n[        U R                  R                  U[        R                  5      5      nUU-  nSU 3U" U5      /nU" UR                  " U6 5        M_     U" U5        U" S5        U" SU-  5        U" S5        U" S5        US:X  a  SR                  TP5      $ Ub  UR                  U;   Gaw  / SQnS/mQS/mNTM* /mOSnU H  nU	" U5        M     TQS   nTNS   nTOS   nU" U
" US5      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        [        R                   " ["        5      n[        R                   " ["        5      n[        R                   " ["        5      n[        R                   " ["        5      nUR%                  U R                  R&                  5        UR%                  U R                  R&                  5        U R                  R(                  R+                  5        H!  u  nnU[        R,                  :w  d  M  UUU'   M#     U R.                  R0                  (       aW  [3        U R.                  R0                  5      U[        R,                  '   U R.                  R4                  U[        R,                  '   [        R6                  [        R8                  [        R:                  [        R<                  4 H  n[?        U5      RA                  S5      S   n UU;   d  M(  U U RB                  RD                  ;   d  MD  U RB                  RD                  U    RF                  UU'   U RB                  RD                  U    RH                  UU'   M     [        R                   " [J        5      n!U R                  RL                  R+                  5        H1  u  n"n#U#R+                  5        H  u  nn$[O        U!U   U$SS9U!U'   M     M3     U!R+                  5        H  u  nn$[3        U$5      UU'   M     U R.                  RP                  (       aW  [3        U R.                  RP                  5      U[        R,                  '   U R.                  RR                  U[        R,                  '   [U        UR+                  5       S SS9n%U%S   u  nn&S R                  [?        U5      RA                  S5      S   5      UU   U" U&US!9U" [        U&5      U-  5      /nU" UR                  " U6 5        U%SS   He  u  nn&S"R                  [?        U5      RA                  S5      S   5      UU   U" U&US!9U" [        U&5      U-  5      /nU" UR                  " U6 5        Mg     U" U5        / S#QnU" UR                  " U6 5        U" U5        UR+                  5        He  u  nn&S"R                  [?        U5      RA                  S5      S   5      UU   U" U&US!9U" [        U&5      U-  5      /nU" UR                  " U6 5        Mg     U" U5        U" S$5        U" SU-  5        U" S5        U" S5        Ub  URV                  U;   Ga[  U RB                  RD                  n'[Y        U'5      S:  Ga5  / n(Sn)Sn*U RB                  RD                  S%   RZ                  n+S& GH?  n,U,U';   d  M  U'U,   n-U+S:X  a  Sn.O[        U-RZ                  5      U+-  n.S%U,;   a  U, OS'U, 3n,U, U-RF                  U" U-RH                  US!9 S(U" U-R\                  US!9 S(U" U-R^                  US!9 S(U" U-R`                  US!9 S(U" [        U-RH                  5      U-  5       3	U" U-RZ                  US!9 S(U" U-Rb                  US!9 S(U" U-Rd                  US!9 S(U" U-Rf                  US!9 S(U" U.5       3	/nU(Ri                  U5        S%U,;  d  GM!  U)U-RH                  -  n)U*U-RZ                  -  n*GMB     UU)-
  n/U+U*-
  n0U+S:X  a  Sn.O[        U05      U+-  n.S)SU" U/US!9 S*U" [        U/5      U-  5       3U" U0US!9 S*U" U.5       3/nU(Ri                  U5        S+n1S,n2S,n3U( H  n[k        US   ["        5      (       a2  [Y        [?        US   5      5      U1:  a  [Y        [?        US   5      5      n1[Y        US   5      U2:  a  [Y        US   5      n2[Y        US-   5      U3:  d  M  [Y        US-   5      n3M     / S.QnS/mQS/mNTM* /mOS/nU	" U5        U	" U15        U	" U25        U	" U35        TQS   nTNS   nTOS   nU" U
" US05      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        U( H  nU" UR                  " U6 5        M     U" U5        U" S15        U" SU-  5        U" S5        U" S5        Ub  URl                  U;   Ga  U R.                  Rn                  (       Ga  / S2QnS/mQS/mNTM* /mOSnU H  nU	" U5        M     TQS   nTNS   nTOS   nU" U
" US35      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        [3        U R.                  Rn                  5      n4[3        U R.                  Rp                  5      n5[3        U R.                  Rr                  5      n6S%U" XS!9U" [        U5      U-  5      /nU" UR                  " U6 5        S4U" U4US!9U" [        U45      U-  5      /nU" UR                  " U6 5        S5U" U5US!9U" [        U55      U-  5      /nU" UR                  " U6 5        S6U" U6US!9U" [        U65      U-  5      /nU" UR                  " U6 5        U" U5        U" S75        U" SU-  5        U" S5        U" S5        Ub  URt                  U;   GaL  U RB                  R*                  (       Ga0  / n(S8nU(       a  U RB                  Rv                  n7OS9U RB                  R*                  0n7U7R+                  5        GHM  u  n8n9U(Ri                  S:U8 35        U[x        Rz                  :X  a  [U        U9R+                  5       S; SS9n%GOGU[x        R|                  :X  a  [U        U9R+                  5       S< SS9n%GOU[x        R~                  :X  a  [U        U9R+                  5       S= SS9n%OU[x        R                  :X  a  [U        U9R+                  5       S> S?9n%OU[x        R                  :X  a  [U        U9R+                  5       S@ SS9n%OU[x        R                  :X  a  [U        U9R+                  5       SA SS9n%O\U[x        R                  :X  a  [U        U9R+                  5       SB SS9n%O-U[x        R                  :X  a  [U        U9R+                  5       SC S?9n%Sn:Sn;W% H$  u  n,n-U:U-RH                  -  n:U;U-R                  -  n;M&     U% GH  u  n,n-U:S:X  a  Sn<O[        U-RH                  5      U:-  n<U;S:X  a  Sn.O[        U-R                  5      U;-  n.U,U-RF                  U" U-RH                  US!9 S(U" U-R\                  US!9 S(U" U-R^                  US!9 S(U" U-R`                  US!9 S(U" U<5       3	SDR                  U" U-R                  US!9U" U-R                  US!9U" U-R                  US!9U" U-R                  US!9U" U.5      5      U-R                  /nU(Ri                  U5        U(       d  GM$  U-R                  R+                  5        GHQ  u  n=n>U-RH                  S:X  a  Sn<O"[        U>RH                  5      U-RH                  -  n<U-R                  S:X  a  Sn.O"[        U>R                  5      U-R                  -  n.[Y        U=5      S-   U:  a  U=S USE-
   n=U=SF-  n=S'U= 3U>RF                  SDR                  U" U>RH                  US!9U" U>R\                  US!9U" U>R^                  US!9U" U>R`                  US!9U" U<5      5      SDR                  U" U>R                  US!9U" U>R                  US!9U" U>R                  US!9U" U>R                  US!9U" U.5      5      S/nU(Ri                  U5        U>R                  R+                  5        H  u  n?n@U>R                  S:X  a  Sn.O"[        W@RZ                  5      U>R                  -  n.[Y        U?5      SG-   U:  a  U?S USH-
   n?U?SF-  n?SIU? 3W@RF                  SJSDR                  U" U@RZ                  US!9U" U@Rb                  US!9U" U@Rd                  US!9U" U@Rf                  US!9U" U.5      5      S/nU(Ri                  U5        M     GMT     U-R                  R+                  5        H  u  n?n@U-R                  S:X  a  Sn.O"[        W@RZ                  5      U-R                  -  n.[Y        U?5      S-   U:  a  U?S USE-
   n?U?SF-  n?S'U? 3W@RF                  SJSDR                  U" U@RZ                  US!9U" U@Rb                  US!9U" U@Rd                  US!9U" U@Rf                  US!9U" U.5      5      S/nU(Ri                  U5        M     GM     GMP     S+n1S,n2S,n3SKnAU( H  n[k        U[>        5      (       a  M  [k        US   ["        5      (       a2  [Y        [?        US   5      5      U1:  a  [Y        [?        US   5      5      n1[Y        US   5      U2:  a  [Y        US   5      n2[Y        US-   5      U3:  d  M  [Y        US-   5      n3M     / SLQnS/mQS/mNTM* /mOU	" U5        U	" U15        U	" U25        U	" U35        U	" WA5        TQS   nTNS   nTOS   nU" U
" USM5      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        U( H>  n[k        U[>        5      (       a  U" U
" UU5      5        M)  U" UR                  " U6 5        M@     U" U5        U" S5        U" S5        Ub  UR                  U;   Ga6  U RB                  R                  (       Ga  / n(U RB                  R                  nBU[x        R                  :X  a  [U        WBR+                  5       SN SS9n%OwU[x        R                  :X  a  [U        WBR+                  5       SO SS9n%OHU[x        R                  :X  a  [U        WBR+                  5       SP S?9n%O[U        WBR+                  5       SQ SS9n%SnCU% H  u  n,n-WCU-RZ                  -  nCM     U% H  u  n,n-WCS:X  a  Sn.O[        U-RZ                  5      WC-  n.U,U-RF                  U" U-RZ                  US!9 S(U" U-Rb                  US!9 S(U" U-Rd                  US!9 S(U" U-Rf                  US!9 S(U" U.5       3	/nU(Ri                  U5        M     / SRQnSSnS+n1S,n3U( Ho  n[k        US   ["        5      (       a2  [Y        [?        US   5      5      U1:  a  [Y        [?        US   5      5      n1[Y        US   5      U3:  d  Ma  [Y        US   5      n3Mq     S/mQS/mNTM* /mOU	" U5        U	" U15        U	" U35        TQS   nTNS   nTOS   nU" U
" UST5      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        [        R                  " SU5      nDU( H  nWDR                  US   5      nEUE(       a$  WER                  S5      UER                  S5      -   n,OUS   n,[Y        U,5      U:  a  U,S US--
   SF-   US'   OU,US'   U" UR                  " U6 5        M     U" U5        U" S5        U" S5        Ub  UR                  U;   Ga  U RB                  R                  (       Ga  / n(U RB                  R                  nFU RB                  RD                  S%   R                  n+UFR+                  5        H  u  n,n-U+S:X  a  Sn.O[        U-R                  5      U+-  n.U,U-RF                  U" U-RH                  US!9 S(U" U-R\                  US!9 S(U" U-R^                  US!9 S(U" U-R`                  US!9 S(U" [        U-RH                  5      U-  5       3	U" U-R                  US!9 S(U" U-R                  US!9 S(U" U-R                  US!9 S(U" U-R                  US!9 S(U" U.5       3	/nU(Ri                  U5        GM     / S.QnSnS+n1S,n2S,n3U( H  n[Y        US   5      U:  a  [Y        US   5      n[k        US   ["        5      (       a2  [Y        [?        US   5      5      U1:  a  [Y        [?        US   5      5      n1[Y        US   5      U2:  a  [Y        US   5      n2[Y        US-   5      U3:  d  M  [Y        US-   5      n3M     S/mQS/mNTM* /mOU	" U5        U	" U15        U	" U25        U	" U35        TQS   nTNS   nTOS   nU" U
" USV5      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        U( H  nU" UR                  " U6 5        M     U" U5        U" S5        U" S5        Ub  UR                  U;   Ga  U RB                  R                  (       Ga  / n(U RB                  RD                  S%   R                  n+U(       a  U RB                  R                  nGOS9U RB                  R                  0nGWGR+                  5        GH  u  n8n9U(Ri                  S:U8 35        U[x        Rz                  :X  a  [U        U9R+                  5       SW SS9n%GOGU[x        R|                  :X  a  [U        U9R+                  5       SX SS9n%GOU[x        R~                  :X  a  [U        U9R+                  5       SY SS9n%OU[x        R                  :X  a  [U        U9R+                  5       SZ S?9n%OU[x        R                  :X  a  [U        U9R+                  5       S[ SS9n%OU[x        R                  :X  a  [U        U9R+                  5       S\ SS9n%O\U[x        R                  :X  a  [U        U9R+                  5       S] SS9n%O-U[x        R                  :X  a  [U        U9R+                  5       S^ S?9n%W% GH  u  n,n-U+S:X  a  Sn.O[        U-R                  5      U+-  n.U,U-RF                  U" U-RH                  US!9 S(U" U-R\                  US!9 S(U" U-R^                  US!9 S(U" U-R`                  US!9 S(U" [        U-RH                  5      U-  5       3	SDR                  U" U-R                  US!9U" U-R                  US!9U" U-R                  US!9U" U-R                  US!9U" U.5      5      /nU(Ri                  U5        GM     GM     SnS+n1S,n2S,n3U( H  n[k        U[>        5      (       a  M  [Y        US   5      U:  a  [Y        US   5      n[k        US   ["        5      (       a2  [Y        [?        US   5      5      U1:  a  [Y        [?        US   5      5      n1[Y        US   5      U2:  a  [Y        US   5      n2[Y        US-   5      U3:  d  M  [Y        US-   5      n3M     / S.QnS/mQS/mNTM* /mOU	" U5        U	" U15        U	" U25        U	" U35        TQS   nTNS   nTOS   nU" U
" US_5      5        U" SU 35        U" U5        U" UR                  " U6 5        U" U5        U( H>  n[k        U[>        5      (       a  U" U
" UU5      5        M)  U" UR                  " U6 5        M@     U" S5        U" S5        Ub  UR                  U;   Ga  U R                  R                  (       d  U R                  R                  (       Ga  U R                  R                  R+                  5        GHX  u  nHnI/ n([U        UIR+                  5       S` SS9n%U% H\  u  nJn-UJU-R                  U-R                  U-R                  U-R                  U-R                  U-R                  /nU(Ri                  U5        M^     [U        U R                  R                  WH   R+                  5       Sa SS9nKUK H\  u  nJn-UJU-R                  U-R                  U-R                  U-R                  U-R                  U-R                  /nU(Ri                  U5        M^     / SbQnS/mQS/mNTM* /mOScnS/nLU	" U5        U	" Sd5        U	" UL5        U	" UL5        U	" UL5        U	" UL5        U	" UL5        TQS   nTNS   nTOS   nU" U
" USeWH 35      5        U" SfR                  U R                  R                  UH   5      5        U" SgR                  U R                  R                  UH   5      5        U" U5        U" UR                  " U6 5        U" U5        U( H>  n[k        U[>        5      (       a  U" U
" UU5      5        M)  U" UR                  " U6 5        M@     U" S5        U" S5        GM[     SR                  TP5      $ )kNr   )SummaryViewr   r*   c                    > TS==   SU-   [        U 5      -   S-   ST-  -   -  ss'   TS==   SU -  ST-  -   -  ss'   TS==   U T-   -  ss'   g )Nr   z{: }r   -)str)paddingtext_dirSPACING_SIZEheader_sep_listline_length_listrow_format_lists     r(   
add_column _build_table.<locals>.add_column}  sh    Hs7|+c1S<5GH	
 	cGms\/ABBw55r'   c                 H    U [        U5      -
  nUS-  nSU-  U-   SX#-
  -  -   $ )Nr   r  )r   )r  textleft_lengthhalfs       r(   	add_title_build_table.<locals>.add_title  s6    D	)aTzD 3+*<#===r'   c                 J   > TR                  U 5        TR                  S5        g )Nr   )rc   )sresults    r(   rc   _build_table.<locals>.append  s    adr'   r   c                     U [        S5      :X  a  g[        U 5      nUS:X  a  US-  nOUS:X  a  US-  nOUS:X  a  US-  nS	R                  S
U-  U5      $ )z'
Transform time in ns to time in unit.
r   r  r  r   r  r   usr   {}{:.2f}r   )r   format)r  unitindentr  s       r(   format_time!_build_table.<locals>.format_time  sc     5<4[Fs{###$$S6\6::r'   c                 2    SR                  SU-  U S-  5      $ )z;
Transform ratio within [0, 1] to percentage presentation.
r  r   r  )r  )ratior  s     r(   format_ratio"_build_table.<locals>.format_ratio  s       vus{;;r'   DevicezUtilization (%)      zDevice SummaryzCPU(Process)zProcess Cpu UtilizationzCPU(System)zSystem Cpu UtilizationGPUa?  Note:
CPU(Process) Utilization = Current process CPU time over all cpu cores / elapsed time, so max utilization can be reached 100% * number of cpu cores.
CPU(System) Utilization = All processes CPU time over all cpu cores(busy time) / (busy time + idle time).
GPU Utilization = Current process GPU time / elapsed time.r  )z
Event TypeCallszCPU Time	Ratio (%)   zOverview SummaryzTime unit: .Tr   c                     U S   $ r#  r   xs    r(   r   _build_table.<locals>.<lambda>6  s    1r'   )keyreversez{})r  z  {})r*   r  zGPU Timer  a@  Note:
In this table, We sum up all collected events in terms of event type.
The time of events collected on host are presented as CPU Time, and as GPU Time if on device.
Events with different types may overlap or inclusion, e.g. Operator includes OperatorInner, so the sum of ratios is not 100%.
The time of events in the same type with overlap will not calculate twice, and all time is summed after merged.
Example:
Thread 1:
  Operator: |___________|     |__________|
Thread 2:
  Operator:   |____________|     |___|
After merged:
  Result:   |______________|  |__________|
r[  )r[  rY  rx   rv   rw   z  z / z  Othersz / - / - / - / r   (   r   )Namer  &CPU Total / Avg / Max / Min / Ratio(%)&GPU Total / Avg / Max / Min / Ratio(%)   zModel SummaryzNote:
In this table, GPU time is the sum of all device(GPU) events called in the phase.
Unlike overview summary, if two device(GPU) events execute on different streams with overlap time, we sum them directly here.
)r  z
Total Timer  zDistribution Summaryz  Communicationz  Computationz	  Overlapa  Note:
Communication time: Communication Event time, Communication Op time and its kernel time on gpu.
Computation time: Kernel time, except kernels belong to communication(nccl kernels).
Overlap time: Communication time intersects with computation time.
Example:
Communication:
  CPU:              |_________________|
  GPU:                                  |______________|
  Total:            |_________________| |______________|
Computation time(Kernel):
  GPU:         |________________|
Overlap time:       |___________|
4   zAll threads mergedzThread: c                      U S   R                   $ r#  r7   r  s    r(   r   r  +      QqT]]r'   c                      U S   R                   $ r#  r  r  s    r(   r   r  0      ad&7&7r'   c                      U S   R                   $ r#  r   r  s    r(   r   r  6  r  r'   c                      U S   R                   $ r#  r   r  s    r(   r   r  ;      QqT5F5Fr'   )r  c                      U S   R                   $ r#  r;   r  s    r(   r   r  @      ad&;&;r'   c                      U S   R                   $ r#  r  r  s    r(   r   r  F      ad&?&?r'   c                      U S   R                   $ r#  r  r  s    r(   r   r  L  r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r  Q      QqT5N5Nr'   z{} / {} / {} / {} / {}r   z...r   r   z    z- / - / - / - / -
   )r  r  r  r  FLOPszOperator Summaryc                      U S   R                   $ r#  )r  r  s    r(   r   r  I      !A$"3"3r'   c                      U S   R                   $ r#  )r  r  s    r(   r   r  O  r  r'   c                      U S   R                   $ r#  )r  r  s    r(   r   r  T  s    !8I8Ir'   c                      U S   R                   $ r#  )r9   r  s    r(   r   r  Y  s    !A$--r'   )r  r  r  Z   zKernel Summaryz(.+?)(<.*>)(\(.*\))zMemory Manipulation Summaryc                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   c                      U S   R                   $ r#  r  r  s    r(   r   r  #  r  r'   zUserDefined Summaryc                      U S   R                   $ r#  r  r  s    r(   r   r        !A$"4"4r'   c                      U S   R                   $ r#  r  r  s    r(   r   r    r  r'   )r  TypezAllocation Countz
Free CountzAllocation Sizez	Free SizezIncreased Size2      zMemory Summary - zPeak Allocated Memory: {}zPeak Reserved Memory: {})<)r  r   r   )cprofilerr  r  r   r   r[  
DeviceViewr  r   r  r   r   rN   r   OverViewr_   r`   r   updater   r   rb   r   r  r   r   r   rY  rx   rv   rw   r  splitr  rI  r   r7   ra   r   r	   r   r   sorted	ModelViewr   r9   r  r   r   r  r  r  rc   r   DistributedViewr   r   r   OperatorViewrF  r   r   r   r    r!   r"   r#   r$   r%   r;   r  r  r  r   r  r  
KernelViewrK  recompilematchgroupMemoryManipulationViewrJ  UDFViewrG  rH  
MemoryViewr  r  r  r  r{  r|  r}  r~  r  r  r  )Rstatistic_data	sorted_by	op_detail
thread_sep	time_unit	row_limitmax_src_column_widthviewsr  r  r  rc   r  r  
total_timeheadersname_column_widthDEFAULT_COLUMN_WIDTHr   
row_format
header_sepline_length
row_valuesgpu_namer9   utilizationcpu_type_timegpu_type_timecpu_call_timesgpu_call_timesr   valueevent_type_namegpu_time_ranger   r   
time_rangesorted_itemsr  rI  all_row_valuesaccumulation_timegpu_accumulation_timegpu_total_timer,   item	gpu_ratio
other_timeother_gpu_timecalltime_widthcpu_data_description_widthgpu_data_description_widthcommunication_timecomputation_timeoverlap_timerF  rg   rb   total_op_cpu_timetotal_op_gpu_time	cpu_ratioinnerop_nameinnerop_nodedevice_node_namerM   flops_widthrK  total_kernel_gpu_timekernel_name_patternr"  rJ  rH  device_typememory_eventsry  sorted_reserved_itemsnumber_column_widthr  r  r  r  r  sR                                                                                @@@@@r(   _build_tabler]  j  sH(    &%LdOdO%6 6>
 F; <  22DD##J }..%7./!$%A+,  %Q'
$Q'
&q) 	y&678zz  '*+zn//0IJK

 	z  *-.n//0HIJ

 	z  *-.&99IIKH11CCo44H
 #Z/Kz*L,EFJ:$$j12 L 	zI	

 	s[ !r
r
?776?"},,5B$$)M?!A+,  %Q'
$Q'
&q) 	y&89:YK()zz  '*+z#//4#//4$005$005n??JJKn??JJK
 ..>>DDF
_:::,1j) G --EE;E22JJ<M/778 22<< ?889
 &&##$$((	
J "*o33C8;On,#!//GGH #00HH'd z* #00HH'h j)#
. %006 ..;;AAC
*<*B*B*D&
J-9":.
d.z* +E D
 '5&:&:&<"J
(2:(>M*% '=--EE;E22JJ<M/778 22<< ?889 !~t
 (?
DKKJ--c2156:&9-tz12	

 	z  *-. ,QR 0Jc*o33C8;<z*Dy1U4[:56	J :$$j12 !1 	z8z  '*+z - 3 3 5Jc*o33C8;<z*Dy1U4[:56	J :$$j12 !6 	z
;	
 	s[ !r
r
}--6 ((@@ 	  &'!+N !$%!,,DD!( 
 22248D%*$%	$)$--$8>$I	(5(=dVRv;D&		&t}}9EFc+VZVgVgnwJxIyy|  ~I  JN  J[  J[  bk  ~l  }m  mp  q|  }A  }N  }N  U^  q_  p`  `c  dp  qv  w{  wD  wD  qE  HR  qR  dS  cT  U&t}}9EFc+VZVgVgnwJxIyy|  ~I  JN  J[  J[  bk  ~l  }m  mp  q|  }A  }N  }N  U^  q_  p`  `c  dp  qz  d{  c|  }	"J #))*5$D0)T]]:)->-/2 $&77J+.CCN"	!.1NB	z	:;?<X]^hXilvXvKwJxy~I>?|\eOfNgh	J !!*-N)+&)+&,
z!}c22C
1./.@%(Z]);%<Nz!}%(BB14Z]1C.z!}%(BB14Z]1C. -G  "dO!dO!- "()~&1212(+J(+J*1-K 9[/:;[,-::$$g./:,
z((*56 -:R 3$%2J2J}33u<--AAAG
  "dO!dO!-#% /0  )+J(+J*1-K 9[*@AB[,-::$$g./:!+22FF"  *22DD  &22@@L J7U:.;<J
 :$$j12!.Y?U#56CDJ
 :$$j12  ,9=U#34zABJ
 :$$j12 Ly9U<0:=>J
 :$$j12:
6 3$%2J2J}00E9''---N "-;;HH ).*F*F*L*L  %1$6$6$8 	5%%&<=
 3 33#)+BD$L *"3"33#)7 $$L
 *"3"33#)7 $$L
 *"3"33#)+F$L *"5"55#); $$L
 *"3"33#)? $$L
 *"3"33#)? $$L
 *"3"33#)+N$L %&!$%!".JD$%6%%)>)>>% #/ #/JD$(A-$%	$)$--$8;L$L	(A-$%	 "$"7"78;LL " 		&t}}9EFc+VZVgVgnwJxIyy|  ~I  JN  J[  J[  bk  ~l  }m  mp  q|  }A  }N  }N  U^  q_  p`  `c  dp  qz  d{  c|  }077'(=(=IN' $ 9 9	 ( $ 9 9	 ( $ 9 9	 )3 

#"J& #))*5 y "11779((#}}1,-	 %*,*?*?$@4==$P !*  $449,-	 %*,*G*G$H&*&;&;%< !*  #<0147HH/;$;&7!&;0" !- 5"$\N 3 , 1 1 8 ? ?$/(4(=(=I%& %0(4(A(A-6%& %0(4(A(A-6%& %0(4(A(A-6%& %1$;!!"$ !9 ? ?$/(4(E(E-6%& %0(4(I(I-6%& %0(4(I(I-6%& %0(4(I(I-6%& %1$;#!"& !$Q)*JT +11*= ".!5!5!;!;!=! 0 +#/#@#@A#E01I ).k.B.B(C*6*G*G)H %.
 %((8$9A$=&7%8 8H(?*;a*?8&$4 %5$=$4&*+;*<$=$/$4$4$7$<$C$C(3,7,@,@y)* )4,7,D,D1:)* )4,7,D,D1:)* )4,7,D,D1:)* )5Y(?!%&$ %(-."
0 !/ 5 5j AQ ">C :Z "\\//1,'#449,-	 %*+*>*>$?&*&;&;%< !*  ##34q8;LL3C$;&7!&;4" 0 !1E 9 0"$%5$6 7 + 0 0 3 8 ? ?$/(3(<(<9%& %0(3(@(@y%& %0(3(@(@y%& %0(3(@(@y%& %1$;!" !$'*J* +11*=E 2a #/g %9N N)+&)+&K,
j#..z!}c22C
1./.@%(Z]);%<Nz!}%(BB14Z]1C.z!}%(BB14Z]1C. -G  "dO!dO!-()~&1212{#(+J(+J*1-K 9[*<=>[,-::$$g./:,
j#..9[*=>:,,j9:	 -
 :2J2J}..%7''444N)77DDLJ---% &&(3  
 j///% &&(3  
 j///% &&(.I   & &&(/   %&!*
d%6% +*
d(A- !I %dmm 47L LIII"4==yAB#kRVRcRcjsFtEuux  zE  FJ  FW  FW  ^g  zh  yi  il  mx  y}  yJ  yJ  QZ  m[  l\  \_  `l  mv  `w  _x  y

 %%j1 +G !#N)+&,
z!}c22C
1./.@%(Z]);%<Nz!}%(BB14Z]1C. -  "dO!dO!-()~&12(+J(+J*1-K 9[*:;<[,-::$$g./:"$**-C"D,
+11*Q-@ ;;q>EKKN:D%a=Dt900$()@+<q+@$AE$IJqM$(JqMz((*56 - :2J2J}::eC''AAAN,,FF & ,,DD!"" 
 8==?
d!Q& !I %d&;&; <~ MIII"4==yAB#kRVRcRcjsFtEuux  zE  FJ  FW  FW  ^g  zh  yi  il  mx  y}  yJ  yJ  QZ  m[  l\  \_  `l  mr  sw  s@  s@  mA  DN  mN  `O  _P  Q"4#8#8yIJ#kZ^ZsZs  {D  OE  NF  FI  JU  VZ  Vo  Vo  v  J@  IA  AD  EP  QU  Qj  Qj  qz  E{  D|  |  @L  MV  @W  X  Y	
 %%j1 @G !"N)+&)+&,
z!}%(99(+JqM(:%z!}c22C
1./.@%(Z]);%<Nz!}%(BB14Z]1C.z!}%(BB14Z]1C. -  "dO!dO!-()~&1212(+J(+J*1-K 9[*GHI[,-::$$g./:,
z((*56 -:2J2J}++u4''999N,,DD!"" 
 "00II )
 ).*F*F*X*X,( %=$B$B$D 	5%%&<=
 3 33#)+BD$L *"3"33#)7 $$L
 *"3"33#)7 $$L
 *"3"33#)+F$L *"5"55#); $$L
 *"3"33#)? $$L
 *"3"33#)? $$L
 *"3"33#)+N$L #/JD$%*$%	 "$"7"78>I " 		&t}}9EFc+VZVgVgnwJxIyy|  ~I  JN  J[  J[  bk  ~l  }m  mp  q|  }A  }N  }N  U^  q_  p`  `c  dp  qv  w{  wD  wD  qE  HR  qR  dS  cT  U077'(=(=IN' $ 9 9	 ( $ 9 9	 ( $ 9 9	 )3	"J$ #))*53 #/[ %ER !"N)+&)+&,
j#..z!}%(99(+JqM(:%z!}c22C
1./.@%(Z]);%<Nz!}%(BB14Z]1C.z!}%(BB14Z]1C. -G  "dO!dO!-()~&1212(+J(+J*1-K 9[*?@A[,-::$$g./:,
j#..9[*=>:,,j9:	 -
 2J2J}..%7 ))99,,;;;
  ..>>DDF!#%!'')4   )5$J"((--,,**"J #))*5 )5 )/"11@@#eg4 )% )>$J"((--,,**"J #))*5 )> $&$#%$%1M? $&!&(#,-2./././././,Q/
,Q/
.q1 k->{m+LM /66&55LL' .55&55JJ' z"z(('23z""0J!*c22yjABz00*=>	 #1
 r
r
{ G~ 776?r'   )r   )r   )1r_   r   enumr   paddle.base.corer   r   paddle.utils.flopsr   statistic_helperr   r	   r
   r   rC   rY  r[  CudaRuntimerN   MemcpyMemsetrR  r  rx   rv   rw   r   PythonOprS  _AllTracerEventTyper   r   r.   r0   rm   rr   r   r   r   r   r   r   r   r   r   ru  r  r   r]  r   r'   r(   <module>rg     sd    	  @ $  !!  !!%% $ 9  B?, ?,D"%P	$'#L0%+PL0 L0^T
 T
n~> ~>B]E ]E@. .( !!
tr'   