
    <Цiy6                     j   S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
  S SKJr  S SKJr  \
(       a  S SKJr  S S	4S
\4S jjr\ R&                  " \S SS9r\ R&                  " \S S	S9r\ " S S5      5       r\ " S S5      5       r " S S5      r " S S5      rSS jrS S S4S jrS rS rg)    N)deque)	dataclass)DictListTYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     U R                   $ N)childrenxs    T/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/profiler/_utils.py<lambda>r      s    1::    Freversec              #      #    U(       a  [         OS n[        U" U 5      5      nU(       a<  U" U5      nUv   U" U" U5      5       H  nUR                  U5        M     U(       a  M;  g g 7f)Nc                     U $ r    r   s    r   r   _traverse.<locals>.<lambda>   s    qr   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser"      sX     H[EeDk"I
Y'
 Z!89K[) : )s   A"A(&A(c                 "    U R                  5       $ r   )popr   s    r   r   r      s
    aeegr   T)r   r   c                 "    U R                  5       $ r   )popleftr   s    r   r   r      s
    r   c                   ^    \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   \
S 5       rSrg	)
EventMetrics!   r   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   g        )r*   r,   selfs    r   fraction_idle_timeEventMetrics.fraction_idle_time(   s*      A%  4#8#888r   r   N)__name__
__module____qualname____firstlineno__r*   int__annotations__r+   r,   r-   propertyr1   __static_attributes__r   r   r   r(   r(   !   s=    cL#L#K9 9r   r(   c                   8    \ rS rSr% \\S'   \\S'   Sr\\S'   Srg)Interval/   startendr   r-   r   N)r3   r4   r5   r6   r7   r8   r-   r:   r   r   r   r<   r<   /   s    J	HKr   r<   c                   @    \ rS rSrS rS rS rS rS\\	   4S jr
Srg	)
EventKey6   c                     Xl         g r   event)r0   rE   s     r   __init__EventKey.__init__7   s    
r   c                 @    [        U R                  R                  5      $ r   )hashrE   idr/   s    r   __hash__EventKey.__hash__:   s    DJJMM""r   c                 \    U R                   R                  UR                   R                  :H  $ r   )rE   rJ   )r0   others     r   __eq__EventKey.__eq__=   s    zz}}..r   c                 0    U R                   R                   $ r   )rE   namer/   s    r   __repr__EventKey.__repr__@   s    **//"#r   	intervalsc                    Sn[        US S9nU(       af  [        U R                  R                  US   R                  5      n[        U R                  R                  US   R                  5      nX4:  a  X$U-
  -  nSu  pVU[        U5      :  a  X   nX   nUS-  nUR                  UR                  :  a4  UR                  UR                  :  a  US-  nMW  UR                  Ul        Un[        U R                  R                  UR                  5      n[        U R                  R                  UR                  5      nX4:  a  X$U-
  -  nU[        U5      :  a  M  U$ )Nr   c                     U R                   $ r   r>   r   s    r   r   ,EventKey.intervals_overlap.<locals>.<lambda>E   s    AGGr   key)r      r\   )	sortedmaxrE   start_time_nsr>   minend_time_nsr?   len)	r0   rU   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapEventKey.intervals_overlapC   s<   9*;<	

 8 8)A,:L:LMMdjj44il6F6FGK*m ;;#i. %LM%LMFA  =#6#66 $$}'8'88FA*7*;*;M'A

 8 8-:M:MNMdjj44m6G6GHK*m ;;! #i. $ r   rD   N)r3   r4   r5   r6   rF   rK   rO   rS   r   r<   rj   r:   r   r   r   rA   rA   6   s&    #/$4> r   rA   c                   P    \ rS rSrS\4S jrS rS rS rS r	SS\
S	\4S
 jjrSrg)BasicEvaluationd   profc                 T   Xl         0 U l        U R                  5         [        S U R                  R	                  5        5       S S9U l        U R
                   Vs/ s H  o"R                  PM     snU l        / U l        U R                  5       U l
        U R                  5         g s  snf )Nc              3   $   #    U  H  ov   M     g 7fr   r   ).0es     r   	<genexpr>+BasicEvaluation.__init__.<locals>.<genexpr>j   s     ,+1Q+s   c                 .    U R                   R                  $ r   )rE   r_   r   s    r   r   *BasicEvaluation.__init__.<locals>.<lambda>j   s    AGG<Q<Qr   rZ   )r	   metricscompute_self_timer]   keys
event_keysrE   eventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r0   ro   rs   s      r   rF   BasicEvaluation.__init__e   s    57  ,))+,2Q
 )-81ww8/1 $ 8 8 :  9s   B%c                 N   U R                   R                  c   e[        U R                   R                  R                  5       5      nU(       a  UR	                  5       nUR
                  nUR                   H"  nX4R
                  -  nUR                  U5        M$     [        U5      U R                  ;  d!   SUR                   SUR                   35       e[        US9U R                  [        U5      '   UR
                  U R                  [        U5         l        U(       a  M  gg)z=
Computes event's self time(total time - time in child ops).
NzDuplicate id: z, )r+   )r	   kineto_resultsr   experimental_event_treer$   r*   r   r   rA   rx   rJ   rR   r(   )r0   stackr    	self_timer!   s        r   ry   !BasicEvaluation.compute_self_timeq   s     ||**666dll11IIKL J"33I)22999	[)  3 $DLL8C
b0ABC81=91UDLL*-. ",!<!< LL$ er   c                 
  ^^^ U R                   R                  c   eU R                   R                  R                  5       nS mS m[        U4S jU 5       S S9n[        U4S jU 5       S S9n[        X#-   S S9U l        0 nS	nU H  m[        UU4S
 jUS9nXdT'   Ub  UOUnM     S	nSnX#-   U R                  -   n	S n
/ nU	R                  U
S9  U	 GH  n[        US5      (       aE  UR                  5       S-  nUR                  5       UR                  5       -   S-  nX;   a	  XL   b  XL   n[        US5      (       a@  UR                  5       nUR                  5       UR                  5       -   nX;   a	  XL   b  XL   nO)[        US5      (       a  UR                  nUR                  nU[        U5      :  aB  X7   R                  5       W::  a,  US-  nU[        U5      :  a  X7   R                  5       U::  a  M,  X-
  S-   n[        US	5      n[        US5      (       d  [        US5      (       a  UR!                  [#        WWU5      5        GMy  [        US5      (       d  GM  XR$                  ['        U5         l        GM     U$ )z
Computes queue_depth at each event. This will calculate the queue depth data for
All the events in the tree.
This will return a list of Interval of queue depth data of cuda launch and kernels.
c                      U R                   S:H  $ )NcudaLaunchKernel)rR   rs   s    r   is_cuda_launch_kernelBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s    66///r   c                     U R                  5       [        R                  :H  =(       a    SU R                  R	                  5       ;  $ )Nmem)device_typer
   CUDArR   lowerr   s    r   is_cuda_kernel;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   s+    ==?joo5U%qvv||~:UUr   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rr   rs   r   s     r   rt   6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     D1+@+CQQ   !	!c                 "    U R                  5       $ r   start_nsr   s    r   r   5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   
    !**,r   rZ   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rr   rs   r   s     r   rt   r      s     =1>!+<QQr   c                 "    U R                  5       $ r   r   r   s    r   r   r      r   r   c                 "    U R                  5       $ r   r   r   s    r   r   r      s
    1::<r   r   c                 F   > U R                  5       TR                  5       :H  $ r   )linked_correlation_id)r   cuda_launch_events    r   r   r      s    !113$::<=r   rX   c                     [        U S5      (       a  U R                  5       S-  $ [        U S5      (       a  U R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nstart_us  r   r_   zUnknown Event Type)hasattrr   r   r_   	ExceptionrD   s    r   new_old_event_comparatorEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   s`    uj))~~'$..uj))~~''uo..***011r   r   r   r   r_   r\   )r	   r   r|   r]   r}   index_of_first_matchsortr   r   duration_usr   duration_nsr_   ra   rb   r^   r   r<   rx   rA   r-   )r0   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   r   rE   
start_timeend_timecurrent_queue_depthr   r   r   s                   @@@r   r~   #BasicEvaluation.compute_queue_depth   s    ||**666,,55<<>	0	V $DD&
 $==&

 "39O
 35!3("=(	E 16,-*/*;AS "4  !!'<t{{J
	2 ,.45Euj))"^^-4
!NN,u/@/@/BBdJ*~/D/P+9+@(uj))"^^-
 >>+e.?.?.AA*~/D/P+9+@(00"00
 ,, %s+='>>'=FFH %)$	 %s+='>>'=FFH #7"MPQ"Q"%&91"=uj))WUJ-G-G ''Z3FG 00<OXe_-9A  D  r   c                     SnSn/ nU R                   (       a  U R                  (       aw  U[        U R                  S   R                  U R                   S   R                  5      [        U R                   S   R
                  U R                  S   R                  5      /-  nU R                    Hi  nUR                  S:X  a  U(       d  UR
                  nSnUR                  S:  d  M:  U(       d  MC  UR                  [        X$R                  5      5        SnMk     U R                  R                  5        Vs/ s H  oUR                  PM     nnU H8  n[        U5      R                  U5      U R                  [        U5         l        M:     gs  snf )z$
Computes idle time of the profile.
Fr   r   TN)r   r|   r<   r_   r>   r?   ra   r-   r   rx   rz   rE   rA   rj   r,   )r0   idle
idle_startidle_intervals
data_pointrs   
event_listrE   s           r   r   !BasicEvaluation.compute_idle_time   sD   
 
)+  T[[Q55t7L7LQ7O7U7UV..r266B8S8ST N
 //J%%*4'^^
%%)dd%%hz;K;K&LM 0 (,||'8'8':;':!gg':
;E9A:/ LL%)6   <s   5Fc                   ^ SSK n[        [        U R                  5      5      nU Vs/ s H  oDR                  PM     nnSmSn/ nSnU[        U5      :  a  XX   T:  a  US-  nM  [        US-   [        U5      5       He  n	[        UU4S jU	S9n
[        XYU
S9nUc  M!  X[   U:  d  M+  UR                  [        X;   R                  X8   R                  5      5        U
b  U
OUn  O   US-  nU[        U5      :  a  M  U R                  R                  5        Vs/ s H  nUR                  U5      (       d  M  UPM     nnU(       Ga  UR                  U Vs/ s H  oR                  U   R                   PM     snUR"                  S9nUR                  U Vs/ s H  oR                  U   R$                  PM     snUR"                  S9nXR'                  U5      -
  UR)                  U5      -  nXR'                  U5      -
  UR)                  U5      -  nUS	U-  -   n[+        [-        UU5      [.        R0                  " S5      S
S9 VVs/ s H  u  nnUPM
     nnnUSU nU$ s  snf s  snf s  snf s  snf s  snnf )z
Filter and Rank the events based on some heuristics:
1) Events that are in the falling phase of the queue depth.
2) Events that have a high idle_time, self_time difference.

Parameters:
    length: The number of events to return.
r   N   r\   c                    > U T:*  $ r   r   )r   bottom_threasholds    r   r   -BasicEvaluation.rank_events.<locals>.<lambda>  s    .?)?r   rX   )r>   r?   )dtypeg333333?T)r[   r   )torchlistr   r   r-   rb   ranger   argmaxr   r<   r>   rx   rz   rj   tensorr+   float32r1   meanstdr]   zipoperator
itemgetter)r0   lengthr   r   rs   	qd_valuestop_threasholddecrease_intervalrf   rg   next_minimum_idxpeak_idxrE   r   r   	idle_timenormalized_gainnormalized_selfheuristic_score_list_r   s                       @r   rank_eventsBasicEvaluation.rank_events  s    	)>)> ?@,<=,<q]],<	=#i. |//Q1q5#i.1 $8?q$  "):JK 'I,?>,Q%,, ,6<<>N>Q>W>W
 -=,H(aA! 2" FA+ #i. 2 **,
,&&'89 , 	 

 ?IJzee$11zJmm % I EOPZEe$77ZPmm % I  )::i+@@EIIiDXXO(::i+@@EIIiDXXO#2S?5J#J 
 !',j9 ++A. !!HAu !   $GV,Js >:
 K Qs#   II$6I$"I)"I.I3r   print_enablec                 @   U R                  U5      nU(       d  U$ U(       a  SOSnUSR                  U Vs/ s HA  nS SU S[        UR                  5       SU R                  U   R
                  S-  S	 S
S 3	PMC     sn5      -  nU(       a  [        U5        U$ s  snf )NzOptimizable events:
zNo events to optimize

zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: rn   z.2fz%
)r   joinsource_code_locationrE   rx   r1   print)r0   r   r   r   outputrE   s         r   get_optimizable_events&BasicEvaluation.get_optimizable_eventsJ  s    %%f-
,6(<U$)) ( (E H g +EKK89 :||E*==CCH I	
 (	
 		
 &Ms   AB
)r}   r{   r|   rx   r	   r   N)r\   T)r3   r4   r5   r6   r	   rF   ry   r~   r   r   r7   boolr   r:   r   r   r   rm   rm   d   s>    
!W 
!=,\ |08GRS D  r   rm   c                     Ub  U[        U 5      :  a  [        U 5      n[        X#5       H  nU" X   5      (       d  M  Us  $    g r   )rb   r   )seq	predicater>   r?   rf   s        r   r   r   _  s@    
{cSXo#h5SVH  r   c                     U $ r   r   r   s    r   r   r   h  s    ar   c                 `    XU n [        U 5      S:X  a  g U R                  [        XS95      U-   $ )Nr   rZ   )rb   r   r^   )r   r[   r>   r?   s       r   r   r   h  s2    
C.C
3x1}99S&'%//r   c                     U b>  [         R                  " SU R                  5      nUc  U R                  n M5  U R                  $ g)Nz
\.py\(.*\)zNo source code location found)researchrR   parent)rE   matchs     r   r   r   o  s:    

		-4=LLEzz*r   c                  T    SSK Jn   U " 5           S S S 5        g ! , (       d  f       g = f)Nr   r   )torch.autograd.profilerr	   r   s    r   _init_for_cuda_graphsr   }  s    /	 
s   
')r   N)	functoolsr   r   collectionsr   dataclassesr   typingr   r   r   r   r	   torch.profilerr
   torch.autogradr   r   r"   partialtraverse_dfstraverse_bfsr(   r<   rA   rm   r   r   r   r   r   r   r   <module>r     s      	  ! , , + % + *>u * *   4EtT  ,e
 
9 
9 
9   + +\x xv  qd 0+r   