
    IЦi                     x   S SK r S SKrS SKrS SKJrJr  S SKJr  S SKJ	r	J
r
JrJrJr  S SKJr  S SKrS SKJr  / SQr " S S	\5      rS
 rS rS rS r " S S5      r " S S5      r\" S/ SQ5      r " S S\5      r " S S\5      r " S S\5      r " S S5      r S r!Sr"Sr#S r$S"S  jr%         S#S! jr&g)$    N)defaultdict
namedtuple)
attrgetter)AnyDictListOptionalTuple)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                      ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
\S	 5       r       SS
 jrS rS rS\S\4S jrSS jrS rSrU =r$ )r      z'A list of Events (for pretty printing).c                    > UR                  SS 5      nUR                  SS5      nUR                  SS5      n[        TU ]  " U0 UD6  X0l        X@l        SU l        XPl        g )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         [/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/autograd/profiler_util.pyr   EventList.__init__   s`    ZZd3
$4e<ZZe4
$)&)%- %    c                 r    U R                  5         U R                  5         U R                  5         SU l        g )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr    r"   s    r&   _build_treeEventList._build_tree'   s.    ##% &&(r(   c                 "    U R                  5       $ N)tabler-   s    r&   __str__EventList.__str__-   s    zz|r(   c                     [        5       n[        [        U 5      5       H  nX   R                  c  M  X   R                  R                  X   R                  :X  d  M>  [        X   R                  R
                  5      S:X  d  Me  X   R
                  X   R                  l        X   R                  X   R                  l        X   R
                   H  nX   R                  Ul        M     UR                  U5        M     [        U5      S:X  a  g [        U 5       VVs/ s H  u  pEXA;  d  M  UPM     nnnU R                  5         U R                  U5        GMX  s  snnf )N   r   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r"   	to_deleteidxchindevnew_evtss          r&   r+   EventList._remove_dup_nodes0   s   ISY'I((4	,,11TY^^CDI00==>!C8<	8N8NDI((53793D3DDI((0"i44(,	(<(< 5MM#& ( 9~"*3D/R/wsS=Q/HRJJLKK!#  Ss   E-Ec                    U  Vs/ s H8  nUR                   (       a  M  UR                  [        R                  :X  d  M6  UPM:     nn[	        U[        S5      S9n[        R                  " US S9nU GH  u  pV[	        US S9n/ nSn	U H  n
[        U5      S:  a  US   nU
R                  R                  UR                  R                  :  d.  U
R                  R                  UR                  R                  :  a  UR                  5         ODUR                  U
5        U
R                  b   SU
R                   35       eU
R!                  U5        O[        U5      S:  a  M  UR#                  U
5        M     GM     gs  snf )	a  Populate child events into each underlying FunctionEvent object.

One event is a child of another if [s1, e1) is inside [s2, e2). Where
s1 and e1 would be start and end of the child event's interval. And
s2 and e2 start and end of the parent event's interval

Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
be a parent of two other intervals.

If for any reason two intervals intersect only partially, this function
will not record a parent child relationship between then.
thread)keyc                 2    U R                   U R                  4$ r1   )rJ   node_idevents    r&   <lambda>2EventList._populate_cpu_children.<locals>.<lambda>a   s    u||U]]&Cr(   c                 \    U R                   R                  U R                   R                  * /$ r1   )
time_rangestartendrN   s    r&   rP   rQ   s   s$    5#3#3#9#9E<L<L<P<P;P"Qr(   r   Nz(There is already a CPU parent event for )is_asyncdevice_typer   CPUsortedr   	itertoolsgroupbyr9   rS   rT   rU   r   append_cpu_childr:   rK   set_cpu_parentappend)r"   evtsync_eventseventsthreads	thread_idthread_eventsthread_events_current_eventscur_endrO   parents               r&   r*    EventList._populate_cpu_childrenD   su   $ 
<< $'OOz~~$E  	 

 8$
 ##C
  )0$I#QN 35NG'.)A-+B/F((..&2C2C2G2GG ++//&2C2C2G2GG '**,//6!,,4REeii[QR4,,V4 .)A-  %%e,# ( )09
s   E<E<E<c                 ^  ^ U4S jm0 nU  HJ  nT" U5      b  M  UR                   c  M  UR                  UR                  4nX1;  d  M<  UR                   X'   ML     U  HN  nT" U5      nUc  M  UR                  c   eUR                  UR                  4nX1;   a  X   Ul         MG  / Ul         MP     g )Nc                 T   > U c  g U R                   S:X  a  U $ T" U R                  5      $ Nr6   )scoper:   )r`   	bw_parents    r&   ro   6EventList._set_backward_stacktraces.<locals>.bw_parent   s*    {a
 00r(   )stacksequence_nrrJ   
fwd_thread)r"   
fwd_stacksr`   tpro   s        @r&   r,   #EventList._set_backward_stacktraces   s    	1 
C~%#))*?__cjj1&$'IIJM	  C#A}||///]]ALL1? *CI "CI r(   c                 &    [        S U  5       5      $ )Nc              3   8   #    U  H  oR                   v   M     g 7fr1   )self_cpu_time_total.0rO   s     r&   	<genexpr>0EventList.self_cpu_time_total.<locals>.<genexpr>   s     ?$,,$   )sumr-   s    r&   rz   EventList.self_cpu_time_total   s    ?$???r(   c                 N    [        U UUUUUUU R                  U R                  US9
$ )a  Print an EventList as a nicely formatted table.

Args:
    sort_by (str, optional): Attribute used to sort entries. By default
        they are printed in the same order as they were registered.
        Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
        ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
        ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
        ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
        ``self_xpu_memory_usage``, ``count``.
    top_level_events_only(bool, optional): Boolean flag to determine the
        selection of events to display. If true, the profiler will only
        display events at top level like top-level invocation of python
        `lstm`, python `add` or other functions, nested events like low-level
        cpu/cuda/xpu ops events are omitted for profiler result readability.

Returns:
    A string containing the table.
)	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r!   )r"   r   r   r   r   r   r   r   s           r&   r2   EventList.table   s;    : !5"7$;//''"7
 	
r(   c                    SSK nU R                  (       d  SOU R                  n[        US5       n/ nSnUR                  S5        U  GH  nUR                  c  M  UR                  SR                  UR                  UR                  R                  UR                  R                  5       UR                  (       d  UR                  OSUR                   SUR                   S	35      5        UR                   HQ  nUR                  S
UR                   SUR                  R                   SUR                   SU SU S35        US-  nMS     GM     [        U 5      S:  a=  UR                  UR                  5       S-
  UR                   5        UR#                  5         UR                  S5        SSS5        g! , (       d  f       g= f)zExport an EventList as a Chrome tracing tools file.

The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

Args:
    path (str): Path where the trace will be written.
r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r6      ])osr   openwrite
trace_nameformatrS   rT   
elapsed_us	is_remoterJ   rM   r=   r9   seektellSEEK_SETtruncate)	r"   pathr   device_namefchrome_eventsnext_idr`   ks	            r&   export_chrome_traceEventList.export_chrome_trace   s    	$($4$4f$:J:J$_MG GGCL>>)' (.v,,113"}} 

)#++l3::,bQ(  A GG%cnn%5 6!!$!5!5 6 7""%** .!!(	 ***5 7((	 qLG %' D 4y1}qvvx!|R[[1

GGCLY __s   E?F88
Gc                 
    / SQ$ )N)rz   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_total r-   s    r&   supported_export_stacks_metrics)EventList.supported_export_stacks_metrics  s    
 	
r(   r   metricc           	         X R                  5       ;  a%  [        S[        U R                  5       5      -   5      e[        R                  SS5      n[	        US5       nU  H  nUR
                  (       d  M  [        UR
                  5      S:  d  M1  [        UUR                  SS5      R                  SS5      R                  S	S5      5      n[        U5      S:  d  M~  S
n[        UR
                  5       H  nXxR                  U5      -  nUS-  nM     US S S-   [        [        U5      5      -   nUR                  US-   5        M     S S S 5        g ! , (       d  f       g = f)Nzmetric should be one of: z ;	
____r   r   r   devicexpuprivateuse1 ;rV    
)r   
ValueErrorstr	maketransr   rq   r9   getattrreplaceintreversed	translater   )	r"   r   r   translate_tabler   r`   metric_value	stack_strentrys	            r&   export_stacksEventList.export_stacks  s'   ==??+d::<=>  --&9$_999SYY!!3#*vx8 1 9	$L <(1,$&	%-cii%8E%)III%,I &9 %.crNS$83s<?P;Q$Q		D 01  __s    E4EA	EA+E
Ec                    U R                   (       d   e[        [        5      nS[        [        S4   4S jnU  H  nX4" XQU5         R                  U5        M     [        UR                  5       U R                  U R                  U R                  S9nU H'  nUR                  SU Ul        U(       a  M   SUl        M)     U$ )a  Averages all function events over their keys.

Args:
    group_by_input_shapes: group entries by
        (event name, input shapes) rather than just event name.
        This is useful to see which input shapes contribute to the runtime
        the most and may help with size-specific optimizations or
        choosing the best candidates for quantization (aka fitting a roof line)

    group_by_stack_n: group by top n stack trace entries

Returns:
    An EventList containing FunctionEventAvg objects.
return.c                 h   [        U R                  5      [        U R                  5      [        U R                  5      [        U R                  5      [        U R
                  5      /nU(       a$  UR                  [        U R                  5      5        US:  a  X0R                  S U -  n[        U5      $ Nr   )
r   rK   rM   rX   	is_legacyis_user_annotationr_   input_shapesrq   tuple)rO   group_by_input_shapesgroup_by_stack_nrK   s       r&   get_key'EventList.key_averages.<locals>.get_key=  s    EIIEMM"E%%&EOO$E,,-C %

3u1123!#{{#4$455:r(   r   r   r   Nr   )r    r   r   r
   r   r>   r   valuesr   r   r!   rq   r   )r"   r   r   statsr   r`   avg_lists          r&   key_averagesEventList.key_averages+  s     9DEU9V	uSRUX 	 C'#6FGHLLSQ  LLN''//''	
 C		"3#34CI((#%   r(   c                 P    [        5       nU  H  nX-  nSUl        M     SUl        U$ )z>Averages all events.

Returns:
    A FunctionEventAvg object.
NTotal)r   rK   )r"   
total_statr`   s      r&   total_averageEventList.total_averageZ  s4     &'
CJ!JN  !
r(   )r   r    r   r!   )Nd   K   7   P   NF)Fr   )__name__
__module____qualname____firstlineno____doc__r   r.   r3   r+   r*   r,   propertyrz   r2   r   r   r   r   r   r   __static_attributes____classcell__)r%   s   @r&   r   r      s    1& "(D-L#4 @ @
   "#(
T7r
2# 2s 20-^ r(   r   c                 J    SnSnX:  a  X-  S S3$ X:  a  X-  S S3$ U S S3$ )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr   )time_usUS_IN_SECONDUS_IN_MSs      r&   _format_timer   h  sM    "LH(-Q//$S),,c]"r(   c                 H    US:X  a  U S:X  d
   SU  35       egU S-  U-  S S3$ )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r   )r   total_time_uss     r&   _format_time_sharer   s  s?    !|G=gYGG|o-c2!44r(   c                     SnSU-  nSU-  n[        U 5      U:  a  U S-  U-  S S3$ [        U 5      U:  a  U S-  U-  S S3$ [        U 5      U:  a  U S-  U-  S S3$ [        U 5      S-   $ )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r&   _format_memoryr   {  s    	B	B	B
6{b3,#C(,,	V	3,#C(,,	V	3,#C(,,6{T!!r(   c                 "   ^  [        U 4S j5      $ )Nc                 .   > [        [        U T5      5      $ r1   )r   r   )r"   r;   s    r&   rP   !_attr_formatter.<locals>.<lambda>  s    gdD.A!Br(   )r   r;   s   `r&   _attr_formatterr    s    BCCr(   c                       \ rS rSrSr\" S5      r\" S5      r\" S5      r\" S5      r	\" S5      r
\" S5      r\S	 5       r\S
 5       r\\" S\S9S 5       5       rSrg)r   i  zsHelpers for FunctionEvent and FunctionEventAvg.

The subclass should define `*_time_total` and `count` attributes.
cpu_timedevice_timecpu_time_totaldevice_time_totalrz   self_device_time_totalc                 ^    U R                   S:X  a  S$ SU R                  -  U R                   -  $ Nr   g        r   )countr  r-   s    r&   r  FormattedTimesMixin.cpu_time  s+    jjAosQ31D1D+Dtzz+QQr(   c                 ^    U R                   S:X  a  S$ SU R                  -  U R                   -  $ r
  )r  r  r-   s    r&   r  FormattedTimesMixin.device_time  s+    jjAosT31G1G+G$**+TTr(   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     U R                   $ r1   )r  r-   s    r&   	cuda_timeFormattedTimesMixin.cuda_time  s     r(   r   N)r   r   r   r   r   r  cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r  r  r   FutureWarningr  r   r   r(   r&   r   r     s    
 #:.L%m4O()9:+,?@-.CD!01I!JR R U U F 	 
 r(   r   c                        \ rS rSrS rS rSrg)r   i  c                     Xl         X l        g r1   )rT   rU   )r"   rT   rU   s      r&   r   Interval.__init__  s    
r(   c                 4    U R                   U R                  -
  $ )z$
Returns the length of the interval
rU   rT   r-   s    r&   r   Interval.elapsed_us  s     xx$**$$r(   r  N)r   r   r   r   r   r   r   r   r(   r&   r   r     s    %r(   r   r   )r;   r   durationc                   R   \ rS rSrSrSSSSSSSSSSS\R                  SSSSSSSS4S jrS rS	 r	S
 r
\S 5       r\S 5       r\\" S\S9S 5       5       r\S 5       r\S 5       r\S 5       r\\" S\S9S 5       5       r\S 5       r\\" S\S9S 5       5       r\S 5       rS rSrg)r   i  z.Profiling information about a single function.Nr   FrV   c                    Xl         UU l        X l        UU l        [	        XE5      U l        X0l        X`l        / U l        SU l	        / U l
        S U l        Xpl        UU l        UU l        Xl        Xl        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UU l        Uc  UOUU l        UU l        UU l        UU l        SU l        SU l        SU l        g )Nr6   rV   ) idrM   r;   r   r   rS   rJ   rs   r=   r  r<   r:   r   concrete_inputskwinputsrq   rn   r   cpu_memory_usagedevice_memory_usagerW   r   rr   rX   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r"   r$  r;   rJ   start_usend_usrs   r   rq   rn   r   r'  r(  rW   r   rr   rM   rX   r)  r*  r   r+  r   r%  r&  r   s                             r&   r   FunctionEvent.__init__  s    8 #	)$,X$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0F6H 	  )$)
2D "!#$&!r(   c                     U R                   [        R                  :X  d   eU R                  R	                  [        XU5      5        g r1   )rX   r   rY   r=   r_   r   )r"   r;   r   r!  s       r&   append_kernelFunctionEvent.append_kernel  s3    :>>111F4:;r(   c                     U R                   [        R                  :X  d   e[        U[        5      (       d   eUR                   [        R                  :X  d   eU R
                  R                  U5        g)zAppend a CPU child of type FunctionEvent.

One is supposed to append only direct children to the event to have
correct self cpu time being reported.
N)rX   r   rY   
isinstancer   r<   r_   )r"   childs     r&   r]   FunctionEvent.append_cpu_child   s[     :>>111%////  JNN222  'r(   c                     U R                   [        R                  :X  d   e[        U[        5      (       d   eUR                   [        R                  :X  d   eXl        g)a  Set the immediate CPU parent of type FunctionEvent.

One profiling FunctionEvent should have only one CPU parent such that
the child's range interval is completely inside the parent's. We use
this connection to determine the event is from top-level op or not.
N)rX   r   rY   r6  r   r:   )r"   ri   s     r&   r^   FunctionEvent.set_cpu_parent  sL     :>>111&-0000!!Z^^333 r(   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr1   )r'  r|   r7  s     r&   r}   6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>  s      +
0Au""0Ar   )rW   rX   r   rY   r'  r   r<   r-   s    r&   self_cpu_memory_usage#FunctionEvent.self_cpu_memory_usage  sJ    ==D,,
>$$s +
040A0A+
 (
 
 	
r(   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr1   )r(  r=  s     r&   r}   9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>%  s      .
3D%%%3Dr   )rW   rX   r   rY   r(  r   r<   r-   s    r&   self_device_memory_usage&FunctionEvent.self_device_memory_usage!  sJ    ==D,,
>''# .
373D3D.
 +
 
 	
r(   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r  c                     U R                   $ r1   rD  r-   s    r&   self_cuda_memory_usage$FunctionEvent.self_cuda_memory_usage)  s     ,,,r(   c                 t    U R                   [        R                  :X  a  U R                  R	                  5       $ gr   )rX   r   rY   rS   r   r-   s    r&   r  FunctionEvent.cpu_time_total1  s*    z~~-??--//r(   c                     U R                   (       d  U R                  [        R                  :w  a  gU R                  [        S U R                   5       5      -
  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr1   )r  r=  s     r&   r}   4FunctionEvent.self_cpu_time_total.<locals>.<genexpr><  s      )
.?U  .?r   )rW   rX   r   rY   r  r   r<   r-   s    r&   rz   !FunctionEvent.self_cpu_time_total8  sJ    ==D,,
>""S )
.2.?.?)
 &
 
 	
r(   c                    U R                   (       d  U R                  (       d  gU R                  [        R                  :X  af  U R
                  (       d9  [        S U R                   5       5      [        S U R                   5       5      -   $ [        S U R                   5       5      $ U R                  [        R                  [        R                  [        R                  4;   d   eU R                  R                  5       $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr1   r!  r|   kinfos     r&   r}   2FunctionEvent.device_time_total.<locals>.<genexpr>G       D|e>>|r   c              3   8   #    U  H  oR                   v   M     g 7fr1   r  )r|   rD   s     r&   r}   rU  G  s      K3DR((3Dr   c              3   8   #    U  H  oR                   v   M     g 7fr1   rR  rS  s     r&   r}   rU  L  rV  r   )rW   r   rX   r   rY   r   r   r=   r<   CUDAPrivateUse1MTIArS   r   r-   s    r&   r  FunctionEvent.device_time_total@  s    ==z~~->>Dt||DDs K373D3DK H  
 Dt||DDD##&&(   
 ??--//r(   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     U R                   $ r1   rX  r-   s    r&   cuda_time_totalFunctionEvent.cuda_time_totalU  s     %%%r(   c                 l   U R                   (       d  U R                  (       d  gU R                  [        R                  :X  a)  U R
                  [        S U R                   5       5      -
  $ U R                  [        R                  [        R                  [        R                  4;   d   eU R
                  $ )Nr   c              3   8   #    U  H  oR                   v   M     g 7fr1   rX  r=  s     r&   r}   7FunctionEvent.self_device_time_total.<locals>.<genexpr>b  s      05FE''5Fr   )rW   r   rX   r   rY   r  r   r<   rZ  r[  r\  r-   s    r&   r  $FunctionEvent.self_device_time_total]  s    ==z~~-))C 0595F5F0 -   ##&&(   
 )))r(   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     U R                   $ r1   r  r-   s    r&   r   "FunctionEvent.self_cuda_time_totalm  s     ***r(   c                     U R                   $ r1   r  r-   s    r&   rK   FunctionEvent.keyu  s    yyr(   c           	         U R                   nU R                  nU R                  nSR                  / SPU R                   PSPU R
                   PSPU R                   PSPU R                   PSPU R                   PSPU R                  R                   PSPU R                  R                   PS	P[        U R                   Vs/ s H  oDR                  PM     sn5       PS
PU PSPU PSPU R
                   PSPU R                   PSP[        U R                  5       PSPU R                    PS
PU PSPU PSPU R"                   PSPU R$                   PSPU R&                   PSPU R(                   PSP5      $ s  snf )Nr   z<FunctionEvent id=z name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r  r(  joinr$  r;   rX   rM   r  rS   rT   rU   r   r<   rJ   r   r'  rW   r   rr   r   )r"   r   r  r(  r7  s        r&   __repr__FunctionEvent.__repr__y  s   oo**"66y y  y	 y y		{ y- yHXHXGY yYb ycgcocobp y q y))*y*4y59__5J5J4KyKSyTXTcTcTgTgShyiyt7H7H I7He7H IJKyKLyMXMyY_y`k_lymy II;y 'y (,{{my 4By CFdFWFWBXAYyZ y !% 5 56	y 78	y 9D}	y ES	y TgRg	yh	y
 y
 '2y
 37..1Ay
 BJy
 KOJZJZI[y
 \gy
 hlgugufvy
 wxy	
 !Js   E<)r%  r  r<   r'  r:   r)  r(  r*  rX   r+  rs   r$  r   rW   r   r   r   r=   r&  r;   rM   rn   r,  rr   rq   rJ   rS   r-  r.  r   r   )r   r   r   r   r   r   rY   r   r3  r]   r^   r   r?  rD  r   r  rH  r  rz   r  r_  r  r   rK   rr  r   r   r(   r&   r   r     sb   8 NN 5<'|<	(
! 
 
 
 
 Y-	 
-   
 
 0 0( K&	 
& * * U+	 
+  
r(   r   c                   4    \ rS rSrSrS	S jrS rS rS rSr	g)
r   i  z:Used to average stats over multiple FunctionEvent objects.Nc                 T   S U l         SU l        SU l        SU l        SU l        S U l        SU l        SU l        SU l        SU l	        S U l
        S U l        S U l        SU l        SU l        SU l        SU l        S U l        S U l        [&        R(                  U l        SU l        SU l        g )Nr   F)rK   r  rM   rW   r   r   r  r  rz   r  r   rq   rn   r'  r(  r?  rD  r<   r:   r   rY   rX   r   r+  r-   s    r&   r   FunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;%)
$(
%&() *+"-.%;?37'1~~$
r(   c                    U R                   c  UR                   U l         UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l        UR                  U l	        UR                  U l
        UR                  U l        UR                  U l        [        U[        [        45      (       d   eUR                   U R                   :X  d   eU =R                   UR                   -  sl        U =R"                  UR"                  -  sl        U =R$                  UR$                  -  sl        U =R&                  UR&                  -  sl        U =R(                  UR(                  -  sl        U =R*                  UR*                  -  sl        U =R,                  UR,                  -  sl        U =R.                  UR.                  -  sl        U =R0                  UR0                  -  sl        U R2                  c  UR2                  U l        U $ UR2                  b  U =R2                  UR2                  -  sl        U $ r1   )rK   rM   rW   r   r:   r<   r   rq   rn   rX   r   r   r   r6  r   r   r  r  rz   r  r'  r(  r?  rD  r  r+  r"   others     r&   r>   FunctionEventAvg.add  s   88 yyDH ==DL!NNDM"__DN#..DO % 2 2D % 2 2DDJDJ$00D"__DN#..DO&+&>&>D#%-1A!BCCCCyyDHH$$$u333%"9"99  E$=$== ##u'C'CC#!7!77  E$=$== ""e&A&AA"%%)G)GG%

ekk!
::DJ  [[$JJ%++%Jr(   c                 $    U R                  U5      $ r1   )r>   rx  s     r&   __iadd__FunctionEventAvg.__iadd__  s    xxr(   c                 F   U R                   (       d  SOU R                   nU R                  nU R                  nU R                  nSU R                   SU R
                   SU R                   SU SU SU SU S[        U R                  5       S	U R                   SU S
U S3$ )Nr   z<FunctionEventAvg key=z self_cpu_time=rk  z  self_rl  r   rm  rn  ro  rp  )
r   r  r  r(  rK   r  r  r   r   r'  )r"   r   self_device_timer  device_memorys        r&   rr  FunctionEventAvg.__repr__  s    $(OOf::**00$TXXJod>Z>Z=[[efjfwfwex y M(8'9;-vk]Zhilmqm~m~i  iA A  $ 5 56a}NS`Raabd	
r(   )r  r<   r'  r:   r  r(  r  rX   r+  r   rW   r   r   r   rK   rM   rn   r?  rz   rD  r  rq   r   )r   N)
r   r   r   r   r   r   r>   r|  rr  r   r   r(   r&   r   r     s    D0"H	
r(   r   c                       \ rS rSrS rSrg)r   i  c                 n    [        U5      S:  a  [        R                  R                  U5      OUX'   X   $ rm   )r9   torch_C	_demangle)r"   rK   s     r&   __missing__StringTable.__missing__  s.     033x!|EHH&&s+	yr(   r   N)r   r   r   r   r  r   r   r(   r&   r   r     s    r(   r   c                   $    \ rS rSrSrS rS rSrg)r   i  z=Acceleration structure for accessing mem_records in interval.c                     Xl         / U l        / U l        [        U5      S:  aR  [	        [        U5       VVs/ s H  u  p#US   R                  5       U4PM     snn5      n[        U6 u  U l        U l        g g s  snnf r   )_mem_records_start_nses_indicesr9   rZ   r?   start_nszip)r"   mem_recordsirtmps        r&   r   MemRecordsAcc.__init__  sp    '&(#%{a9[;QR;Q411Q4==?A.;QRSC.13i+Ddm  Rs   !A7
c              #      #    [         R                  " U R                  US-  5      n[         R                  " U R                  US-  5      n[	        X45       H!  nU R
                  U R                  U      v   M#     g7f)zk
Return all records in the given interval
To maintain backward compatibility, convert us to ns in function
i  N)bisectbisect_leftr  bisect_rightr8   r  r  )r"   r/  r0  	start_idxend_idxr  s         r&   in_intervalMemRecordsAcc.in_interval  sh     
 &&t'7'7DI	%%d&6&6Fy*A##DMM!$455 +s   A:A<)r  r  r  N)r   r   r   r   r   r   r  r   r   r(   r&   r   r     s    G86r(   r   c                 6   ^  / SQn[        U 4S jU 5       5      $ )N))autograd/__init___make_grads)r  backward)ztorch/tensorr  )_internal/common_utilsprof_callable)r  prof_func_call)r  prof_meth_callc              3   Z   >#    U  H   oS    T;   =(       a    US   T;   (       + v   M"     g7f)r   r6   Nr   )r|   r   r   s     r&   r}   &_filter_stack_entry.<locals>.<genexpr>  s,     O>NaDEM3adem44>Ns   (+)all)r   filtered_entriess   ` r&   _filter_stack_entryr    s     O>NOOOr(   z[memory]z[OutOfMemory]c                 .    [         [        SSSSSS/nX;   $ )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r;   filtered_out_namess     r&   _filter_namer    s/     	 *.)	 %%r(   c                 `    [        5       nX    n U(       a  U R                  S5      (       a  Sn U $ )NzProfilerStep#zProfilerStep*)r   
startswith)r;   with_wildcardstring_tables      r&   _rewrite_namer    s-    =LD???++"DKr(   c
                   ^^,^-^.^/^0 [        U 5      S:X  a  g[        S U  5       5      n
[        S U  5       5      nU S   R                  nU(       d  U
(       a  [        S5      e[        S U  5       5      nTb  [	        [        U U4S jS	S
9UUUS9n [        S U  5       5      S-   nUb  [        X5      n[        S U  5       5      S-   nUb  [        X5      nSnUnSnU  Vs/ s H9  nUR                  c  M  [        UR                  5      S:  d  M-  UR                  PM;     nn[        U5      S:  nU(       a$  [        S U 5       5      S-   nUb  [        UU5      n/ SQnUb  UR                  5       OSnU
(       a"  UR                  SU 3SU S3U S3U S3/5        U(       a;  UR                  SS/5        U(       a!  U(       a  UR                  U S3SU S3/5        UR                  S5        [        S U  5       5      nU(       a  UR                  S5        Sm,S/m0S/m-T,* /m.S1U,U-U.U04S jjnS nU" U5        US S  H  nU" U5        M     U(       a  UR                  S!5        U" U5        U(       a  UR                  S"5        U" US#S$9  U(       ap  U  Vs/ s H!  nUR                  S:  d  M  UR                  PM#     nn[        U5      S:w  a1  U" [        U5      5      u  nnUR                  S%U 35        U" U5        OS&nT0S   nT-S   n T.S   n!Sn/ m/U/4S' jn"Sn#Sn$U  H  nU#UR                  -  n#UR                  [        R                   :X  a"  UR"                  (       a  U$UR$                  -  n$MR  UR                  [        R&                  [        R(                  [        R*                  4;   d  M  UR,                  (       a  M  U$UR$                  -  n$M     Ub  U"" S(U!-  5        U"" U5        U	(       a  U"" S(U!-  5        U"" S)5        U"" U 5        U"" UR.                  " U6 5        U"" U 5        S* n%Sn&U  GHG  nU&U:X  a    GO?U	(       a  UR0                  b  M#  U&S -  n&UR2                  n'Ub  [        U'5      US+-
  :  a  U'SUS+-
   S,-   n'[5        UR                  U#5      Ul        UR8                  (       d  [5        UR:                  U#5      OSUl        U'UR6                  UR>                  UR<                  UR@                  URB                  /n(U
(       aX  [5        UR$                  U$5      Ul"        U(R                  URF                  URD                  URH                  URJ                  /5        U(       a  U(R                  [M        URN                  5      [M        URP                  5      /5        U(       a@  U(       a9  U(R                  [M        URR                  5      [M        URT                  5      /5        U(R                  URV                  5        U(       a  U(R                  URX                  5        U(       a'  U(R                  [[        UR\                  5      SU 5        U(       aB  UR                  S::  a  U(R                  S-5        O U(R                  UR                  W-  S. 5        U(       aB  Sn)[        UR                  5      S:  a  U%" UR                  S   U5      n)U(R                  U)5        U"" UR.                  " U(6 5        U(       d  GM  S/[        U5      S -
  -  n*UR                  S S  H#  n+U"" UR.                  " U*U%" U+U5      /-   6 5        M%     U*R                  S5        U"" UR.                  " U*6 5        GMJ     U"" U 5        U"" S/[_        U#5       35        U
(       a*  U"" SUb  UR                  5       OS S0[_        U$5       35        SRa                  T/5      $ s  snf s  snf )2zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   >   #    U  H  oR                   S :  v   M     g7fr   Nrf  r{   s     r&   r}   _build_table.<locals>.<genexpr>0  s     Ou66:   c              3   >   #    U  H  oR                   S :  v   M     g7fr  rG  r{   s     r&   r}   r  1  s     P77!;r  z9use_device is None, but there is device performance data.c              3   |   #    U  H2  nUR                   S L=(       a    [        UR                   5      S:  v   M4     g 7fr   )r   r9   r{   s     r&   r}   r  9  s:      E 
		4	'	GC0B0B,Ca,G	Gs   :<Nc                 |   > [        U TR                  SS5      R                  SS5      R                  SS5      5      $ )Nr   r   r   r   )r   r   )r`   r   s    r&   rP   _build_table.<locals>.<lambda>B  s3    OOFH5WUH-W]H5	!r(   T)rK   reverser   c              3   L   #    U  H  n[        UR                  5      v   M     g 7fr1   )r9   rK   r|   r`   s     r&   r}   r  O  s     ;FSCLLFs   "$   c              3   ^   #    U  H#  n[        [        UR                  5      5      v   M%     g 7fr1   )r9   r   r   r  s     r&   r}   r  S  s#     KFSc#c&6&6"788Fs   +-   c              3   F   #    U  H  n[        S  U 5       5      v   M     g7f)c              3   8   #    U  H  n[        U5      v   M     g 7fr1   r9   )r|   r   s     r&   r}   )_build_table.<locals>.<genexpr>.<genexpr>a  s     2E5CJJEr   N)max)r|   rq   s     r&   r}   r  a  s     Gu2E222s   !)Namez
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   >   #    U  H  oR                   S :g  v   M     g7f)rV   N)rM   r  s     r&   r}   r    s     =fs*fr  zNode IDr   c                    > TS==   SU-   [        U 5      -   S-   ST-  -   -  ss'   TS==   SU -  ST-  -   -  ss'   TS==   U T-   -  ss'   g )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r&   
add_column _build_table.<locals>.add_column  sh    qHs7|+c1S<5GH	
 	qS7]cL.@AAg44r(   c                 .   / SQnU S:  d   e[        S[        [        R                  " U 5      S-  [	        [        U5      S-
  5      5      5      nUS:  a  U[        U5      :  d   e[        S[        R                  " U5      S-  5      U[        U5         4$ )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r6   
   g      )	r  minmathlog10floatr9   powfloorr   )r+  flop_headers	log_flopss      r&   auto_scale_flops&_build_table.<locals>.auto_scale_flops  s    
 qyy3tzz%014eC<MPQ<Q6RST	A~)c,.?"???BI.57c)n9UVVr(   r6   zInput ShapeszSource Location<)r  zTotal Fc                 J   > TR                  U 5        TR                  S5        g )Nr   )r_   )r   results    r&   r_   _build_table.<locals>.append  s    adr(   =z1This report only display top-level ops statisticsc                 v    [        U 5      U:  a)  [        U 5      U-
  nXS  n [        U 5      S:  a  SU SS  -   n U $ )Nr  ...r  )r   src_column_widthoffsets      r&   	trim_path_build_table.<locals>.trim_path  sG    t9''Y!11F=D4y1}tABx'r(   r  r  z--z8.3fzSelf CPU time total: z time total: )rp  )1r9   anyr   RuntimeErrorr   rZ   r  r  rq   upperrA   r_   r+  rz   rX   r   rY   r   r  rZ  r[  r\  r   r   r:   rK   r   r,  rW   r  r-  r  r  r  r.  r  r  r  r   r'  r?  r(  rD  r  rM   r   r   r   rq  )1rb   r   r   r   r   r   r   r   r   r   has_device_timehas_device_memr   has_input_shapesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  r`   stacks	has_stackheadersr   append_node_idr  r  _	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthr_   sum_self_cpu_time_totalsum_self_device_time_totalr  event_limitr;   
row_values	src_fieldempty_headersr   r  r  r  r  r  s1    `                                          @@@@@r&   r   r      s    6{aOOOOPPPN%%J /VWW  
  	 ")!
  ;F;;a?( 1IKFKKaO*!"5O-##csyy	SYYRSAS			V   FaIGGG!K 	  +"#35IJG )3(>*""$FK}%}B'-v&-y)		
 	
 .NN"m4(K=- NN< =f==Ny! LTNTN$}oO5 5W  !QR['(  ~&&'()#c2*0B&3CIIMYSYY&	By>Q*:3y>*J'[,NNVL>23)*J"J"J!!$KJ F  !"3#:#::??jnn,&#*D*DD&OO&& *** '#*D*DD& $ s[ !vs[ !BC
:
:g&'
: K)# S^^%?1Kww ,T>SVW>W1W50146>D1##%< 

 << s113JK 	   ''!!""	

 '9**,F(C$ 22,,--''  #3#7#78"3#<#<=	 n!! 's'>'>?&s'C'CD	 	II	
 ckk*c#"2"234H5HIJyyA~!!$'!!SYY%<T$BDI399~!%ciil4DE	i(z  *-.9DCL1$45M12%%'9U<L+M*NN '   $:$$m45q t :
"<0G#H"IJK**@J$$&fM N'(BCDF	
 776?ot Cs   !_#6_#_#_(3_()F)	NNr   r   r   r   FFF)'r  r[   r  collectionsr   r   operatorr   typingr   r   r   r	   r
   typing_extensionsr   r  torch.autogradr   __all__listr   r   r   r   r  r   r   r   r   r   r   r   r  r  r  r  r  r   r   r(   r&   <module>r     s       /  3 3 (  %	K K\
5"D   <	% 	% 
H<	=I
' I
XK
* K
\+ 6 6,	P  * && rr(   