
    BjH                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZmZ d dlmZ d dlmZm Z  g d	Z!d
e"de#e"e"f         fdZ$dee%         dej&        de'e"ef         fdZ(dej&        de'e"ef         dej        j)        fdZ*d,dej        j)        dej        j)        fdZ+dej)        dej)        fdZ,dej)        de-ej&                 de-ej&                 de-ej&                 fdZ.ej/        ej0        ej1        ej2        ej3        ej4        ej5        ej6        ej7        ej8        ej6        ej9        ej:        gZ;ej<        ej=        gZ>ej/        ej?        ej0        ej@        ej1        d iZAde-ej&                 de'e"ej)        f         fdZBde-ej&                 de'e"ej)        f         de'ej)        ej)        f         fdZC G d  d!          ZDd-d$ZEd%eDdeFfd&ZG G d' d(          ZHdejI        fdej        j)        d)e'e"ef         dz  d*e%ejI                 dej        j)        fd+ZJdS ).    N)defaultdict)Iterable)Enum)Anycast)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 P    |                      dd          ^ }}|r|d         nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      e/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/torch/fx/experimental/optimization.py_parent_namer#   %   s3    
 MM#q))MVT&6!99B,,    patternnodemodulesc                 j   t          |j                  dk    rdS |j        d         |f}t          | |          D ]x\  }}t          |t          j                  s dS |j        dk    r dS t          |j        t                    s dS |j        |vr dS t          ||j                           |ur dS ydS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r%   r&   r'   nodesexpected_typecurrent_nodes         r"   r   r   /   s     49~~u"&)A,!5E'*7E':': 
 
#|,00 	55?m++55,-s33 	55g--55+,--]BB55 C4r$   
new_modulec                     t          | j        t                    s$t          dt	          | j                             t          | j                  \  }}||| j        <   t          ||         ||           d S )NExpected str target, got )r-   r   r1   AssertionErrorr2   r#   setattr)r&   r'   r6   parent_namer!   s        r"   r   r   C   sw     dk3'' NLdk9J9JLLMMM$T[11K%GDKGK $
33333r$   Fmodelc                    t           j        t           j        ft           j        t           j        ft           j        t           j        ft           j        t           j        fg}|st          j	        |           } |rt          | t          j        j                  st          j        |           }n| }t          |                                          }t          j	        |j                  }|D ]}|j        D ]}t'          |||          rt)          |j        d         j                  dk    r8||j        d         j                 }	||j                 }
|
j        se|d         t           j        t           j        t           j        fv rt3          |	|
          }nt5          |	|
          }t7          |j        d         ||           |                    |j        d                    |                    |           t          j        ||          S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr-   torchr.   GraphModulesymbolic_tracedictnamed_modulesgraphr3   r   r*   r+   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r<   inplaceno_tracepatternsfx_modelr'   	new_graphr%   r&   first_layerbnfused_layers               r"   r   r   M   s    
BN#	BN#	BN#	BN#	H  %e$$ :eUX-ABB $U++8))++,,Ghn--I + +O 	+ 	+D%gtW== +ty|)**Q..%dil&9:T[)- 1:")RY	!BBB"3K"D"DKK"5k2"F"FK#DIaL';GGG**49Q<888$$T***	+  >(I...r$   c                     t          j        |           } G d dt          j         j                  } ||                                          S )z5
    Removes all dropout layers from the module.
    c                   P     e Zd Zdedeedf         deeef         def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r+   .kwargsr   c                    t          | j        |         t          j                  r:t	          |          dk    rt          dt	          |                     |d         S t                                          |||          S )Nr   z Expected 1 arg for Dropout, got r   )r-   
submodulesr>   Dropoutr*   r9   superr)   )selfr   r+   r]   	__class__s       r"   r)   z2remove_dropout.<locals>.DropoutRemover.call_module|   ss     $/&12:>> At99>>()WCPTII)W)WXXXAwww**64@@@r$   )__name__
__module____qualname__r	   tupler   rK   r1   r   r)   __classcell__)rc   s   @r"   DropoutRemoverr\   {   s}        	A 	A(-hm(<	AFJ3PS8n	A	A 	A 	A 	A 	A 	A 	A 	A 	A 	Ar$   ri   )r.   rJ   rH   Transformer	transform)r<   rU   ri   s      r"   r   r   u   si      ''H	A 	A 	A 	A 	A- 	A 	A 	A >(##--///r$   orig_moduler3   inputsoutputsc                 P   t          j                    }i |D ]!}|                    |j                  }||<   "|D ] }|                    |fd          }||<   !|                    fd|D                        |                                 t          j        | |          S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |          S N )xenvs    r"   <lambda>z"extract_subgraph.<locals>.<lambda>   s    s1v r$   c                      g | ]
}|         S rr   rr   ).0outputrt   s     r"   
<listcomp>z$extract_subgraph.<locals>.<listcomp>   s    888fc&k888r$   )r.   Graphplaceholderr!   	node_copyrx   lintrI   )	rl   r3   rm   rn   rV   inputnew_noder&   rt   s	           @r"   r   r      s     

I"$C  ((44E

  &&t-=-=-=-=>>D		8888888999NN>+y111r$   c                 *    t          j        |           S rq   )	th_mkldnnMkldnnBatchNorm)a_s     r"   ru   ru      s    !:1!=!= r$   c                    i }| D ]}|j         dk    rt          |j        t                    s$t	          dt          |j                             ||j                 }t          |          t          v rt          t          |                   |t          j                  }t          |t          j
                  st	          dt          |                     t          j        |          ||<   t          |||           |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r)   r8   zExpected nn.Module, got )r0   r-   r   r1   r9   r2   
mkldnn_maprH   floatr>   ModulerF   rG   r   )r3   r'   old_modulesr&   
cur_moduler6   s         r"   r   r      s     /1K ? ?7m##dk3// V$%TdkARAR%T%TUUU -JJ:--'Z(8(89*ekRR
!*bi88 X()VDDTDT)V)VWWW*.-
*C*CJ'#D':>>>r$   r   c                     | D ]s}|j         dk    rft          |j        t                    s$t	          dt          |j                             ||j                 }||v rt          ||||                    tdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r)   r8   N)r0   r-   r   r1   r9   r2   r   )r3   r'   r   r&   r   s        r"   r   r      s      L L7m##dk3// V$%TdkARAR%T%TUUU -J[((#D';z3JKKKL Lr$   c                   $    e Zd Zdej        fdZdS )r   fx_graphc                 >    || _         g | _        g | _        g | _        d S rq   )r   r3   start_nodes	end_nodes)rb   r   s     r"   __init__zMklSubgraph.__init__   s#     $&
*,(*r$   N)rd   re   rf   r.   rz   r   rr   r$   r"   r   r      s1        + + + + + + +r$   r   
   r   c                 H     dddt           dt          f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrM   r   c                    | j         }	:| j        j        	| j        j        t	          	                                         d |D             t          t          t          j	                 d | j
        D                       }t          	| j        ||          
fd} |fd          }t          j        j        t                                                                |fd          }||k     S )Nc                 @    g | ]}t          j        |j                  S rr   )rH   randnshaperw   r&   s     r"   ry   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s$    IIITTZ00IIIr$   c                 (    g | ]}|j         d          S )r   )r+   r   s     r"   ry   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s    *T*T*TD49Q<*T*T*Tr$   c                     t                    D ]} |              t          j                    }t                    D ]} |              t          j                    |z
  S rq   )rangetime)fr   beginiterswarmups      r"   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark  sc    6]]  IKKE5\\  9;;&&r$   c                  6    d  d  D              D             S )Nc                 6    g | ]}|                                 S rr   )to_denserw   is     r"   ry   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>
  s-       !"

  r$   c                 6    g | ]}|                                 S rr   )	to_mkldnnr   s     r"   ry   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s     1W1W1WA!++--1W1W1Wr$   rr   sample_inputs	submodules   r"   ru   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>
  s7      &/i1W1W1W1W1W&X   r$   c                         S rq   rr   r   s   r"   ru   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s    		=(A r$   )r   r   owning_moduler   r
   	propagater   listr.   r/   r   r   r3   r   rM   rK   rL   )rM   input_nodesoutput_argsr   mkl_timeno_mkl_timer   r   example_inputsrU   r   r   r   s         @@r"   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s:   '~3H.4Kh)).999II[III4=*T*TEO*T*T*TUU$Xu{KUU		' 	' 	' 	' 	' 	' 9    
 
 	O!((**++		
 	
 	
  i A A A A ABB+%%r$   )r   bool)r   r   r   r   rU   r   s   ``` @@r"   r   r      s\     HK &  &  &  &  &  &  &  &  &  &  &  &D r$   rM   c                 2    t          | j                  dk    S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r*   r3   )rM   s    r"   r   r     s     u{ar$   c                   @    e Zd Zd ZdefdZdedefdZdedefdZd	S )
r   c                 2    d g|z  | _         dg|z  | _        d S )Nr   r    size)rb   ns     r"   r   zUnionFind.__init__%  s     )-
 !sQw			r$   vc                 .    || j         |<   d| j        |<   d S )Nr   r   )rb   r   s     r"   make_setzUnionFind.make_set)  s    A	!r$   r   c                     | j         |         }||k    r|S |t          d          |                     |          | j         |<   t          t          | j         |                   S )NzParent is None)r    r9   findr   int)rb   r   pars      r"   r   zUnionFind.find-  sZ    k!n88H; !12223ACQ(((r$   r   bc                     |                      |          |                      |          }}||k    r|S | j        |         | j        |         k     r||}}|| j        |<   | j        |xx         | j        |         z  cc<   d S rq   )r   r   r    )rb   r   r   s      r"   joinzUnionFind.join6  sz    yy||TYYq\\166H9Q<$)A,&&aqAA	!	!$r$   N)rd   re   rf   r   r   r   r   r   rr   r$   r"   r   r   $  s        ' ' '#    )c )c ) ) ) )%c %c % % % % % %r$   r   pass_configtracerc                    dddt           id}|i }|                    |           |d         rt          |           } |d         rt          |           } |d         du r| S t	          |d         t
                    st          d	          d|d         vrt          d
          |d         d         } |            }|                    t          j	        |                     t          j        |j                   t          |                                           } G d dt                    }t          j                  D ]}|j        }	|j        dk    r||j                 }
t)          |
          t*          v r{|j        }	t/          |
                                d          }|P|j        t4          j        k    rt9          d          |j        t5          j        d          k    rt9          d          n6|j        dk    r+|j        t*          v r|j        }	n|j        t<          v r|j        }	|	|j        k    r|	|j        k    r tA          d |j!        D                       s!"                    |          5  t          j#        |j!        fd          }ddd           n# 1 swxY w Y   tI          tJ          t          j&        j'                 |          |_!        (                    |          5  )                    dd|f          }|*                    |           |f|_!        ddd           n# 1 swxY w Y   tW          t          j                  |          }|_,        j        D ]}|j        dk    r|j        dk    r|j!        d         }t          |j-                  }|D ]B}|j        dk    r5|j        dk    r*|*                    |           .                    |           Ct_          |j-                  dk    r.                    |           t_          j                  }ta          |          fdtc          j                  D ]$\  }}|j        dk    r(|j        dk    r||_2        3                    |           9|j        dk    rL|j        dk    rA |j!        d                   t9          d           |j!        d                   |_4        fd|j5        D             }t_          |          dk    rtA          d |D                       rt9          d          tm          |          }|d         |_7        |dd         D ]}8                    |d         |           &ts          fd          }j        D ]}tu          |d          r8|;                    |j7                           j        <                    |           tu          |d           r8|;                    |j2                           j=        <                    |           tu          |d!          r8|;                    |j4                           j>        <                    |           |?                                D ]l} ||          s_|j=        |j>        z   D ]9}|j!        d         }|*                    |           .                    |           :t          |j        ||           md}j        D ]}|j        dk    s|j        dk    r|dz  }t          jB        t                    D                    d"|           E                                 t          j        |           }|S )#a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZdS )*optimize_for_inference.<locals>.MklSupportr   r      N)rd   re   rf   NOYESUNKNOWNrr   r$   r"   
MklSupportr   l  s        r$   r   r)   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   ,   K   | ]}|j         d k    V  dS )r   N)r   )rw   args     r"   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>  s)      II3:3IIIIIIr$   c                 2                         d| f          S )Nr   )call_method)r   r   s    r"   ru   z(optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)P)P r$   r   r   r   r   c                     t          | d          r                    | j                  S t          | d          r                    | j                  S d S )Ncolorstart_color)hasattrr   r   r   )r   ufs    r"   	get_colorz)optimize_for_inference.<locals>.get_color  sT    1g 	$7717###1m$$ 	*771=)))tr$   z!Expected color for to_dense inputc                 p    g | ]2}t          |t          j                  r |          ' |          3S rq   )r-   r.   r/   )rw   r   r   s     r"   ry   z*optimize_for_inference.<locals>.<listcomp>  sQ       a)) 9Q<<+ 	! ,++r$   c              3      K   | ]}|d u V  	d S rq   rr   r   s     r"   r   z)optimize_for_inference.<locals>.<genexpr>  s&      1119111111r$   zFound None in cur_colorsr   c                  "    t                     S rq   )r   )r   s   r"   ru   z(optimize_for_inference.<locals>.<lambda>  s    H@U@U r$   r   r   	end_colorzmkldnn conversions: %s)Fr   updater   r   r-   rK   RuntimeErrortracerF   rG   r.   rI   rootrL   r   r   r3   r   r0   r   r2   mkldnn_supportedr   next
parametersdtyperH   r   r9   devicemkldnn_supported_unknownr   anyr+   inserting_beforemap_argr   rg   r&   r   inserting_aftercreate_noderP   r   r   rN   rQ   r*   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerrd   infor}   )r<   r   r   default_pass_configr   
cur_tracerr'   r   r&   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderN   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrM   prvmkldnn_conversionsresultr   r   r   s                              @@@r"   r   r   @  sk   & #."? 
 {+++>* U+, &u%%34==)*BCTJJ JHIII-.FGGGQRRR+,DEkRJe 4 455HN:?H---$()<)<)>)>$?$?G    T    X^$$ "' "'$-7m## -JJ#333",.#'
(=(=(?(?#F#F #/'-<<,G   (.%,u2E2EEE,-PQQQW''{...",. 888","4jm++*"444IItyIIIII **400   jIPPPP               
 U27#34kBBDI))$// ' '"..}j4'RR**7333 $w' ' ' ' ' ' ' ' ' ' ' ' ' ' ' $D$8$8'BBK&H  	* 	*7m##z(A(Ay|H$$E . .7m++{0J0J..x888''---4:!####D)))HN##I	9		B    $ #8>22 4 47m##{(B(B&DKK    W%%$+*C*Cy1&&.$%HIII&Yty|44DNN   -  J :!##11j11111 A$%?@@@
++J#ADJ)!""~ 4 4
1{33334 -88U8U8U8U,V,VM J J4!! 	B"''$*--.4;;DAAA4'' 	N"''$"2334@GGMMM4%% 	J"''$.112<CCDIII %%'' = =  '' 	=)EO; * *il**3///##D))))%+w<<< $ $;+%%
)B)B!#h$$%=?QRRRMMOOO^E8,,FMs$   >J((J,	/J,	76L99L=	 L=	)FF)r   r   )KrF   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   rH   torch.fxr.   torch.nnr>   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder   r	   torch.fx.passes.shape_propr
   torch.nn.utils.fusionr   r   __all__r1   rg   r#   r2   r/   rK   r   r   r   r   r   r   r   rA   rE   rB   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rr   r$   r"   <module>r,     sv       # # # # # # $ $ $ $ $ $                                     & & & & & & & & & * * * * * * * * 0 0 0 0 0 0 H H H H H H H H   - -sCx - - - -d^#%759#s(^   (4
'4 cN48=4 4 4 4%/ %/ %/58? %/ %/ %/ %/P0") 0	 0 0 0 0(22=2 M2 "']	2 2 2 2. IINGLL	J	O	MFL & %L(,7 Iy%Iy%N==
T"'] T#ry.5I    ,L=L#ry.!L bi*+L L L L$+ + + + + + + +. . . .b +  $        % % % % % % % %< *. ir r8?rc3h$&r Or X_	r r r r r rr$   