
    GjM              #          U d Z ddlZddlmZ ddlmZ ddlmZ ddlZddl	m
Z
  ed          Z ed          Zi Zeej        j        ef         ed	<    eh d
          Zdededeeef         deeeef         geeef         f         fdZ	 d9dddddej        dej        dej        dej        dz  dedededej        fdZ edde          	 d9dddddej        dej        dej        dej        dz  dedededej        fd            Zdedz  dedefdZd ej        d!ededej        fd"Zd#ej        d$ej        d%ed&ededz  d'edej        fd(Zd%ed&eddfd)Zd#ej        d$ej        d%ed&ededz  dej        fd*Z 	 	 	 d:dddddd+dd,d#ej        d$ej        d-ej        d.ej        dz  d/ej        dz  d0ej        dz  d1ed2ed3ed'ededz  d4ed5edz  de!ej        ej        ej        ej        f         fd6Z" ed7de"          	 	 	 d:dddddd+dd,d#ej        d$ej        d-ej        d.ej        dz  d/ej        dz  d0ej        dz  d1ed2ed3ed'ededz  d4ed5edz  de!ej        ej        ej        ej        f         fd8            Z#dS );zImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)TypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                      dt           t          t          f         dt           t          t          f         f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    d }t          j                            d d| d          |           }| t          t	          t	          t           j        j                  |          <   |                               |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr	   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   s      W/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator'   s    *=**=**)W))x)) + 
 

   	wwuy~w'G'GRRS 	y)))    )r   r   r   )r   r   r   r#   s   ``` r"   _onnx_opr%   "   sU    
	R( 	Xb"f-= 	 	 	 	 	 	 	 	 r$   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr&   r'   r(   c                *    |                                  S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r)   r*   r+   r,   r&   r'   r(   s          r"   _rotary_embedding_23_fake_implr/   5   s     7799r$   RotaryEmbedding   c                h   | j         t                    }d         d         t          j                                        dk    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j                                        dk    o                                dk    fd	           nGt          j                                        d
k    o                                d
k    fd           |dk    rt          j        | d          } nJ|d
k    rDt          j        |dk    fd           d         }||z  }	||	g}
t          j        | |
          } t          j        t          | j                   dk    d            | j         d
         }	|dk    r|	}| ddddddd|f         }| dddddd|df         }|dz                    nt          j        j         d         k    oj         d         k    fd           t          j        j         d         k    oj         d         k    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j        d          t          j        d          |r+|dddddddddf         }|dddddddddf         }nt          j        |dd          \  }}|z  |z  z
  }|z  |z  z   }|r]t          j        |d          }t          j        |d          }t          j	        ||fd          }t          j        ||j                   }nt          j	        ||fd          }t          j	        ||fd          }|d
k    rt          j        |          S t          j        |d          S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      d j          S )Nz6position_ids must be 2D when provided. Received shape shape)r,   s   r"   <lambda>z%rotary_embedding_23.<locals>.<lambda>Z   s    a\M_aa r$   c                  *    d  dj         d          S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r6   )
batch_sizer,   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>^   s"    |Z||eqewxyez|| r$   r
   c                  *    d d j         d          S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r:   r
   r6   )r,   sequence_lengths   r"   r8   z%rotary_embedding_23.<locals>.<lambda>b   s>      GRa  G  Go{  pB  CD  pE  G  G r$   c                  (    d j          dj          S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r6   r*   r+   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>f   -     ](1] ]KT?] ] r$      c                  (    d j          dj          S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r?   r6   r@   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>l   rA   r$      )r   r4   r
   rB   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r"   r8   z%rotary_embedding_23.<locals>.<lambda>y   s    obmoo r$   c                      dS )Nzx should be a 4D tensor by nowr   r   r$   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   s    ,L r$   c                  &    dj          d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r6   )r;   cosr=   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   &    jjjjjSbjjj r$   c                  &    dj          d  d dS )Nzsin has shape rI   rJ   rK   r6   )r;   r=   sins   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   rM   r$   c                  ,    d j         d          d dS )NzLast dimension of cos cache (rP   ') should match rotary_embedding_dim/2 ().r6   )rL   rotary_embedding_dim_halfs   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   4      D	"  D  Df  D  D  D r$   c                  ,    dj         d          d  dS )NzLast dimension of sin cache (rP   rR   rS   r6   )rT   rO   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   rU   r$   dim)
r7   lenr   _checkrX   permutereshape	unsqueezechunkcat)r)   r*   r+   r,   r&   r'   r(   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr;   rL   rF   rT   r=   rO   s    ```               @@@@@@r"   rotary_embedding_23rl   C   sv    'K[!!JQJ!"oO !#aaaa	
 	
 	
 	q!Z/|||||	
 	
 	
 	q!_4 G  G  G  G  G	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 Q M!\**	qNoooo	
 	
 	
 "!n9,	)YG	M!Y''	LQW"$L$LMMM
I q  (AAAqqq////0HQQQ11123334L 4 9 
 
 	L	!
"Fsy|'Fjjjjjj   
L	!
"Fsy|'Fjjjjjj   
L	"22 	D  	D  	D  	D  	D   
L	"22 	D  	D  	D  	D  	D   /Q C /Q C
  2aaaAAAqt!tm$aaaAAAqt!tm$Xqb111B 8cBhD8cBhD  3 tR((tR(()T4Lb999=(.AA9dD\r222Y,/R888FQ}V[111 =...r$   scalerb   c                 8    | | ndt          j        |          z  S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)rm   rb   s     r"   _get_scale_factorrq      s     %55C$)I2F2F,FGr$   tensorr;   c                     | j         d         | j         d         }}||z  }|                     ||||                              dd                                          S )z1Reshape 3D tensor to 4D for multi-head attention.r
   r4   )r7   view	transpose
contiguous)rr   r;   r'   r=   ra   rb   s         r"   _reshape_3d_to_4drw      sS     $*<?FLO[Oy(IJIFF	1a	r$   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk    rt          | ||||          S t          j        t          j        | |                    dd                              S )z1Get QK output tensor based on the specified mode.r   r3   rP   )_compute_qk_output_for_mode_0r   
zeros_likematmulru   )rx   ry   rz   r{   rm   r|   s         r"   _get_qk_output_for_aten_spdar      s[     !!,q%';U
 
 	

 QB0C0C D DEEEr$   c                 J     t          j         z  dk     fd           dS )z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )r{   rz   s   r"   r8   z-_validate_gqa_configuration.<locals>.<lambda>   s    y 3yyZnyyy r$   N)r   rZ   )rz   r{   s   ``r"   _validate_gqa_configurationr      sA     
L22a7yyyyy    r$   c                    |}||k    r||z  }|                     |d          }t          || j        d                   }t          j        |          }| |z  }	||z  }
t          j        |	|
                    dd                    S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r
   rW   rB   r3   rP   )repeat_interleaverq   r7   ro   rp   r   r   ru   )rx   ry   rz   r{   rm   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r"   r~   r~      s     H222+/CC&&}!&<<$UAGAJ77L<((J:~H*$H<("4"4R"<"<===r$           )	is_causalkv_num_headsq_num_headsr|   rm   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                ^   | j         d         }t          | j                   dk    rr| j         d         }| j         }|.|||j         d         |j         d         z   |j         d         |z  f}n|||j         d         |j         d         |z  f}|}||||d         f}n| j         d         }| j         }|A|j         d         |j         d         |j         d         |j         d         z   |j         d         f}n|j         }|}| j         d         | j         d         | j         d         |d         f}t          j        || j        | j                  }t          j        ||j        |j                  }t          j        ||j        |j                  }t          j        || j        | j                  }||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   rB   r
   Nr4   dtypedevice)r7   rY   r   emptyr   r   )rx   ry   r   r   r   r   r   r   r   r|   rm   r   r   r;   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaperk   present_keypresent_value	qk_outputs                          r"   _attention_23_fake_implr     s   " J 17||qGAJw q!AGAJ.
l*	! 

l*	! 0 a 	
 GAJw 

q!AGAJ.
	! !"/ GAJGAJGAJa 	
 [QWQXFFFF+/qwqxPPPKK 31718TTTMO1718LLLI;y88r$   	Attentionc                	   d\  }}}t          | j                  }| j        d         }t          | j                  dk    r`t          j        |dk    o|dk    d            | j        d         }t	          | ||          } t	          |||          }t	          |||          }t          j        t          | j                  dk    o/t          |j                  dk    ot          |j                  dk    d            | j        |         }t          |
|          }
|t          j        ||g|	          n|                                }|t          j        ||g|	          n|                                }||}}| j        |         }|j        |         }| j        |         }|j        |         }|d
k    o"|	dk    o|du o|du p|j        t          j	        k    }t          ||           |rSt          j        j                            | |||d
||
t          ||k                        }t          | ||||
|	          }nY||k    r3||z  }|                    ||	          }|                    ||	          }t          j        ||| j        | j                  }|rut          j        |du d            t          j        t          j        ||t          j	        | j                            }|                    | t+          d                    }|?|j        t          j	        k    r%|                    | t+          d                    }n||z   }t          |
| j        d                   }t-          j        |          } | | z  }!|| z  }"t          j        |!|"                    dd                    }#|#}|#|z   }$|	dk    r|$}|d
k    r|t          j        |$|z            z  }$|	dk    r|$}|x|t6          v rX|$j        }%|$                    t:          j        |                   }$t          j        |$d	          }&|&                    |%          }&n-t          j        |$d	          }&nt          j        |$d	          }&|	dk    r|&}t          j        |&|          }|dk    r=|                    dd                                           !                    ||d          }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r
   r4   rB   r   rB   c                      dS )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  s    Q r$   r
   rD   c                      dS )Nz'Q, K, and V should be 4D tensors by nowr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  s    9 r$   NrW   r   )r   	dropout_pr   rm   
enable_gqar   c                      dS )Nz'Cannot use both is_causal and attn_maskr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  s    +T r$   z-infr3   rP   r4   )"rY   r7   r   rZ   rw   rq   r_   r.   r   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatro   rp   r   ru   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrv   rt   )'rx   ry   r   r   r   r   r   r   r   r|   rm   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr;   r   q_head_sizer   r   rz   r{   kv_sequence_lengthcan_use_sdpark   r   r   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs'                                          r"   attention_23r   c  s   & ,3(L, !'llOJ 17||q12!2QQ	
 	
 	
 GAJa[99a\::a\::	LAGEc!'lla/ECLLA4E99   '(#Ke[11E
  		8Q-\2222WWYY  ! 		:q/|4444WWYY  qA ',/7<0-. 	3 	A!Q&	A%	A $?)/UZ"?	    35IJJJ k-$AA#';;  B 
 
 1 !
 
		 "666/3GGM##M|#DDA##M|#DDA K1
 
 
	
  	KLT!#T#T    *
%&*8	   K "--{lE&MMJJI  %*,,%119*eFmmLL		 &	1	 )
;; Y|,,
z>z> !<(2D2DR2L2LMM %	 ()3 A%%$I S=="UZw0F%G%GGL A%%$I ( $QQQ!-!3+#=>OP    #]<R@@@
']]>::

"]<R@@@

|<<<J A%%"I j!,, ! Q""--//44ZARTVWW 	 ;y88r$   )N)NNN)$__doc__ro   collections.abcr   typingr   typing_extensionsr   r   torch.onnx.opsr   r   r   r	   dict_ops
OpOverload__annotations__	frozensetr   strintr%   Tensorr   r/   rl   r   rq   rw   r   r   r~   tupler   r   r   r$   r"   <module>r      s/      $ $ $ $ $ $       ' ' ' ' ' '  * * * * * * Yt__WT]] AC UZ2H<= B B B09	  1 1 -!$19"b&1AxB (2r6"223   . )-	  !  || | ,%	    \    

R!?@@
 )-	D/  !D/ D/ D/|D/|D/ |D/ ,%	D/ D/ D/ D/ \D/ D/ D/ A@D/NHUT\ Hc He H H H H

L
&)
69

\
 
 
 
F|F|F F 	F
 4<F F \F F F F$47	   >|>|> > 	>
 4<> \> > > >4 &*$(&*Q9 !"$(Q9 Q9 Q9|Q9|Q9 |Q9 |d"	Q9
 lT!Q9 t#Q9 Q9 Q9 Q9 Q9 4<Q9 Q9 TzQ9 5<u|U\ABQ9 Q9 Q9 Q9h 
+r233
 &*$(&*9 !"$(9 9 9|9|9 |9 |d"	9
 lT!9 t#9 9 9 9 9 4<9 9 Tz9 5<u|U\AB9 9 9 439 9 9r$   