
    Bj             !       s   U d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dl mZmZ d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlmZm Z m!Z! d d	l"m#Z# d d
l$m%Z% d dlm&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5        j6        Z6g Z7e8e9         e:d<   ej;        j<        j=        Z= G d de          Z>	 	 ddedej?        de@de@fdZA eeAej?        jB        d          ZC eeAej?        jB                  ZD eeAej?        jB        d          ZE eeAej?        jF                  ZGde!deHde!fdZI e#e=jJ                   e/d           eDd!e!d"e!fd#                                    ZJ e#e=jK                   e/d           eDd!e!d"e!fd$                                    ZK e#e=jL                   e/d           eDd!e!de!d%eMd&eMfd'                                    ZL e#e=jN                   e/d           eDd(e!d)eMd*eMd+eMd,e@d-e!fd.                                    ZN e#e=jO        jP        g          d/             ZQ e#e=jO        j!        g          d0e!fd1            ZR e#e=jS                   e/            eDd2e!de!fd3                                    ZS e#e=jT                   e/d           eDd(e!d2e!fd4                                    ZT e#e=jU                   e/d           d(e!d2e!d5eMd6eMfd7                        ZU e#e=jV                   e/            eDd2e!de!fd8                                    ZV e#e=jW                   e/            eDd(e!d2e!de!fd9                                    ZW e#e=jX                   e/d           d(e!d2e!d&eMfd:                        ZX e#e=jY                   e/d           eDd(e!d2e!d;eMd<e@fd=                                    ZY e#e=jZ                   e/d           eDdd?e!d2e!d@e9fdA                                    ZZ e#e=j[                  eDd(e!dBe!fdC                        Z[ e#e=j\                   e/            eDd2e!de!fdD                                    Z\ e#e=j]                   e/d           eDd(e!d2e!de!fdE                                    Z] e#e=j^                  d2e!dFe!de!fdG            Z^ e#e=j_                  d(e!d2e!dFe!de`e!e!f         fdH            Z_ e#e=ja                   e/            eDd(e!d2e!dIe!dJeMdKeMdLe@d<e@de!fdM                                    Za e#e=jb                   e/d           eDd(e!d2e!dNe!de!fdO                                    Zb e#e=jc                   e/            d2e!dPe!de!fdQ                        ZcdRe!dSeHfdTZddUeje        fdVZf e#e=jg                   e/            eDe>jh        ji        fd2e!dWe!dSeHde!fdX                                    Zg e#e=jj                   e/d           eDd(e!dBe!dWe!dSeHfdY                                    Zj e#e=jk                  ddZ            Zl e#e=jm                   e/            eDe>jh        ji        d[fd2e!dWe!dSeHd%eMfd\                                    Zm e#e=jn        jo                  eDd(e!d2e!dWe!dSeHd%eMf
d]                        Zn e#e=jn        jp                  eDd(e!d2e!dWe!dSeHd%eMd e!fd^                        Zq e#e=jr        jo                  eDd(e!d2e!dWe!dSeHd_eMf
d`                        Zr e#e=jr        js                  eDd(e!d2e!dWe!dSeHd_eMd e!fda                        Ztd(e!d2e!dWe!dFe!dz  dSeHdbeHdce!de!fddZu e#e=jv                   e/d           eDd(e!d2e!deHde!fde                                    Zv e#e=jw                   e/d           d(e!d2e!dWe!dFe!dz  dSeHdbeHdce!de!fdf                        Zw e#e=jx                   e/d           d(e!d2e!dWe!dFe!dz  dSeHdbeHdce!de!fdg                        Zx e#e=jy                   e/            eDde>jh        ji        fd2e!dWe!dFe!dz  dSeHde!f
dh                                    Zy e#e=jz                   e/d           eDde>jh        ji        fd(e!d2e!dWe!dFe!dz  dSeHde!fdi                                    Zz e#e=j{                   e/            eDe>jh        ji        fdBe!dWe!dSeHde!fdj                                    Z{ e#e=j|                   e/d           eDe>jh        ji        fd(e!d2e!dWe!dSeHde!f
dk                                    Z| e#e=j}                   e/            ddBe!dPe!dmeMfdn                        Z} e#e=j~                   e/            doe!dpe!de!fdq                        Z~ e#e=j                   e/            d(e!dre8eH         deHdseHdteHdueHfdv                        Z e#e=j        j!                  	 	 	 	 dd2e!deHdseHdz  dteHdz  dueHf
dx            Zde!deHdseHdz  dteHdz  de`eHeHf         f
dyZ e#e=j                   e/            	 	 	 	 ddBe!dze!deHdseHdz  dteHdz  dueHfd{                        Z e#e=j                   e/            d(e!dre8eH         deHd|eHfd}                        Z e#e=j                   e/            d(e!dre8eH         d~eHdeHdeHf
d                        Zd(e!d e!deje        fdZ e#e=j                   e/d           eCd(e!de!deHdeje        fd                                    Z e#e=j                   e/            eCd(e!de!deHdeje        fd                                    Zd Z e#e=j                   e/            dBe!de8eH         de8eH         de8eH         de8eH         de!fd                        Z e#e=j                   e/            eDdBe!de8eH         de8eH         de8eH         de8eH         de8eH         de!fd                                    Z e#e=j                   e/            d(e!de!d*eMfd                        Z e#e=j                   e/            d?e!de8eH         deHdeHdueHde!fd                        Z e#e=j        jo                  eD	 dd(e!d2e!deMdz  de!fd                        Z e#e=j                  e=j        jo                            e6j                  e=j        jo                            e6j                  dBe!dmeMde@dz  fd                                    Z e#e=j                   e/dd          dBe!dmeMde@dz  fd                        Z e#e=j                   e/            de!deHde@fd                        Z e#e=j                   e/d          de!deHde@fd                        Z e#e=j                   e/            	 	 	 ddFe!de!deHde@de@de!fd                        Z e#e=j                   e/            d(e!de!deHdeHde@f
d                        Zde8eH         fdZde8e!         deHdeHde8e!         fdZde8e!         fdZde8e!         deHfdZde8e!         deHdeHfdZ e#e=j        jo        e=j        js        g          	 dde8e!         deHdeHde!dz  de!f
d            Z e#e=j        jo        e=j        js        g          	 	 dd2e!de8eH         deHde8e!         dz  de8e!         dz  f
d            Z e#e=j        j!                  ddBe!deHdeHde`e!df         fd            Z e#e=j        jo                  	 ddBe!de8eH         deHde`e!df         fd            Z e#e=j        j!                  dd2e!deHdeHde`e!df         fd            Ze=j        j                            e6j                  	 dd2e!de!deHde`e!df         fd            Z e#e=j                   e/d          eDdd2e!de!de!d%eHd)eHf
d                                    Z e#e=j                   e/            eD	 	 	 dd2e!de!de!d%eHd)eHde@fd                                    Z e#e=j                   e/d          eDdd2e!de!de!d%eHd)eHf
d                                    Z e#e=j        jo                  eDd(e!dBe!de!de!de!dz  deHdeHdeHdeHde8e@         de`e!dz  e!dz  e!dz  f         fdɄ                        Z e#e=j        js                  d(e!dBe!de!de!de!dz  deHdeHdeHdeHde8e@         dej!        dej!        dej!        de`e!dz  e!dz  e!dz  f         fd˄            Zde!dz  de!dz  fd̄Z e#e=j        jo                  de!dBe!de8eH         de!de!dFe!dz  de!dz  de8e@         de`e!dz  e!dz  e!dz  f         fdЄ            Z e#e=j        js                  de!dBe!de8eH         de!de!dFe!dz  de!dz  de8e@         dej!        dej!        dej!        de`e!dz  e!dz  e!dz  f         fdф            Z e#e=j        jo                  dBe!de8eH         dFe!dz  deMdz  de`e!e!f         f
d҄            Z e#e=j        jo                  de!dBe!de8eH         de!dFe!dz  de8e@         de`e!dz  e!dz  f         fdӄ            ZdBe!dFe!dz  de!dz  de!dz  de!dz  dLe@deMdeMde@de`e!e!e!e!dz  e!dz  f         fd؄Z e#e=j                   e/dddڦ          dBe!dFe!dz  de!dz  de!dz  de!dz  dLe@deMdeMde`e!e!e!f         fdۄ                        Ze=j        jo                            e6j                  e=j        jo                            e6j                  dBe!dFe!dz  de!dz  de!dz  de!dz  dLe@deMdeMde`e!e!e!f         fd܄                        Ze=j        jo                            e6j                  dde8e!         fd݄            Z e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!deMdeMde`e!e!e!f         fdބ            Z e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!dLe@deMdeMde`e!e!e!f         fd߄            Z e#e=j        j                  dBe!dFe!dz  de!dz  dLe@deMdeMde`e!e!e!f         fd            Z e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!dLe@deMdeMde`e!e!e!e!e!f         fd            ZdBe!dFe!dz  de!dz  de!de!deMdLe@de!fdZ e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!deMdeMde`e!e!e!e!f         fd            Z e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!deMdeMde`e!e!e!e!e!e!f         fd            Z e#e=j        jo                  dBe!dFe!dz  de!dz  de!de!deMdeMde`e!e!e!e!f         fd            Z e#e=j                   e/dd          eDdd                                    Z e#e=j                   e/            dddddddde!e'z  dUeje        dz  dej        dz  de@de@dej        dz  fd                        Z e#e=j        e=j        e=j        g           e/            d                         Ze=j        jo                            e6j                   e#e=j                   e/dddd          dBe!dFe!de!dz  de!dz  de!dz  dLe@deMdeMfd                                    Zd Z e#e=j        jo                  de!dBe!dFe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@         de!de`e!e!dz  e!dz  f         fd            Z e#e=j        jo                  de!dBe!dFe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@         de`e!e!dz  e!dz  f         fd            Z e#e=j        js                  de!dBe!dFe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@         dej!        dej!        dej!        de`e!e!dz  e!dz  f         fd            Z e#e=j                   e/dddʦ          dBe!d(e!dFe!de!dz  de!dz  de!dz  de!dz  deMfd                        Z e#e=j                   e/dddʦ          dBe!d(e!dFe!de!dz  de!dz  de!dz  de!dz  deMde!fd                        Z e#e=j                   e/            eDdBe!de`eHeHf         fd                                    Zd2e)de)de8eH         deHfdZ e#e=j                   e/            d2e)de)de8eH         fd                        Z e#e=j                   e/            dBe)de)de8eH         de8eH         de8eH         f
d                        Z e#e=j                  dwdde)deHd|e)d e)d)e'f
d            Z e#e=j                   e/            dwdde)deHd|e)d e)d)e'f
d                        Zdwdde)deHd|e)d e)de@d)e'fdZ e#e=j        jo                  e=j        jo                            e6j                  dd                        Z e#e=j                  de)deHd|e)d e)fd            Z e#e=j                   e/            de)deHd|e)d e)fd                        Zde)deHd|e)d e)de@f
d	Z e#e=j                   e/ddN          eDd2e!de`e!e!f         fd
                                    Z e#e=j                   e/            	 	 	 dde!de@eHz  eMz  de@eHz  eMz  dej        dz  fd                        Z e#e=j                  dd            Zܐd Zݐd Z e#e=j        j                   e#e=j        j                   e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  dBe!de8eH         dz  de8eM         dz  de!fd                                                                                                            Z e#e=j        j                   e#e=j        j                   e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  dBe!de8eH         dz  de8eM         dz  de!fd                                                                                                            ZddZ e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 ddBe!de8eH         deMdz  de!fd                                                Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 ddBe!de8eH         deMdz  de!fd                                                Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 	 ddBe!de8eH         deMdz  deMdz  de!f
d                                                Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 	 ddBe!de8eH         deMdz  deMdz  de!f
d                                                Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 	 	 ddBe!de8eH         deMdz  deMdz  deMdz  de!fd                                                Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                  e=j        jo                            e6j                   e/dd          	 	 	 ddBe!de8eH         deMdz  deMdz  deMdz  de!fd                                                 ZeD	 ddBe!de8eH         de8eMdz           d!e@de!f
d"            Zd# Zd$ Zd% Zd& Z	 dd'Zd( Zd) Zdd*Zdd+Zd, Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d-                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d.                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d/                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d0                                     Zd1 Zdd2Zdd3Zd4 Z  e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d5                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d6                                     Zd7 Zd8 Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d9                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d:                                     Z e#e=j	        j                  e=j	        j                            e6j                  e=j	        j                            e6j                  d;                                     Z
 e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d<                                     Z e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  d=                                     Z e#e=j        j                   e#e=j        j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  e=j        j                            e6j                  d>                                                                                                 Z e#e=j        jo        e=j        js        g           e/            	 ddBe!de8eH         d?e@deMdz  de!f
d@                        Z e#e=j        jo        e=j        js        g          e=j        jo                            e6j                   e/            	 	 ddBe!de8eH         d?e@deMdz  deMdz  de!fdA                                    Z e#e=j        jo        e=j        js        g           e/            	 	 	 ddBe!de8eH         d?e@deMdz  deMdz  deMdz  de!fdB                        ZddCZdD Zdzee!         dEee!         dFe!de!fdGZdEe*de!fdHZeDdBe!de8eH         d?e@de8eMdz           de!f
dI            Z e#e=j        jo                  dJe!dKe!de@fdL            Z e#e=j        e=j        g           e/            dM                         Z e#e=j        g          dN             Z e#e=j        g          ddO            Z e#e=j        g          dP             Z e#e=j        g          dQ             Zd2e!dWe!dFe!dz  dSeHdbeHde`e!e!f         fdRZ e#e=j                    e/ddc          d2e!dWe!dFe!dz  dSeHdbeHde`e!e!f         fdS                        Z  e#e=j!                   e/ddc          d2e!dWe!dFe!dz  dSeHdbeHde`e!e!f         fdT                        Z!de!dUeMde!fdVZ"de!dUeMde!fdWZ#dXe!de*fdYZ$dZe*d[e!de!fd\Z%d[ee!         de!fd]Z&d^eHd?e@dUeje        dej        fd_Z'd`e!daeHdbeHd?e@fdcZ(d`e!ddeHdaeHdbeHd?e@f
deZ)d`e!de8eH         d?e@fdfZ*d`e!de8eH         d?e@fdgZ+ e#e=j,                   e/            eDd`e!de8eH         d?e@fdh                                    Z,	 	 	 	 ddJe!die!djeHdkeHd?e@dle@de!fdmZ- e#e=j.                   e/            eD	 	 	 ddJe!die!djeHdkeHd?e@de!fdn                                    Z. e#e=j/                   e/d          eDdo                                     Z/ e#e=j0                   e/            dde>jh        ji        fdp                        Z0dqej!        drej!        dse@de@fdtZ1e=j2        jo                            e6j                  e=j2        js                            e6j                   e/du          ddvdw                                    Z2 e#e=j3        jo        e=j3        js        g          e=j3        jo                            e6j                   e/            eD	 	 ddBe!de`eHeHf         d?e@dxeMdz  dyeMdz  de!fdz                                                Z4 e#e=j3        j                  e=j3        j                            e6j                  e=j3        j                            e6j                   e/            eD	 ddJe!de`eHeHf         dz  d?e@de`eMeMf         dz  de!f
d{                                                            Z5 e#e=j6                   e#e=j7                   e#e=j8                  eD e/            dJe!de`eHdf         de!fd|                                                            Z9 e#e=j:                   e#e=j;                   e#e=j<                  eD e/            dJe!de`eHdf         de!fd}                                                            Z=dJe!de`eHdf         d~eeHeHeHge!f         de!fdZ> e#e=j?                   e#e=j@                   e#e=jA                   e/d           d                                                 ZB e#e=jC                   e/dd          dddd                        ZC e#e=jD                   e/            dddd                        ZD e#e=jE        jo        e=jE        js        g           e/            dejF        ddddte'dUeje        dz  dejG        dej        dz  de@f
d                        ZH e#e=jE        jI        g          dejF        ddddse'dte'dUeje        dz  dejG        dej        dz  de@fd            ZJ e#e%          d             ZK e#e=jL                  e=jL        jo                            e6j                   e/            dwdwde>jh        ji        fdBe!dWe!dme'de'dFe!dz  dSeHde!fd                                    ZL e#e=jM                  e=jM        jo                            e6j                   e/dd          dBe!dWe!dSeHde`e!e!f         fd                                    ZM e#e=jN        jo                  	 	 ddddde!de!d0e!deMde@de!dz  d*eMdz  de`e!e!f         fd            ZOd ZP e#e=jQ        g           e/d          eDdd                                    ZQ e#e=jR                   e/            d                         ZR e#e=jS                  d             ZS e#e=jT        jo        e=jT        js        g          dddd2e!dUeje        dz  de!dz  de!fd            ZU e#e=jV        jo        e=jV        jW        g          dd2e!deHdz  fd            ZX e#ej<        j=        jY                  dd            ZY e#e=jZ                   e/            dddd                        ZZ e#e=j[        jo                  ddd2ej!        dej        dz  dej!        fd            Z[dddZ\ddddZ] e#e=j^                   e/            d                         Z^ e#e=j_                  dd            Z_ e#e=j`                  d(e!d2e!de@de!fd            Z` e#e=ja        jo        e=ja        js        g           e/            ddddddeHdUeje        dz  dejG        dz  dej        dz  de@dz  de!fd                        Za e#e=ja        jb        e=ja        jc        g           e/            	 dddddddeHde@dUeje        dz  dejG        dz  dej        dz  de@dz  de!fd                        Zd ePe=je        e=jf                    ePe=jg        e=j                    ePe=jh        e=j                    ePe=ji        e=jQ                    ePe=jj        e=jO                    ePe=jk        e=jl                    ePe=jm        e=jV                    ePe=jn        e=jo                    ePe=jp        e=jS                    ePe=jq        e=jr                    ePe=js        e=jt                    ePe=ju        e=jv                    ePe=jw        e=jx                    ePe=jy        e=jz                    ePe=j{        e=j|                    ePe=j}        e=j~                    ePe=j        e=jc                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j                    ePe=j        e=j\                   e=j        jo                            e6j                  dd2e!deHde!fd            ZdS (      N)CallableIterable)nullcontext)Enum)partialreduce)chainproduct)Anycast)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                       e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM     _/var/www/html/Carbon-Document/venv/lib/python3.11/site-packages/torch/_decomp/decompositions.pyr    r    1   s        DD
CCCr*   r    Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                 R     t          j                    fd            }|S )Nc                  4   rt           t          j        j        fnt           ffdt	          j        | i |D             }t          j        |di\  fd}fd} 
t          ||           i t          ||          }	r|S t          ||          S )Nc                 4    g | ]}t          |          |S r)   )
isinstance).0xallowed_typess     r+   
<listcomp>z-type_casts.<locals>.inner.<locals>.<listcomp>E   s9     
 
 
!]++

 
 
r*   type_promotion_kindc                 \    t          | t                    r|                               S | S Nr3   r   to)r5   computation_dtypes    r+   increase_precz0type_casts.<locals>.inner.<locals>.increase_precO   s-    !V$$ tt-...r*   c                 \    t          | t                    r|                               S | S r:   r;   )r5   result_dtypes    r+   decrease_precz0type_casts.<locals>.inner.<locals>.decrease_precU   s,    !V$$ ttL)))r*   )	r   torchtypes_Numberpytreearg_tree_leavesutilselementwise_dtypesr   )argskwargs	flat_argsr>   rA   rr6   r=   r@   r.   r,   r/   r-   s         @@@r+   innerztype_casts.<locals>.inner@   s     .ESVU[())6) 	
 
 
 
+T<V<<
 
 
	
 +0*B+
,:+
 +
'<
	 	 	 	 		 	 	 	 	 Axt,,P0O0OPP 	.HM1---r*   )	functoolswraps)r,   r-   r.   r/   rM   s   ```` r+   
type_castsrP   :   sM     _Q. . . . . . . .@ Lr*   T)r-   r.   )r-   )r-   r/   r5   dimreturnc                 ~    t          ||                                 z
            D ]}|                     d          } | S N)rangerQ   	unsqueeze)r5   rQ   _s      r+   _unsqueeze_to_dimrY   w   s;    3=!!  KKOOHr*   
grad_inputout_gradyc                 <    | d||z  z
                                   z  S Nr!   conj_physicalr[   r\   s     r+   tanh_backwardrb   }   s#     q1q5y//1111r*   c                 <    | |d|z
  z                                   z  S r^   r_   ra   s     r+   sigmoid_backwardrd      s#     qAE{113333r*   beta	thresholdc                 |    ||z                                   }t          j        ||z  |k    | | |z  |dz   z            S N      ?)exprB   where)r[   r5   re   rf   zs        r+   softplus_backwardrm      sA     
TA;DI-xAS9QRRRr*   grad_outputalphascaleinput_scale	is_resultself_or_resultc                     ||z  }|}|}|r&t          j        |dk    | |z  ||z   z  | |z            S t          j        |dk    | |z  |z  t          j        ||z            z  | |z            S Nr   )rB   rk   rj   )	rn   ro   rp   rq   rr   rs   negcoefposcoef
negiptcoefs	            r+   elu_backwardry      s     emGGJ 
{a*$(@A'!
 
 	
 {a*$w.>J;V1W1WW'!
 
 	
r*   c                 ,    t          j        | |          S r:   )rB   	full_likeselfvalues     r+   fill_scalarr      s    ?4'''r*   r~   c                     t          j                                        dk    fd           t                              |           S )Nr   c                  4    d                                   dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrQ   )r~   s   r+   <lambda>zfill_tensor.<locals>.<lambda>   s     kSXS\S\S^S^kkk r*   )rB   _checkrQ   atencopyr|   s    `r+   fill_tensorr      sI    	L		qkkkk   99T5!!!r*   r}   c                 b    t          j        t          j        | dz   d          d          dz  S N   r   min   maxrB   clampr}   s    r+   hardsigmoidr      s1     ;u{4!8333;;;a??r*   c                 J    t          j        |dk    |dk     z  | dz  d          S )Ng      g      @gUUUUUU?        rB   rk   rn   r}   s     r+   hardsigmoid_backwardr      s3     ;	$y!  r*   min_valmax_valc                 D    t          j        ||k    ||k    z  d|           S )Nr   r   )rn   r}   r   r   s       r+   hardtanh_backwardr      s%    
 ;DGO<c;OOOr*   c                 h    | t          j        t          j        | dz   d          d          z  dz  S r   r   r   s    r+   	hardswishr      s6     %+ek$(:::BBBBQFFr*   c           
      x    t          j        |dk    dt          j        |dk     | |dz  dz   z  |                     S )Nr   r         ?r   r   s     r+   hardswish_backwardr      sE     ;
D1HkdQh#-=>LL  r*   c                 6    t          j        ||k    d|           S ru   r   )rn   r}   rf   s      r+   threshold_backwardr      s     ;ty(![999r*   negative_slopeself_is_resultc                 <    t          j        |dk    | | |z            S ru   r   )rn   r}   r   r   s       r+   leaky_relu_backwardr      s!     ;taxkN.JKKKr*   nonegradapproximatec                    d}d}d}|dk    rh||z  dz  }d}||z  }||z  }	||||	z  z   z  }
t          j        |
          }d|z  }d|z   }d|z  }d||z  z
  }|dd|z  |z  z   z  }||z  |z  }| ||z   z  S |}||z  dz  }ddt          j        ||z            z   z  }|t          j        ||z  d	z            z  }| |||z  z   z  S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   r         )rB   r   erfrj   )r   r}   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberM   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfs                        r+   gelu_backwardr      s-    %G&I'Jf*$s*d{/0Z&&
TzJ+j:55 AF
T(9$9:/14DD)99::Y&,Q4&=1112eitd 2333sTCZ'((r*   inputc                     t          j        t          j        |                    }t          j        |          }||z  d||z  z
  z  }| ||z   z  S r^   )rB   r   Fsoftplussigmoid)rn   r   input_tanh_softplusinput_sigmoidouts        r+   mish_backwardr     sY      *QZ%6%677M%((M
-
1':=P'P#P
QC-344r*   c                 0    | t          j        |           z  S r:   )rB   r   r   s    r+   silur   "  s     %-%%%%r*   c                 Z    ddt          j        |           z   z  }| |z  d|d|z
  z  z   z  S r^   )rB   rj   )rn   r}   r   s      r+   silu_backwardr   )  s=     1uy$'''(G AG(<$<==r*   weightc                 <    t          j        | dk    | || z            S ru   r   )r}   r   s     r+   _prelu_kernelr   1  s    ;taxv}555r*   c                 ~    t          j        |dk    | || z            }t          j        |dk    d|| z            }||fS )Nr   r   r   )rn   r}   r   
input_gradweight_grads        r+   _prelu_kernel_backwardr   6  sG     TAX{F[4HIIJ+dQhTK-?@@K$$r*   noiseloweruppertrainingc                 z    |r|                      |          S ||z   dz  }t                              | |||          S Nr"   )mulr   r   )rn   r}   r   r   r   r   r   r   s           r+   rrelu_with_noise_backwardr   A  sM      
u%%%%-1,''~~
 
 	
r*   bufferc                 
   |dk     }t          j        |dd          }t          j        |dd          }|                                dk    r|n&t          j        t          j        |                     }| |||d|z   z  z  z
  z  S )Nr   r!   rU   )rB   rk   numelrj   abs)rn   r}   r   in_negative	max_derivsignrl   s          r+   log_sigmoid_backwardr   V  s     (KKA..I;{Ar**D ,,..1$$%)UYt__4D*E*EA)da1q5k&::;;r*   otherc                     t          j        | j                  st          j        | j                  rt          j        n| j        }|                     dd|          }| t	          j        ||          z  S )Nr)          @dtype)rG   is_integer_dtyper   is_boolean_dtyperB   float32new_fullpow)r}   r   	two_dtype
two_tensors       r+   ldexpr   c  sn    
 !$*--	161G
1S1S	Z 
 r3i88J%)J....r*   loss	reductionc                     |t           j        j        k    rt          j        |           S |t           j        j        k    rt          j        |           S | S r:   )r    r'   r~   rB   meanr(   sum)r   r   s     r+   apply_loss_reductionr   o  sE    IN(((z$	im)	)	)yr*   r   c                     | t           j        k    rt           j        S | t           j        k    rt           j        S | t           j        k    rt           j        S d S r:   )rB   	complex32float16	complex64r   
complex128float64r   s    r+   to_real_dtyper   x  sJ    }	%/	!	!}	%"	"	"} 
#	"r*   targetc                 2    | |z
  dz  }t          ||          S r   )r   )r}   r   r   r   s       r+   mse_lossr    s"     6MaDi000r*   c                 t    |t           j        j        k    rd|                                z  nd}|||z
  z  | z  S )Nr   )r    r'   r~   r   )rn   r   r   r   norms        r+   mse_loss_backwardr    s>     #,y~/C"C"C3D56>"[00r*   c                     t          j        | ||          }|                     t          d                    }t          j        ||d          }t          j        |          }t          j        |||          S )N)rQ   r   z-infTrQ   keepdim)rB   softmaxeqfloatall
zeros_likerk   )r}   rQ   r   r   maskedmasked_rowszeross          r+   safe_softmaxr    si    
-#U
3
3
3CWWU6]]##F)FT:::KS!!E;{E3///r*   ri   c                     | |z
                                   }t          j        ||k     d|dz  z  |z  |d|z  z
            }t          ||          S )Nr   r"   )r   rB   rk   r   )r}   r   r   re   r   s        r+   smooth_l1_lossr    sV     6M  D;td{C$'MD$8$t:KLLDi000r*   c                    |t           j        j        k    rd|                                z  nd}||z
  }t	          j        |          }|| z  }t	          j        ||k     ||z  |z  |t	          j        |          z            S rh   )r    r'   r~   r   rB   r   rk   r   )	rn   r}   r   r   re   r  r5   abs_x	norm_grads	            r+   smooth_l1_loss_backwardr    s    
 "+in.B!B!B3DvAIaLLE{"I;AEJqMM!  r*   c                 v    t          | ||||          }t          ||j                   t          ||d          S NT	copy_fromcopy_toexact_dtype)r  r   shaper   )rn   r}   r   r   re   rZ   results          r+   smooth_l1_loss_backward_outr     s@     %[$	4PPFj&,///FJDQQQQr*   deltac           
          |t           j        j        k    rd|                                z  nd}||z
  }t	          j        || k     | | z  |z  t	          j        ||k    || z  |z  ||z  | z                      S rh   )r    r'   r~   r   rB   rk   )rn   r}   r   r   r!  r  r5   s          r+   huber_loss_backwardr#    s    
 "+in.B!B!B3DvA;	UF
	e#AItk1E94!8k;QRR  r*   c                 v    t          | ||||          }t          ||j                   t          ||d          S r  )r#  r   r  r   )rn   r}   r   r   r!  rZ   r  s          r+   huber_loss_backward_outr%    s@     !dFIuMMFj&,///FJDQQQQr*   ignore_indextotal_weightc                    |                                 dk     rdnd}|t          j        j        k    r| |z  } |                                 dk    r |                                 dk    r|d         }|                    |          }t          j        ||k    |d          }t          j        |          }	t          j        |	||d          }	|	                                 |                                  cxk    rdk    rn n|                     |          } |Ud t          |                                           D             }
|j
        d         |
|<   |                    |
          }| |z  } t          j        ||k    | d          } |	| z  S )Nr"   r   r!   g      c                     g | ]}d S r!   r)   r4   rX   s     r+   r7   z&_nll_loss_backward.<locals>.<listcomp>  s    2221Q222r*   )rQ   r    r'   r~   rW   rB   rk   r  scatterrV   r  reshape)rn   r}   r   r   r   r&  r'  channel_dimsafe_targetrZ   	new_shapes              r+   _nll_loss_backwardr1    su    xxzzA~~!!1KIN(((!L0
 xxzzQ6::<<!++k**F+f4fa@@K!$''Jz;TJJJ~~+//++////a/////!++K8822dhhjj 1 1222	!'a	+	**!F*+f4k1EEK##r*   c                    |                                 dk    rt          d          t          j        |                                 |          }|                    |          }|dz  dk    rt          d| d|           |dz  }|                    |d|          }|                    |||          }t          j        |          }d|z
  |z  |z  | z  }	|| z  }t          j        ||	g|          S )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size ri   r   )	rQ   AssertionErrorrG   canonicalize_dimsizenarrowrB   r   cat)
rn   r}   rQ   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfs
             r+   glu_backwardr?    s    xxzzQIJJJ%dhhjj#66H
))H

C
Qw!||UXUUPSUU
 
 	
 qIHa33IXy)<<Jz22	!	!%77)CkQ  ,k99(*=>HMMMMr*   c           	         d|                                 cxk    rdk    s'n t          d|                                  d          |                                 dk    r%t          d|                                  d          |                                 dk    o|                                 dk    }|s<|j        d         |j        d         k    s t          d|j         d|j         d	          |                                dk    r-t          d
|j         d|                                 d          |2|                                |j        d         k    rt          d          |t          j        j        k    r|                                 dk    ru|                                  dk    r| j        d         |j        d         k    s@t          d|j        d          d|                                   d| j        d                    nG|                                  dk    r|                                 dk    st          d| j                   t          | ||||||          S )Nr   r"   %input tensor should be 1D or 2D, got Dr!   A0D or 1D target tensor expected, multi-target not supported, got size mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rU   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rQ   r3  r  r   r    r&   r~   r1  )rn   r}   r   r   r   r&  r'  no_batch_dims           r+   nll_loss_backwardrI  -  s        q    RTXXZZRRRSSSzz||a_PVPZPZP\P\___
 
 	
 88::?8vzz||q'8L 
TZ]fl1o==NNNv|NNN
 
 	
 q  F!F F%1%7%7%9%9F F F
 
 	

 fllnn
2>>J
 
 	
 IN(((TXXZZ1__!!Q&&;+<Q+?4:a=+P+P c$*UV- c c"-//"3"3c cLWL]^_L`c c   ,Q !!Q&&;+<+<+>+>!+C+C ]+J[]]   T669lL  r*   c           	         |                                 dk    r$t          d|                                            |                                 dk    r$t          d|                                            |j        d         |j        d         k    r8|j        d         |j        d         k    r|j        d         |j        d         k    st          d|j         d	|j                   |                                dk    r-t          d
|j         d|                                 d          t	          | ||||||          S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   rD  rE  rG  z ( z, elements))rQ   r3  r  r   r1  )rn   r}   r   r   r   r&  r'  s          r+   nll_loss2d_backwardrL  `  su    xxzzQnbfbjbjblblnn
 
 	
 zz||qrdjdndndpdprr
 
 	

 	
1a((JqMV\!_,,JqMV\!_,,MMMv|MM
 
 	
 q  M &M M+7+=+=+?+?M M M
 
 	

 T669lL  r*   c           	      2   |dz
  t          j        t          j        |            |                     dd                    z  |t          j        t          j        |           |                     dd                    z  z
  }|||z  }t          ||          S )Nr!   r)   i)rB   maximumlog1pr   logr   )r}   r   r   r   r   s        r+   binary_cross_entropyrQ    s     QJ%-TEDMM"d33  uyb$0G0GHHHID f}i000r*   c                     d}| ||z
  z  t          j        |d|z
  z  |          z  }|||z  }|t          j        j        k    r||                                z  }|S )Ng-q=r!   r   )rB   r   r    r'   r~   r   )rn   r}   r   r   r   EPSILONr  s          r+   binary_cross_entropy_backwardrT    sk     GD6M*U[T9JPW-X-X-XXF&IN((($**,,&Mr*   c                 v    t          j        t          j        |  |z                      }t          ||          S r:   )rB   rO  rj   r   )r   r   r   r   s       r+   soft_margin_lossrV    s3     ;uy%&1122Di000r*   c                     || z  t          j        ||z            dz
  z  }|t          j        j        k    r||                                z  }|S r^   )rB   r   r    r'   r~   r   )rn   r}   r   r   rZ   s        r+   soft_margin_loss_backwardrX    sN     +%v})E)E)IJJIN((($**,,.
r*   r"   pc                 @    t                               | |z
  |          S )N)rY  )r   r  )r   r   rY  s      r+   distr[    s     99UU]a9(((r*   x1x2c                 .   |                      d                              dd          }t          j        |t          j                  }|                     d                              dd          }t          j        |t          j                  }t          j        |                     d          ||gd          }t          j        |||gd          }|                    |j                  }|	                    d          
                                S )Nr"   rU   Tmemory_formatr   )r   r   rB   	ones_likecontiguous_formatr7  r   matmulmT	clamp_minsqrt)	r\  r]  x1_normx1_padx2_normx2_padx1_x2_r  s	            r+   _euclidean_distrn    s     ffQiimmB%%G_WE4KLLLFffQiimmB%%G_WE4KLLLF
)RVVBZZ&12
6
6C
)R)2
.
.CZZFA##%%%r*   input_sizesstartendstepc                 ^    |                      |          }t          j        || ||||          S r:   )	new_zerosrB   slice_scatter)rn   ro  rQ   rp  rq  rr  rZ   s          r+   slice_backwardrv    s2     &&{33Jz;UCNNNr*   r!   c                 d   ddl m} |                                 }|dk    rt          d          t	          j        |                                 |          }t          |                                           }t          |                                           }|dk    rt          d          ||nd}	||nt          j
        }
|	dk     r|	||         z  }	|
dk     r|
||         z  }
|	dk     rd}	n|	||         k    r||         }	 ||
t          j
        k              r	||         }
n|
|	k     r|	}
n|
||         k    r||         }
|                                 |	||         z  z   }|
|	z
  }||z   dz
  |z  ||<   ||xx         |z  cc<   | j        rt          d          |                     |||          S )Nr   statically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesry  rQ   RuntimeErrorrG   r4  listr5  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)r}   rQ   rp  rq  rr  ry  ndimsizesstrides	start_valend_valr  lens                r+   slice_forwardr    s    LKKKKK88::DqyyIJJJ

 S
1
1CE4;;==!!Gqyy8999*I_cc#+G1}}U3Z	{{5:1}}			U3Z		#J	W344 *	9			5:		*((**Y-EEN
I
C*q.T)E#JCLLLDLLL ?!J
 
 	
 ug~>>>r*   c                 x    | j         |         dt          ffd} ||dd          } |||          }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rR   c                 ^    | |S | dk     r| z   } t          t          | |          |          S ru   r   r   )valr   r   defaultdim_sizes       r+   
clamp_wrapz(_normalize_start_end.<locals>.clamp_wrap.  s7    ;N77.C3sE??E***r*   r   )r  int)r5   rQ   rp  rq  r  r  s        @r+   _normalize_start_endr  %  sk     ws|H+# + + + + + + Jua1--E
*S%8
4
4C#:r*   srcc           	         t          j        | j        |          }| j        |         }t	          | |||          \  }}t          | j                  }||z
  |dz
  z   |z  ||<   |                    |          }|dk    r ||k    r|dk    r|                                S d g|                                 z  }t          j
        || j                  }	|	|z
  |z  ||<   t          j        || j        t          j                  }
|dk    rt          j        |
|	|k              }
||k    rt          j        |
|	|k               }
|dk    rt          j        |
|	|z
  |z  dk              }
dg|                                 z  }d||<   |
                    |          }
t                               |
t                               ||
|d          |           S )Nr!   r   devicer  r   rU   )rG   r4  r  r  r  r|  expandclonerQ   rB   aranger  onesboollogical_andviewr   rk   _unsafe_masked_index)r   r  rQ   rp  rq  rr  r  src_sizeindicesidxmask
mask_shapes               r+   ru  ru  <  s    
 S
1
1C{3H%eS%==JE3EK  H5[D1H-$6HSM
**X

CzzcXoo$!))yy{{$(6EIIKK#7G
,x
5
5
5C%KD(GCL:hu|5:FFFDzz se|44
h sSy11qyy et';q'@AAuyy{{"JJsO99Z  D::dD55c4!LLeTTTr*   indexc                 Z    |                      |          }t          j        || ||          S r:   )rt  rB   select_scatter)rn   ro  rQ   r  rZ   s        r+   select_backwardr  e  s.     &&{33J
KeDDDr*   offsetdim1dim2c                 \    |                      |          }t          j        || |||          S r:   )rt  rB   diagonal_scatter)rn   ro  r  r  r  rZ   s         r+   diagonal_backwardr  l  s0    
 &&{33J!*k64NNNr*   input_dtypec                 F    | j         |k    r|                    |          }|S r:   )r   r<   )rn   rZ   r  s      r+   _cast_grad_to_input_dtyper  u  s)     K'']];//
r*   outputc                     | |z  }||t          j        ||d          z  z
  }t          | ||                                          S NTr  )rB   r   r  
contiguous)rn   r  rQ   r  new_grad_outputrZ   s         r+   _softmax_backward_datar  }  sY     "F*O 6EIS$- - - $ J %[*kJJUUWWWr*   c                     | t          j        |          t          j        | |d          z  z
  }t          | ||          S r  )rB   rj   r   r  )rn   r  rQ   r  rZ   s        r+   _log_softmax_backward_datar    sM     uy0059d4 4 4   J %[*kJJJr*   c                     | |dz  z   ||dz
  z  z
  }t          t          j        t          j        |          } |d||                              d          } |d||z  |                              d          }	||	z   S )z/Utility function to implement im2col and col2imr"   r!   r   r  r   rU   )r   rB   r  int64rW   )
input_dkernel_d
dilation_d	padding_dstride_dr  blocks_d	arange_kwblocks_d_indiceskernel_grids
             r+    _im2col_col2im_indices_along_dimr    s     Q&x!|)DDHEKGGGI !yHh77AA!DD )Ax*4jAAKKBOOK k))r*   kernel_sizedilationpaddingr}  c           
         t          j        t                    dk    d            t          j        t                    dk    d            t          j        t                    dk    d            t          j        t                    dk    d            dd} |d            |d	            |d
d            |d           | j        t                    }t          j        |dv o t	          d dd          D                       fd           t          d t          dd                    D                       t          j        t	          d D                       fd           |dk    }|s|                     d          } | j        \  }}	}
}\  }}\  }}\  }}\  }}t          |
||||| j	                  }t          |||||| j	                  }t          j        | ||||f          }|                    d                              d          }|d d d d ||f         }|                    dddddd          }|                    d          }|                    d          }|                    ||	|z  |z  ||z            }|s|                    d          }|S )Nr"   c                      dS )Nz"im2col(): only 2D kernel supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  s    0T r*   c                      dS )Nz$im2col(): only 2D dilation supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  s    -S r*   c                      dS )Nz#im2col(): only 2D padding supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  s    ,Q r*   c                      dS )Nz"im2col(): only 2D stride supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  s    +O r*   Tc                      |rt          d  D                       nt          d  D                       }t          j        | fd           d S )Nc              3   "   K   | ]
}|d k    V  dS r   Nr)   r4   rY  s     r+   	<genexpr>z1im2col.<locals>.check_positive.<locals>.<genexpr>  &      ((Q1q5((((((r*   c              3   "   K   | ]
}|d k    V  dS r  r)   r  s     r+   r  z1im2col.<locals>.check_positive.<locals>.<genexpr>  &      ;R;RqAF;R;R;R;R;R;Rr*   c                       d  S Nz& should be greater than zero, but got r)   param
param_names   r+   r   z0im2col.<locals>.check_positive.<locals>.<lambda>      ZVVuVV r*   r  rB   r   r  r  strictconds   ``  r+   check_positivezim2col.<locals>.check_positive  q    ,2Rs((%((((((;R;RE;R;R;R8R8RVVVVV	
 	
 	
 	
 	
r*   r  r  r  Fr  r}  r   rK  c              3   "   K   | ]
}|d k    V  dS r  r)   r4   ds     r+   r  zim2col.<locals>.<genexpr>  &      ::!qAv::::::r*   r   c                  (    dt                      S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler  s   r+   r   zim2col.<locals>.<lambda>       <-25\\< < r*   c              3   V   K   | ]$\  }}}}}d |d|z  z   ||d z
  z  z
  d z
  |z  z   V  %dS )r!   r"   Nr)   r4   r   paddilkersts         r+   r  zim2col.<locals>.<genexpr>  sd        "Cc3 	
S1s7]SC!G_,q0R77     r*   ra  c              3   "   K   | ]
}|d k    V  dS r  r)   )r4   cs     r+   r  zim2col.<locals>.<genexpr>  s&      ''aAE''''''r*   c                  X    dt          dd                     d d  d d d dS )	Nz!Given an input with spatial size ra  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  )r  r  output_sizer  r  r}  s   r+   r   zim2col.<locals>.<lambda>  s|     FE%*4E4E F F"F F/7F FF F%+F F 	F F F r*   rK  r   rU   r!   r      T)rB   r   r  r  r  r  ziprW   r  r  r   r  permuter5  r-  squeeze)r   r  r  r  r}  r  r  batched_input	batch_dimr.  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr  num_blocks_rownum_blocks_colr  r  s    ````                     @@r+   im2colr    s    
L[!!Q&(T(TUUU	LX!#%S%STTT	LW"$Q$QRRR	LV!#O#OPPP
 
 
 
 N;...N8Z(((N8Yu5555N68$$$KEu::D	L:3::uRSSz:::::	< 	< 	< 	<  
   &)"##J;'
 '
    K 
L'';'''''	F 	F 	F 	F 	F 	F 	F 	F 	F   AIM #""/4{,I{GWHh"Iy%J
$Hh9:y(EL  ::y(EL  5Iy) LMML+55b99CCBGG!!!QQQ 24FFGF^^Aq!Q1--F',,Q//N',,Q//N^^;)H4n~6U F  #""Mr*   r  c           
         t          j        t                    dk    d            t          j        t                    dk    d            t          j        t                    dk    d            t          j        t                    dk    d            t          j        t                    dk    d            d d} |d	            |d
            |dd            |d            |d           | j        t                    }t          j        |dv o t	          d dd          D                       fd           d         d         z  }t          j        d         |z  dk    fd           d t                    D             }	|	d         |	d         z  t          j        d         k    fd           t          j        dk    fd           |dk    }
|
s|                     d          } | j        \  }}\  }}\  }}\  }}\  }}|                     d         d         |z  gz   |	z             } |                     dddddd          } t          |||||| j
                  }t          |d          }t          |||||| j
                  }d t                    D             }|                     d         d         t                    z  g|z             }d d ||f}t                              ||| d          }t!          j        || | | | f          }|
s|                    d          }|S )!Nr"   c                      dS )Nzonly 2D output_size supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  s    0O r*   c                      dS )Nzonly 2D kernel supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  s    0J r*   c                      dS )Nzonly 2D dilation supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  s    -I r*   c                      dS )Nzonly 2D padding supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  s    ,G r*   c                      dS )Nzonly 2D stride supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  s    +E r*   Tc                      |rt          d  D                       nt          d  D                       }t          j        | fd           d S )Nc              3   "   K   | ]
}|d k    V  dS r  r)   r  s     r+   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r*   c              3   "   K   | ]
}|d k    V  dS r  r)   r  s     r+   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r*   c                       d  S r  r)   r  s   r+   r   z0col2im.<locals>.check_positive.<locals>.<lambda>  r  r*   r  r  s   ``  r+   r  zcol2im.<locals>.check_positive  r  r*   r  r  r  Fr  r}  r  )r"   r   c              3   "   K   | ]
}|d k    V  dS r  r)   r  s     r+   r  zcol2im.<locals>.<genexpr>   r  r*   ra  c                  (    dt                      S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r  s   r+   r   zcol2im.<locals>.<lambda>!  r  r*   r   r!   c                       dd          d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = ra  z and kernel_size=r)   )r  r  s   r+   r   zcol2im.<locals>.<lambda>'  s(     %=B2Y% %"% % r*   c                 N    g | ]"\  }}}}}d |d|z  z   ||d z
  z  z
  d z
  |z  z   #S r!   r"   r)   r  s         r+   r7   zcol2im.<locals>.<listcomp>+  sV       "Cc3 	
S1s7]SC!G_,q0R77  r*   rU   c                  @    d d d d d d  dd          d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rU   .r)   Lr  r  r  r  r  r}  s   r+   r   zcol2im.<locals>.<lambda>4  |     B[ B B B BB B(/B B:@B B)*B B5:2YB B B r*   c                  @    d d d d d d  dd          d	S r$  r)   r'  s   r+   r   zcol2im.<locals>.<lambda>:  r)  r*   r   rK  r  c                 $    g | ]\  }}|d |z  z   S r"   r)   )r4   orY  s      r+   r7   zcol2im.<locals>.<listcomp>V  s$    JJJ1!a!e)JJJr*   
accumulater  )rB   r   r  r  r  r  rW   r-  r   r  r  rY   rt  prodr   _unsafe_index_putr   r  r  )r   r  r  r  r  r}  r  r  prod_kernel_sizecolr  out_hout_wr  r  r  r	  r
  r  r  r  indices_rowindices_coloutput_padded_sizer  r  r(  r  s    `````                    @@r+   col2imr9     s    
L[!!Q&(O(OPPP	L[!!Q&(J(JKKK	LX!#%I%IJJJ	LW"$G$GHHH	LV!#E#EFFF
 
 
 
 N;...N8Z(((N7Ie4444N68$$$N;...KEu::D	L:3::uRSSz:::::	< 	< 	< 	<  
 #1~A6	Lb	$$)	% 	% 	% 	% 	%   &)(K'
 '
  C 	AQA	Lb	Q	B 	B 	B 	B 	B 	B 	B 	B 	B 	B   
L	A	B 	B 	B 	B 	B 	B 	B 	B 	B 	B   AIM #""KELE5Hh"Iy%J
$Hh MM58U1X1A%AB[PSVVWWEMM!Q1a++E2xY%, K $K33K2xY%, K KJK0I0IJJJ__	q58tK00014FF F {K
0C##FC4#HHFU6YJ
YJ
KLLF #""Mr*   r  c                     | |                     |           |z  z                      t          j        |                     }|S Nr_  )type_asr  rG   r   )rn   r  rp   rL   s       r+   native_dropout_backwardr=  c  sJ     
[11E9	:AA1+>> 	B 	 	A Hr*   
input_size	dimensionr5  c                 <   t          |          dk    rt          j        | d          S t          j        t          |          |          }t          j        ||         | j        t          j                  }|                    d||          	                                }| 
                    d|dz             	                    ||dz             } |                     |          }d|z  |fz   }t                              ||| d                                          S )Nr   r  rU   r!   r:   Tr.  )r  rB   squeeze_copyrG   r4  r  r  int32unfoldflattenmovedimrt  r   r1  r  )	r   r>  r?  r5  rr  rQ   r  rZ   r  s	            r+   unfold_backwardrF  r  s    
 :!!$***

 Z)
<
<C
,z#t{%+
N
N
NC
**Qd
#
#
+
+
-
-C<<C!G$$,,S#'::D 
++JcMSF"E!!*eTd!KKVVXXXr*   epsc           
      >   |A|}d|z
  }t          j        t          j        ||k    ||k              | |d|z
  z  z  d          S t          j        t          j        |dk    |dk              | |d|z
  z  z  |                    dt	          d                              S )Nri   r   r)   nan)rB   rk   r  r   r  )rn   r}   rG  lohis        r+   logit_backwardrL    s    
 2X{dbj$"*5543:./
 
 	
 {dck43;7743:./MM"eEll++
 
 	
r*   trainc                 ~    |r(|dk    r"t                               | ||          d         S |                                 S ru   )r   native_dropoutr  )r   rY  rM  s      r+   dropoutrP    s?      a""5!U33A66{{}}r*   out0out1c                 n   |r|dk    r|dk    r4t          j        |           t          j        | t           j                  fS | j        j        st          d          t          j        |           |k    }|| z  t          dd|z
  z            z  }||fS | t          j        | t           j                  fS )Nr   r!   r   z?result type Float can't be cast to the desired output type Longri   )	rB   r  r  r   is_floating_pointr{  	rand_liker  rb  )r   rY  rM  	bool_maskress        r+   rO  rO    s      Aa66$U++U-=e5:-V-V-VWW{, 	Q   OE**Q.	%%sQw"8"88YuuEJ???@@r*   half_to_floatc                 <   ddl m} |                                 } |r-| j        t          j        k    rt          d| j         d          t          j        | t          j	        j
                  \  }}|                     |          }  ||                                 dk              rt	          j        |           }n.t	          j        | |d          }t	          j        | |z
            }|t	          j        ||d          z  }|s|                    |          }|S Nr   guard_or_falsez%half_to_float is True but x.dtype is z, expected torch.halfr8   T)r  )rz  r\  r  r   rB   halfr3  rG   rH   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr<   r   rj   amaxr   )	r5   rQ   rX  r\  r=   r@   unnormalizedx_maxr  s	            r+   _softmaxrd    s,    EDDDDD 	
A 7ej   VVVV   ',&>	uDL' ' '#| 	
A~aggii1n%% ,y||
1c4000yU++EIlCFFFFF )<((Mr*   )r  c                 @   ddl m} |                                 } |r-| j        t          j        k    rt          d| j         d          t          j        | t          j	        j
                  \  }}|                     |          }  ||                                 dk              r| }nt	          j        | |d          }| |z
  }t	          j        t	          j        t	          j        |          |d                    }||z
  }	|s|	                    |          }	|	S rZ  )rz  r\  r  r   rB   r^  r3  rG   rH   r_  r`  r<   r   ra  rP  r   rj   )
r5   rQ   rX  r\  r=   r@   shiftedrc  shifted_logsumexpr  s
             r+   _log_softmaxrh    s5    EDDDDD 	
A 7ej   VVVV   ',&>	uDL' ' '#| 	
A~aggii1n%% 
1c4000e)	%)EIg,>,>T"R"R"RSS((F )<((Mr*   rU   r  padding_idxscale_grad_by_freqsparsec                    |                                  dk    r%t          d|                                   d          |j        dk    r8|                     d|          }|j        dk    r|                    d          }|S | |         S )Nr"   z'weight' must be 2-D, got z-Dr!   r   )rQ   r3  r  index_selectr  )r   r  ri  rj  rk  r   s         r+   	embeddingrn    s     zz||qJ&**,,JJJKKK|q!!!W--<1++a..C
gr*   num_weightsc                    t          j        | t           j        j                  \  }}|                     |          } t          |t          j                  }|ri|                    |f          }t          j	        |          }t                              ||g|d          }||         }	| |	                    d          z  } t          ||k    | j                  }
|                     |
d          }|                     |f| j        |j        d          z             }t                              ||g|d                              |          S )Nr]  Tr.  rU   r   )rG   rH   r_  r`  r<   r   rB   longrt  rb  r   r1  rW   rY   r  masked_fillr  )rn   r  ro  ri  rj  r=   r@   countsr  grad_weights_scaler  r   grad_weights                r+   embedding_dense_backwardrv    sN    ',&>)N)V' ' '#| ..!233K%guz::G E""K>22w''''	4D'QQ#G_!$6$@$@$D$DDW3[5EFFD""4++D''	*7<>>:: K !!+y$4!PPSS  r*   c                     d}| D ]}||z  }|S r^   r)   )r5   rL   is      r+   r0  r0  #  s&    	A  	QHr*   tensors
num_chunksc                    g }| D ]}|                                 }||         |z   dz
  |z  |z  }|||         k    r>dgdz  |j        |z
  dz
  z  d|||         z
  gz   }t                              ||d          }|d |         t	          j        |dg          z   }|                    |                    |                     |S )Nr!   r   r"   rU   )r5  r  r   constant_pad_ndrB   Sizeappendr-  )	ry  rQ   rz  padded_tensorstensortensor_sizepad_along_dimr  	view_sizes	            r+   
_pad_chunkr  *  s    
 N 9 9kkmm$S)J6:zIJVK,,,#'V[3.23C 007 C ))&#q99F%
J3C(D(DD	fnnY778888r*   c                 F    | d         j         }| D ]}|j         |k    r dS dS )Nr   FTr  )ry  r  r  s      r+   have_same_ndimsr  ?  s:    1:?D  ;$55 4r*   c                     | d                                          d |         }| D ]6}t          j        |                                 d |         |k    d            7d S )Nr   c                      dS )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr)   r)   r*   r+   r   z+leading_dimension_matches.<locals>.<lambda>L  s    ] r*   )r5  rB   r   )ry  rQ   leading_dim_sizesr  s       r+   leading_dimension_matchesr  G  sq    
))$3$/ 
 
KKMM$3$#44]]	
 	
 	
 	

 
r*   c                    t          j        |dk    d            t          j        t          |           dk    d            | d         j        }| d         j        }| D ]l}t          j        |                                dk    d            t          j        |j        |k    d            t          j        |j        |k    d            mt          |           r.t          j        | d         	                                |          }n>t          j        |dk    d            | D ]!}t          j        ||j
        k     d	            "t          | |           |S )
Nr!   c                      dS )Nz&_chunk_cat expects positive num_chunksr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>U  s    *R r*   r   c                      dS )Nz0_chunk_cat expects a non-empty input tensor listr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>W  s    "T r*   c                      dS )Nz#_chunk_cat expects non-empty tensorr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>\  s    1V r*   c                      dS )Nz8_chunk_cat expects all input tensors with the same dtyper)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>_      N r*   c                      dS )Nz8_chunk_cat expects all inputs tensors on the same devicer)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>c  r  r*   c                      dS )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>j  s    a r*   c                      dS )Nz3_chunk_cat expects dim < ndim for all input tensorsr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>o      M r*   )rB   r   r  r   r  r   r  rG   r4  rQ   r  r  )ry  rQ   rz  expected_dtypeexpected_devicer  s         r+   _preprocess_chunk_cat_inputsr  P  s   
 
Lq"R"RSSS	LGqTT   QZ%Naj'O 	
 	
V\\^^a')V)VWWWLN*NN	
 	
 	
 	M_,NN	
 	
 	
 	
 w $WQZ^^%5%5s;;1Haa	
 	
 	
  	 	FLfk!MM    gs+++Jr*   r   c                     t          | ||          }t          | ||          }|t          j        ||dz             S t          j        ||dz   |           |S )Nr!   )r   )r  r  rB   r7  )ry  rQ   rz  r   r  s        r+   
_chunk_catr  u  sa     'wZ
@
@Cj99N
{yq111	.#'s3333
r*   split_sizesc                     t                               | ||          }|d |D             S t          ||          D ],\  }}t          ||j                   t          ||d           -d S )Nr   c                 N    g | ]"}|                     t          j                   #S )r_  )r  rB   rc  r4   ss     r+   r7   z)split_with_sizes_copy.<locals>.<listcomp>  s)    OOO1e&=>>OOOr*   Tr  )r   split_with_sizesr  r   r  r   )r}   r  rQ   r   splitsr  splits          r+   split_with_sizes_copyr    s     ""4#">>F
{OOOOOO f-- 	N 	NMFEfek222UFMMMMMtr*   
split_size.c                 D    t           j                            | ||          S r:   )r   r  r   )r   r  rQ   s      r+   unsafe_splitr    s    :UJ444r*   c                 D    t           j                            | ||          S r:   )r   r  r  )r   r  rQ   s      r+   unsafe_split_with_sizesr    s      ((SAAAr*   c                 @   | j         }||         }dk    r.|dk    rt          d| d          |                                 fS |z   dz
  z  }ddlm}  ||          }fdt          |          D             }|z  |z
  z
  |d<   t          j        | ||          S )Nr   z split_size is 0 but dim_size is z, expected 0r!   )	guard_intc                     g | ]}S r)   r)   )r4   rx  r  s     r+   r7   zsplit.<locals>.<listcomp>  s    555!:555r*   rU   )r  r3  detachrz  r  rV   rB   r  )r}   r  rQ   ro  r  chunksr  r  s    `      r+   r  r    s    *K3HQq== I8III   #a'J6F @?????YvF5555uV}}555K J$7($BCKO;t[#...r*   tensor_indices_or_sectionsc                    |j         j        dk    rt          d|j                    |j        t          j        k    rt          d|j                   |                                t	          j        dk    pdk    fd           dk    rc|                                }t          |t                    s$t          dt          |          j                   |                     ||          S t          }t          j                                        x}r|j        x}r|j        } |            5  d |D             }d d d            n# 1 swxY w Y   |                     ||          S )	Ncpuz/tensor_indices_or_sections must be on CPU, got z.tensor_indices_or_sections must be int64, got r!   r   c                      d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr)   )	split_dims   r+   r   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>  s      M<EM M M r*   z%Expected sections to be IntLike, got c                 6    g | ]}|                                 S r)   )item)r4   rx  s     r+   r7   zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>  s     DDDAqvvxxDDDr*   )r  typer3  r   rB   r  rQ   r   r  r3   r   r#   tensor_splitr   _guardsdetect_fake_mode	shape_envignore_fresh_unbacked_symbols)	r}   r  rQ   sectionsctx	fake_moder  r  r  s	           @r+   /tensor_split_tensor_indices_or_sections_py_implr    s    "(-66a>X>_aa
 
 	
 "'5;66_=W=]__
 
 	
 +..00I	LQ()q.	M 	M 	M 	M  
 A~~-2244(G,, 	 QX8OQQ     3///77999I 	:",,I	: 9C
 SUU 	E 	EDD)CDDDG	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E   #...s   5EEEmat1mat2c                     |                                  s2|                                 st          |          }t          |          }|t          j        ||          z  }|dk    r|S ||| z  z   S ru   )rT  
is_complexr  rB   mm)r}   r  r  re   ro   r   s         r+   addmmr    st     !!## DOO,=,= 4yyE


%(4&&
&Cqyy
 r*   use_geluc                     t          | ||||          }|r=| j        rt                              |d          S t                              |          S t                              |          S )Nr   )r   )r  is_cudar   gelurelu)r}   r  r  re   ro   r  r   s          r+   _addmm_activationr    sa     dD$
.
.C "< 	"99Sf955599S>>!99S>>r*   vecc                    |                                  s2|                                 st          |          }t          |          }|t          j        ||          z  }|dk    r|S |                                dk    r|| z  S ||| z  z   S ru   )rT  r  r  rB   mvr   )r}   r  r  re   ro   r   s         r+   addmvr    s     !!## DOO,=,= 4yyE


%(4%%
%Cqyy

yy{{ad{r*   r   rstdgammaNCHxWgroupoutput_maskc
           	      	   t          j        | ||d           t          j        || d           t          j        |d           t          j        |                                z  z  k    fd           t          j        j        fk    fd           t          j        d u p                                k    fd           z  }
t          j        |
z  k    fd           t          j        | |                                        	                    dg          }|                               	                    dg          }d }d }d }|	d	         rdd
|
z  z  }t          j        |
                    d	                                        |
          	                    d          }t          j        |
                    d	                                        |
          	                    d          }t          j        |
                    d                              d|
                    }n|                    |
          	                    d          }|                    |
          	                    d          }t          j        |
                    d          t          j        d|
f|j                            }|z  |z
  |z  |z  |z  |z  }| z  ||z  |z  z
  }|
                    d          }t          |d          }t          |d          }t          j        |                     |
          |          t          j        |                    |
          |          z   |z   }|                    |j                                      |j                  }|	d         r|                    |
          |                    |
          
                    d          z  z
  |
                    d          z  	                    d	g                                        }|	d         r|	                    d	g          }|||fS )NF)allow_cpu_scalar_tensorsc                      d z  z   dS )NzExpect input to have z	 elementsr)   )r  r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda><  s    >A>>> r*   c                  $    d  d dj          S )NzExpect mean to have shape (, z
, but got r  )r  r  r   s   r+   r   z,native_group_norm_backward.<locals>.<lambda>@  s"    PaPP5PPDJPP r*   c                  @    d  d                                 nd S )NzExpect gamma to have z elements but got rU   )r   )r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda>D  s*    iiieN_U[[]]]egii r*   c                      d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   )r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda>J  s    cQcc\acc r*   r"   r   r   ri   rU   r!   r  rK  )rG   check_same_devicecheck_same_shaperB   r   r   r  r   r  r   rW   r-  r  r  rY   r<   r   )rn   r   r   r  r  r  r  r  r  r  cpgdsdbd_inputd_gammad_biasr  ds_valdb_valc1c2c3s     ` `````             r+   native_group_norm_backwardr  '  s    
UD$    
5+NNNN	4FFFF	LQ$>>>>>>   
L
q%j PPPPPP   
L+!+iiiii  
 u*C	L	S5[ccccc   
;	&	&	+	+Aq#	6	6	:	:s	:	C	CB			!Q	$	$	(	(aS	(	1	1B!G!G F1~ ?39Yr5??1#5#566>>q%MMQQRSTTFYr5??1#5#566>>q%MMQQRSTTFr""a,, BB
 ZZ5#..22155FZZ5#..22155Fr""
Auc?4;??? B tmf$,t3d:Q>S4Z&4-!++\\"r1%%r1%%Ik))!UC==rBBiaS992>>? 	
 //%+..11%+>>1~ 
 E3''"''!UC*@*@4>>RTCUCU*UU..$$% SaSS\\WQZZ 	 1~ !QCWf%%r*   out2c
                    t          | |||||||||	
  
        }|
||f}t          |          D ]:\  }}|3t          ||         |j                   t	          |||         d           ;|S r  )r  	enumerater   r  r   )rn   r   r   r  r  r  r  r  r  r  rQ  rR  r  r  rZ   rx  rL   s                    r+   native_group_norm_backward_outr    s    " (UD$q!S% F d#J&!! Q Q1=jmQW555Q
14PPPPr*   c                 4    | |                      |          S | S r:   r<   )r5   r   s     r+   _maybe_castr    s    }ttE{{Hr*   grad_outnormalized_shapebiasc                 F  " |j         }|                                }	t          j        |j                  ""fd| |||fD             \  }
}}}|
t          d          |	t          |          z
  }||d          }|d |         }g }g }t          |	          D ]3}||k    r|                    |           |                    |           4t          |          }t          |          }ddl
m}  ||dk              s ||dk              rl|d         r|                    |          nd |d         r|                    ||d                    nd |d         r|                    ||d                    nd fS t          ||                                          }t          ||                                          }|t          d          ||z
  |z  }||
|z  }n|
}||z  }t          j        ||d          }t          j        ||          }t          j        ||d          }t          j        ||          }||z
  |z
  }d }d } d }!|d         r||z  |z  }|d         r4|2t          |          dk    rt          j        |
|z  |d	          } n|
|z  } |d         r@|>t          |          dk    rt          j        |
|d	          }!n|
                                }!t%          ||j                  t%          | ||j        nd           t%          |!||j        nd           fS )
Nc              3   b   K   | ])}|!|                     t          j                  n|V  *d S r;  )r<   rB   rc  r4   r5   r=   s     r+   r  z-native_layer_norm_backward.<locals>.<genexpr>  sZ       9 9  = 	
e.EFFF9 9 9 9 9 9r*    grad_out_cast should not be Noner   rx  r!   r"   zinput_cast should not be NoneTF)r  rQ   rG   get_computation_dtyper   r3  r  rV   r~  r0  rz  ry  rt  rY   rB   r   r   r  r  )#r  r   r  r   r  r   r  r  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesrx  r  Mry  x_hat
grad_x_hatabr  r  r  rM   r  d_weightr  r=   s#                                     @r+   native_layer_norm_backwardr    s    +KJ3EK@@9 9 9 9 E640	9 9 95M:{I ?@@@,---DTUU#JUdU#J#%#%: ( (99$$Q''''$$Q''''ZAZAKKKKKKQ!V$$ 
(=(=a1f(E(E 
,7NDEOOK(((3>q>KEOOK.///t3>q>KEOOK.///t
 	

 T:>>#3#344DT:>>#3#344D<===$$&E"[0

"
QA	*/66A	:u	%	%B	2($	/	/B	5"		BEBJE!G"H F1~ %!8u$1~ -+1 !!A%%y!68I5QQHH$u,H1~ +)/ !!A%%Y}.?GGFF"((**F 	GU[))Hf.@flldKKF$*:DJJEE r*   c          
          t          | |||||||          }||	|
f}t          |          D ]:\  }}|3t          ||         |j                   t	          |||         d           ;|S r  )r  r  r   r  r   )r  r   r  r   r  r   r  r  rQ  rR  r  r  rZ   rx  rL   s                  r+   native_layer_norm_backward_outr    s     (%)4vt[ F d#J&!! Q Q1=jmQW555Q
14PPPPr*   c                    g }t          t          |                    D ]/}|                    |                                 |z
  dz
             0t	          j        | j                  }|                     |          }|b|t          j	        t          j
        fv r$t          j        t          j	                  j        }n&t          j        t          j                  j        }n|}t          j        t          j        j        j                            t          j        |d                              |d          |                    }	|                    |	          }
||
                    |          }
| j        p
|d uo|j        }t	          j        |           }|t          j        t          j        fv }|s*|s(|
                                }
|	                                }	|
                    |           }||	fS )Nr!   r"   Tr  )rV   r  r~  rQ   rG   r  r   r<   rB   r   r   finforG  r   rsqrtopsr   addScalarr   r   r   	is_nestedr   channels_lastchannels_last_3dr  r<  )r   r  r   rG  dims_to_reducerx  r=   upcasted_inputeps_valrqrst_inputupcasted_resultr  r`  is_channels_lastr  s                  r+   _fused_rms_normr    s    !#N3'(()) 3 3eiikkAo12222 3EK@@XX/00N { @@@k%-004GGk%-004GG+ 		!!Ina((--.$-OOQX	
 	
 K %((55O)--f55 LF$$6$K6;KI/66M$) 
  /- /)4466!,,.. $$U++F;r*   c                    |j         }|                                }t          j        |j                  }|                     |t          j                  }	|                    |t          j                  }
|!|                    |t          j                  nd }|	t          d          |t          |          z
  }||d          }|d |         }g }g }t          |          D ]3}||k    r|                    |           |                    |           4t          |          }t          |          }ddlm}  ||dk              s ||dk              rF|d         r|                    |          nd |d         r|                    ||d                    nd fS t!          ||
                                          }||	|z  }n|	}d }d }|
|z  }|d         r(t          j        ||z  |d          }|||z  |z  z
  |z  }|d         r4|2|	|z  }t          |          dk    rt          j        ||d          }n|}t%          ||j                  t%          ||j                  fS )	Nr_  r  r   r[  r!   Tr  F)r  rQ   rG   r  r   r<   rB   rc  r3  r  rV   r~  r0  rz  r\  rt  rY   r   r  )r  r   r  r  r   r  r  r  r=   r  r  r  r   r  r  r  r  rx  r  r  r\  r  r  r
  r  sum_vald_weight_full_shapes                              r+   _fused_rms_norm_backwardr!  L  s    +KJ3EK@@KK)@    M +5;RSSJ  			#53J	KKK 
 ?@@@,---DTUU#JUdU#J#%#%: ( (99$$Q''''$$Q''''ZAZADDDDDD~a1f 
Q!7!7 
,7NDEOOK(((3>q>KEOOK.///t
 	

 T:>>#3#344D"[0

"
!G"HE1~ >)EJ.4EtTTTg 55=1~ ++1+e3 !!A%%y#):E  HH +H 	GU[))Hek** r*   running_meanrunning_varmomentum
functionalc	                    dgt          t          d|                                                     z   }	t          j        | j                  }
|}|}|r$t          j        | j                  }
|                     |
          }t          j        ||	dd          \  }}t          j	        ||z             }| |z
  |z  }t          j
        ||	          }t          j
        ||	          }|%||z  d|z
  |z  z   }|s|                    |           |g|                                 | j        d         z  }t          j
        ||	          }|||dz
  z  z  }||z  d|z
  |z  z   }|s|                    |           n||t          d          |                    |
d          }|}|                    |
d          }|}|}dt          j        ||z             z  }| j        j        d	k    r|}|}n*|                     d
          }|                     d
          }t'          ||                                 dz
            }t'          ||                                 dz
            }| |z
  |z  }|>|                                }t'          ||                                 dz
            }||z  }|>|                                }t'          ||                                 dz
            }||z   }| j        j        d	k    r6|                    | j                  }|                    | j                  }|                    | j                  ||||fS )Nr   r"   r   T)rQ   
correctionr  r!   z:running_mean and running_var must not be None in eval mode)r   r   r  r   )r|  rV   rQ   rG   r  r   r<   rB   var_meanr  r  copy_r   r  r3  rg  r  r  rt  rY   rD  )r   r   r  r"  r#  r   r$  rG  r%  reduction_dimsr=   new_running_meannew_running_var	input_acc
biased_varr   r  r  	save_mean	save_rstdnsqueezed_varunbiased_varinvstds                           r+   native_batch_norm_helperr6    sa    S4a 5 5666N3EK@@#!O .)!7DDHH#4H55	 >>a
 
 

D {:+,,$,$&M$77	M$77	#')3q8||6SS 5""#3444"A.A !=^DDL'1A;7L&5X8TTO 3!!/222;#6 L   $->TJJ'!nn+<4nHH%ejs!2334<%%$III--I--I uyy{{Q77"6599;;?;;$,&(!!"6599;;?;;&||~~ uyy{{Q77$|E!!LLu{L33	LLu{L33				$$ r*   r0  save_invstdc                 F    t          | |||||||d	  	        \  }}	}
}}||	|
fS NFr6  r   r   r  r"  r#  r   r$  rG  r  r0  r1  rX   s               r+   native_batch_normr<    sB     *Bvt\;(CQV* *&FIy!Q 9i''r*   c           
         |!|t                               | |||||          S |t          d          |t          d          |r!t                               | |||||||          S t                               | ||||||          S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitr{  $_native_batch_norm_legit_no_training)r   r   r  r"  r#  r   r$  rG  s           r+   native_batch_norm_decompositionr@  	  s      3,,648S
 
 	
 <
 
 	
 <
 
 	
  
,,64{HhPS
 
 	
 8864{Hc
 
 	
r*   c                 `   |                      |          }||z   dz
  |z  dk    r]|dk    rWfdt          |          D             }|z  |z
  z
  ||dz
  <   t          j        j        j                            | ||          S t          j        j        j                            | |          S )Nr!   r   c                     g | ]}S r)   r)   )r4   rX   r  s     r+   r7   z(unsafe_chunk_py_impl.<locals>.<listcomp>4  s    999az999r*   )	r5  rV   rB   r  r   r  r  r  r   )r  r  rQ   r  r  r  s        @r+   unsafe_chunk_py_implrC  .  s    {{3HV#a'F2JQ8q==99995==999",
V0Ch0N"OFQJy~5==fkSVWWW9>&--fj#FFFr*   c           
      N    t           j                            | ||||d||          S r9  )r   r>  r  )r   r   r  r"  r#  r$  rG  s          r+   r?  r?  :  s7     (00	 	 	r*   c                 F    t          | |||||||d	  	        \  }}	}
}}||	|
fS r9  r:  r;  s               r+   r>  r>  P  sB     *Bvt\;(CQV* *&FIy!Q 9i''r*   c                 F    t          | ||d d |||d	  	        \  }}}}	}	|||fS r9  r:  )
r   r   r  r   r$  rG  r  r0  r1  rX   s
             r+   !_native_batch_norm_legit_no_statsrG  a  sA     *BvtT48S%* *&FIy!Q 9i''r*   c                     t          | |||||||d	  	        \  }}	}
}}|t          d          |t          d          ||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be None)r6  r3  )r   r   r  r"  r#  r   r$  rG  r  r0  r1  r,  r-  s                r+   #_native_batch_norm_legit_functionalrK  p  sw    " 	!vt\;(CQU	 	 BCCCABBB9i)9?JJr*   c           	         t           j                            | ||||d|          }d}|t           j        j        j        k    r t           j                            | |          }t          j        |t           j        | j        | j	                  S )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr  )
rB   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8rM  r  )	r   r   r  r"  r#  rG  r   backendreserve_sizes	            r+   _get_batch_norm_reserve_tensorrW    s      h11vt\;c G L%(,222xHH8
 
 ;EKU\   r*   c                 t    t          | ||||d||d	  	        \  }}}	}
}
t          | |||||d          }|||	|fS )NTFr   r6  rW  r   r   r  r"  r#  r$  rG  r  r0  r1  rX   reserves               r+   _batch_norm_with_updater]    sp     *B
* 
*&FIy!Q -vt\;d  G 9i00r*   c                     t          | ||||d||d	  	        \  }}}	}
}t          | |||||d          }|
t          d          |t          d          |||	||
|fS )NTrY  rI  rJ  )r6  rW  r3  )r   r   r  r"  r#  r$  rG  r  r0  r1  new_rmnew_rvr\  s                r+   "_batch_norm_with_update_functionalra    s      	!vt\;hT	 	 -vt\;d  G ~BCCC~ABBBIy'66BBr*   c                 t    t          | ||||d||d	  	        \  }}}	}
}
t          | |||||d          }|||	|fS )NFrY  rZ  r[  s               r+   _batch_norm_no_updaterc    sp     *B
* 
*&FIy!Q -vt\;e  G 9i00r*   c                     |t          d|           t          j        |           |k                         t          j                  }|                    |           | z  d|z  z  }||fS )Nz=generator must be None for _fused_dropout decomposition, got r   ri   )r3  rB   rU  r<   rT  r<  )r   rY  	generatorr  rW  s        r+   _fused_dropout_decompositionrf    sy     WIWW
 
 	
 OE""Q&***==D
,,u


%q
1C;r*   )r   rM  r  
pin_memorynon_blockingr`  r  rg  rh  r`  c                &   |r"|t           j        k    rt          d|           |rt          d          t          | t           j        t
          t          t          t          f          s$t          dt          |           j
                   |4|2|0t          | t           j                  r|                                 S | S d}t          | t           j                  r| }nt          j        |           }|[||j        k    rP|-|j	        dk    r"t           j                            ||          }d}t           j                            |||          }|$|s"t           j                            ||          }d}|t          j        ||          S |S )Nz*layout must be None or torch.strided, got z:pin_memory=True is not supported in _to_copy decompositionz x must be Tensor or scalar, got Fr  Tr_  )rB   stridedr3  r3   r   r  r  r  complexr  r#   r  scalar_tensorr  _primsconvert_element_type
device_put)	r5   r   rM  r  rg  rh  r`  dtype_convertedx_tensors	            r+   _to_copyrr  	  s     T&EM))R&RRSSS 
H
 
 	
 a%,UD'BCC TRQ@PRRSSS~%-M,Aa&& 	7799HO!U\"" *&q))f77!5!5|885IIH"O<**8V\JJ<44XuEE {8=AAAAOr*   c                 6    t                               |           S r:   )r   alias)r5   s    r+   nop_decompositionru  ;	  s     ::a==r*   out3exponential_average_factorepsilonc           
      0   t                               | |||||||          \  }}	}
|r%||	|
|                     dt          j                  fS ||                    d          |                    d          |                     dt          j                  fS )Nr(  r   )r   r<  rt  rB   rT  )r   r   r  r"  r#  r   rw  rx  r  r	  r  s              r+   cudnn_batch_normrz  C	  s     $$"	 	GAq!  C1aU[AABB	EK00	 r*   c                     t          |          D ]<\  }}|dk    r1|| j        k     r| j        |         |k    s|                     |          } =| S r^   )r  r  r  rW   )r5   broadcast_maskr   r  s       r+   _broadcast_batch_norm_backwardr}  e	  sW    // " "
d199dQVmm0E0ED!!AHr*   r\  c                 2    t          | |||||||||	
  
        S r:   )native_batch_norm_backward)r  r   r   r"  r#  r0  r7  rM  rG  r  r\  s              r+   batch_norm_backwardr  l	  s4     &  r*   c
                   & |j         }
||j         }n|
}t          j        |j                   &&fd| ||||||fD             \  }}}}}}}|j        }|                                }|dk     rt          d|           d}t          t          |                    ||         z  }|}|}|r||t          d          n,||t          d          |}t          j	        ||z             }dg|z  }||         ||<   g }t          |          D ]}||k    r|                    |           t          ||          }d|z  }t          j        ||          }t          j        |||z
  z  |          }t          ||z  |          }t          t          j        ||z  ||z            |          } |t          ||          dz  }!nt          ||z  |          }!|r||z
  | z  }"||"z
  |z
  |!z  }#n||!z  }#|	d         r||z  }$nd }$|	d         r|}%nd }%|#                    |
          t!          |$|          t!          |%|          fS )Nc              3   J   K   | ]}||                               n|V  d S r:   r  r  s     r+   r  z-native_batch_norm_backward.<locals>.<genexpr>	  sM       	 	 $%=a	 	 	 	 	 	r*   r"   z*rank of the input must be at least 2, got r!   z1mean and invstd must not be None in training modezDrunning_mean_cast and running_var_cast must not be None in eval moderi   )r   rG   r  r  rQ   r3  r0  r|  rB   r  rV   r~  r}  r   r   r<   r  )'r  r   r   r"  r#  r0  r7  rM  rG  r  r  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr   num_featuresr   r5  r|  reduction_axesrx  r  grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrZ   ru  	grad_biasr=   s'                                         @r+   r  r  	  s    +K|"3EK@@	 	 	 	 
	 	 	 +KJA~~V*VVWWWD[))**[->>LDF 
5<6> !TUUU * $(8(@ V   !-344!"j 0N&t,N4 "N: % %99!!!$$$)$??DDi~>>OImzD'89>JJE./E~VVI/	%$,00 J
 3FNKKcQ

3[ .
 

  0T!Z/$t+y8JF

"Z/
1~ fn1~ #			 	k""K..I|,, r*   c
                    t          | |||||||||	
  
        }|
||f}t          |          D ]:\  }}|3t          ||         |j                   t	          |||         d           ;|S r  )r  r  r   r  r   )r  r   r   r"  r#  r0  r7  rM  rG  r  rQ  rR  r  r  rZ   rx  rL   s                    r+   native_batch_norm_backward_outr  	  s    " ( F d#J&!! Q Q1=jmQW555Q
14PPPPr*   save_varc                 L    t                               || |||||d|g d
  
        S NT)TTTr   r  )r   rn   r   r"  r#  r0  r  rx  s           r+   miopen_batch_norm_backwardr  
  s@     **  r*   reserveSpacec	                 L    t                               || |||||d|g d
  
        S r  r  )	r   rn   r   r"  r#  r0  r  rx  r  s	            r+   cudnn_batch_norm_backwardr  6
  s@     **  r*   c                 ^   | j         | j        t                    t          j        dv fd           | j        dd          D ]}t          j        |dk    fd           d         |d         z  dk    rd         |d         z  dk    rt          d t          dd          |          D                       }t          d t          dd          ||          D                       }t          j        j        	                    | ||          S d	 d
 fd} |d         |d                   \  }}}}	 |d         |d                   \  }
}}}| dt          |d          |
f         }|	s|st          j        |d          S d } |||||	d          \  }} |||||d          \  }}d }t          t          |j        d                   t          |j        d                             D ]'\  }}||d|d d |f         }||d|d d |f         z   }(|||z  z  S )Nr  c                      d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r  s   r+   r   z%adaptive_avg_pool2d.<locals>.<lambda>[
  s    RDRR r*   ra  r   c                  *    dt                      dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r&  r  r  s   r+   r   z%adaptive_avg_pool2d.<locals>.<lambda>`
  s&     I9>uI I I r*   rU   c              3   &   K   | ]\  }}||z  V  d S r:   r)   )r4   rx  r-  s      r+   r  z&adaptive_avg_pool2d.<locals>.<genexpr>f
  s*      GG$!QqAvGGGGGGr*   c              3   4   K   | ]\  }}}||d z
  |z  z
  V  dS r!   Nr)   )r4   rx  r-  r  s       r+   r  z&adaptive_avg_pool2d.<locals>.<genexpr>g
  sE       
 
 '1aAQ!O
 
 
 
 
 
r*   c                 6    t          j        | |z  |d          S )Ntruncrounding_moderB   divr  r	  r  s      r+   start_indexz(adaptive_avg_pool2d.<locals>.start_indexl
  s    yQ9999r*   c                 H    t          j        | dz   |z  |z   dz
  |d          S )Nr!   r  r  r  r  s      r+   	end_indexz&adaptive_avg_pool2d.<locals>.end_indexo
  s*    y!a%1q1,awGGGGr*   c                    t          j        |t           j                  } |||           }| |z  dz   }| |z  }|dk    p||z  dk     }|r|dz  }n|dk    r|dz  }t          j        |t           j                  }|                    d          |z   }|rLt          j        | dz
  |j        |j                  }	t          j        ||	          } |||           }
|
|z
  }n|}||||fS )Nr  r!   r   rU   r  )rB   r  r  rW   rl  r   r  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1lengthr  r  r  s               r+   compute_idxz(adaptive_avg_pool2d.<locals>.compute_idxr
  s1   hvU[III[733 x'!+	(#q(GH{,Ba,GH 	NIIANIL6MMM	ll2* 	 (!39SZ  F -V,,C 68W55B"WFFFFIx//r*   .rK  )r   rU   r   c                    t          |t                    r| |fS |dk    rt          d|           ||                    d          k    }|dk    rt	          |d          }t          j        | |d          } t	          ||           }| |fS )Nr   z)dim should be negative when masking, got rU   ra  rK  r   )r3   r   r3  rW   rY   rB   rr  )valsr  r  r  rQ   r  s         r+   
maybe_maskz'adaptive_avg_pool2d.<locals>.maybe_mask
  s    fg&& 	 < axx$%VQT%V%VWWW 0 0 4 44Dbyy(q11$T455D&vt44F<r*   )r  rQ   r   )r  r  r  rB   r   r  r  nnr%  
avg_pool2drY   r   r
   rV   )r   r  r  r}  kernelr  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr  r  retrx  jr  r  r  r  r  s                      @@@@@r+   adaptive_avg_pool2dr  Q
  s   
 \FKEu::D	LRRRR   [ 
 
FI I I I	
 	
 	
 	
 Ry;r?"a''E"IB,G1,L,LGG#eBCCj+*F*FGGGGG 
 
+.uRSSz;+O+O
 
 
 
 
 x"--eVVDDD: : :H H H0 0 0 0 0 0 0@ /:k%)[QS_.U.U+D(K.9k%)[QS_.U.U+D(K'a00$67D .j .z$H----        Zhjb  ND(  Zhjb  ND(
 Cdjn--uTZ^/D/DEE + +1;sAqqq!|$CCS!QQQ\**CC(X%&&r*   c                 ,   t          j        d| d           t          | j        d |                    t          |          z   }t	          d |D                       r|                     |          S t          t          j        | j        d |                    }t          t          j        |          }dg| j	        z  }| j        d |          |d | <   |t                              || j                                      |          |z  z                       d          }|                     |          }	t                              |	                    d          |g|                     d          d                              |	j                  S )	Nmax_unpoolingd_forward_outc              3   "   K   | ]
}|d k    V  dS r  r)   r  s     r+   r  z _max_unpoolnd.<locals>.<genexpr>
  s&      
(
(a16
(
(
(
(
(
(r*   r!   r  rU   Fr.  )rG   alert_not_deterministicr|  r  anyrt  r   operatorr   r  r   r  r  r  r-  r1  )
r}   r  r  rQ   output_shapenchwindices_nc_shapeindices_flatr  s
             r+   _max_unpoolndr  
  st    
!"D#"D"D"DEEE
5SD5)**T+->->>L

(
(<
(
(
((( ,~~l+++	dj3$/	0	0B	k	*	*BsTY"j3$/UsdU$++b+55::;KLLrQQgbkk  ^^L))F!!r\NDLL,<,< "  
d6<r*   c                     t          j        j        t           j        k    fd           t          j        t	                    dk    fd           t          j         j        dv  fd           t          j         j        j        k     fd           t          d j                  D ]2t          j                                       dk     fd	           3t           d          S )
Nc                      d j          S )Nz2elements in indices should be type int64 but got: r   )r  s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s    TW]TT r*   r"   c                  *    dt                      dS )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r  s   r+   r   zmax_unpool2d.<locals>.<lambda>
  '    4;''4 4 4 r*   r  c                      d j          dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r  r   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s!    =%)Y= = = r*   c                  (    dj          d j          S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  )r  r}   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s1    BQUQ[ B B29-B B r*   r!   r   c                       dj          d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.r  )rx  r}   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s4    H:H H78H H H r*   )
rB   r   r   r  r  r  r  rV   r5  r  )r}   r  r  rx  s   ```@r+   max_unpool2dr  
  sT    
L$TTTT   
LKA	
 	
 	
 	
   
L	V	
 	
 	
 	
   
L
gm#	
 	
 	
 	
 	
   1di   
 
IIaLL1    	
 	
 	
 	
 wQ777r*   c                 
    t          j        j        t           j        k    d            t          j         j        dv  fd           t          j        t                    dk    fd           t          j        t                    dk    fd           t          j        t                    dk    fd           t          j         j        j        k     fd           t          d	 j                  D ]2t          j                                       d
k     fd           3t          j        d
         d
k    od	         d
k    od         d
k    fd           t           d          S )Nc                      dS )Nz(elements in indices should be type int64r)   r)   r*   r+   r   zmax_unpool3d.<locals>.<lambda>  s    .X r*   rK  r  c                      d j          dS )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r  r  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>  s    w_d_iwww r*   r   c                  *    dt                      dS )NzVThere should be exactly three elements (depth, height, width) in output_size, but got r  r  r  s   r+   r   zmax_unpool3d.<locals>.<lambda>  r  r*   c                  *    dt                      dS )NzRThere should be exactly three elements (depth, height, width) in stride, but got: r  r  r}  s   r+   r   zmax_unpool3d.<locals>.<lambda>  s    |ehioepep||| r*   c                  *    dt                      dS )NzSThere should be exactly three elements (depth, height, width) in padding, but got: r  r  )r  s   r+   r   zmax_unpool3d.<locals>.<lambda>"  s    ~fijqfrfr~~~ r*   c                  (    dj          d j          S r  r  )r  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>&  s1    BQVQ\ B B29-B B r*   r!   r   c                       dj          d  dS )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got r  r  r  )rx  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>/  s4    I ;I I89I I I r*   r"   c                      d  S )Nz5strides should be greater than zero, but got stride: r)   r  s   r+   r   zmax_unpool3d.<locals>.<lambda>8  s    PPP r*   )
rB   r   r   r  r  r  r  rV   r5  r  )r   r  r  r}  r  rx  s   `````@r+   max_unpool3dr    s    
L$&X&X   
L
fwwww   
LKA	
 	
 	
 	
   
LFq||||   
LG~~~~   
Lw}$	
 	
 	
 	
 	
   1ej!! 
 
JJqMMA    	
 	
 	
 	
 
Lq	A9&)a-9F1IMPPPP  
 a888r*   )ro   r  c                ,    t          | |||d|          S )NTinplacero   
_index_addr5   rQ   r  r  ro   s        r+   
index_add_r  >  s     aeVTGGGGr*   c                ,    t          | |||d|          S )NFr  r  r  s        r+   	index_addr  J  s     aeVU%HHHHr*   r  c                   t          j        | j                  t          j        j        dk    fd           j        dk    r                    d          nd|j        dk    r|                              ndt          j        k    fd           dk    r`t          j        | j                  t          j        t          u p!t          j	        t                              fd           |z  }| j        dk    }|r|                     d          n| }dz  fz   }|rt          j        nt          j        }	 |	|||d          }
|r| S |r|
                    d          n|
                                S )	Nr!   c                      d j          dS Nz(Index should have dimension 1 or 0 (got rF  r  r  s   r+   r   z_index_add.<locals>.<lambda>c      H5:HHH r*   r   c                      d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r)   )rQ   
index_sizer  s   r+   r   z_index_add.<locals>.<lambda>i  s"    sjssYdssmpss r*   c                  0    dt                      d dS )Nzalpha argument of type z cannot be safely cast to type !)r  )ro   python_types   r+   r   z_index_add.<locals>.<lambda>p  s"    hd5kkhhZehhh r*   r:   Tr.  )rG   canonicalize_dimsr  rB   r   r5  dtype_to_typer   r  is_weakly_lesser_typer  rW   r   
index_put_	index_putr  r  )r5   rQ   r  r  r  ro   zero_dimr\  r  r  r   r  r  r  s    ``  `     @@@r+   r  r  W  s    
!!&#
.
.C	L
aHHHH   #(*//AqJ&,kAoo&++c"""1K	Lz!ssssss   zz)!'224 E*4;;DDhhhhh	
 	
 	

 %v{H#	*QB
C-5(
"C#*>I
)BV
5
5
5C @!)?s{{1~~~s~~/?/??r*   r   c           
         t          j        t          |           dk    d            t          |           }| d                                         }|dd          }t	          d | D                       }|r||f}n||f}||z   }| d                             ||          }dt          |          z  }	t          |          D ]}
| |
         }t                              ||	d||                    d          z
  fz   |          }|rt          	                    ||d|
          }bt          	                    ||d|
          }|S )Nr   c                      dS )Nz#received an empty list of sequencesr)   r)   r*   r+   r   zpad_sequence.<locals>.<lambda>  s    -R r*   r!   c              3   @   K   | ]}|                     d           V  dS r  r5  )r4   r5   s     r+   r  zpad_sequence.<locals>.<genexpr>  s,      //!&&))//////r*   )r   r   rQ   r  )
rB   r   r  r5  r   r   rV   r   r|  r  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsrx  currseqrows                r+   pad_sequencer    sd    
LY!#%R%RSSS^^N|  ""HQRRLM//Y/////G -"G,^,-'H
A,

-
8
8CC...L>"" @ @A,""\Q',,q//(A$BBM
 
  	@%%c3AQ%??CC%%c3AQ%??CCJr*   c                 *    t          | |||d          S )NTr  _index_copyr5   rQ   r  r  s       r+   index_copy_r#    s    q#ufd;;;;r*   c                 *    t          | |||d          S )NFr  r   r"  s       r+   
index_copyr%    s     q#ufe<<<<r*   c                   t          j        | j        |          }t          j        j        dk    fd           | j        dk    }|r|                     d          n| }j        dk    r                    d          nd|z  fz   }|rt          j        nt          j        } ||||          }	|r| S |r|		                    d          n|	
                                S )Nr!   c                      d j          dS r  r  r   s   r+   r   z_index_copy.<locals>.<lambda>  r  r*   r   r:   )rG   r  r  rB   r   rW   r   r
  r  r  r  )
r5   rQ   r  r  r  r  r\  r  r  r   s
     `       r+   r!  r!    s     
!!&#
.
.C	L
aHHHH  
 v{H#	*QB"'*//EOOAuE
C-5(
"C#*>I
)BV
$
$C @!)?s{{1~~~s~~/?/??r*   c                    t          j        |                     d          |           }t          j        t          j        |                      }| j        s| j        r|                     d          }n|}|t          j        |          z
  |fS )Nr)   r(  )rB   r  rt  rj   r   r  is_xpurO  )r}   r   rl   r   s       r+   log_sigmoid_forwardr*    s~     -r**D
1
1C	59T??"##A| t{ %%Q''r*   lowhighre  c           	          t          j        | j        |                                 t	          |          t	          |          | j        | j        |          S )N)r}  r+  r,  r   r  re  )prims_uniform_helperr  r}  r   r   r  )r5   r+  r,  re  s       r+   uniformr0    sM      	xxzzcNNt__gx   r*   c                 L    |                      t          | |||                    S r:   )r*  r0  )r}   r+  r,  re  s       r+   uniform_r2    s"    ::gdCy99:::r*   c                 p   t          |           dz
  }|At          j        |d u d            t          j        t          |          |k    d            |S |t          j        |d u d            t          j        t          |          |k    d            g }t          |          D ]u\  }}t	          |          |k    r/|                    | |dz            t	          |          z             G|                    t          | |dz            |z                       v|S t          j        dd            d S )Nr"   c                      dS Nz9Must specify exactly one of output_size and scale_factorsr)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>      O r*   c                      dS N r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  s    R r*   c                      dS r5  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r6  r*   c                      dS r8  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  s    r r*   Fc                      dS r5  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  s    R r*   )r  rB   r   r  r  r~  r   )r>  r  scale_factorsspatial_dimensionsrx  r  s         r+   upsample_compute_output_sizer?    sg   Z1,T!OO	
 	
 	
 	S%%);;ZZHHH 4OO	
 	
 	
 	S''+==zzJJJm,, 	C 	CDAq1vv{{"":a!e#4s1vv#=>>>>""7:a!e+<q+@#A#ABBBB	LRR    r*   c                     | d S | |         S r:   r)   )scalesr  s     r+   get_scale_valuerB    s    ~t#;r*   r=  c                     t          |                                 ||          }|r|nd gt          |          z  }t          | ||          S r:   r?  r5  r  _upsample_nearestr   r  r=  osizerA  s        r+   _upsample_nearest_vecrH    sO     ){MRRE&?TFSZZ,?  UE6222r*   c                     t          |                                 ||          }|r|nd gt          |          z  }t          | ||d          S NTexactrD  rF  s        r+   _upsample_nearest_exact_vecrM    sT     ){MRRE&?TFSZZ,?  UE6>>>>r*   c                    g }t          |          }|rdnd}t          |          D ]}||         }| j        | |z            }	||         ||         dk    r|	|	||         z  z  n|	|z  }
t          j        |t          j        | j                  }||z   |
z                      t          j                  }t          |dz
  |z
            D ]}|	                    d          }|
                    |           |S )Nr   r   r   r  r!   rU   )r  rV   r  rB   r  r   r  r<   r  rW   r~  )r   r  rA  rL  r  num_spatial_dimsr  r  rG  isizerp   output_indicesinput_indicesrX   s                 r+   !_compute_upsample_nearest_indicesrS  -  s#    G;''"SSsF#$$ & & A--12
 ay$Q UVAY&'' 	 e5=VVV(61U:>>u{KK'!+a/00 	8 	8A)33B77MM}%%%%Nr*   )preserve_memory_formatr  rA  c                 &    t          | ||g          S r:   rE  r   r  rA  s      r+   upsample_nearest1drX  S  s     UK&:::r*   c                 *    t          | ||gd          S rJ  rV  rW  s      r+   upsample_nearest_exact1drZ  _  s     UK&FFFFr*   scales_hscales_wc                 (    t          | |||g          S r:   rV  r   r  r[  r\  s       r+   upsample_nearest2dr_  m  s     UK(H1EFFFr*   c                 ,    t          | |||gd          S rJ  rV  r^  s       r+   _upsample_nearest_exact2dra  z  s      UK(H1ETRRRRr*   scales_dc                 *    t          | ||||g          S r:   rV  r   r  rb  r[  r\  s        r+   upsample_nearest3dre    s     UK(Hh1OPPPr*   c                 .    t          | ||||gd          S rJ  rV  rd  s        r+   _upsample_nearest_exact3drg    s+     {Xx:$   r*   rL  c                 8   t          | |||          }d d g|z   }t                              | |          }|j        dk    rYt	          j        |           }| j        d         }| j        j        dk    r|dk     rt          j
        }|                    |          }|S )NrK  rK  r!   cudar_  )rS  r   _unsafe_indexr  rG   r   r  r  r  rB   rc  r  )	r   r  rA  rL  spatial_indicesr  r  r`  
n_channelss	            r+   rE  rE    s     8{F%  O Tl_,Gw//F{a3E:: [^
<&&:>>!3M"""??Mr*   c                      |r|rdn|rdn|rdndt                     z  dk    r"t          dt                      d            fdt          dt                               D             S )	Nr  rK  r   r"   r   zlen(params)=z  is not divisible by group_size=c                 D    g | ]}t          ||z                      S r)   r  )r4   rx  
group_sizeparamss     r+   r7   z!gather_params.<locals>.<listcomp>  s<       ./fQZ'())  r*   )r  r3  rV   )rp  
has_biaseshas_projectionsro  s   `  @r+   gather_paramsrs    s     o 

	 

	 



6{{Z1$$T3v;;TT
TT
 
 	
    38CKK3T3T   r*   c                     |r3| d|z           |d|z           }}| d|z  dz            |d|z  dz            }}n| |         ||         }}d\  }}||||fS )Nr"   r!   NNr)   )rp  hiddensrx  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddens           r+   params_hiddensr|    sr     0!'AAJ
%+AEAI%6A	8Jl!'GAJJ
%/"lz<==r*   c                     ||k    rt          d| d| d          |                    |                     d|||z
                       |                     dd|          S )Nlast_batch_size (z) must be > batch_size (rF  r   )r3  r~  r6  )ry  last_batch_size
batch_sizerv  s       r+   update_hidden_for_packedr    sx    *$$VVVVVV
 
 	
 NN:$$Q
Oj4PQQRRRQ:...r*   c           	          ||k    r| S ||k    rt          d| d| d          t          j        | |                    d|||z
            f          S )Nr~  z) must be < batch_size (rF  r   )r3  rB   concatr6  )ry  r  r  
inp_hiddens       r+    update_hidden_for_packed_reverser    s     *$$*$$VVVVVV
 
 	
 <a*2NOO	
  r*   c           	         |d         }|d         }|r|d         nd }	|r|d         nd }
g }g }|r|d         n|d         }|                     dd|          }t          j        | t          |                    }|r|d d d         }|D ]d} | j        d         }||k    rn'|rt          ||||          }nt          ||||          } || |||	||
          }|}|                    |           e|r|                                 n)|                    |           |                                 t          j	        |d          }|st          j	        |d          n|}||fS )Nr   r!   r"   r   rU   )
r6  rB   r  r|  r  r  r  r~  reverser7  )inphiddenrp  rq  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputrv  r  ry  	split_inprx  r   
hidden_outs                      r+   one_layer_rnn_datar    s    q	Iq	I%/fQii4G%/fQii4GK"$G)0Dk"ook!nOq!_55JCk!2!233I $dddO	 ' 'IaLa 	9OQ JJ 2OQ J YsJ	7IwWW
:&&&& z"""
)K
#
#C.5E7A&&&:J
?r*   c                       fd}|S )Nc                 H     t          j        |||          | z             S r:   r   linearrx  ry  r  r  r  r  nonlinearitys         r+   rM   zrnn_cell.<locals>.inner+  s&    |AHZGDDqHIIIr*   r)   r  rM   s   ` r+   rnn_cellr  *  s(    J J J J J Lr*   c                       fd}|S )Nc                 t    t          j        | ||          }  t          j        |||          | z             S r:   r  r  s         r+   rM   zrnn_cell_data.<locals>.inner2  s9    HQ	7++|AHZGDDqHIIIr*   r)   r  s   ` r+   rnn_cell_datar  1  s(    J J J J J Lr*   c           	         |d         }|d         }|r|d         nd }|r|d         nd }	t          j        | ||          }
|r|
                    d          n|
}
|                    d          }g }|
D ]'} |||||||	          }|                    |           (|r|                                 t          j        |d          }||                    d          fS )Nr   r!   r"   r   )	r   r  fliprW   r~  r  rB   r7  r  )r  r  rp  rq  r  r  r  r  r  r  precomputed_inputry  r  rx  r   s                  r+   one_layer_rnnr  9  s   q	Iq	I%/fQii4G%/fQii4Gi995<S)..q111BS!!!$$JK ' 'Yq*i)WUU
:&&&& 
)K
#
#C
""1%%%%r*   c                    |d         }|d         }|r|d         }|d         }nLt          j        |                                          }t          j        |                                          }|d                             d          }	|d                             d          }
g }d}|	                    d          }d}d}d}d}|                                 } |	                                }	|
                                }
t           j        j        j                            | |||||	|
|||||||||          }|d         |d         |d         }}}||	                    d          |	                    d          ffS )Nr   r!   r"   r   F)
rB   r  r5  rW   r  r  r   mkldnn_rnn_layerr  r  )r  r  rp  rq  r  w0w1w2w3hxcxr  modehidden_size
num_layersrw  r  rM  outputsr\   hycys                         r+   mkldnn_one_layer_lstmr  O  sq   	B	B $AYAY[##[##			Q		B			Q		BKD''!**KJ MKE ..

C	B	Bin-55





! G$ 
GAJ
2rArzz!}}bjjmm,,,r*   c
                 8   |r|                      dd          n| } g }
t          |          D ]}t          ||||          \  }}}}|r||dz
  k     r|nd} |	| |||          \  }}|
                    |           |r( |	| |||d          \  }}|
                    |           |r-t	          j        ||g|                                dz
            } n|} |dk    r"|r ||dz
  k     rt	          j        | |d          } |r|                      dd          n| } | |
fS )Nr   r!   r   T)r  )rM  )	transposerV   r|  r~  rB   r7  rQ   rP  )r   r  rp  rq  r  rP  rM  rw  r  layer_fnfinal_hiddensrx  rx  ry  rz  r{  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddens                       r+   _rnn_helperr    sz    &1;EOOAq!!!eEM: > >=KFA}>
 >
:
Jl $D
QU(:(:''&huj*jQQZ((( 	-"*(|\:t# # #GZ   ,,, 	Iw0'++--!2CDDEEEa<<E<a*q.&8&8M%===E%0;EOOAq!!!eE-r*   c	                    |                     d          }	t          ||d          }t          | |	|||||||t          t          t          t          j                            
  
        \  }
}|
t          j        |d          fS Nr   Fr  )	unbindrs  r  r   r  r  rB   r   stackr   r  rp  rq  r  rP  rM  rw  r  r  r   r  s               r+   rnn_tanh_inputr         YYq\\F6:u55F$%*)=)=>>> C M1----r*   c	                    |                     d          }	t          ||d          }t          | |	|||||||t          t          t          t          j                            
  
        \  }
}|
t          j        |d          fS r  )	r  rs  r  r   r  r  rB   r  r  r  s               r+   rnn_relu_inputr    r  r*   c	                 
   |                     d          }	t          ||d          }t          | |	||||||dt          t          |t          t          j                            
  
        \  }
}|
t          j        |d          fS Nr   Fr  r  )	r  rs  r  r   r  r  rB   r  r  datar  r  rp  rq  r  rP  rM  rw  r  r   r  s               r+   rnn_relu_datar         YYq\\F6:u55F$##EJ//	
 	
 	
 C  M1----r*   c	                 
   |                     d          }	t          ||d          }t          | |	||||||dt          t          |t          t          j                            
  
        \  }
}|
t          j        |d          fS r  )	r  rs  r  r   r  r  rB   r   r  r  s               r+   rnn_tanh_datar    r  r*   c                    t          j        |||          | z   }|                    d|          }|d                                         }	|d                                         }
|d                                         }|d                                         }|
|z  |	|z  z   }||                                z  }||nt          j        ||d           }||fS )NrK  r   r!   r"   r   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater  r  s                  r+   	lstm_cellr  .  s    HRG,,s2EKK9--MA&&((G"**,,Ka %%''IQ''))H	r	Wy0	1B	BGGII	B ahr9d&C&CBr6Mr*   c           
         |d         }|d         }|r|d         nd }|r|d         nd }t          |          dk    r|d         nt          |          dk    r|d         nd }	|d                             d          }
|d                             d          }t          j        | ||          }|r|                    d          n|}g }|D ]0} t          | |
||||	d          \  }
}|                    |
           1|r|                                 t          j	        |d          }||

                    d          |
                    d          ffS )Nr   r!   r"   r   r  rK  r  )r  rW   r   r  r  r  r~  r  rB   r7  r  )r  r  rp  rq  r  r  r  r  r  r  r  r  r  r  r   s                  r+   one_layer_lstmr  <  so   q	Iq	I%/fQii4G%/fQii4G[[A%%q		Fq8H8H6!99d  
		Q		B			Q		Bi995<S)..q111BSK   3B	7IQRSSSB2 
)K
#
#CA

1...r*   c           
      d   |d         }|d         }|r|d         nd }|r|d         nd }	t          |          dk    r|d         nt          |          dk    r|d         nd }
g }g }|r|d         n|d         }t          j        | t          |                    }|r|d d d         }|d         }|d         }|                    dd|          |                    dd|          }}|D ]5} | j        d         }t          j        | ||          } ||k     ru|                    |                    d|||z
            |                    d|||z
            f           |                    dd|          |                    dd|          }}||k    r^t          j	        ||                    d|||z
            fd          }t          j	        ||                    d|||z
            fd          }t          | ||||	|
d          \  }}|}|                    |           7|r|                                 ||f}na|                    ||f           |                                 t          | \  }}t          j        |d          t          j        |d          f}t          j        |d          }||fS )	Nr   r!   r"   r   r  rK  rU   r  )r  rB   r  r|  r6  r  r   r  r~  r  r  r  r  r7  )r  r  rp  rq  r  r  r  r  r  r  r  r  rv  r  r  orig_hxorig_cxr  r  rx  r  hidden0hidden1r   s                           r+   one_layer_lstm_datar  W  s   q	Iq	I%/fQii4G%/fQii4G[[A%%q		Fq8H8H6!99d  KG)0Dk"ook!nOCk!2!233I $dddO	QiGQiGq!_--q!_-- 	B
   IaLhsIw// NNIIaOa$788IIaOa$788   YYq!Q''1a););B W^^AO8KLLMq B W^^AO8KLLMq B 3B	7IQRSSSB2 B"X

Bx   =Yw**EIgq,A,AA

)K
#
#C
?r*   c                 >    d } || ||          rt           S t          S )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c                 4   t           j                                        sdS | gt          |          z   t          t	          j        |                    z   }d |D             }t          |          dk    rdS |                                }|t          j        d          k    rdS d |D             }|D ]}|t           j	        t           j
        fvr dS  | j        rdS |d                             d          |d                             d          k    }|rdS dS )	NFc                     h | ]	}|j         
S r)   r  r4   ts     r+   	<setcomp>zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>  s    ---18---r*   r!   r  c                     h | ]	}|j         
S r)   r   r  s     r+   r  zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>  s    +++a!'+++r*   r   r"   T)rB   rN  _get_mkldnn_enabledr|  r	   from_iterabler  popr  r  bfloat16requires_gradr5  )	r   r  rp  ry  devicesr  dtypesr   rr  s	            r+   
use_mkldnnz2select_one_layer_lstm_function.<locals>.use_mkldnn  s#   x++-- 	5'DHH$tE,?,G,G'H'HH--W---w<<15U\%((((5++7+++ 	 	EU[%.999uu :  	5Q%**Q--2a5::a==8 	5tr*   )r  r  )r   r  rp  r  s       r+   select_one_layer_lstm_functionr    s5      : z%V$$ $$r*   c	                 8   t          |          dk    rt          dt          |                     t          |||d                             d          |d                             d          k              }t	          t          |d         |d                             }	t          | ||          }
t          | |	||||||||

  
        \  }}t	          t          |           }|t          j	        |d         d          t          j	        |d         d          fS )Nr"   $lstm expects two hidden states, got r   r!   )
r  r3  rs  r5  r|  r  r  r  rB   r  )r   r  rp  rq  r  rP  rM  rw  r  r  r  r   r  s                r+   	lstm_implr    s    2ww!||MCGGMMNNN6:r!uzz!}}1

1/MNNF#beRU##$$F-eR@@H$ C m,--MM!,a00%+mA>NPQ2R2RRRr*   c	                 >   t          |          dk    rt          dt          |                     t          |||d                             d          |d                             d          k              }t	          t          |d         |d                             }	t          | |	||||||dt          t          |          
  
        \  }
}t	          t          |           }|
t          j
        |d         d          t          j
        |d         d          fS )Nr"   r  r   r!   F)r  )r  r3  rs  r5  r|  r  r  r   r  rB   r  r  s               r+   lstm_data_implr    s    2ww!||MCGGMMNNN6:r!uzz!}}1

1/MNNF#beRU##$$F$#=== C m,--MM!,a00%+mA>NPQ2R2RRRr*   c                 p   |                      dd          }t          j        |||                               dd          }|d         |d         z                                   }|d         |d         z                                   }	|d         |d         |z  z                                   }
||
z
  |	z  |
z   S )Nr   r!   r"   r   )r  r   r  r   r   r  ry  r  r  r  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gates              r+   gru_cellr    s    YYq!__NXj)W==CCAqIIN #nQ&77@@BBJ #nQ&77@@BBJq!^A%6%CDJJLLH!Z/(::r*   c                    t          j        | ||                              dd          }t          j        |||                              dd          }|d         |d         z                                   }|d         |d         z                                   }	|d         |d         |z  z                                   }
||
z
  |	z  |
z   S )Nr   r!   r   r"   r  r  s              r+   gru_cell_datar    s    Xc9g66<<QBBNXj)W==CCAqIIN #nQ&77@@BBJ #nQ&77@@BBJq!^A%6%CDJJLLH!Z/(::r*   c	                     t          ||d          }t          | |                    d          ||||||dt          t          |t
                    
  
        \  }	}
|	t          j        |
d          fS )NFr   r  )rs  r  r  r   r  r  rB   r  )r  r  r  rp  rq  r  rP  rM  rw  r   r  s              r+   gru_impl_datar     sz     6:u55F$
		!"}UUU C M1----r*   c	                     t          ||d          }t          | |                    d          |||||||t          t          t
                    
  
        \  }	}
|	t          j        |
d          fS )NFr   r  )rs  r  r  r   r  r  rB   r  )r   r  rp  rq  r  rP  rM  rw  r  r   r  s              r+   gru_implr  >  sw     6:u55F$
		!222 C M1----r*   c                     t          |                                 ||          }t          |d          }t          |d          }t          j        j                            | ||||          S Nr   r!   )r?  r5  rB  rB   r  r   _upsample_bilinear2d_aar   r  align_cornersr=  rG  scale_hscale_ws          r+   upsample_bilinear2d_aa_vecr  \  sb     ){MRREmQ//GmQ//G9>11umWg  r*   c                     t          |                                 ||          }t          |d          }t          |d          }t          j        j                            | ||||          S r  )r?  r5  rB  rB   r  r   _upsample_bicubic2d_aar	  s          r+   upsample_bicubic2d_aa_vecr  h  b     ){MRREmQ//GmQ//G9>00umWg  r*   c                     t          |                                 ||          }t          |d          }t          |d          }t          j        j                            | ||||          S r  )r?  r5  rB  rB   r  r   _upsample_lanczos2d_aar	  s          r+   upsample_lanczos2d_aa_vecr  t  r  r*   c                     t          |                                 ||          }|r|nd gt          |          z  }t          | |||          S r:   )r?  r5  r  _upsample_linear)r   r  r
  r=  rG  rA  s         r+   _upsample_linear_vecr    sL     ){MRRE+D]]$#e**1DFE5-@@@r*   r
  c                 (    t          | |||g          S r:   r  )r   r  r
  r\  s       r+   upsample_linear1dr    s     E;zJJJr*   c                 *    t          | ||||g          S r:   r  )r   r  r
  r[  r\  s        r+   upsample_bilinear2dr    s     E;(?STTTr*   c                 ,    t          | |||||g          S r:   r  )r   r  r
  rb  r[  r\  s         r+   upsample_trilinear3dr    s&     {MHh+I  r*   c                 P    |r|dk    r| dz
  |dz
  z  ndS ||dk    rd|z  n| |z  S )Nr!   ri   r   r)   )r  r  r
  rp   s       r+   _compute_scaler     sK     V5=\\#(S.11qH#/EAIIsU{{7XCUUr*   c                 &    |r| |z  S | |dz   z  dz
  S Nr   r)   )rp   	dst_indexr
  s      r+   _compute_source_indexr$    s)     /y  	C(3..r*   weightsweights_precisionc                     t          d t          | |          D                       d|dz
  z  z   }||z	  }t          j        |dd                              t          j                  S )Nc              3      K   | ]F\  }}|                     t          j                  |                     t          j                  z  V  Gd S r:   )r<   rB   rB  )r4   r  r  s      r+   r  z%_sum_tensors_uint8.<locals>.<genexpr>  sX        26!QU[ADD---     r*   r!   r      )_sum_tensorsr  rB   r   r<   rT  )r  r%  r&  r  s       r+   _sum_tensors_uint8r+    s}       :=c7:K:K    	
 1$	%'F ((F;vq#&&))%+666r*   c                     t          j        |                                           }d}t          j        ||j                  }d|d|dz   z  z  z   }|dk    }||                                z
  S )N   r  r   r!   i   )rB   r  r   r  r  r   )r%  
max_weightmax_weight_precision
precisionsvaluesr  s         r+   _compute_weight_precisionr2    sp    W%%))++J2:;LMMMJ:zA~!677FgD$((**,,r*   c           	      $     j         d         } j         dd          }t          |          t          j         t          j        j                  \  } fdfdt          t          |||                    D             }t          t          |           \  }g }	t          ddggz   D ]dd d gfdt                    D             z   }
t                               |
          }t          |          }|	                    |           et          t                              D ]h}||         |         z
                      dd	                                        fd
t          |	d d d         |	dd d                   D             }	it          |	          dk    rt%          dt          |	                     |	d         }t          j                   } j        j        dk    r|dk     rt,          j        }t1          |t,          j                  s$t%          dt+          |          j                   |                    |          }                                 s|                                }|S )Nr!   r"   r]  c                    t          | |	|          }t          j        |j                                      
          }t          ||	                              d          } |j        |j        d         gdg|z  R  }|                    t          j	                  }|dz                       | dz
            }|||fS )Nr  r   r   r   r   r!   r   )
r   rB   r  r  r<   r$  r   r-  r  r  )inp_sizer  rA  nsqueezescale_factorrx  x_f32r5   xp1r
  r   r   s            r+   
get_valuesz$_upsample_linear.<locals>.get_values  s    %h-PP L%,777:::GG%lA}EEKKPSKTTek!n@sh/?@@@HHU[!!1umm1m--a}r*   c           	      F    g | ]\  }\  }}} |||d z
  |z
            S r*  r)   )r4   rx  r5  r  rA  r:  n_dimss        r+   r7   z$_upsample_linear.<locals>.<listcomp>  sL       +A+(F 	
8XvvzA~>>  r*   r   c                 H    g | ]}|         d k    r|         n|         S r(  r)   )r4   kr  xp1sxss     r+   r7   z$_upsample_linear.<locals>.<listcomp>  s1    UUU!qtqyybeed1gUUUr*   r   ri   c                 L    g | ] \  }}|t          j        ||z
            z   !S r)   )rB   r   )r4   v1v2xscales      r+   r7   z$_upsample_linear.<locals>.<listcomp>	  sD     
 
 
 B 27F+++
 
 
r*   z+Expected vs to have exactly 1 element, got ri     z$Expected result to be a Tensor, got r_  )r  r  rG   rH   r_  INT_TO_FLOATr  r  r|  r
   rV   r   rj  r   r~  reversedr   r<   r3  r   r  r  rB   rc  r3   r   r#   r  rT  round)r   r  r
  rA  rl  	inp_sizesrX   r1  xs_f32vsr  vrx  r  r`  r  r   r:  r<  r?  r@  rD  s   ` `            @@@@@@@r+   r  r    s    QJABBI^^F'!AN  HAu
          /8	;//0
 0
  F CL))FB	B1vh'(  TlUUUUUUuV}}UUUUuc**#Au--
		!eFmm$$ 
 
)be#**34477>>
 
 
 
 b1gr!$Q$x00
 
 
 2ww!||T3r77TTUUUUF /66M |F""zB/fel++ 
J4<<3HJJ
 
 	
 ];;F""$$  Mr*   r  r	  c                 "    | j         |j         k    S r:   r  )r  r	  s     r+   is_same_sizerN  (  s    7agr*   c                 8    t                               | |          S r:   )r   r  )r5   r  rI   s      r+   _reshape_aliasrP  -  s     99Qr*   c                 8    t                               | |          S r:   )r   r  )r5   r  s     r+   rj  rj  3  s    ::a!!!r*   c                 <    t                               | |||          S r:   )r   r  )r5   r  r~   r/  s       r+   r1  r1  8  s    >>!WeZ888r*   c                    |D ]7}|3t          j        |j        t           j        t           j        fv d            8t          j        |j        t           j        k    d            ddlm}  ||                                 dk              r;t           j	        
                    | |          }|                     |j        |          S t          t          |                    D ]<}||         }|0|                    d|                     |          dz
            ||<   =t"                              | |                              | |          S )Nc                      dS Nz3tensors used as indices must be long or int tensorsr)   r)   r*   r+   r   z&_unsafe_masked_index.<locals>.<lambda>C  r  r*   c                      dS Nz*tensors used as masks must be bool tensorsr)   r)   r*   r+   r   z&_unsafe_masked_index.<locals>.<lambda>H      < r*   r   r[  r!   r  )rB   r   r   rq  r  r  rz  r\  r   _meta_registrationsmeta_index_Tensorr   r  rV   r  r   r5  r   rj  rr  )r5   r  r  fillr  r\  meta_resultrx  s           r+   r  r  =  sK     L
EI66MM  
 
L
ej <<  
 EDDDDD~aggii1n%% 3/AA!WMMzz++T2223w<<   ? ?
q		A>>GAJa))55teTBBBr*   c                 d   |D ]7}|3t          j        |j        t           j        t           j        fv d            8t          j        |j        t           j        k    d            |                                 dk    r|                                 S t          t          |                    D ]P}||         }|D|
                    |                     |           |                     |          dz
            ||<   Q|                    | d          }t                              | ||d          S )Nc                      dS rU  r)   r)   r*   r+   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>_  r  r*   c                      dS rW  r)   r)   r*   r+   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>d  rX  r*   r   r!   r  Tr.  )rB   r   r   rq  r  r  r   r  rV   r  r   r5  rr  r   r1  )r5   r  r  r1  r  rx  masked_values          r+   #_unsafe_masked_index_put_accumulatera  Y  s$     L
EI66MM  
 
L
ej <<  
 	wwyyA~~wwyy3w<<   H H
!&&))QGGGAJ%%teQ//L!!!Wlt!LLLr*   c                    |                                  }d}|dk     rd}|9|dk    r,dg|z  }|j        d         ||<   |                    |          }n|}| |z  } t          j        ||k    |d          }	|	                    |          }
t          j        | ||
                              |           }t          j        ||k    |d          }|t          j	        j
        k    r |dk    r|                     dd          }||fS |r|                    | j                  }t          j        |||
                              |          }t          j        ||k    |d          }|                                }n+||k                                                        |           }|t          j        j
        k    r|                                }n,|t          j        j
        k    r|                                |z  }||fS )Nr!   r"   r   r)   r   )rQ   r  r  rB   rk   rW   gatherr  r    r&   r~   r   r  r   r<   r(   r'   )r}   r   r   r   r&  r<  r.  r  wr/  safe_target_r  r'  wsums                 r+   _nll_loss_forwardrg  s  s    XXZZFKzzA::E "(aE+E""AAAax+f4fa@@K((55L l4l;;CCKPPPF[</;;FIN(((VaZZ}}R--|##HHTZ  |A{L99AA+NN{6\14;;xxzz,.335588>>IM'''	in*	*	*,<r*   c                                                       dk    r                                  dk    s%t          d                                   d                                           dk    r%t          d                                  d                                            dk    o                                 dk    }|s3t          j         j        d         j        d         k     fd            j        d         }|J|                                 dk    r|                                |k    st          d	| d
|j                   t           |||          S )Nr   r"   rA  rB  r!   rC  c                  *    d j          dj          dS )NrD  rE  rF  r  )r}   r   s   r+   r   z"nll_loss_forward.<locals>.<lambda>  s    VVVv|VVV r*   rU   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rQ   r3  rB   r   r  r   rg  )r}   r   r   r   r&  rH  	n_classess   ``     r+   nll_loss_forwardrk    sn    HHJJNNtxxzzQRTXXZZRRRSSSzz||a_PVPZPZP\P\___
 
 	
 88::?8vzz||q'8L 
JqMV\!_,VVVVV	
 	
 	

 
2I6::<<1#4#499T9T>i > >/5|> >
 
 	

 T669lKKKr*   c                 (    t          | ||||          S r:   )rg  )r}   r   r   r   r&  s        r+   nll_loss2d_forwardrm    s     T669lKKKr*   Ac                 0    |dz   | z  |dz   z
  | z  | z  dz   S )Nr"   r   r!   r)   r5   rn  s     r+   _upsample_cubic_convolution1rq    s(    UaK1q5!Q&*Q..r*   c                 <    || z  d|z  z
  | z  d|z  z   | z  d|z  z
  S )Nr     rK  r)   rp  s     r+   _upsample_cubic_convolution2rt    s0    UQU]a!a%'1,q1u44r*   r  c                    d}| j         t          j         d          k    rt          j        | d| z
  gd          }t          j        | dz   d| z
  gd          }t          ||          }t	          ||          }t          j        |d          \  }}t          j        |d          \  }}	|||	|fS t          | dz   |          t	          | |          t	          d| z
  |          t          d| z
  |          fS )Ng      r  ri   r   r   r   )r  rB   r  rt  rq  r  )
r  rn  tt1tt2w03w12r  r  r  r  s
             r+    _upsample_get_cubic_coefficientsrz    s   Ax5<&&&&k1cAg,A...k1s7C!G,!444*322*322cq)))Bcq)))B2r2~ )S!44(A..(q!44(q!44	
 	
r*   coeffstsc                 n    t          |          }t          d t          | |          D                       S )Nc              3   &   K   | ]\  }}||z  V  d S r:   r)   r4   r  r  s      r+   r  z+_upsample_cubic_interp1d.<locals>.<genexpr>  s*      EEHRREEEEEEr*   )rz  r*  r  )r{  r|  coeffs2s      r+   _upsample_cubic_interp1dr    s6    .r22GEEFG0D0DEEEEEEr*   c                 6    t          t          j        |           S r:   )r   rB   r  )r|  s    r+   r*  r*    s    %)R   r*   	num_stepsc                     | dk    rt          j        d||          S |s| dz
  | z  nd}t          j        | || ||          S )Nr!   r   r  )stepsr  r   )rB   r  linspace)r  r
  r   r  r  s        r+   _linspace_from_neg_oner    sY     A~~|AfE::::-:A)a-9	$	$A>1"ayuMMMMr*   thetahrd  c                    | j         }| j        }t          ||||                              d|d          }t          ||||                              |dd          }t	          j        d||          }t          j        j                            |ddd          }t          j        j                            |ddd          }t          j        j                            |d	dd          }||z   |z   S )
Nr!   )r!   r!   r!   r  )r   r"   constantr   r  r  r~   r!   r!   )r"   r   	r   r  r  r  rB   r  r  r%  r  )	r  r  rd  r
  r   r  grid_xgrid_ygrid_ones	            r+   _make_base_grid_4dr  	  s    KE\F $A}eVDDII!QPQRRF#A}eVDDII!QPQRRFz)5@@@H X $$VjPQ$RRFX $$VjPQ$RRFx"&&xV*TU&VVHF?X%%r*   r  c                    | j         }| j        }t          ||||                              dd|d          }t          ||||                              d|dd          }t          ||||                              |ddd          }	t	          j        d||          }
t          j        j                            |ddd          }t          j        j                            |ddd          }t          j        j                            |	d	dd          }	t          j        j                            |
d
dd          }
||z   |	z   |
z   S )Nr!   )r!   r!   r!   r!   r  )r   r   r  r   r  r"  r"   r!   )r   r   r  )r  r  r  rd  r
  r   r  r  r  grid_zr  s              r+   _make_base_grid_5dr    s?   KE\F#A}eVDDII!QPQSTUUF#A}eVDDII!QPQSTUUF#A}eVDDII!QPQSTUUFz,eFCCCH X $$VjPQ$RRFX $$VjPQ$RRFX $$VjPQ$RRFx"&&xV*TU&VVHF?V#h..r*   c                     |\  }}}}t          | |||          }|                    ddd          | j                            d          z                      d          }|                    |||d          S )Nr
  rU   r   r!   ra  r"   )r  r  re  rW   r   )	r  r5  r
  r2  rX   r  rd  	base_gridgrids	            r+   _affine_grid_generator_4dr  +  sw    JAq!Q"5!QmLLLI NN2q!$$ux'9'9!'<'<<AA"EED99Q1a   r*   c                     |\  }}}}}t          | ||||          }|                    ddd          | j                            d          z                      d          }	|	                    ||||d          S )Nr  rU   rK  r!   ra  r   )r  r  re  rW   r   )
r  r5  r
  r2  rX   r  r  rd  r  r  s
             r+   _affine_grid_generator_5dr  5  s}    MAq!Q"5!QOOOI NN2q!$$ux'9'9!'<'<<AA"EED99Q1a###r*   c                     t          j        t          |          dv d            t          |          dk    rt          | ||          S t	          | ||          S )Nr  c                      dS )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r*   r+   r   z'affine_grid_generator.<locals>.<lambda>E  s    U r*   rK  r  )rB   r   r  r  r  )r  r5  r
  s      r+   affine_grid_generatorr  ?  sg     
LD		VUU   4yyA~~(MRRRR(MRRRRr*   r  interpolation_modepadding_mode_expand_gridc           	          !"#$%&'()*+,- t          j        dv fd           t          j        dv fd           dt          dt          dt          ffd-dt          dt          d	t          dt          fd
+dt          dt          dt          f+fd dt          dt          dt          f -fd} j        \  $%|j        \  })*}|dk    rt          d|           r0|                    d)*|                              )*d          }dt          dt          dt          f$%fd&t          j         j	                                      ddd          t          j         j	                                      ddd          dt          dt          dt          dt          f&)*fddt          dt          dt          f fd"|d         }	|d         }
dk    r ||	%          } ||
$          }|                                |                                c'('dz   (}}'(dz   }}||}}||z
  ||z
  z  }||z
  ||z
  z  }||z
  ||z
  z  }|'z
  |(z
  z  }t          "fd'(|f|||f|||f|||ffD                       S dk    rM ||	%          } ||
$          }|                                }|                                } "||d          S  -|	%          } -|
$          }|                                '|                                (|'z
  ,|(z
  }s*,                    d          ,|                    d          }dt          dt          dt          f "$%fd#dt          dt          f#'(,fd!t          !fd t!          d!          D                       }t#          ||          S )"N)r   r!   r"   c                      d  S )NzInvalid interpolation mode r)   )r  s   r+   r   z"_grid_sampler_2d.<locals>.<lambda>^  s    B.@BB r*   c                      d  S )NzInvalid padding mode r)   )r  s   r+   r   z"_grid_sampler_2d.<locals>.<lambda>a  s    +Q<+Q+Q r*   coordsr5  rR   c                 B    r|dz  dz
  n|dz  }|dz  dz
  }| |z  |z   S r"  r)   )r  r5  r   ofsr
  s       r+   unnormalizez%_grid_sampler_2d.<locals>.unnormalized  s>     %2CtczCs
Sj3|c!!r*   	twice_low
twice_highc                 ^   ||k    rt          j        |           S |dz  }||z
  dz  }| |z
                                  }t          j        ||          }||z                                                      t           j                  }t          j        |dz  dk    ||z   ||z   |z
            S )Nr"   r   r!   r   )rB   r  r   fmodfloorr<   int8rk   )r  r  r  
coords_mincoords_spancoords2extraflipss           r+   reflect_coordinatesz-_grid_sampler_2d.<locals>.reflect_coordinateso  s    
""#F+++]
!I-2J&++--
7K00;&--//222DD{AINEJ.j0H50P
 
 	
r*   c                     dk    r| S dk    rt          j        | d|dz
            S r | dd|dz
  z            }n | dd|z  dz
            }t          j        |d|dz
            S )Nr   r!   r"   rU   r   )r  r5  coords_reflectedr
  r  r  s      r+   compute_coordinatesz-_grid_sampler_2d.<locals>.compute_coordinates{  s    1MQ;vq$(333 Q#6#6vq!tax.#Q#Q  #6#6vr1t8a<#P#P ;/D1H===r*   c                 4     | |          } ||          S r:   r)   )r  r5  	coords_unr  r  s      r+   compute_source_indexz._grid_sampler_2d.<locals>.compute_source_index  s'    K--	""9d333r*   r"   z4grid last dimension must be 2 (for x,y coords), got r!   r@  ysc                     t          j        d| k    t          j        | k     t          j        d|k    |k                                   S ru   rB   r  )r@  r  iHiWs     r+   in_bounds_condz(_grid_sampler_2d.<locals>.in_bounds_cond  sH     GU&rBw0A!r'2PR70S0STT
 
 	
r*   r  wsc                      | |          rndt          	
fd|                     t          j                  |                    t          j                  |fD                       S )Nr!   c              3   p   K   | ]0}t          j        |d                                         V  1dS r  )rB   rk   r  )r4   r  r  r  r  oHoWs     r+   r  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>  sY       
 
 Ka##((Ar266
 
 
 
 
 
r*   r   )r  r<   rB   r  )r@  r  r  r  r  r  r  r  r  r  r  s      @@r+   clipz_grid_sampler_2d.<locals>.clip  s    ~b"%%
 $AA1 
 
 
 
 
 
 
 
ee%+e..EK0H0H"M
 
 
 
 
 	
r*   ixiyc                 D     	| ||          \  }}}||f         |z  S r:   r)   )
r  r  rd  idx_xidx_yw_C_idxN_idxr  r  s
         r+   get_summandz%_grid_sampler_2d.<locals>.get_summand  s4    4B??ubue+,r11r*   ).r   ).r!   r   c              3   :   K   | ]\  }}} |||          V  d S r:   r)   )r4   r  r  rd  r  s       r+   r  z#_grid_sampler_2d.<locals>.<genexpr>  sK       
 
R KB""
 
 
 
 
 
r*   c                 N     |           } |          } ||d          S r^   r)   )r  r  r5   r\   r  r  r  r  s       r+   get_value_boundedz+_grid_sampler_2d.<locals>.get_value_bounded  s;    ##B++A##B++A;q!Q'''r*   r  c                     | dz
  z   } dz
  |           |           dz   |           dz   |          f}t          |          S )Nr!   r"   )r  )r  iy_ofscsr  ix_nwiy_nwtxs      r+   	get_coeffz#_grid_sampler_2d.<locals>.get_coeff  sv    cAg&F!!%!)V44!!%00!!%!)V44!!%!)V44	B ,B333r*   c              3   .   K   | ]} |          V  d S r:   r)   )r4   r  r  s     r+   r  z#_grid_sampler_2d.<locals>.<genexpr>  s+      ::#yy~~::::::r*   rK  )rB   r   r   r  r  r3  r  r  r  r  r   r  r*  rH  rW   r  rV   r  ).r  r  r  r  r
  r  r  rX   twor5   r\   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s.   ` ````                     @@@@@@@@@@@@@@@@@@@r+   _grid_sampler_2dr  M  s    
Li'BBBB   
L	!#Q#Q#Q#Q  "F "# "& " " " " " "

F 

s 

 

PV 

 

 

 


>F 
># 
>& 
> 
> 
> 
> 
> 
> 
> 
>4V 43 46 4 4 4 4 4 4 4 7LAq"bZNAr2s
axxH3HH
 
 	
  D yyAr2s++221aRCC
6 
v 
& 
 
 
 
 
 
 

 L18,,,11!Q1==EL18,,,11!Q1==E

 

V 

 

4F 

 

 

 

 

 

 

 

 

 

 

2 2F 2& 2 2 2 2 2 2 2 2 2
 	VAVAQ!!!R((!!!R((xxzz288::uqy%ueaiueu
urz*U
urz*
rEz*U
rEz* 
 
 
 
 t$t$t$t$	 
 
 
 
 
 	
 
q	 	 !!!R((!!!R((XXZZ
XXZZ
{:z1555[B[B



%Z%Z 	!aBaB	(& 	(f 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(
	43 	46 	4 	4 	4 	4 	4 	4 	4 	4 	4 ::::q:::::'333r*   c                 *    t          | ||||          S )N)r  r  r  r
  )r  )r  r  r  r  r
  s        r+   grid_sampler_2dr    s*     	-!#   r*   c                 L    t          j                                         dk    o                                dk     fd           t          j                             d                              d          k     fd            z                      d          S )Nr"   r!   c                  \    d                                   d                                  S )Nzmatrix @ vector expected, got r  r   r}   r  s   r+   r   zmv.<locals>.<lambda>  s(    JJJswwyyJJ r*   r   c                      d                      d           d                      d           d                     d           dS )Nzsize mismatch, got input (r   r5   r!   z), vec (rF  r  r  s   r+   r   zmv.<locals>.<lambda>  sG    `TYYq\\``DIIaLL``RURZRZ[\R]R]``` r*   r   )rB   r   rQ   r5  r   r  s   ``r+   r  r  	  s     
L

a*CGGIINJJJJJ   
L		!#`````   3J"""r*   c                     |,|dz
  |z  dz   }d|z
  | z  |t          j        |           z  z
  }nd|z
  | z  t          j        |           z
  }|||z  }|                    |j                  }t	          ||          S r^   )r   
logsigmoidr<   r   r   )r}   r   r   
pos_weightr   
log_weightr   s          r+    binary_cross_entropy_with_logitsr    s    
  1n.2
F
d"j1<3E3E&EFF
d"Q\$%7%77f} 776<  Di000r*   tensor1tensor2is_outc                    	 | j         |j         k    r| |fn|| f\  }}ddlm	 |j         dk    r|j         dk    sdS |j        r|sdS | j         dk    rdS  	|                                dk              rdS |j        }|                                }dg}t          |dd                    D ] }|                    ||d         z             !t          	fd	t          |t          t          |                    |          D                       S )
Nr   r[  r   r"   FTr!   rU   c              3   \   K   | ]&\  }}} |d k              p ||k              V  'dS r  r)   )r4   r   r   r5  r\  s       r+   r  zshould_fold.<locals>.<genexpr>D  s^        D% 	tqy!!B^^DEM%B%B     r*   )r  rz  r\  r  r   r  r}  rG  r~  r  r  r|  )
r  r  r  t1t2t1_shape	t1_strideexpected_strider5  r\  s
            @r+   should_foldr  ,  sY    $+<7<#?#?gwgwEWFBDDDDDDGqLLRW\\u	  t|qu~bhhjjAo&& txH		I cO!""&& ; ;tob&99::::    !$tH_5566"
 "
     r*   )pass_is_out)r  c                
   ddl m}m} |                                 }|                                }|dk    s|dk    rt	          d| d|           |dk    r|dk    rt          j        | |          S |dk    r|dk    rt          j        | |          S |dk    rA|dk    r;t          j        t          j	        t          j
        | d          |          d          S |dk    r|dk    rt          j	        | |          S t          | ||          rN||k    }|r|j        n| }|s|n|dk    r|                                 n| }	|j        }
t          |
d d                   }t!          t"          j        |          }|	                                dk    }|r |                    |	j        d                    |                    ||
d                   }|rUt
          j        j                            |	                    |	          |          }|r|j                                        n|S t
          j        j                            |                    |	          |          S |dk    r|dk    r|dk    r|                     d          nd}|                     d          }| j        d d         }|dk    r|                    d          n|                    d          }|dk    r|                    d          nd}g }t5          |dz
            D ]*}|                    |                    |                     +|d	k    r|d	k    r ||d         |d         k              r~ ||d         dk              r*| j        r#t9          |                     d          |          S  ||d         dk              r*|j        r#t9          | |                    d                    S t          t          j        ||                    }|||gz   }t=          |          }|                     |                              |||          }|dk    }|rC||gz   }|                    |                              ||          
                    d          }n1|||gz   }|                    |                              |||          }|}|dk    r|                    |           |dk    r|                    |           |r;|                     |                              d          !                    |          S |                     |          !                    |          S t          j"        d
d            d S )Nr   )r\  guard_or_truez9matmul does not support 0-dimensional tensors, got dims: z and r!   r"   rU   ra  r   Fc                      dS )Nz/both arguments to matmul need to be at least 1Dr)   r)   r*   r+   r   zmatmul.<locals>.<lambda>  s    $U r*   )#rz  r\  r  rQ   r3  rB   dotr  r  r  rW   r  re  r  r  r|  r   r  r   r~  r-  r  r   _unsafe_viewr  r5  rV   r  rd  broadcast_shapesr0  r  bmmr  r   )r  r  r  r\  r  dim_tensor1dim_tensor2r  r  r  sizes_1r  folded_dim1t2_is_matrix	t1_foldedr  r2  m1batch_tensor1m2rY  batch_tensor2rx  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandeds                                 r+   rd  rd  L  s    TSSSSSSS++--K++--Ka;!++gggZegg
 
 	
 aK1,,y'***			kQ..x)))			kQ..}UXeogq&A&A7KKQOOO			kQ..x)))	Wgv	.	. hW  +-	$1WZZ'$XGG+:J:J799;;;PW 	 (GCRCL))X\<88 vvxx1} 	-,,, JJ{GBK88	 	O Y^00b1A1A<PPF-6B69'')))FB9>..y||B/?/?NNN			kQ.. !,aGLLQ\\"crc*!,qW\\"gll26F6F +aGLLQ#%{Q'' 	2 	2A  a1111
 1q  mA.-2BBCC ! ~mA.!344 ;9N ;gooa00':::~mA.!344 ;9N ;gwq'9'9:::  $"=-@@ 
  
 3aW<#$899 #>>*=>>FF !R
 
 !A%
 	"6""=233-r221  #7"a"@&~~.ABBJJ$b!    ,??"""??""" 	M#''(899AA"EEJJ<XXX#''(899>>|LLLUUUVVVVVr*   r  r  c                      j         \  }}t          |d         ||          }t          |d         ||          }t          j         t          j        j                  \  }}t          j        |d          j                  	                    |          }	t          j        |d          j                  	                    |          }
t          ||
|          }t          ||	|          }|                    d          }|                                }|                                }||z
                      dd          }||z
                      dd          }|	                    t          j                  }|	                    t          j                  }|dz
  ||dz   |d	z   f}|dz
  ||dz   |d	z   ft          |          t          |          }d
\   j        t          j        k    r:t%                    t%          |          fdD             fd|D             } fd fdt'          fd|D                       } j        t          j        k    r#t)          d          t+          ||          }n't-          d t/          ||          D                       }t          j                   }|                    |          }|S )Nr   r!   r]  r  r   rU   r   ri   r"   ru  c                     g | ]?}|d z  z  t          j        |          dz  z                       t           j                  @S r!   r   rB   r   r<   int16)r4   rd  weights_precision_xs     r+   r7   z.upsample_bicubic2d_default.<locals>.<listcomp>  V     
 
 
 !**+ejmmc.AAEEekRR
 
 
r*   c                     g | ]?}|d z  z  t          j        |          dz  z                       t           j                  @S r  r  )r4   rd  weights_precision_ys     r+   r7   z.upsample_bicubic2d_default.<locals>.<listcomp>  r!  r*   c                     t          j        | ddz
            }t          j        |ddz
            }t                              d d ||g          }|S r  )rB   r   r   rj  )r  r@  y_idxx_idxrL  in_hin_wr   s        r+   load_boundedz0upsample_bicubic2d_default.<locals>.load_bounded  sT    B4!8,,B4!8,,utT5%&@AAr*   c                      t           fdD                       }j        t          j        k    r"t	          d          t          |          S t          d t          |          D                       S )Nc              3   0   K   | ]} |          V  d S r:   r)   )r4   x_ofsr)  r\   s     r+   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s/      BBll1e,,BBBBBBr*   z4weights_precision_x must not be None for uint8 inputc              3   &   K   | ]\  }}||z  V  d S r:   r)   r  s      r+   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s*      JJRBGJJJJJJr*   )r  r   rB   rT  r3  r+  r*  r  )r\   src_xr   ixs_ofsr)  r   	weights_xs   ` r+   get_x_interpz0upsample_bicubic2d_default.<locals>.get_x_interp  s    BBBBB'BBBBB;%+%%"*$J   &eY8KLLLJJCy4I4IJJJJJJr*   c              3   .   K   | ]} |          V  d S r:   r)   )r4   y_ofsr1  s     r+   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s-      ;;%,,u%%;;;;;;r*   z4weights_precision_y must not be None for uint8 inputc              3   &   K   | ]\  }}||z  V  d S r:   r)   r  s      r+   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s*      LL(2rb2gLLLLLLr*   r_  )r  r   rG   rH   r_  rF  rB   r  r  r<   r$  rW   r  r   r  rz  r   rT  r2  r  r3  r+  r*  r  r   r  )r   r  r
  r  r  rX   h_scale_factorw_scale_factorr   rx  r  x_floaty_floatr5   r\   yscalerD  iys_ofs	weights_ysrc_yr  r`  r1  r'  r(  r/  r)  r   r#  r0  s   `                     @@@@@@@@r+   upsample_bicubic2d_defaultr=    sQ    {Aq$ $D+a.-QQN#D+a.-QQN'5#H#U  HAu 	[^EL999<<5<IIA[^EL999<<5<IIA#NA}EEG#NA}EEG##GAA k  c**Fk  c**F	U[A	U[A1uaQA&G1uaQA&G088I088I/9,,{ek!!7	BB7	BB
 
 
 

 
 
	
 
 
 

 
 
	
      K K K K K K K K K ;;;;7;;;;;E{ek!!& !WXXX#E96IJJLLc%6K6KLLLLL /66M];;FMr*   c                    t          j        t          |          t          |          z   dk    d            |j|t          d          t	          t
          t          t          f         t          d t          | j        dd          |          D                                 }|r|nd\  }}t          | ||||          S )Nr!   c                      dS )Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r*   r+   r   z(upsample_bicubic2d_vec.<locals>.<lambda>3  s    L r*   z7scale_factors must not be None when output_size is Nonec              3   Z   K   | ]&\  }}t          t          |          |z            V  'd S r:   )r   r   )r4   rd  rp   s      r+   r  z)upsample_bicubic2d_vec.<locals>.<genexpr><  sM        Au 	!u,--     r*   r"   ru  )
rB   r   r  r3  r   r  r  r  r  r=  )r  r  r
  r=  r  r  s         r+   upsample_bicubic2d_vecrA  &  s     
L[D///14LL     I   #s(O   #AGABBK ? ?    
 
 )6G}}<GW%amWgVVVr*   c                 0      fd}t           ||          S )Nc                     t          j        |  ||z   j                  }|dz
  |dz
  |                                z
                                  z
  S )Nr  r!   )rB   r  r  r   r   middler   dim_idxr  s       r+   r  z_reflection_pad.<locals>.idxK  sM    ,ufunQXFFFzVaZ'++--7<<>>>>r*   _reflection_or_replication_padr  r  r  s   `  r+   _reflection_padrJ  E  s9    ? ? ? ? ? *	  r*   c                 0      fd}t           ||          S )Nc                 v    t          j        |  ||z   j                  }t          j        |d|dz
            S )Nr  r   r!   )rB   r  r  r   rD  s       r+   r  z_replication_pad.<locals>.idx\  s9    ,ufunQXFFF{7Avz222r*   rG  rI  s   `  r+   _replication_padrM  V  s9    3 3 3 3 3 *	  r*   idx_fnc                    t                    dz  t          j        |                                 dz   dz   fv fd           | j         d          }|                                 z
  }fdt                    D             }fdt                    D             }| }t                    D ]Z}d g|                                z  }	 |||         ||         ||                   |	||z   <   t                              ||	          }[t          j	        |          }
|
                    |
          }|S )Nr"   r!   c                  (    d  d dz    d dz    dS )Nreflection_padzd requires r!   zD or r"   zD inputr)   r   s   r+   r   z0_reflection_or_replication_pad.<locals>.<lambda>o  s+    OOOqOOsQwOOO r*   c                 2    g | ]}d dz
  |z
  z           S r  r)   r4   rx  rQ   r  s     r+   r7   z2_reflection_or_replication_pad.<locals>.<listcomp>t  +    CCC1GAq1-.CCCr*   c                 8    g | ]}d dz
  |z
  z  dz            S r  r)   rS  s     r+   r7   z2_reflection_or_replication_pad.<locals>.<listcomp>u  0    HHHWQ#'A+.23HHHr*   r_  )r  rB   r   rQ   r  rV   r   rj  rG   r   r  )r  r  rN  	inp_shapenc_dimpadding_leftpadding_rightr  rx  r  r`  rQ   s    `         @r+   rH  rH  g  sU   
 g,,!
C	L	C!GS1W%%OOOO   IUUWWs]FCCCCCc

CCCLHHHHHU3ZZHHHMF3ZZ 1 1&**,,. &a)A,a@PQQAJ##FC00 /77M];;FMr*   c                     t                    dz  d |j         d          D             fdt                    D             fdt                    D             g }t          |j                  D ]^}dg|j        z  }d||<   |                    t          j        |j        |         |j                                      |                     _|d           | d          d fd	t                    D             fd
t                    D             }fdt                    D             }fdt                    D             t          j
        t          j        fdt                    D                       }t                               |z   d          }	 fd}
t          j        d t                    D              D ]}|t!          dgz            k    rg }g }t                    D ]}||         dk    r|         }|         }n]||         dk    r||         }|         d|         f}n7||         dk    r+||         }|         |         |         z
  |         dz
  f}|                    |           |                    |            |
|	||          }	|	S )Nr"   c                     g | ]}|d z
  S r*  r)   )r4   r  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s    
)
)
)Q1q5
)
)
)r*   c                 2    g | ]}d dz
  |z
  z           S r  r)   rS  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  rT  r*   c                 8    g | ]}d dz
  |z
  z  dz            S r  r)   rS  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  rV  r*   r!   rU   r  c                 H    | \  }}}t          j        ||k    ||k              S r:   r  )index_rangerx  lbubs       r+   index_range_conditionz7_reflection_pad_backward.<locals>.index_range_condition  s(    	2r b!r'222r*   c                 2    g | ]}|         |         z   S r)   r)   r4   rx  rY  xyzs     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s&    ;;;1c!f|A&;;;r*   c                 2    g | ]}|         |         z
  S r)   r)   re  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s&    AAALOc!f,AAAr*   c                 J    g | ]}d |         z  |         z   |         z
   S r,  r)   )r4   rx  dhwrY  rf  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s3    OOOqQQZ,q/1CF:OOOr*   c                 V    g | ]%}|         d |         |         z   |         z   f&S r(  r)   )r4   rx  centerri  rY  rZ  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  sH       HIAs1vQ/-2BBC  r*   c                 2    g | ]} |                   S r)   r)   )r4   rx  rc  range_cs     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s)    QQQ00<<QQQr*   r   c                 :   t                    D ];}||         d         ||         d         k     }t          |t                    r|r| c S <t          j        t
          j        
fd|D                       }t
                              	||z   d          }| |z   S )Nr"   r!   c                 &    g | ]} |          S r)   r)   )r4   r`  rc  s     r+   r7   z@_reflection_pad_backward.<locals>.accumulate.<locals>.<listcomp>  s%    PPPK"";//PPPr*   r   )rV   r3   r  rN   r   r   r  r  )r   r   index_rangesrx  upper_less_than_lowerr  gr	  rQ   rn   rc  s          r+   r/  z,_reflection_pad_backward.<locals>.accumulate  s     s 	 	A$0OA$6a9K$K!/66 ;P PPPP<PPP
 
 %%k4S#FFaxr*   c                     g | ]}g d S ))rU   r   r!   r)   r+  s     r+   r7   z,_reflection_pad_backward.<locals>.<listcomp>  s    #C#C#C1JJJ#C#C#Cr*   r   )r  r  rV   r  r~  rB   r  r  r  rN   r   r   r  r  	itertoolsr
   r  )rn   r5   r  r  rx  
view_shapeleft_reflectright_reflectr  r   r/  areaoutsrp  r   r`  r	  rk  ri  rQ   rc  rY  rZ  rm  rf  s   ` `             @@@@@@@@@r+   _reflection_pad_backwardrz    s}   
 g,,!
C
)
)!'3$%%.
)
)
)CCCCCCc

CCCLHHHHHU3ZZHHHMG16]] S SS16\

1u|AGAJqx@@@EEjQQRRRR#A
3$%%.C3 3 3 <;;;;c

;;;FAAAAAeCjjAAALOOOOOOE#JJOOOM      MRSVZZ  G QQQQQeCjjQQQ D $$[$F
CHHD        !#C#Cc

#C#C#CD 4 45!s####s 	- 	-AAw!||Qi%ajaB"1o"1vq,q/:aA#A&"1vs1va0@'@#a&1*MKK,,,,z$l33Kr*   r   r   r  c                f    t          j        | ||          }t          j        | ||          }||fS )Nr  )rB   aminra  )r}   rQ   r  r|  ra  s        r+   aminmaxr}    s:     :dW555D:dW555D:r*   r   c                    t                               t          j        t          j        |           d|           |||          S )Nr   r   )r   r   rB   rk   isnan)r}   rQ   r  r   s       r+   nansumr    s7     88EKD 1 11d;;S'QV8WWWr*   r   rM  r  rg  rM  c          	      N    t           j                            d| d||||          S )Nr   r!   r  r   r  
start_step)rq  r   rM  r  rg  s        r+   arange_defaultr    s2     ;!!	3vf "   r*   c          	      N    t           j                            | |d||||          S )Nr!   r  r  )rp  rq  r   rM  r  rg  s         r+   arange_startr    s2     ;!!sAU6&Z "   r*   c                      ddl m}  || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rI   rJ   r  s      r+   out_dtype_decompr    s)    AAAAAA?D+F+++r*   marginc                 N   	
 t          j                    t          j                   j        d         
 j        d         	t          j        |dk    p|dk    d            t          j         j        dk    o	dk     fd           t          j        j        dk    o                                
k    
fd           Nt          j                  t          j        j        dk    o                                	k    	fd                               d          t          j         d          }||z
   z   }|	                    d          }|dk    r|n||z  }|         z  }t          j
        	 j        	          }t          j        |k    |d          }|t          j        j        k    r|                                S |t          j        j        k    r"|                                |j        d         z  S |                    d
          S )Nr   r!   r"   c                      dS )Nz only p == 1 and p == 2 supportedr)   r)   r*   r+   r   z#multi_margin_loss.<locals>.<lambda>"  s    +M r*   c                      d j          S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r  s   r+   r   z#multi_margin_loss.<locals>.<lambda>%  s    m`e`kmm r*   c                      d  dj          S )Nz#inconsistent target size, expected r%  r  )nframer   s   r+   r   z#multi_margin_loss.<locals>.<lambda>)  s    UfUUv|UU r*   c                      d  dj          S )Nz#inconsistent weight size, expected r%  r  )rQ   r   s   r+   r   z#multi_margin_loss.<locals>.<lambda>/  s    V#VVVV r*   r  r  r   )rB   
atleast_2d
atleast_1dr  r   r  r   rW   rc  rf  r  r  rk   r    r'   r~   r   r(   r   )r   r   rY  r  r   r   url   r  rQ   r  s   ``  `    @@r+   multi_margin_lossr    s"    U##Ef%%F[^F
+a.C	La!16#M#MNNN	L
a$C1Hmmmm   
Lq5V\\^^v5UUUUU   !&))K163!6VVVVV	
 	
 	
 a  FU000A
UA	AA!VVQAv
,s5<
0
0
0CC6M1a((AIN(((vvxx	im)	)	)uuww##vv!v}}r*   	is_targetc                    | j         |j         t          j        |           } t          j        |          }| j         d         }t          j        t	                    dk    o|dk    fd           t          j        t	                    dk    ok    fd           t          j        ||j                  }|dk    }t          j        t          j        |||          dd	          }||k     }t          j        ||d          }t          j	        | d|
          }	t          j        ||d          }
t          j
        ||
                    d          k    d          }d|	j                            d          z
  | z   }|                    d          }||z  }t          j        |d|          }|t          j        j        k    r)|                    d                                          }n@|t          j        j        k    r|                                }n|                    d          }|                    | j                                                }||fS )Nr!   r"   r   c                      d  S r  r)   )orig_input_shapes   r+   r   z0multilabel_margin_loss_forward.<locals>.<lambda>Q  s    r`prr r*   c                      d d  S )Nzinconsistent target size: z for input of size: r)   )r  orig_target_shapes   r+   r   z0multilabel_margin_loss_forward.<locals>.<lambda>U  s    f->ffTdff r*   r  rU   Tr  r  r   ri   )r   rU   )r  rB   r  r   r  r  r  r|  rk   rc  r  rW   Trf  r    r'   r~   r   r   r(   r<   r   r-  )r   r   r   rQ   r  is_endend_idxtarget_masktidx0r  tidx1r  rl   r  r  s                @@r+   multilabel_margin_loss_forwardr  B  sG    {U##Ef%%F
+a.C	L"/saxrrrr   
L!#M(9=M(Mfffff  
 ,s6=
1
1
1Cr\FjVS#66BMMMG-KKVQ//EU%000AKVR00E	#R!8!88a@@@Iacmmm###e+A	AA	CAIq!$$AIN(((EEgE##%%	im)	)	)EEGGEEgEU[))112CDDIi<r*   )	attn_maskrp   querykey	dropout_p	is_causalr  c                8    t          j        t          j                    fd           t          j                                         dk    o/                                dk    o                                dk     fd           t          j        dk    fd           t          j         j        d         j        d         k    oj        d         j        d         k    d            t
          j                             ||d |                     d                              d          k    		  	        \  }}|	                    d
ddd          
                    t           j                  	                    dd
dd          }||fS )Nc                      d j          S )Nz-query must be FP32, FP64, BF16, FP16 but got r   )r  s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    MMM r*   rK  c                      d                                  d                                   d                                  S )Nz,q, k, v must be a 4 dimensional tensor, got r  r   )r  r  r~   s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s=    huyy{{hhcggiihh[`[d[d[f[fhh r*   r   c                      d  S )Nz&dropout probability must be zero, got r)   )r  s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    "V9"V"V r*   r   c                      dS )Nz&q, k, v should have the same head sizer)   r)   r*   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    8 r*   r!   )r  r  r  dropout_maskrp   
enable_gqar"   r   r_  )rB   r   rT  rQ   r  r   "_scaled_dot_product_attention_mathr  r5  r   r  rc  )	r  r  r~   r  r  r  rp   r  attns	   ````     r+   *scaled_dot_product_flash_attention_for_cpur    s    
L&&MMMM   
L		q@SWWYY!^@		q0@hhhhhh   
LSVVVV   
LA%+a.(KSYq\U[^-K88  
 :BB::a==CHHQKK/ C 
 
LFDF 	q!Q""	%"9	:	:	Aq!		 
 4<r*   c                 <    t          |           fd            }|S )Nc                  J     | i |}| d                              |          S ru   )r*  )rI   rJ   r   outplace_ops      r+   
inplace_opz$register_inplace.<locals>.inplace_op  s-    k4*6**Aw}}S!!!r*   r   )aten_opr  r  s    ` r+   register_inplacer    s7    G$$" " " " %$" r*   c                 f   |                                  s2|                                 st          |          }t          |          }t          j        ||          }t          |t          j                  r|dk    r||z  }|dk    r|S t          |t          j                  r|dk    r| |z  } | |z   S )Nr!   r   )rT  r  r  rB   r	  r3   numbersNumber)r}   batch1batch2re   ro   r  s         r+   baddbmmr    s     !!## DOO,=,= 4yyE

Yvv&&FeW^,,  

%qyydGN++ tqyyd{&=r*   c                 0    t          j        | |d          S )Nr  r  r  )r}   r   s     r+   floor_divider    s     9T58888r*   c                 L    t          j        t          j        | j        d          S r^   )rN   r   r  r   r  )r  s    r+   	sym_numelr    s    HL!'1555r*   r   r   c                    |"t           j                            | g |          S t           j                            | g ||          S )Nr   r  )r   r   dim_IntListIntList_out)r}   r   r   s      r+   sum_defaultr    sE     {x##D"E#:::x##D"Es#CCCr*   c           	         t          | t          j                  s| S |Lt          j                            | t          t          |                                                               S t          j                            | |g          S r:   )	r3   rB   r   r   r  dimsr|  rV   rQ   )r}   rQ   s     r+   squeeze_defaultr    sl     dEL)) 
{|  tE$((**,=,='>'>???|  u---r*   c                 4   t          fdt          t          | j                            D                       }|j        t
          j        k    rt
          j        nd }|                     d|d|          }| ||	                    |j                  z  z  |fS )Nc              3   (   K   | ]}|k    |V  d S r:   r)   )r4   rx  rQ   s     r+   r  z)_weight_norm_interface.<locals>.<genexpr>  s'      @@1qCxxQxxxx@@r*   r"   T)r  r   )
r  rV   r  r  r   rB   r  r  r  r<   )rL  rr  rQ   keep_dim
norm_dtyper  s     `   r+   _weight_norm_interfacer    s     @@@@c!'ll 3 3@@@@@H !5> 9 9tJ66!Xt:6>>DDGGAG$$$%t++r*   assume_uniqueinvertc                   t          | t          j                  st          j        | |j                  } t          |t          j                  s,|rt          j        | |          S t          j        | |          S ddlm}  ||	                                dt          | 	                                d          z  k               rt          | ||          S t          | |||          S )Nr  r   r[  g      $@g(\?r  r  )r3   rB   r   rl  r  ner
  rz  r\  r   r   isin_defaultisin_sorting)elementstest_elementsr  r  r\  s        r+   isinr    s     h-- N&x8LMMMmU\22 5 	58Hm4448Hm444DDDDDD~m))++dS9I9I55Q5Q.QQRR 
HmFCCCCm=
 
 
 	
r*   )re  c                *   |9t          j        |                                 t           j        | j                  }n9t          j        |                                 |t           j        | j                  }|| k                         | j                  }|S )Nr  )re  r   r  )rB   randr5  r   r  r<   r   )r}   re  raw_prY  s       r+   	bernoullir  $  s|     
499;;emDKPPP
IIKK-;	
 
 
 
$*%%AHr*   r  c                   |                                  dk    r t          j        | t          j                  S |j        dk    r| |k    }|r| n|S | j        d|j        z  z   }|                     |          }t          t          d|j         dz
  d                    }||k    	                    |          }|r| n|S )Nr   r   r*  rU   r!   r   )
r   rB   
empty_liker  r  r  r  r  rV   r  )r  r  r  rW  expanded_elem_shaper5   rQ   s          r+   r  r  7  s    ~~1
;;;;Q-'&tt3&".4-2D+DD)**A
b=--1266
7
7C
"
"s
"
+
+C"C44s"r*   c                &   |                                  }|                                 }|rt          j        ||g          }t          j        |d          \  }}|dd          |d d         k    }	t          j        |	ddgd          }	|r|	                                }	t          j        |	          }
|
                    d||	          }
|
d|                                          	                    | j
                  S t          j        |          \  }}t          j        ||          }t          j        ||                                k     |d          }||         |k    }|r|                                n|}|	                    | j
                  S )NT)stabler!   rU   r   F)rD  rB   r7  sortr|  logical_notr  r%  r   r-  r  searchsortedrk   )r  r  r  r  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskr  sorted_test_elementsrX   r  test_idxcmps                   r+   r  r  D  s   $$&&M&..00 + y-1C!DEE(-
<(M(M(M%(,0DD.~1vuMM 	:+7799N//q,??A((()11(.AAA"'*-?"@"@a !5}EE;s%9%?%?%A%AA3JJ"8,=#)2coos{{8>***r*   c                 <    |                      d          }||         S rT   )r-  )r}   r  	flatteneds      r+   taker  a  s      R  IUr*   c                     |t           j        }|t           j        k    rt          |          }t                              | |j        |          S r;  )rB   rc  preserve_formatr   r   resizer  )r}   r   r`  s      r+   	resize_asr  h  sE    /----e44;;tU[;FFFr*   	ceil_modec                    t          j                    rt          S | j        j        dk    rt          S |                    d          }|                    d          }	|                     d          }
|                     d          }|                                dk    }|s?|                    d          }|                     d          } |                    d          }|                    d          }|                    d          }| j        t           j	        t           j
        fv }|rt           j        n| j        }t          j        ||z  ||	z  || j                  }|                     ||z  |
|z            }|                    ||z  |
|z            }|r|                    t           j                  }|                    d||          }|                    ||||	          }|r|                    | j                  }t!          j        |          }|                    |          }|s|                    d          }|S )	u  
    Decomposition of max_pool2d_with_indices_backward using scatter_add.

    This replaces the native implementation with a high-level decomposition
    that uses scatter_add for gradient accumulation. The scatter-based approach
    provides automatic optimization opportunities for Inductor and handles all
    pooling configurations without requiring specialized fallback paths.

    Algorithm:
        For each output gradient position, use the corresponding index from the
        forward pass to scatter the gradient to the input position. When multiple
        output positions select the same input position as max, scatter_add
        automatically accumulates their gradients.

    Complexity: O(B * C * H_out * W_out)
        Independent of kernel size, unlike traditional O(B * C * H_in * W_in * K²)
        approaches that iterate over input positions and kernel windows.

    Known Limitations:
        - FP16/BF16: Uses FP32 accumulation internally to preserve precision when
          many gradients accumulate to the same position (overlapping pooling windows).
          This adds slight overhead but ensures numerical stability.
        - Deterministic mode: Falls back to native implementation to ensure
          consistent results across runs

    Args:
        grad_output: Gradient w.r.t. pooling output [B, C, H_out, W_out]
        self: Original input tensor (for shape) [B, C, H_in, W_in]
        kernel_size: Pooling kernel size
        stride: Pooling stride
        padding: Pooling padding
        dilation: Pooling dilation
        ceil_mode: Whether to use ceil for output size calculation
        indices: Indices from forward pass (per-channel linear positions)

    Returns:
        Gradient w.r.t. input [B, C, H_in, W_in]
    mpsra  rU   rK  r   r!   r  r_  )rB   $are_deterministic_algorithms_enabledNotImplementedr  r  r5  rQ   rW   r   r   r  r   r  r-  r<   scatter_addrG   r   r  r  )rn   r}   r  r}  r  r  r  r  	in_heightin_width
out_height	out_width
is_batchedr  channelsuse_fp32_accumaccum_dtypegrad_input_flatgrad_output_flatr  rZ   r`  s                         r+    max_pool2d_with_indices_backwardr  q  sK   d 133  %'' 		"Iyy}}H!!"%%J  $$I qJ '~~a  !++A..##A&&1Jyy||H
 !&5=%.*IIN#1H%--{7HK kXH!	  O #**XzI5  ??:#8*y:PQQL  >+..u}== &11!\CSTTO !((Xy(SSJ  6]];#455
 /55M&&]&CCJ  +''**
r*   window_lengthc                L    t           j                            | d||||          S )a  hann_window(window_length, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length` with ``periodic=True``.

    Equivalent to :func:`torch.hann_window` with ``periodic=True``.

    Args:
        window_length (int): the size of returned window.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    Tr  )r   hann_windowperiodic)r	  r   rM  r  rg  s        r+   r  r    s6    2 $$ %   r*   r  c                   ||nt          j                    }| dk    rt          j        d||||          S | dk    rt          j        d||||          S t	          j        |          }|r| dz   n| }t          j        |||||          }|dt           j        z  |dz
  z  z  }t          j        |          }|dz  d	z   }|r|	                    dd|           n|}	|	
                    |          S )
a@  hann_window(window_length, periodic=True, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length`.

    .. math::
        w[n] = 0.5 - 0.5 \cos\!\left(\frac{2\pi n}{N-1}\right)

    where :math:`N` is ``window_length + 1`` when ``periodic=True`` (for spectral analysis),
    or ``window_length`` when ``periodic=False`` (symmetric window).

    Low-precision dtypes (``bfloat16``, ``float16``) are computed in ``float32`` then cast.

    Args:
        window_length (int): the size of returned window.
        periodic (bool, optional): if ``True``, returns a periodic window for use with STFT.
            Default: ``True``.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    Nr   r(  r  r!   r*  r   r   r   )rB   get_default_dtyperS  r  rG   r  r  picosr6  r<   )
r	  r  r   rM  r  rg  compute_dtyper2  r  windows
             r+   hann_window_periodicr    s-   D &EEE,C,E,EE{fV

 
 
 	
 zfV

 
 
 	
 /66M%8=A		 	 	A 	
S58^q1u%&A	!A	D3A.6=QXXaM***AF99Ur*   num_classesc                    |dk    r6t          |                                                                           dz   }t          j                            t          j        | dk              d           t          j                            t          j        | |k               d           |                     d          t          j	        || j
        | j                  k                        t          j                  S )NrU   r!   r   z+one_hot: Class values must be non-negative.z7one_hot: Class values must be smaller than num_classes.r  )r  r   r  r   _assert_asyncmsgrB   r  rW   r  r   r  r<   r  )r}   r  s     r+   one_hotr  b  s    b$((**//++,,q0	$!)5   		$$%%A  
 	r<4:dkJJJ	Kboor*   )FF)r   r:   r,  )r   NNr!   )rU   FFr  r(  r  )r!   r!   F)Fr   )r   ri   N)r   r!   N)Fru  )NNN)r   r   FT)r   r   Fr9  )r   Fr  )rU   (  rN   rt  r  r  r~  collections.abcr   r   
contextlibr   enumr   r   r   r	   r
   typingr   r   rB   torch._meta_registrationstorch._primsrm  r.  torch._prims_common_prims_commonrG   torch.nn.functionalr  r%  r   r   r   r   torch._decompr   r  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   rE   torch.utils._pytreer   rN  DispatchKeyr   r|  str__annotations___opsr  r   r    r_  r  rP   r`  compute_only_pw_cast_for_opmathpw_cast_for_opmath"pw_cast_for_opmath_non_tensor_argsrF  pw_cast_for_int_to_realr  rY   rb   rd   rm   r  ry   r[  r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r  r'   r~   r  _safe_softmaxr  r  r  r  rZ   r   r#  r   r%  r1  r?  rI  rL  rQ  rT  rV  rX  r[  rn  rv  slicer  r  ru  r  r  r  r  r  r  r  r9  r=  rF  rL  rP  py_implCompositeImplicitAutogradAutogradrO  rd  rh  rn  rv  r0  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r6  r<  r@  unsafe_chunkrC  r?  r>  no_statsrG  rK  rW  r]  ra  rc  _fused_dropoutrf  rr  r  r`  r  lift
lift_freshru  rz  r}  r  r  r  r  r  _adaptive_avg_pool2dr  r  r  r  r  r  r  r  r#  r%  r!  r*  r0  	Generatorr2  r?  rB  rX  r  r_  re  rH  _upsample_nearest_exact1dra  rg  rM  rS  rZ  rE  rs  r|  r  r  r  r  r  r  r  r  rnn_tanhr   r  rnn_relur  r  r  r  r  r  r  r  lstmr  r  r  r  grur  r  r  r  r  r  r  r  r  r  r  r  r   r$  r+  r2  r  rN  rP  r  rj  r1  r  ra  rg  rk  rm  rq  rt  rz  r  r*  r  r  r  r  r  r  r  r  r  r  r  rd  upsample_bicubic2dr=  rA  reflection_pad1dreflection_pad2dreflection_pad3drJ  replication_pad1dreplication_pad2dreplication_pad3drM  rH  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardrz  r}  r  r  rj  rM  r  rp  r  r  r  r  +_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  r  rQ   r  r  r  r  r  r  r  r  r  r  r  periodic_outr  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r
  r  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__ldexp_leaky_relu_
leaky_relulogit_logitrelu_r  renorm_renormround_rH  scatter_r,  scatter_add_r  scatter_reduce_scatter_reducesilu_r  r)   r*   r+   <module>rp     s c              



 . . . . . . . . " " " " " "       % % % % % % % % $ $ $ $ $ $ $ $                        # # # # # #          , , , , , , , , , , 0 0 0 0 0 0 7 7 7 7 7 7                         * ) ) ) ) ) ( ( ( ( ( ( h" c   z~         %$)	' ''9' ' "	' ' ' 'T #*'8@# # # 
 WuDL    &-W8@ & & & "
 "'uDQ    c f     *++\2F 2v 2 2 2   ,+2 -..\4v 4& 4 4 4   /.4 .//\S S6 S S5 S S S   0/S
 )**\


 
 	

 
 
 
 
   +*
2 )*++( ( ,+( )*++"V " " " ,+" ())@f @ @ @ @   *)@ 122\f F      32 .//\PP%P05P@EP P P  0/P ''GF Gv G G G   ('G /00F & V      10 /00\:F :& :U : : :  10: 011\LL%L7<LNRL L L   21L *++\) ) )f )3 ) ) )   ,+)< *++5v 5f 5 5 5  ,+5 	""&v && & & &   #"& *++\>v >V > > > >   ,+>
 *++6 6 66 6 6 6 ,+6 344%%
% % 66>	% % % 54% 677



 
 	

 
 
 
 
 
 
   87
$ 122\<f <F <F <v < < <   32< 
##/ /v /& / / /  $#/v #         &&3<>3G1 1
1 1-011 1 1   '&1 .//\11 &1061CF1 1 1   0/1 *++0 0 0 ,+0 +,, ^)		1 	1
	1	1 	1 		1 	1 	1   -,	1 4<==%/5BEMR    >= 4?@@
R
R

R 
R 	
R
 
R 
R 
R 
R  A@
R 0899		%	/5	BE	NS	 	 	  :9	 0455
R
R

R 
R 	
R
 
R 
R 
R 
R  65
R#$#$
#$ #$ TM	#$
 #$ #$ #$ #$ #$ #$ #$L )**\Nf NF N N N N N   +*N( .//\..
. . TM	.
 . . . . . .  0/.b 011\$$
$ $ TM	$
 $ $ $ $ $ $  21$N 122 !^)	1 1
11 TM1 	1
 1 1 1   321& :;;\
 !^) 
  TM	
       <;  -.. ^)1 111 1 	1 1 1   /.1 677\
 ^)		 		
	 	 		
 	 	 	   87	 	"") ) )v )% ) ) )  #") ,--& &F &v & & &  .-& +,,	O	Oc	O 
	O 		O
 
	O 	O 	O 	O  -,	O 
)** 3? 3?
3? 
3? :	3?
 
t3? 3? 3? 3? +*3?l #d
14t
38_   . *++ $U $U$U	$U 
$U :	$U
 
t$U $U $U $U  ,+$UN ,--E Ed3i Ec ERU E E E  .-E
 .//OO&*3iO9<ODGOORO O O  0/O%+:?+    344\ XX!'X.1X@EX X X !   54X 788 KK!'K.1K@EK K K !   98K* * *& $$MMcM 3iM #Y	M
 IM M M M  %$M` $$]]c] c] 3i	]
 #Y] I] ] ] ]   %$]@ 455
 
v 
e 
 
 
  65
 ,--Y
Y"3iY47Y?BYJMYY Y Y  .-Y" +344;?
 

%
,1DL

 
 
  54
& %%kCDDk2336 e D4K    43 ED &% +,,VVA& AU A4$; A A A  -,A && S      '&4 )**F  T     +*6 '' $   	
      ('( 566  	
     76:DI    &\	  
&\	   *T&\    
tF| 
# 
 
 
 
"&\"	" " " " "J 0$/2EFGG
 	 &\	  
$	
    HG  	')C)GH  #	 
c 
 
f		
 
&\D     )0115 5 5C 5c 5%PSBT 5 5 5 215 4<==67B BB $S	B03B
63;B B B >=B 
)**/ / /C /c /%:L / / / +*/( -55)  // //
// &// 
// 63;	// // // //f 
##  f F # #      $#" .//
  

  	
       0/" 
##	 	 	f 	6 	 	 	 	 	   $#	 7?@@S&S&S& S& 	S&
 D=S& S& S& 
S& S& dS& 6D=&4-$67S& S& S&  A@S&n 7;<<  	
 D=   
  d , , , 6D=&4-$67   =<86D= FTM     7?@@QQQ 3iQ 	Q
 Q TMQ 4-Q dQ 6D=&4-$67Q Q Q A@Qj 7;<< 3i 	
  TM 4- d , , , 6D=&4-$67   =<4 ,455333i3 TM3 
	3
 66>3 3 3 653l 5=>>HHH 3iH 	H
 TMH dH 6D=&4-'(H H H ?>HVRRTMR 4-R 4-	R
 $R R R 
R R 6666D=&4-?@R R R Rj .//UK//((TM( 4-( 4-	(
 $( ( ( 
( 666!"( ( ( 0/ 0/(4 ''(<==''(MNN 
 
TM 
 4- 
 4-	 

 $ 
  
  
 
 
 666!" 
  
  
 ON >= 
F "";#HIIG G4< G G G JIG AIJJTM 4- 	
   
 666!"   KJ* 5=>>((TM( 4-( 	(
 ( ( ( 
( 666!"( ( ( ?>(  5>??((TM( 4-( 	(
 ( 
( 666!"( ( ( @?( @HIIKKTMK 4-K 	K
 K K K 
K 6666612K K K JIK4TM 4- 	
  
     : 4<==11TM1 4-1 	1
 1 1 
1 6666)*1 1 1 >=14 ?GHHCCTMC 4-C 	C
 C C 
C 6666669:C C C IHC8 2:;;11TM1 4-1 	1
 1 1 
1 6666)*1 1 1 <;14 +,,VV     -, && !%"&04+ + +
+ ;+
 L4+ + + &-+ + +  '&+b diABB   CB &&{';<<-..VVVV,, 4- 4-	
 $  !&    -, /. =<>   0899 TM 4-	
 $ } $  
 d  66D=&4-/0   :96 7?@@iii TMi 4-	i
 $i }i $i i 
i di 66D=&4-/0i i i A@iZ 7;<<""" TM" 4-	"
 $" }" $" " 
" d" ," ," ," 66D=&4-/0" " " =<"J 788VVV$$  4-	
 $ } tm    %$ 980 677VVV$$  4-	
 $ } tm     %$ 872 122d'v d'E#s(O d' d' d'   32d'N
)8<S	HK   6 )***8
*8*8 c*8 *8 *8  +**8Z )**494949 c49 I	49
 #Y49 49 49  +*49n (( H H HH	H H 	H H H H )(H '' I I II	I I 	I I I I  ('I$ %@ %@ %@%@	%@ %@ 	%@ %@ %@ %@ %@ %@P )122"";#HII   JI 322 ())<: <C <
 <J < < < *)< ((=* =3 =z =: = = =  )(=@@@$.@8B@PT@ @ @ @, 011Xx  (f (vv~)> ( ( (  !  21( %% ""(,	 	e	 *u
 %	    &%" &&; ; ; '&;
  8   /344/344/344$$[%JKK$$[%9::$$[%JKK$$[%9::$$[%JKK$$[%9::	3	3cT!	3 ;%	3 		3 	3 	3 ;: LK ;: LK ;: LK 54 54 54	3 6:;;6:;;6:;;#++K,QRR#++K,@AA#++K,QRR#++K,@AA#++K,QRR#++K,@AA	?	?cT!	? ;%	? 		? 	? 	? BA SR BA SR BA SR <; <; <;	?# # # #L 08$:Q:UVWW (()NOO (()=>>Dd;;;  ; ;;c; DL; 	; ; ; <; ?> PO XW; 	#+T-K-OP  '//0UVV'//0DEEDd;;;  G GGcG DLG 	G G G <; FE WV G 08$:Q:UVWW (()NOO (()=>>Dd;;; "!	G GGcG dlG dl	G
 G G G <; ?> PO XWG 	#+T-K-OP  '//0UVV'//0DEEDd;;; "!	S SScS dlS dl	S
 S S S <; FE WV S 08$:Q:UVWW (()NOO (()=>>Dd;;; "!!Q QQcQ dlQ dl	Q
 dlQ Q Q Q <; ?> PO XWQ 	#+T-K-OP  '//0UVV'//0DEEDd;;; "!!	 		c	 dl	 dl		
 dl	 	 	 	 <; FE WV 	 
 	 c  	
    4  &> > >/ / /  $ FK+ + + +\    & & & &,/- /- /- /-d&  &  & R +,,[BCC[122. . 32 DC -,.8 +,,[BCC[122. . 32 DC -,.8 *++KABBK011. . 21 CB ,+.@ *++KABBK011. . 21 CB ,+.@  / / / /6= = = =@/ / /d 	((>??-..S S /. @? )(S@ 	''=>>,--S S .- ?> ('S>; ; ;; ; ; &&{<=={+,,. . -, >= '&.6 ''=>>,--. . .- ?> ('.6 4899!))+*OPP!))+*>??  @? QP :9 3788 (()NOO (()=>>  ?> PO 98 3788 (()NOO (()=>>  ?> PO 98 04551566##K$IJJ##K$899%%k&KLL%%k&:;;&&{'LMM&&{';<<A A =< NM <; ML :9 KJ 76 65A /79O9STUU
 "	K KKcK K dl	K
 K K K  VUK 	%t'?'CD  !))+*>??
 "!U UUcU U dl	U
 dlU U U U  @? 
U 	&(A(EF  
 "!!
 

c
 
 dl	

 dl
 dl
 
 
 
  
V V V V/ / /7	&	7$,V$47IO77 7 7 7-'9 -f - - - - IIcI I 	I
 I I I IZ )122F v $    32 ,d.?@AA   BA +,--" " .-" /0119 9 9 219 2344C C 54C6 ABCCM M DCM23 
3 3  TM3  	3 
 3  66>3  3  3  3 l -..X~&&L
LL TML 	L
 L 66>L L L '& /.L@ /00X~&&L
LL TML 	L
 L 66>L L L '& 10L/F /u / / / / /5F 5u 5 5 5 5 5
 
3E 
 
 
 
(F%7 FV F F F F F!Xf% !& ! ! ! !NN#'N05NEJ\N N N N&f & & &T & & & &"/f / / / /T / / / /"!V !49 !T ! ! ! !$V $49 $T $ $ $ $ 233S StCy S S S S   43S  f4 f4f4
f4 f4 	f4
 f4 f4 f4 f4 f4 f4R ,--   
  	
       .-    	# 	#   ! 	# =>>$).:N1 1 1  ?>1$  d t    @ [BCC>??', yW yW yW yW  @? DCyWx 08$:Q:UVWW (()=>>
 ! T TTsCxT T T\	T
 T\T T T T   ?> XWTn /344$$[%JKK$$[%9::
 15	W WWsCx4'W W &-	W
 W W W   ;: LK 54
W4 -..-..-..	v 	c3h 	F 	 	 	   /. /. /.
	 .//.//.//	 	sCx 	V 	 	 	   0/ 0/ 0/
	38_ c3_f,- 	   8 677677677\T T  87 87 87Tn %%UEu      &% $$X4 X X X X  %$X ,dko>?? !% ="&
 
 
	
 ;
 L	

 L4
 
 
 
  @?
 *+,,
 !% ="&  	 ;	
 L L4    -, 	"", , #", .//''(<==  ^)) ))) ) 	)
 TM) ) ) ) )  >= 0/)X ;<<$,44[5IJJX{##--- - 66>	- - - $# KJ =<-x HPQQ
 A  $A A AA	A A 	A
 A }A 4<A 66>A A A RQAH   ''     (' )**9 9  +*9 ''6 6 ('6 )48<899 !%		D 	D 	D
	D ;	D 
$		D
 	D 	D 	D :9	D -t|/?@AA. .& .sTz . . . BA. 	=>>, , , ?>, 	""38 
 
 
 
  #"
( .// )-  
, % \	   0/$ 5: 
# 
# 
# 
# 
# <A + + + + +: 	""   #"
 ''G G G ('G =>>ss
s s s s s ?>sl )143C3GHII !%"&"&"   ; L4	
 L4 t     JIB )2D4D4QRSS 6 !%"&"&"6 6 666 ;	6
 L46 L46 t6 6 6 6  TS6r  t{ + + +  dj ) ) )  dj ) ) )   - - -  TY ' ' '  TY ' ' '  $. 1 1 1   / / /  "D$4 5 5 5   - - -  !4? 3 3 3  $. 1 1 1  #T%6 7 7 7  t{ + + +  !4? 3 3 3   - - -  dj ) ) )  !4? 3 3 3  dj ) ) )  TY ' ' '  t{ + + +  dj ) ) )   - - -  "D$4 5 5 5  %t': ; ; ;  TY ' ' ' kCDD & s F    ED  r*   