
    f,jY              #          d dl mZ d dlZd dlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZ G d de          Zd	d
e de de de d	z   e_        	 	 	 	 	 	 	 d%dee         dee         dee         dee         dedz  dedz  dedz  dededz  dedededededededdf"d Zd! Zdee         dee         dee         dee         dedz  dedz  dededededededededdfd"Zdee         dee         dee         dee         dedz  dedz  dededededededededdfd#Zdee         dee         dee         dee         dedz  dedz  deez  dedededededededdfd$ZdS )&    )castN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real
DeviceDict	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeez  d	ed
ededededz  dedededz  ddf fdZ fdZddZ	d Z
edd            Z xZS )r   {Gz?r   绽|=NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   returnc          
         t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           ||||||||	|
d		}t	                                          ||           |
r0|	rt          d
          |rt          d          d| _        d| _        | j	        D ]}|d         D ]}| j
        |         }|d         r0t          j        dt          |d                   |j                  n!t          j        dt                                |d<   t          j        |          rt#          ||          n|}t          j        ||t          j                  |d<   d S )Nr   zTensor lr must be 1-element        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r!   r   r    r"   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fuseddtypedevicer*   stepmemory_formatsum)
isinstancer   numel
ValueErrorsuper__init__RuntimeError"_need_device_dtype_check_for_fused_step_supports_amp_scalingparam_groupsstatetorchzerosr
   r+   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r    r!   r"   r   r   r   defaultsgrouppr:   
init_value	__class__s                   S/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/torch/optim/adagrad.pyr5   zAdagrad.__init__   sk    b&!! 	<bhhjjAoo:;;;byy;r;;<<<hBBBCCCl""JLJJKKK///W<UWW   czz<s<<===  ()B ,

 

 	*** 	3 P"#NOOO W"#UVVV6:D3.2D+& 	 	E8_  
1 W~FEK/wHHH x    c1B1D1DEEE f '**3G57PQQQ2 
  %z1F     e!	 	    c                    t                                          |           d }| j        D ]}|                    dd            |                    dd           |                    dd           |                    dd           }|d         D ]}| j                            |g           }t          |          dk    rt          j        |d                   slt          |d                   }|d         r*t          j
        |t          |	          |j        
          n!t          j
        |t                                |d<   t          | j                                                  }t          |          dk    ot          j        |d         d                   }|s?|D ]>}	t          j
        t          |	d                   t          |	                    |	d<   =d S d S )Nr"   r   Fr   r   r   r   r-   r'   r)   r,   )r4   __setstate__r9   
setdefaultr:   getlenr;   	is_tensorfloatr=   r
   r+   listvalues)rB   r:   r   rD   rE   p_statestep_valstate_valuesstep_is_tensorsrG   s             rH   rK   zAdagrad.__setstate__d   s   U### & 	 	EY---Z///-u555$$Wd33E8_  *..B//w<<1$$U_WV_-M-M$$WV_55H !>O$"3U"C"C"C#$8    #\(:K:M:MNNN FO	 DJ--//00l++q0 
eoOF#7
 7
  	!  !L!F)$$,=u,M,M,M  &			 	 rI   c                 ~    | j         D ]4}|d         D ])}| j        |         }|d                                          *5dS )z6Calls tensor.share_memory_() on the state sum tensors.r   r0   N)r9   r:   share_memory_)rB   rD   rE   r:   s       rH   share_memoryzAdagrad.share_memory   sZ    & 	- 	-E8_ - -
1e**,,,,-	- 	-rI   c                    d\  }}|d         D ]}|j         |d         r't          | dd          rt          |           d| _        ||j         j        z  }|t          j        |          z  }|                    |           |                    |j                    | j        |         }	t          |	          dk    r|d         rt          |           |d         r0t          j
        dt          |d         	          |j        
          n!t          j        dt                                |	d<   | j        d         }
t          j        |          rt          |
|
          n|
}t          j        ||t
          j                  |	d<   |                    |	d                    |                    |	d                    ||fS )N)FFr   r   r7   TFr   r&   r'   r)   r%   r,   r-   r    r.   r0   )gradgetattrr   r7   	is_sparser;   r>   appendr:   rN   r<   r
   r+   r=   rC   r?   r@   rA   )rB   rD   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexrE   r:   r    rF   s               rH   _init_groupzAdagrad._init_group   s   '3$x (	2 (	2Av!> Dg8' ' D
 2!444>CD;16#33u/222 ''***QV$$$
1u::??W~ 95a888 !>J"3U7^"L"L"L#$8    #\#5F5H5HIII &M 1531-
 !+A..7 9;TUUU6 
 $)?:U5J$ $ $E%L !!%,///""5=111++rI   c                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }|                     |||||          \  }}	t	          |||||d         |d         |d         |d         ||d         |d         |d         |	|d	         t          | d
d          t          | dd                     |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r!   r"   r   r   r   
grad_scale	found_inf)r   r   r   r!   rd   r"   r   r   re   r   rh   ri   )r;   enable_gradr9   rf   r   r]   )
rB   closurelossrD   r`   ra   rb   rc   rd   re   s
             rH   r-   zAdagrad.step   s`    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	 	E-/"$E')J(*K+/+;+;'
K, ,(O[  ;">2z*%L /i(z*$%56'Gn"4t<<!$T::!    & s   /33)r   r   r   r   r   N)r#   NN)__name__
__module____qualname__r   rP   r   boolr5   rK   rZ   rf   r   r-   __classcell__)rG   s   @rH   r   r      sW        "+,#E $!E E EE FNE 	E
 E $)E E E E E d{E 
E E E E E EN! ! ! ! !F- - - -,, ,, ,,\ "* * * "!* * * * *rI   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU and CUDA only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    Fr   ra   rb   rc   r   rh   ri   rd   r"   r   re   r   r   r   r!   r   r#   c                4   t          d |D                       st          d          ||t          | |	d          \  }}|d}|d}|r-t          j                                        rt          d          |r-t          j                                        rt          d          |r&t          j                                        st          }n/|r&t          j                                        st          }nt          } || ||||||||||	|
||           dS )	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rm   )r1   r;   r   ).0ts     rH   	<genexpr>zadagrad.<locals>.<genexpr>6  s.      @@qz!U\**@@@@@@rI   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r!   rd   r   r   re   rh   ri   )	allr6   r   r;   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   ra   rb   rc   r   rh   ri   rd   r"   r   re   r   r   r   r!   r   _funcs                     rH   r   r     sn   2 @@K@@@@@ 
^
 
 	
 }1Ne
 
 

7 } U59))++ USTTT S'')) SQRRR &UY++-- &	 &//11 &$%D!'%     rI   c                 V    |                                  }t          j        |||          S rm   )sizer;   sparse_coo_tensor)r\   grad_indicesrR   r   s       rH   _make_sparser   g  s$    99;;D"<>>>rI   c          
      >   ||t          d          t          j                                        st	          |          }t          | |||d          D ]F\  }}}}|dz  }t          |          }|s|n| }|dk    r-|j        rt          d          |	                    ||          }|d|dz
  |z  z   z  }|j        r|
                                }|                                }|                                }|                    t          |||                    d                               |                    |          }|                                                                                    |	          }|                    t          ||||z            |            Xt          j        |          }|r<t          j        |          }t          j        |          }t          j        |          }|                    ||d	           |r|                                |	z   }n'|                                                    |	          }|                    ||| 	           |r(t          j        |          }t          j        |          }Hd S )
N,Expected grad_scale and found_inf to be NoneT)strictr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)AssertionErrorr;   r{   r|   r   zipr   r^   r6   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r>   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   ra   rb   rc   rh   ri   r   r   r   r!   rd   r   r   re   paramr\   	state_sumstep_tr-   clrr   grad_valuesstd
std_valuesr>   s                            rH   r   r   l  s   " !6KLLL9!!## ^^*-z;t+ + + *= *=&tY 	!&!!#.tt$1~ "Q   88E866DAX--.> 	===??D==??L,,..KNN<lKOOA<N<NOOPPP''--C,,..33C88JJJT<z1IJJSVRV      )%00J 2)$//!.y99	*511tT333 1nn&&,nn&&++C00NN4SDN111 =-e44!1)<<	U*= *=rI   c                P   |rt          d          ||t          d          t          |           dk    rd S t                    t          j        | |||g          }|                                D ]-\  \  }}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|
ot          d |D                       }|rt          ||||||	d|||||           |rt          |||           |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd	          d
           nt          j        |d           |dk    r1|rt          j        |||
           nt          j        |||
          }fd|D             }t          j        |||d           t          j        |          }t          j        ||	           |dk    s|rt          j        ||           |}nt          j        ||          }t          j        |||           /d S )Nz#_foreach ops don't support autogradr   r   c              3   $   K   | ]}|j         V  d S rm   )r^   )ru   r\   s     rH   rw   z(_multi_tensor_adagrad.<locals>.<genexpr>  s5       9
 9
#DN9
 9
 9
 9
 9
 9
rI   Try   g      ?cpu)r+   r   r   c                 H    g | ]} d t          |          d z
  z  z   z  S )r   )r   )ru   r-   r   r   s     rH   
<listcomp>z)_multi_tensor_adagrad.<locals>.<listcomp>  sD     
 
 
>BRC1
4((1,889
 
 
rI   r   )r   rN   r   r   "_group_tensors_by_device_and_dtyperR   r   rQ   r   anyr   r   r;   _foreach_negcompileris_compilingis_cpu_foreach_add_r=   _foreach_add_foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   ra   rb   rc   rh   ri   r   r   r   r!   rd   r   r   re   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_r   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_grad	minus_clrr   	numerators         ` `                   rH   r~   r~     sH   "  DBCCC!6KLLL 6{{a	BB#F	
K0   &&((M? M? 		 	T&\>::DL-88 f/ABB!$v,0CDD!0 "
S 9
 9
'39
 9
 9
 6
 6
 " 	"!")! $!-'%#       	J-7HIII 	< -l;;L ~**,, 	71CA1F1M 	7"ELU$C$C$C3      2A6661 #L-|TTTTT$1 -|     
 
 
 
 
FX
 
 
	 	 1<UVWWWW!"344C%%%1i888$II*<CCIy#>>>>[M? M?rI   c                >   | sd S |
s|rt          d          |rt          d          |	|j        |ini }|	|j        |ini }t          |t                    r!t	          |j                  dk    r	|j        |ind }t          j        | |||g          }|                                D ]t\  \  }}\  \  }}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }d\  }}|+|
                    ||                    |d                    }|+|
                    ||                    |d                    }|&||vr"|                    |d          ||<   ||         }t          j        |d           t          j        ||||||||	|||	           |&t          j        ||gt!          |          z             vd S )
Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=Truer   )NNT)non_blocking)r+   r   r   )r   r   r   r!   r   rh   ri   )r6   r+   r1   r   strr   r   itemsr   rQ   rL   tor;   r   _fused_adagrad__foreach_sub_rN   )r   ra   rb   rc   rh   ri   r   r   r   r!   rd   r   r   re   grad_scale_dictfound_inf_dictlr_dictgrouped_tensorsr+   r   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                 rH   r}   r}     s   "   T+ TRSSS 
J
 
 	

 ,6+A	J''r  *3)>	9%%B  &b&11Wc")nn6M6MBSW   B	
K0 O 
			 	 + + 	 	
	T&\>::DL-88 f/ABB!$v,0CDD.8++! / : :
f4@@! !  -88	V$??    6#8#8 ee6eEEGFOB.222%(&	
 	
 	
 	
 '"%5$6=O9P9P$P  S+ +rI   )NNNFNFF)typingr   r;   r   	optimizerr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__rQ   rq   rP   r   r   r   r~   r}   r&   rI   rH   <module>r      s
                                              $ i
 K K K K Ki K K K^4	  
  
  
  5. p  $# " G GLG<G VG f	G
 $;G G }G G D[G G G 	G  !G" #G$ 
%G& 'G( 
)G G G GT? ? ?
A=LA=<A= VA= f	A=
 A= }A= 	A= A= A= 
A= A= A= A= A=  
!A= A= A= A=Hl?Ll?<l? Vl? f	l?
 l? }l? 	l? l? l? 
l? l? l? l? l?  
!l? l? l? l?^SLS<S VS f	S
 S }S 	S S S 
S S S S S  
!S S S S S SrI   