
    a,j&                     
   U d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZm Z  d dl!m"Z" d d	l#m$Z$m%Z%m&Z'm(Z(m)Z)m*Z*m+Z+m,Z, d d
l-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z6 d dl7m8Z8 g dZ9dZ:dZ;dZ<dZ=e>e?         Z@e1ez  ejA        z  eBz  eCz  e?z  ZDeDeEeD         z  eFeD         z  eGe?df         z  ZHeGe?eHf         ZIeEeI         ZJeGe?eIeJz  f         ZK e>            ZLe>e         eMd<   e jN        d             ZOe
 G d d                      ZPe
 G d deP                      ZQ	 	 	 dKdejR        de?de?d eSd!eSd"e@fd#ZT G d$ d%          ZUdLd&ZVddd'dejR        d(eFejW        jX        d)f         d*eSd+e>ejR                 dz  d,ePdz  d"eQfd-ZYd.eGe?eHf         d/eKd0eQd"dfd1ZZd2ejR        ejW        jX        z  d3e?d"efd4Z[d5eGe?ef         d0eQd"eGe?ef         fd6Z\ ej]                    dejR        d0eQd"eGe?eHf         fd7            Z^ ej]                    dejR        d5eGe?eHf         d0eQd"e3fd8            Z_d9ejW        jX        d"dfd:Z`d5eKd"eGe?eHf         fd;Zad9ejW        jX        d5eGe?eHf         d0eQd"eKfd<Zb ej]                    dejR        d=eFejW        jX        d)f         d0eQd"eKfd>            ZcdejR        d9ejW        jX        d/eKd0eQd"eKf
d?Zd ej]                    dejR        d=eFejW        jX        d)f         d5eKd0eQd"df
d@            Zeddd'dejR        d+e>ejR                 dz  d,ePdz  d"eGe?eHf         fdAZfddd'dejR        d=ejW        jX        eejW        jX                 z  d+e>ejR                 dz  d,ePdz  d"eKf
dBZgddd'dejR        d=ejW        jX        eejW        jX                 z  d+e>ejR                 dz  d,ePdz  d"eFeGe?eHf         eKf         f
dCZhdejR        d5eGejR        eGe?eHf         f         eGe?eHf         z  d"eGe?eHf         fdDZiddEdejR        d.eGe?eHf         d,ePdz  d"e3fdFZjddEdejR        d=ejW        jX        eejW        jX                 z  d/eKd,ePdz  d"df
dGZkddEdejR        d=ejW        jX        eejW        jX                 z  d.eGe?eHf         d/eKd,ePdz  d"e3fdHZleddEdejR        d,ePdz  d"dfdI            ZmeddEdejR        d=eFejW        jX        d)f         d,ePdz  d"dfdJ            ZndS )M    N)Callable	GeneratorIterable)asdict	dataclassfield)chain)Anycastno_type_check)ShardedTensor)_broadcast_state_dict_distribute_state_dict_flatten_state_dict_gather_state_dict_offload_state_dict_to_cpu_unflatten_state_dict)_CHECKPOINT_PREFIX)FullOptimStateDictConfigFullStateDictConfigFullyShardedDataParallelOptimStateDictConfigShardedOptimStateDictConfigShardedStateDictConfigStateDictConfigStateDictType)._get_module_fsdp_state_if_fully_sharded_moduleFSDP_WRAPPED_MODULE)DTensor)_IncompatibleKeys)DistributedDataParallel)tree_map_only)FQNS_TPrimitiveType	ValueTypeDictValueTypeListDictValueTypeOptimizerStateTypeStateDictOptionsget_model_state_dictget_optimizer_state_dictget_state_dictset_model_state_dictset_optimizer_state_dictset_state_dict_flat_paramparam_groupsparamsstater%   _patched_state_dictc               #      K   t          j                    } t          j                     	 d V  | rt          j                     d S d S # | rt          j                     w w xY wN)gc	isenableddisableenable)
is_enableds    g/var/www/html/banglarbhumi/venv/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py_gc_contextr=   Q   sh      JJLLL 	IKKKKK	 	: 	IKKKK	s   A Ac                       e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed	<   dZeed
<   dZeed<   dS )r)   ap  
    This dataclass specifies how get_state_dict/set_state_dict will work.

    - ``full_state_dict``: if this is set to True, all the tensors in the
      returned state_dict will be gathered. No ShardedTensor and DTensor
      will be in the returned state_dict.

    - ``cpu_offload``: offload all the tensors to cpu. To prevent CPU OOM, if
      ``full_state_dict`` is also true, then only the rank0 will get the
      state_dict and all other ranks will get empty state_dict.

    - ``ignore_frozen_params``: if the value is True, the returned state_dict
      won't contain any frozen parameters -- the ``requires_grad`` is False.
      The default value is False.

    - ``keep_submodule_prefixes`` (deprecated): when ``submodules`` is not None, this option
      indicates whether to keep the submodule prefixes from the state_dict keys.
      or example, if the submodule is ``module.pretrain`` and the full FQN of
      the parameter is ``pretrain.layer1.weight`` of the param. When this option
      is True, the parameter's key in the returned state_dict will be
      ``pretrain.layer1.weight``. If the options is False, the key will be
      ``layer1.weight``.
      Note that if ``keep_submodule_prefixes`` is False, there may be conflicted
      FQNs, hence there should be only one submodule in ``submodules``.

    - ``strict``: the ``strict`` option when ``set_state_dict`` calls
      model.load_state_dict().

    - ``broadcast_from_rank0``: when the option is True, rank0 should receive a
       full state_dict and will broadcast the tensors in the state_dict/
       optim_state_dict one by one to other ranks. Other ranks will receive
       the tensors and shard according to the local shards in the model and
       optimizer. ``full_state_dict`` must be set to True when using this option.
       This option currently only supports DTensor, not the legacy ShardedTensor.
    Ffull_state_dictcpu_offloadignore_frozen_paramsTkeep_submodule_prefixesstrictbroadcast_from_rank0flatten_optimizer_state_dict_fqn_modifiersdsd_fqn_modifiersN)__name__
__module____qualname____doc__r?   bool__annotations__r@   rA   rB   rC   rD   rE   rG   str     r<   r)   r)   \   s         " "H "OT!!!K!&$&&&$(T(((FD!&$&&&). $...-s-----rP   r)   c                   N   e Zd ZU  ee          Zeeej        z  e	ej        z  f         e
d<    ee          Zeeej        z  e	ej        z  f         e
d<    ee          Zee         e
d<   dZee
d<   dZee
d<   ej        Zee
d<    ee          Zeej                 e
d	<   d
S )_StateDictInfo)default_factoryfqn_param_mappingshared_params_mappingsubmodule_prefixesThandle_modelhandle_optimfsdp_contextfsdp_modulesN)rH   rI   rJ   r   dictrT   rN   torchTensorr#   rM   rU   setrV   rW   rL   rX   
contextlibnullcontextrY   r   listrZ   nnModulerO   rP   r<   rR   rR      s        
 	d### tel	 $ $ $ 	d### 4el	 $ $ $ $)5#=#=#=C===L$L$'3L(333$)E$$?$?$?L$ry/?????rP   rR   rF   TmodelnamerG   skip_ddp_prefixskip_compiler_prefixreturnc                 x   |                     t          d          }d|vr|hS |                    d          }g }| }t          |          D ]?\  }}	t	          |t
                    r8|	dk    rt          d|	 d          |j        }|s|                    |	           St	          |t                    r|t          |          dz
  k     rZ||dz            t          k    rFd                    |          t          |t                    }
r dfd|
j        D             c S t          |t                    }|	t          k    r%|                    |	           t          ||	          }t	          |t           j        j        j                  r9|	dk    rt          d	|	 d          |j        }|s|                    |	           |t+          ||          rM t          ||                                          |	          x}r t+          ||          rt          ||          }|                    |	           |	t.          j        j        j        k    r'|t          |          dz
  k    rt5          d
          /t          ||	          }Ad                    |                               t          d          hS )a  
    This API is used to convert the name of a parameter to the FQNs. For FSDP
    without `use_orig_params`, the name of FlatParameter can be mapped to
    multiple original parameters. As a result, the return type of this function
    is `set[str]`.

    Args:
        module (nn.Module): the root model.
        name (str): the name
        skip_ddp_prefix (bool): whether to skip DDP's `module` prefix

    Returns:
        The canonical FQNs based on the model traversal.
     .modulezExpected 'module', got ''   c                     h | ]} | 	S rO   rO   ).0fqnprefixs     r<   	<setcomp>z_get_fqns.<locals>.<setcomp>   s$    EEES6(3((EEErP   	_orig_modzExpected '_orig_mod', got 'z-Expect `_extra_state` to be the last obj name)replacer   split	enumerate
isinstanceDDPAssertionErrorrl   appendFSDPlen_FLAT_PARAMjoingetattr_fqnsr   r\   _dynamo
eval_frameOptimizedModulert   hasattrgetrb   modules_EXTRA_STATE_KEY_SUFFIXRuntimeError)rd   re   rG   rf   rg   	obj_namesfqn_obj_namescurr_objicurr_obj_name
flat_paramremoved_fqnrr   s               @r<   	_get_fqnsr      s   . <<*B//D
$v

3IMH%i00 &< &<=h$$ %	<(($%P%P%P%PQQQH" 4$$]333$'' 	<3y>>A%%%)AE*:k*I*I-00$X{;;
 * &\\\FEEEEJ4DEEEEEEx)<==H 333$$]333"8];;%-":"JKK 	<++$%S=%S%S%STTT)H' 4$$]333 x!233 B"F'(4E"F"F"H"H"L"L!# # ; B x55 B#*8[#A#A  ///
 1 IIII***&'VWWW + #8];;HH]##++,>CCDDrP   c                       e Zd ZdS )_EXTRA_STATEN)rH   rI   rJ   rO   rP   r<   r   r      s        DrP   r   c              #      K   t                      dt          j        dt          dt          ffd | d          E d {V  d S )Nrl   curr_fqnrh   c              3     K                        |            |r| dnd}|                                 D ]i\  }}|v r
t          |           r7| t          |                                                       v r|d d         }n| | } ||          E d {V  jt          |                     d          |                     d                    D ]\  }}|| j        v r| | }||fV  t          | j	        dt          j        j                  t          j        j        k    r.| t          j        j        j         }|t!                      fV  d S d S )Nrk   rj   F)recurseget_extra_state)addnamed_childrenr   r   valuesr	   named_buffersnamed_parameters_non_persistent_buffers_set	__class__rb   rc   r   r   rl   r   r   )	rl   r   re   	submodulenew_fqnobjrG   r   visited_moduless	         r<   r   z+_iterate_valid_model_state.<locals>.recurse   s     F###%-5h>>>>2%4466 	3 	3OD)O++  122.>GF,=>>@@GGIIII #3B3-%-t--wy'2222222222   //1H1HQV1H1W1W
 
 	 	ID# v999!)4))G3, F$&79RSSy() ) "N2:#4#LNNG<>>))))))	) )rP   rj   )r^   rb   rc   rN   r   )rd   rG   r   r   s    `@@r<   _iterate_valid_model_stater      s~      &)eeO *	  *S  *Y  *  *  *  *  *  *  *  *D wub!!!!!!!!!!!rP   )
submodulesoptionsoptims.
optim_onlyr   r   c                p   |rt          j        dt          d           |r|st          d          |pt	                      }i }i }t          |           D ]\  }}t          |t                    rt          | |          }	|	                    |          }
|
Et          t          t                   ||                                       |	           ||         ||<   n|	                                ||<   |	D ]}
t          |t                    s|||
<   t          |                                          D ]'\  }}|D ]}
t          t"          j        |          ||
<    (t                      }|rt          |          }|                                 D ][\  }}||vr
t          | |          }	t)          |	          dk    rt+          d          |                    d |	D                        \|j        r|j        st1          d	          t3          j        |           }|r|j        rJt7          |j        |j        
          }t;          |j        |j        p|j        
          }t<          j        }n6tA          |j                  }tC          |j                  }t<          j"        }tF          j$        d             }tK          j&        || |||          }ntF          j'        }tQ          di tS          |          ||||t          t          tT          j+                 |          | t)          |          dk    dS )zW
    Verify the model and options passed by the user and generates _StateDictInfo.
    zGetting submodules only model/optim state_dict is deprecated and will be removed in 2.5. This feature can be achieved by manually filtering out the state_dict returned from get_state_dict.   
stacklevelz;Optimizers are not passed in but optim_only is set to True.Nrn   z)Submodule FQN should only have 1 instancec              3       K   | ]	}| d V  
dS )rk   NrO   )rp   rq   s     r<   	<genexpr>z"_verify_options.<locals>.<genexpr>K  s(      %@%@Ciii%@%@%@%@%@%@rP   z?full_state_dict must be True when broadcast_from_rank0 is True.)offload_to_cpu
rank0_only)r   c              3     K   t          j                    5  t          j        ddt                     t	          j        | |||          5  d V  d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )NignorezFSDP.state_dict_type)messagecategoryrl   state_dict_typestate_dict_configoptim_state_dict_config)warningscatch_warningsfilterwarningsFutureWarningr|   r   r   s       r<   $fsdp_state_dict_type_without_warningz=_verify_options.<locals>.fsdp_state_dict_type_without_warningi  s#      (** 
 
'&<}    )!$3&7,C	     EEE              	
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s5   5A4AA4A 	 A4#A 	$A44A8;A8r   r   )rT   rU   rV   rY   rZ   rW   rX   rO   ),r   warnr   r   r)   r   rx   r   r   r   r   r^   rN   updatecopyra   itemsr\   r]   named_modulesr}   rz   rD   r?   
ValueErrorr|   rZ   r   r@   r   r   FULL_STATE_DICTr   r   SHARDED_STATE_DICTr_   contextmanager	functoolspartialr`   rR   r   rb   rc   )rd   r   r   r   r   rT   rU   re   paramfqnsrq   param_fqns_rV   rl   rZ   r   r   r   r   rY   s                        r<   _verify_optionsr     s     
I 	
 	
 	
 	
  
& 
I
 
 	
 +)++GKMOQ1%88 / /ee\** 	%%##E**?S,U344;;DAAA+<U+C!%(( (,yy{{e$ 	/ 	/Ce\22 /).!#&	/ 399;;<< D D 	D 	DC)-elF)C)C!#&&	D $'55 A__
!//11 	A 	ALD&Z''UD))D4yyA~~$%PQQQ%%%@%@4%@%@%@@@@@# 
G,C 
M
 
 	
 $U++L  /." 	? 3&2w?R! ! ! '?&2#/O73O' ' '# ,;OO 6&2! ! ! 'B&2' ' '# ,>O		"	 	 
#	"	$ !(0+/$;
 
 
 "- 	 	
//	+3-!$ry/<88#^&kkAo	 	 	 	 	rP   model_state_dictoptim_state_dictinfoc                    |j         D ]"}t          |          }|t          d          #|j        rP| sN|j        sG|j        s@|j        r|j        s2|j        r+|j	        s$t          dt          j                    d          |j        r)|s'|j        r|j        s|j	        st          d|           | D ]%}t          |v rt          | dt           d          &d S )Nz)Expected a fsdp_state with a fsdp module.z}The option indicates that model state_dict is required to save or load, but model state_dict is empty.rank = dist.get_rank()=rk   zgThe option indicates that model state_dict is required to save, or load but optim state_dict is empty. z
 contains z6. This can happen if the model is not the root module.)rZ   r   rz   rW   rV   rA   r@   r?   rC   rD   r   distget_rankrX   r~   )r   r   r   rl   
fsdp_statekeys         r<   _verify_state_dictr     s   
 # N NCFKK
 !LMMM  	
 
 '
 )	

 !

 '+&:
 K
 )
 *moo* * *
 
 	
  	 	%	*.*>	 .	
 M:JM M  
    # * *+ * * *    rP   r   apic                     t          | |          }|t          v r)t          j        t          | j        |          |           }|S )N)self)r   r4   r   r   r   )r   r   calls      r<   _state_dict_fnr     sC    3D""" !<!<3GGGKrP   
state_dictc                     |j         r@|j        rt          j                                        sdnd}t          | |j        |          S |j        rt          |           S | S )NrO   )r   )r@   
ranks_only)r?   r@   r\   distributedis_initializedr   r   )r   r   r   s      r<   _maybe_full_or_cpu_state_dictr     s       $,1,=,L,L,N,NBB 	
 "D$4
 
 
 	
 
	 )*555rP   c           	         |j         si S |                                5   t          | d                      }d d d            n# 1 swxY w Y   t          |                                          D ]}t          | |          }t          |          dk    r%t          d| dt          |           d|           t          t          |                    }||k    rDdt          fd} |||          st          d| d	|           |                    |          ||<   |j        r^i }|D ]W}|j        D ]M}|                    |          s|j        r||         ||<   +|t          |          d          }	||         ||	<   NX|}|j        rL|                                 D ]7\  }}
|
j        rt          | |          }|D ]}|                    |           8t'          ||          S )
Nr   rn   Expected 1 FQN for key '', got z: rh   c                 T   t          |          t          |           k    rdS |                    d          }|                     d          }d}t          |          D ]I\  }}|||         k    r1|dz  }|t          |          k    r|t          |          dz
  k    c S B|dv rG dS dS )NFrk   r   rn   )rl   rt   T)r}   rv   rw   )r   rq   	fqn_split	key_splitfqn_idxkey_idxkey_names          r<   verifyz%_get_model_state_dict.<locals>.verify  s    s88s3xx'' 5IIcNN	IIcNN	)29)=)= % %%GX9W#5551"c)nn44#*c)nnq.@#@@@@ 5!%<<< $uutrP   zAn unexpected key, z, exists. FQN is )rW   rY   r   ra   keysr   r}   rz   nextiterrL   r   poprV   
startswithrB   rA   r   requires_gradr   )rd   r   r   r   r   rq   r   new_state_dictrr   r   r   s              r<   _get_model_state_dictr     s     						 ; ;8^E<88::
; ; ; ; ; ; ; ; ; ; ; ; ; ; ; JOO%%&& 2 2$$t99>> J3JJs4yyJJDJJ   4::#::D    " 6#s## V"#T#T#Ts#T#TUUU(nnS11JsO $/1 	> 	>C1 > >~~f-- / >*4S/N3''!#f++--0G.8oN7++> $
  $0022 	$ 	$JC" UC((D $ $s####$ )T:::s   AA
Ac           	         |j         r	|s|j        st          i i           S i }t          | |j                  D ]\  }}t          | ||j                  }t          | ||j        dd          }t          ||          D ]f\  }}	|j        rt          j                    dk    r>||	k    r8|	                    |d           }
|
|j
        rt          d| d          n|
||	<   |||	<   gd}|j        s|j        rt                      }|                                D ]H}t          j        |          r2|                                dk    r|                    |j                   It          j        d          |v r)|                    t          j        d                     d}t+          |          dk    r2|                    t          j                                                   n"t+          |          dk    rt1          d	          |j        r1t3          |||	                                |j
        |j        
           n+|j        r$t7          |||	                                           |                    |           |                                5  t=          t           t?          | d          ||j
        |                    cd d d            S # 1 swxY w Y   d S )NF)rf   rg   r   zMissing key: rk   metaTrn   zMultiple devices found)devicerC   r@   r   load_state_dict)r   rC   assign) rW   rD   r    r   rG   r   zipr   r   r   rC   r   r?   r^   r   r\   	is_tensordimr   r   remover}   distributed_c10d_get_pg_default_devicer   r   r@   r   r   rY   r   r   )rd   r   r   local_state_dictr   valuer   fqns_with_prefixrq   fqn_with_prefix
load_valuer   devicess                r<   _load_model_state_dictr    ss     )Z )8Q ) R(((08NOO 6 6
UT%;<<$"!!&
 
 
 %(.>$?$? 
	6 
	6 C-=15A1E1E(('^^C66
%{ C*+A3+A+A+ABBBC 3=J/05_--
	6 F  ,D$8 ,%%%,,.. 	* 	*Eu%% *%))++//EL))) <7**NN5<//000Fw<<1KK-DDFFGGGG\\A5666$ 		W! {{}}{ ,     ! 	W":/?VVVV*+++					 
 
4N5"344%dk&  
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   4KKKoptimc                 B   | j         rdS | j        D ]}|t                   D ]}|j          dS | j        D ]2}|t                   D ]"}|j        rt          j        |          |_        #3g }| j        D ]Z}d|v rT|                    |d                    t          |d         t
          j	                  rt          j
        d          nd|d<   [|                     d           | j        D ]}d|v r|                    d          |d<   |                     d           dS )zH
    Initialize optim states by calling the step() with zero grads.
    Nlrg        )closurer   T)set_to_none)r3   r1   _PARAMSgradr   r\   
zeros_liker{   rx   r]   tensorstepr   	zero_grad)r  param_groupr   lrss       r<   _init_optim_stater  a  sx    {  )   ) 	 	Ez% &	 ) 5 5 ) 	5 	5E" 5"-e44
	5 C)  ;JJ{4())) k$/>>S!!! 
 
JJtJ ) + +; #

K	OOO%%%%%rP   c           
        	
 dt           t          t          f         dt          dt           t          t          f         f	
fd	d 
i }t	          t
          | t                                                             D ]U\  }}t           d| }|                     	t	          t           t          t          f         |          |                     Vt	          t          | t                             D ]k}|                    t                    }t	          t          t                   |          D ].}|                                D ]\  }}||t           d| d| <   /l|S )a3  
    This API flattens the optimizer state_dict to support optimizer resharding for
    MPMD, e.g., pipeline parallelism.

    Without the API, the original optimizer state_dict looks like:
    {
        "state": {
            "layer1.weight": {
                "step": 10, "exp_avg": SomeTensor, "exp_avg_sq": SomeTensor
            },
            "layer2.weight": {
                "step": 10, "exp_avg": SomeTensor, "exp_avg_sq": SomeTensor
            },
        },
        "param_groups": [
            {
                "lr": 0.0,
                "betas": (0.9, 0.95), ...,
                "params": ["layer1.weight", "layer2.weight"]
            }
        ]
    }

    With this API, the optimizer state_dict looks like:
    {
        "state.layer1.weight.step": 10,
        "state.layer2.weight.step": 10,
        "state.layer1.weight.exp_avg": SomeTensor,
        "state.layer2.weight.exp_avg": SomeTensor,
        "state.layer1.weight.exp_avg_sq": SomeTensor,
        "state.layer2.weight.exp_avg_sq": SomeTensor,
        "param_groups.layer1.weight.lr": 0.1,
        "param_groups.layer2.weight.lr": 0.1,
        "param_groups.layer1.weight.betas": (0.9, 0.95),
        "param_groups.layer2.weight.betas": (0.9, 0.95),
    }

    The "state" section supports arbitrary levels of nesting for optimizers like Shampoo.
    nested_dictrr   rh   c                     i }|                                  D ]d\  }}t          |          }|r| d| n|}t          |t                    r |                     ||                     T |           |||<   e|S )a  
        Recursively flatten a nested dictionary with dot-separated keys.

        Args:
            nested_dict: The dictionary to flatten
            prefix: The prefix to prepend to all keys

        Returns:
            Flattened dictionary with dot-separated keys
        rk   )r   rN   rx   r[   r   )	r  rr   	flattenedr   r  str_keyfull_key_flatten_state_nested_dict_raise_if_type_not_supporteds	          r<   r  z=_flatten_optim_state_dict.<locals>._flatten_state_nested_dict  s     +-	%++-- 	, 	,JC#hhG06C&,,7,,,GH%&& ,  !;!;E8!L!LMMMM -,U333&+	(##rP   c                     t          | t          j        t          t          t
          f          s t          dt          |            d          d S )Nz[Flattening optimizer state_dict only supports tensor, int, float, dict states now. Type is rk   )rx   r\   r]   intfloatr[   NotImplementedErrortype)vs    r<   r  z?_flatten_optim_state_dict.<locals>._raise_if_type_not_supported  sX    !elC=>> 	%&77& & &  	 	rP   rk   )r[   rN   r
   r%   r   r&   _STATEr   r   r'   _PGr   r  ra   )r   retrq   r3   state_prefixr  r   kr#  r  r  s            @@r<   _flatten_optim_state_dictr)    s   R#s(^-0	c9n	      <   !#C =*V*<==CCEE 
 

U ((3((

&&tDcNE'B'BLQQ	
 	
 	
 	

 -z#?? , ,w''S	4(( 	, 	,C#))++ , ,1*+s&&S&&1&&'',	, JrP   c                    dt           dt          t           t          f         dt          t           t          f         fd}i }g }t          |t          |i}| j        D ]}|                    t          g i           |t                   D ]'}|j        |         D ]}	|	|j	        v r)d}
|D ]#}|t          k    rt           d|	 d| }||v rd}
 nd}
|
s:|d         t                   }t          |t                    st          d	t          |                     |                    |	           |j        si ||	<   | j        |         D ]d}t           d|	 d| }||vr+ |||          }|t!          t"          ||	                   |<   @||         t!          t"          ||	                   |<   e)t!          t          t                    |d         t                             d
         }|D ]s}|t          k    r|t           d| d|          }||d         vr||d         |<   9|d         |         |k    r(t%          d| d| d| d|d         |          d	          t|S )z
    This API unflattens the state_dict generated by _flatten_optim_state_dict().
    Supports arbitrary levels of nesting in the state section through recursive reconstruction.

    See the docstring of _flatten_optim_state_dict() for more detail.
    flattened_keyflattened_dictrh   c                 f   |  d}i }|                                 D ]\  }}|                    |          s|t          |          d         }|                    d          }|}|dd         D ]5}	|	|vri ||	<   t	          ||	         t
                    st          ||	         }6|||d         <   |S )z
        Reconstructs a potentially nested value from flattened keys.
        For non-nested values, returns the value directly.
        For nested values, reconstructs the nested structure with string keys.
        rk   Nr   )r   r   r}   rv   rx   r[   rz   )
r+  r,  rr   r  r   r  remaining_keypartscurrentparts
             r<   _reconstruct_nested_dictz=_unflatten_optim_state_dict.<locals>._reconstruct_nested_dict  s     "$$$&( )..00 	' 	'JC >>&))   F.M "'',,E!G crc
 ( (w&&$&GDM!'$-66 )((!$- "'GE"I rP   Frk   Tr   Expected list, got r   zaAll the parameters in the same parameter group should have the same saved param_group value. But z is z while other(s) is )rN   r[   r%   r$  r%  r1   r{   r  rT   rU   rx   ra   rz   r"  r   r3   r   r&   r   )r  r   r   r2  r3   pg_state
return_osdr  r   rq   	in_paramsr(  flatten_keyr2   
state_nameflattened_state_keyreconstructed_valuefirst_param_fqnr  s                      r<   _unflatten_optim_state_dictr<    s5   00,0i,@0	c9n	0 0 0 0d E"$H&,eS(%CJ) A A"&&& ) 1	 1	E-e4 0 0 $444 %I(  <<$),&8&8s&8&8Q&8&8&*44(,I $I  !"g.!&$// O()MtF||)M)MNNNc""" *   c
"'+e"4  J-3*H*Hc*H*HJ*H*H'**<<.F.F// /+
 0 ]E#J77
CC GQ/G]E#J77
CC?0d tCy(2,w*?@@C 	 	AG||#====!==>E$$"'Q"aE))"G=LG GOPG GG G4<RLOG G G   *	 rP   
optimizersc                    |j         si S t          i t          g i}|D ]/}t          |            t	          |d                      }|j        r|                                5  t          j        | ||          }d d d            n# 1 swxY w Y   |svt          |t                   
                                          D ]H}d|v rB|t                                       |          |t                   |                    dd          <   I|t                   D ]#}d |t                   D             }||t          <   $nt          t          j        d |j        D                                 }t#          t%          |t'          t)          |                                        }	i |                                 D ]}\  }
}t-          | |
          }t)          |          dk    r"t/          d|
 d	t)          |                     t1          t3          |                    }||	vrk|	|         }||<   ||<   ~t          |t                   
                                          D ]8}
|
         }|t                                       |
          |t                   |<   9|t                   D ]#}fd
|t                   D             |t          <   $|st5          t6          |t                                                 |t                              t5          t:          |t                                                 |t                              1|j        r"t5          t@          tC          |                    }tE          ||          S )Nr   rt   
_orig_mod.rj   c                 :    g | ]}|                     d d          S )r?  rj   ru   rp   r(  s     r<   
<listcomp>z)_get_optim_state_dict.<locals>.<listcomp>  s&    JJJ!!))L"55JJJrP   c              3   0   K   | ]}|t                    V  d S r6   )r  )rp   gs     r<   r   z(_get_optim_state_dict.<locals>.<genexpr>  s&      -U-UQaj-U-U-U-U-U-UrP   rn   r   r   c                      g | ]
}|         S rO   rO   )rp   pidfqn_pid_mappings     r<   rC  z)_get_optim_state_dict.<locals>.<listcomp>  s    !Q!Q!Q3/#"6!Q!Q!QrP   )#rX   r$  r%  r  r   rZ   rY   r|   r   ra   r   r   ru   r  r	   from_iterabler1   r[   r   ranger}   r   r   rz   r   r   r   r&   r   r'   extendrE   r(   r)  r   )rd   r=  r   r   r  osdr(  rE  r2   param_pid_mappingr   r   r   rq   rG  grouprH  s                   @r<   _get_optim_state_dictrO  u  s     	,2BR+@ 4H 4H%   1nUL1133 +	R""$$ ? ?+E5#>>? ? ? ? ? ? ? ? ? ? ? ? ? ? ?  #f+**,,-- R R!##?B6{q?Q?QCK		, ; ;<X $ $JJqzJJJ#'

$ %--U-U%BT-U-U-UUUVVF $Ss6{{1C1C%D%D E E O#4466 + +
U ,,t99>>(J3JJs4yyJJ   4::&& 111'.'*$'*$$ CK,,..// 8 8%c* $'v;??3#7#7FC  S R R!Q!Q!Q!Q%.!Q!Q!Qg 	],V455<<S[III 0 566==c#hGGGG( 
 9:J K K
 
 ))94@@@s   "BB		B		c           
         i }g }t           |t          |i}i }t          d t          t          |t                              D                       r|S |j        D ]}|                    t          g i           |t                   D ]}	|j        |	         D ]}
|
|j	        v rWd}t          t          |t                             D ]3}|
t          t          t                   |t                             v rd} n4nd}|sh|d         t                   }t          |t                    st          dt          |                     |                    |
           |	j        rf|
t          t          |t                              v r*t          t          |t                              |
         ||
<   n|j        rt%          d|
 d          t          t          |t                             D ]\}|
t          t          t                   |t                             v r-t'          |t                             dz
  |t)          |          <   ]t'          |t                             d	k    rg }t          t          |t                             D ]S}t'          t          t          t                   |t                                       d	k    r|                    |           Tt'          |          dk    rt+          d
          t'          |t                             t'          |j                  k    rt+          d          t'          |t                             dz
  |t)          |          <   t          t          |t                             D ]]}|                    t)          |          d          }|dk    r,|                                D ]\  }}|t          k    r|||         |<   ^|S )a  
    Extract the corresponding optim state_dict from ``optim_state_dict`` for
    ``optim`` and return the result optim state_dict.

    Args:
        model (nn.Module): the root model.
        optim (torch.optim.Optimizer): the optimizer.
        optim_state_dict (Dict[str, ValueType]): the superset optim state_dict that
            contains the optim state_dict of ``optim``.
        info (_StateDictInfo): state dict information.

    Returns:
        The optim state_dict of ``optim``.
    c              3   @   K   | ]}t          |t                    V  d S r6   )rx   r  rB  s     r<   r   z*_split_optim_state_dict.<locals>.<genexpr>  s,      
U
U!:a
U
U
U
U
U
UrP   FTr   r3  z'Missing optimizer state for parameter 'z' in checkpoint. The parameter requires gradients but has no saved optimizer state. To load anyway, use StateDictOptions(strict=False).rn   r   zThere are param groups that have zero parameters. In such a case, DSD only support exactly one param group with zero parameters.But the loaded state_dict has zero or more than one param groups that have zero parameters.z`When there is a parameter group that has zero parameters, multiple optimizers are not supported.)r$  r%  allr   r&   r1   r{   r  rT   rU   r'   ra   rN   rx   rz   r"  r   rC   r   r}   idr   r   r   )rd   r  r   r   r3   r4  r5  
pg_mappingr  r   rq   r6  loaded_param_groupr2   r&  pg_idxr   r  s                     r<   _split_optim_state_dictrW    s	   * E"$H&,eS(%CJ!#J

U
UtM;KF;S'T'T
U
U
UUU  ) 7J 7J"&&& )  	V  	VE-e4 V V$444 %I.2)+;C+@/ / " "* $tCy2DW2M"N"NNN(,I!E O !%I  !"g.!&$// O()MtF||)M)MNNNc"""& d=2B62JKKKK%)-9I&9Q%R%RSV%Wc

 *Rc R R R  
 +/%'7'<+ + V V& d49.@.IJJJJ=@C=Q=QTU=U
2&8#9#9:	V7VB {7#$$))C&*+<>Ns>S&T&T 3 3"tDI'9''BCCDDIIJJ12223xx1}} 1   #C())S1C-D-DDD =   25Z_1E1E1IJr,--.-/?/DEE 	* 	*;44R<<%++-- 	* 	*JCg~~$)HVS!!		* rP   c           
         |j         sd S |D ]y}t          |           |rSt          |v rt          | |||          }n9t	          |t          t          t          t          f         |          |          }ni }|j	        r| 
                                D ][\  }}t          | |          }t          | |d          }	||	k    r/t          |          dk    r"t          d| dt          |                     |                                |	                                |t                   D ]M}
t          t          t          t           f         |
          }fd|t"                   D             }||t"          <   Nt          t$          |t                             }t'          |                                          D ]2}|v r,|                    |          ||                              <   3]|                                5  t/          j        | ||          }d d d            n# 1 swxY w Y   n9|j        r1d|_        t5          | |f|          }d|_        d fd}t7          t8          j        ||          }t          d	          t=          |          \  }}t=          |          \  }}|j        rtA          ||
           ntC          ||
           |D ]3}||vr-||vrt          d| d          ||         ||<   ||         ||<   4tE          ||          }|t                   D ]:}t"          |vr/g t          t          t          t          f         |          t"          <   ; tG          |d          |           {d S )NF)rg   rn   zExpected 1 FQN for 'r   c                 <    g | ]}|                               S rO   rA  )rp   r   rq   fqn_with_compilers     r<   rC  z*_load_optim_state_dict.<locals>.<listcomp>K  s5       @CC):;;  rP   Tc                     |                                  dk    r$| j        n| j        k    rt          d          | S )Nr   zDevice mismatch)r   r   r   )tr   s    r<   _devicez'_load_optim_state_dict.<locals>._device^  sC    5577Q;;~!"18++():;;;rP   zExpected device to be setr   zExpected key 'z' in osd_mappingr   )r   )$rX   r  r$  rW  r<  r   r[   rN   r%   rZ   r   r   r}   rz   r   r%  r
   r  r&   ra   r   ru   rY   r|   optim_state_dict_to_loadr?   rO  r"   r\   r]   r   rD   r   r   r   r   )rd   r=  r   r   r  r   original_fqn_r   fqns_with_compilerrE  valr2   	osd_stater(  r  r]  flatten_osdosd_mappingflatten_local_osdlocal_osd_mapping	optim_keypgr   rq   rZ  s                          @@@r<   _load_optim_state_dictrj  !  s      [N [N%    
	"###:5*d$ $   $?4S)^ 4jAA4$ $    " I	A $)#9#9#;#; X Xa 55%.<e& & &" ---t99>>(O|OOCIIOO   hhjj$6$:$:$<$<!)#. * *AtCH~q11C    GJ7|  F $*CLL 0@0HII	inn..// X XAaxxGP}}UVGWGW	!))C1B"C"CDX ""$$  #'#@5"2$ $                ! )	A#(D 4UUHdKK#'D F     elG5EFFA~$%@AAA':;K'L'L$K3FGW3X3X00( V%k3DVTTTTT&{4EfUUUU
 ) J J	$555 33,HYHHH   4?y3I%i03>y3I%i04!#4    's+ A A"$$>@Dc9n-r227;
 	1u/00<LMMMMMw[N [Ns   H((H,	/H,	c                    t                      5  t          | dd||          }t          | |          }t          |i |           |cddd           S # 1 swxY w Y   dS )aH  
    Return the model state_dict of ``model``.

    See ``get_state_dict`` for the detail usage.

    Args:
        model (nn.Module): the nn.Module to the model.
        submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
            that belong to the submodules.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be returned. See
            `StateDictOptions` for the details.

    Returns:
        The state_dict for ``model``.

    :rtype: typing.Dict[str, ValueType]
    rO   Fr   r   r   N)r=   r   r   r   )rd   r   r   r   r   s        r<   r*   r*     s    0 
 
  
 !
 
 
 1==+R666
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
 s   7AAAc                $   t                      5  t          |t          j        j                  r|fnt          |          }t          | |d||          }t          | ||          }t          i ||           |cddd           S # 1 swxY w Y   dS )a  
    Return the combined state_dict for optimizers.

    See ``get_state_dict`` for the detail usage.

    Args:
        model (nn.Module): the nn.Module to the model.
        optimizers (Union[None, Optimizer, Iterable[Optimizer]]):
            The optimizers that are used to optimize ``model``.
        submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
            that belong to the submodules.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be returned. See
            `StateDictOptions` for the details.

    Returns:
        The state_dict for ``optimizers``.

    :rtype: OptimizerStateType
    Trl  N)	r=   rx   r\   r  	Optimizertupler   rO  r   )rd   r=  r   r   r   r   s         r<   r+   r+     s    6 
     *ek&;<<#ZMMz"" 	
 !
 
 
 1
DII2/666                                   s   A)BB	B	c                H   t                      5  t          |t          j        j                  r|fnt          |          }t          | |d||          }t          | |          }t          | ||          }t          |||           ||fcddd           S # 1 swxY w Y   dS )a  
    Return the model state_dict and optimizers state_dict.

    ``get_state_dict`` can process any module that is parallelized by PyTorch
    FSDP/fully_shard, DDP/replicate, tensor_parallel/parallelize_module, and any
    combination of these parallelisms. The main functions of ``get_state_dict``
    are: 1.) returning a model and optimizer state_dict that can be resharded
    with a different number of trainers and/or different parallelisms.
    2.) hiding the parallelism-specific state_dict APIs. Users don't have to call
    these APIs.
    3.) sanity checking the result state_dict.

    The keys of the result state dictionary are the canonical FQNs (Fully
    Qualified Names).  A canonical FQN refers to the FQN based on a parameter's
    position in an nn.Module hierarchy. More specifically, a canonical FQN to a
    parameter is the FQN returned by ``module.named_parameters()`` or
    ``module.named_buffers()`` when the module is not distributed by any
    parallelisms. Since the optimizer internally uses parameter IDs to represent
    a parameter, there will be a conversion from the parameter IDs to the
    canonical FQNs when calling this API.

    ``get_state_dict`` can also process a module that is not parallelized. In
    such a case, ``get_state_dict`` only performs one function -- converting the
    optimizer parameter IDs to the canonical FQNs.

    Example:
        >>> # xdoctest: +SKIP
        >>> import torch
        >>> from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
        >>> from torch.nn.parallel import DistributedDataParallel as DDP
        >>> from torch.distributed.checkpoint.state_dict import get_state_dict

        >>> fsdp_model = FSDP(copy.deepcopy(model))
        >>> fsdp_optim = torch.optim.Adam(model.parameters(), lr=1e-3)
        >>> ddp_model = DDP(copy.deepcopy(model))
        >>> ddp_optim = torch.optim.Adam(model.parameters(), lr=1e-3)


        >>> ddp_state_dict, ddp_optim_state_dict = get_state_dict(ddp_model, ddp_optim)
        >>> fsdp_state_dict, fsdp_optim_state_dict = get_state_dict(
        ...     fsdp_model, fsdp_optim
        ... )

        >>> # if we simply call ddp_model.state_dict() and fsdp_model.state_dict(),
        >>> # the asserts will fail.
        >>> assert ddp_state_dict == fsdp_state_dict
        >>> assert ddp_optim_state == fsdp_optim_state_dict


    Args:
        model (nn.Module): the nn.Module to the model.
        optimizers (Union[None, Optimizer, Iterable[Optimizer]]):
            The optimizers that are used to optimize ``model``.
        submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
            that belong to the submodules.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be returned. See
            `StateDictOptions` for the details.

    Returns:
        ``Tuple`` that contain model state_dict and optimizer state_dict.

    :rtype: typing.Tuple[typing.Dict[str, ValueType], OptimizerStateType]
    Frl  N)
r=   rx   r\   r  rn  ro  r   r   rO  r   )rd   r=  r   r   r   r   r   s          r<   r,   r,     s   P 
 2 2 *ek&;<<#ZMMz"" 	
 !
 
 
 1==0
DII+-=tDDD!11!2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2s   A;BBBc                 "  	 |si S t          t          t          |                                                    t          j                  rt          j        dt          d           t          t          t          j        t          t          t          f         f         |          }i }|                                D ]\  }}|                                 D ]\  }}||k    rt          | |          }t!          |          dk    rt#          d          t          t          |                     d	|                    	fd|                                D                        |S t          t          t          t          f         |          S )NzPassing model_state_dict as a ``Dict[nn.Module, Dict[str, Any]]``is deprecated and will be removed in 2.5. If you need this feature, please preprocessing the model_state_dict to achieve the same functionality.r   r   rn   z/FQNs for a submodule should only have 1 elementrk   c                 "    i | ]\  }}|z   |S rO   rO   )rp   subfqnr  rr   s      r<   
<dictcomp>z/_unflatten_model_state_dict.<locals>.<dictcomp>T  s#    XXXVf_eXXXrP   )rx   r   r   r   rb   rc   r   r   r   r   r[   rN   r%   r   r   r   r}   rz   r   )
rd   r   cast_state_dictr   r   sub_state_dictre   mr   rr   s
            @r<   _unflatten_model_state_dictrx  6  s     	$tJOO--..//;; 6" 	
 	
 	
 	
 tBItCN/C$CDjQQ/1)8)>)>)@)@ 	 	%I~ ..00  a	>> --t99>>(I   !d,,///%%XXXXAUAUAWAWXXX    Di(*555rP   )r   c                    t          | |          }t                      5  t          | dd|          }t          |i |           t	          | ||          cddd           S # 1 swxY w Y   dS )a=  Load the model state_dict.

    The counterpart of ``get_model_state_dict`` to set the state_dict to the
    model. See ``set_state_dict`` for the detail usage.

    Args:
        model (nn.Module): the nn.Module to the model.
        model_state_dict: (Dict[str, ValueType]):
           the model state_dict to load. If the key of the ``model_state_dict``
           is nn.Module, the key is a submodule of ``model`` and the value should
           be the state_dict of the submodule. When loading the state_dict,
           the prefix of the submodule will be append to the state_dict.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be loaded. See
            `StateDictOptions` for the details.

    Returns:
        ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:
            * **missing_keys** is a list of str containing the missing keys
            * **unexpected_keys** is a list of str containing the unexpected keys

    :type model_state_dict: typing.Dict[str, ValueType]
    rO   Fr   r   N)rx  r=   r   r   r  )rd   r   r   r   s       r<   r-   r-   [  s    : .I. . 
 E EubUGLLL+R666%e-=tDD	E E E E E E E E E E E E E E E E E Es   5A!!A%(A%c                "   t                      5  t          |t          j        j                  r|fnt          |          }t          | |d|          }t          i ||           t          | |||           ddd           dS # 1 swxY w Y   dS )a  Load the optimizers state_dict.

    The counterpart of ``get_optimizer_state_dict`` to set the state_dict to the
    optimizers. See ``set_state_dict`` for the detail usage.

    WARN: ``set_optimizer_state_dict`` can only be called before ``backward()`` or after
        ``step()`` is called on the optimizers. Otherwise, the optimizer states won't be
        initialized correctly.

    Args:
        model (nn.Module): the nn.Module to the model.
        optimizers (Union[Optimizer, Iterable[Optimizer]]):
            The optimizers that are used to optimize ``model``.
        optim_state_dict: OptimizerStateType:
            the optimizer state_dict to load.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be loaded. See
            `StateDictOptions` for the details.

    Returns:
        None

    :type optim_state_dict: typing.OptimizerStateType
    Trz  N)	r=   rx   r\   r  rn  ro  r   r   rj  )rd   r=  r   r   r   s        r<   r.   r.     s    > 
 	J 	J *ek&;<<#ZMMz"" 	
 ujT7SSS2/666uj2BDIII	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	Js   A(BBBc                d   t          | |          }t                      5  t          |t          j        j                  r|fnt          |          }t          | || |          }t          |||           t          | |||           t          | ||          cddd           S # 1 swxY w Y   dS )a  Load the model state_dict and optimizers state_dict.

    The counterpart of ``get_state_dict`` to set the state_dict to the model and
    optimizers.  The given ``model_state_dict`` and ``optim_state_dict`` do not
    have to be returned by ``get_state_dict`` but must meet the following
    requirements: 1) all FQNs are canonical FQNs as defined in ``get_state_dict``,
    2) if a tensor is sharded, it must be either a ShardedTensor or DTensor,
    3) optimizer state_dict cannot contain the parameter IDs; the keys should be
    the canonical FQNs.

    WARN: ``set_state_dict`` can only be called before ``backward()`` or after ``step()``
        is called on the optimizers. Otherwise, the optimizer states won't be initialized
        correctly.

    Args:
        model (nn.Module): the nn.Module to the model.
        optimizers (Union[Optimizer, Iterable[Optimizer]]):
            The optimizers that are used to optimize ``model``.
        model_state_dict: (Union[Dict[nn.Module, Dict[str, ValueType]], Dict[str, ValueType]]):
           the model state_dict to load. If the key of the ``model_state_dict``
           is nn.Module, the key is a submodule of ``model`` and the value should
           be the state_dict of the submodule. When loading the state_dict,
           the prefix of the submodule will be append to the state_dict.
        optim_state_dict: OptimizerStateType:
            the optimizer state_dict to load.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be loaded. See
            `StateDictOptions` for the details.

    Returns:
        ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:
            * **missing_keys** is a list of str containing the missing keys of the model state_dict.
            * **unexpected_keys** is a list of str containing the unexpected keys of the model state_dict.

    :type model_state_dict: typing.Dict[str, ValueType]
    :type optim_state_dict: typing.OptimizerStateType
    rz  N)rx  r=   rx   r\   r  rn  ro  r   r   rj  r  )rd   r=  r   r   r   r   s         r<   r/   r/     s-   \ .I. . 
 E E *ek&;<<#ZMMz"" 	
 :.>*>
 
 
 	+-=tDDDuj2BDIII%e-=tDDE E E E E E E E E E E E E E E E E Es   A9B%%B),B)c                F   t          j        t          | |          fd}|| _        t          j        t          | |          dt
          t          t          f         ffd}|| _        t          
                    |           t          
                    |           dS )a  Patch the ``state_dict`` and ``load_state_dict`` attributes of ``model``.

    Patch the ``state_dict`` and ``load_state_dict`` attributes of ``model`` to
    be a partial function to call ``get_state_dict`` and ``set_state_dict``.

    Example:
        from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
        from torch.distributed.checkpoint.state_dict import patch_model_state_dict

        model = fsdp(model)
        patch_model_state_dict(model)

    Args:
        model (nn.Module): the nn.Module to the model.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be loaded. See
            `StateDictOptions` for the details.
    Returns:
        None
    )rd   r   c                                    S r6   rO   _state_dict_calls   r<   state_dict_callz0_patch_model_state_dict.<locals>.state_dict_call      !!!rP   r   c                       |            d S )N)r   rO   r   _load_state_dict_calls    r<   load_state_dict_callz5_patch_model_state_dict.<locals>.load_state_dict_call      z::::::rP   N)r   r   r*   r   r-   r[   rN   r
   r   r4   r   )rd   r   r  r  r  r  s       @@r<   _patch_model_state_dictr    s    6 !(  " " " " " 'E%-  ;c3h ; ; ; ; ; ; 1EO,,,011111rP   c                   t          j        t          | ||          fd}t          j        t          | ||          dt          t
          t          f         ffd}t                              |           t                              |           t          |t          j        j                  r|fnt          |          }|D ]}||_        ||_        dS )a  Patch the ``state_dict`` and ``load_state_dict`` attributes of ``optimizers``.

    Patch the ``state_dict`` and ``load_state_dict`` attributes of ``optimizers`` to
    be a partial function to call ``get_state_dict`` and ``set_state_dict``.

    Note that if there are multiple optimizers, all of the optimizers will be patched.
    So users only need to call one of the state_dict() to get the full result.

    Example:
        from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
        from torch.distributed.checkpoint.state_dict import patch_model_state_dict

        model = fsdp(model)
        patch_model_state_dict(model)

    Args:
        model (nn.Module): the nn.Module to the model.
        options (StateDictOptions): the options to control how
            model state_dict and optimizer state_dict should be loaded. See
            `StateDictOptions` for the details.
    Returns:
        None
    )rd   r=  r   c                                    S r6   rO   r  s   r<   r  z4_patch_optimizer_state_dict.<locals>.state_dict_callL  r  rP   r   c                       |            d S )N)r   rO   r  s    r<   r  z9_patch_optimizer_state_dict.<locals>.load_state_dict_callV  r  rP   N)r   r   r+   r.   r[   rN   r
   r4   r   rx   r\   r  rn  ro  r   r   )rd   r=  r   r  r  r  r  r  s         @@r<   _patch_optimizer_state_dictr  &  s   > !( 	  " " " " " &- 	  ;c3h ; ; ; ; ; ; O,,,0111 j%+"788	: 
  5 5* 45 5rP   )rF   TT)rF   )or_   r   r7   r   collections.abcr   r   r   dataclassesr   r   r   	itertoolsr	   typingr
   r   r   r\   torch.distributedr   r   torch.nnrb   'torch.distributed._shard.sharded_tensorr   #torch.distributed._state_dict_utilsr   r   r   r   r   r   ;torch.distributed.algorithms._checkpoint.checkpoint_wrapperr   torch.distributed.fsdpr   r   r   r|   r   r   r   r   r   $torch.distributed.fsdp._common_utilsr   r   torch.distributed.tensorr   torch.nn.modules.moduler    torch.nn.parallelr!   ry   torch.utils._pytreer"   __all__r~   r%  r  r$  r^   rN   r#   r]   r  r   r$   ra   ro  r[   r%   r&   r'   r(   r4   rM   r   r=   r)   rR   rc   rL   r   r   r   r  rn  r   r   r   r   no_gradr   r  r  r)  r<  rO  rW  rj  r*   r+   r,   rx  r-   r.   r/   r  r  rO   rP   r<   <module>r     s            				  9 9 9 9 9 9 9 9 9 9 0 0 0 0 0 0 0 0 0 0       + + + + + + + + + +                    A A A A A A                    	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	        - , , , , , 5 5 5 5 5 5 < < < < < < - - - - - -  " 
		S-'%,6<uDsJD''%*>>c;FVAWW 
 S)^$' #}/@@@A  &)SUU S] * * *    ,. ,. ,. ,. ,. ,. ,. ,.^ @ @ @ @ @% @ @ @& . !%FE FE9FE
FE FE 	FE
 FE FE FE FE FER	 	 	 	 	 	 	 	%" %" %" %"Z )-'+} } }9}%+',-} }
 BI%} $} } } } }@+3	>*+(+ + 
	+ + + +\	EK$99      S#X&4	#s(^   $ ?;9?;*?;	#y.?; ?; ?; ?;D A
9A
S)^$A
 A
 	A
 A
 A
 A
H'&U[2 '&t '& '& '& '&T_*< _c9nAU _ _ _ _DE; ES)^$E E 	E E E EP DA9DAek+S01DA DA 	DA DA DA DANa9a; a )a 	a
 a a a aH dN9dNek+S01dN #dN 	dN
 
dN dN dN dNT )-'+	"  "  " 9"  BI%"  $	" 
 
#y."  "  "  " R )-'+*  *  * 9* %1F(GG*  BI%	* 
 $*  *  *  *  * b )-'+X2 X2 X29X2%1F(GGX2 BI%	X2
 $X2 4Y!334X2 X2 X2 X2v"69"6RYS)^ 445S)^8LL"6 
#y."6 "6 "6 "6R (,	$E $E $E9$E3	>*$E $	$E
 $E $E $E $EX (,(J (J (J9(J%1F(GG(J )(J
 $(J 
(J (J (J (Jb (,=E =E =E9=E%1F(GG=E 3	>*	=E
 )=E $=E =E =E =E =ED  (,12 12 12912 $12 
	12 12 12 12l 
 (,	;5 ;5 ;59;5 ek+S01;5 $	;5
 
;5 ;5 ;5 ;5 ;5 ;5rP   