
    IЦisX                        S SK r S SKrS SKJrJr  S SKJrJrJrJ	r	J
r
JrJrJrJrJrJrJr  S SKJr  / SQrS\R*                  S\\R*                  /\\R*                     4   4S jrS\R*                  S\	\R*                  \	\\4   4   S	\S
\\R*                  /\\R*                     4   4S jrS\R*                  S\\\R*                        S\\R*                     S\	\\4   S\	\R*                  \	\\4   4   4
S jrS
\4S jr " S S\5      rS\R*                  S\S\S\\\R*                        S
\4
S jr " S S\5      r " S S\5      r S\R*                  S\S\S\S
\4
S jr!S\R*                  S\S\S\\\R*                        S
\4
S jr"S\R*                  S\\#   S\4S jr$S\R*                  S\S\S
\4S  jr%\" S!5      SS4S\R*                  S\S\S"\S#\\\\R*                           S$\\\\R*                           S
\4S% jjr&\RN                  \RP                  1\&l)        \RT                  1\&l+        \ RX                  S&\S'\S
\
S(   4S) j5       r-S\R*                  S*\S
\R*                  4S+ jr.S\R*                  S&\S
\R*                  4S, jr/ S4S\R*                  S-\S&\S\\R*                     S.\\R`                     S/\S0\S
\\R*                  \4   4S1 jjr1 " S2 S35      r2g)5    N)ABCabstractmethod)AnyCallablecastDict	GeneratorIterableOptionalSequenceSetTupleTypeUnion)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                    ^^^ U 1mS[         R                  S[        S[        [         R                     4UUU4S jjmT" U SS5        g)a9  
This applies ``fn`` to every module in the module tree of ``root_module``
following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
then this replaces the original module with the newly returned one in the
tree. Otherwise, ``fn`` should return ``None``, in which case the module is
not changed.
modulemodule_nameparent_modulec                 x  > U R                  5        H'  u  p4UT;  d  M  TR                  U5        T" XCU 5        M)     T" U 5      nUbs  [        U[        R                  5      (       d   SU SU  35       eU(       d
   SU  35       e[        U[        R                  5      (       d
   SU 35       e[        X!U5        g g )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModulesetattr)	r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_moduless	         Z/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torch/distributed/fsdp/wrap.pyr)   2_post_order_apply.<locals>._post_order_apply_inner9   s    
 06/D/D/F+?2##L1'P 0G V*&mRYY77 O /vh07  ,,285;   R??PQR  M@ '     N)r#   r$   strr   )r   r   r)   r*   s    `@@r+   _post_order_applyr0   +   sP     (3mOA		AA  		*A A0 KT2r-   target_module_to_kwargsfsdp_fnreturnc                 n   ^ ^^ S[         R                  S[        [         R                     4UU U4S jjnU$ )z
This constructs the "wrap" function to pass to :func:`_post_order_apply`
based on ``target_module_to_kwargs``, which should be constructed from the
wrapping policy.
r   r3   c                 8   > U T;   a  U TLa  TU    nT" U 40 UD6$ g N )r   kwargsr2   r   r1   s     r+   r   _construct_wrap_fn.<locals>.fn_   s2     ,,{1J,V4F6,V,,r-   )r#   r$   r   )r   r1   r2   r   s   ``` r+   _construct_wrap_fnr:   T   s/    299 "))!4   Ir-   module_classesignored_modulesroot_kwargsc                     [        [        U5      5      nU R                  5        H,  nXb;   a  M
  [        Xe5      (       d  M  Xd;  a  X4U'   S XF   S'   M.     U$ )Nmixed_precision)tuplesetmodulesr"   )r   r;   r<   r=   r1   module_classes_tupler   s          r+   $_run_mixed_precision_override_policyrD   j   s`     !^!45%%'$5542=/AE#+,=> ( #"r-   c                      g)z
A simple recursive wrap policy that always returns ``True``. This means
that every submodule is wrapped by the wrapper class in
:func:`_recursive_wrap`.
Tr7   )argsr8   s     r+   r   r   ~   s     r-   c                       \ rS rSrSr\S\R                  S\\R                     S\	\
\4   S\	\R                  \	\
\4   4   4S j5       rSrg	)
_Policy   z_
This defines an abstract base class that represents a policy for applying
a module-level API.
r   r<   r=   r3   c                     g)zp
This should return a dict ``target_module_to_kwargs`` that maps from
each target module to wrap to its kwargs.
Nr7   )selfr   r<   r=   s       r+   _run_policy_Policy._run_policy   s     	r-   r7   N)__name__
__module____qualname____firstlineno____doc__r   r#   r$   r   r   r/   r   rL   __static_attributes__r7   r-   r+   rH   rH      sk    
 
YY
 RYY
 #s(^	

 
biic3h'	(
 
r-   rH   r   recursenonwrapped_numelc                 <    U(       a  g[        U [        U5      5      $ )a  
This auto wrap policy wraps every module that is an instance of any type in
``module_classes`` as its own FSDP instance. The root module given by
``module`` is always wrapped as an FSDP instance regardless. Since the
wrapping proceeds bottom up, each FSDP instance manages the parameters in
its subtree excluding any already managed by a child FSDP instance.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.
    module_classes (Set[Type[nn.Module]]): Set of module classes that are
        wrapped as FSDP instances.

Returns:
    ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
    if ``recurse=False``.
Tr"   r@   )r   rT   rU   r;   s       r+   _module_wrap_policyrX      s    6 feN344r-   c                      ^  \ rS rSrSrS\\\R                        4S jr	S\R                  S\
\R                     S\\\4   S\\R                  \\\4   4   4S	 jrS
 rS\4U 4S jjrSrU =r$ )r      zo
This policy applies to every module of the specified module classes,
passing in the kwargs given to the root.
r;   c                 F    [        U5      nX l        [        U5      U l        g r6   )rA   _module_classesr/   _module_classes_str)rK   r;   module_classes_sets      r+   __init__ModuleWrapPolicy.__init__   s!     01#&'9#: r-   r   r<   r=   r3   c                     [        U R                  5      n0 nUR                  5        H4  nXb;   a  M
  [        Xd5      (       d  M  [        R                  " U5      XV'   M6     U$ r6   )r@   r\   rB   r"   copy)rK   r   r<   r=   r;   r1   r   s          r+   rL   ModuleWrapPolicy._run_policy   s[     t334CE!))+F(F3326))K2H'/ , '&r-   c                 ,    [        XSU R                  S9$ )N)rU   r;   )rX   r\   )rK   r   rT   rF   r8   s        r+   __call__ModuleWrapPolicy.__call__   s    "bAUAU
 	
r-   c                 B   > [         TU ]  5       SU R                   S3-   $ )N())super__repr__r]   )rK   	__class__s    r+   rl   ModuleWrapPolicy.__repr__   s&    w!a(@(@'A$CCCr-   )r\   r]   )rN   rO   rP   rQ   rR   r
   r   r#   r$   r_   r   r   r/   r   rL   rf   rl   rS   __classcell__)rm   s   @r+   r   r      s    
;xRYY'@ ;
'YY' RYY' #s(^	'
 
biic3h'	(' 
D# D Dr-   r   c                       \ rS rSrSrS\\R                  /\\	\
\\4   4   4   4S jrS\R                  S\\R                     S\
\\4   S\
\R                  \
\\4   4   4S	 jrS
rg)r      a  
This policy takes in a lambda function that maps a given ``nn.Module`` to
either ``False``, ``True``, or a kwarg dictionary.
- If the function returns ``False`` or an empty dictionary, then the module
  does not have the API applied.
- If the function returns ``True``, then the module has the API applied
  with the root's kwargs.
- If the function returns a non-empty dictionary, then the module has the
  API applied, and the dictionary overrides the root's kwargs.

Example::

    >>> # xdoctest: +SKIP("undefined variables")
    >>> model = init_transformer_model(...)
    >>> def lambda_fn(module: nn.Module):
    >>>     if module is model.lm_head:
    >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
    >>>     elif isinstance(module, TransformerBlock):
    >>>         return True
    >>>     return False
    >>> policy = CustomPolicy(lambda_fn)
    >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
	lambda_fnc                     Xl         g r6   
_lambda_fn)rK   rr   s     r+   r_   CustomPolicy.__init__   s    #r-   r   r<   r=   r3   c                 L   0 nUR                  5        H  nXR;   a  M
  U R                  U5      n[        U[        [        45      (       d  [        SU 35      eU(       d  MM  [        R                  " U5      n[        U[        5      (       a  UR                  U5        XtU'   M     U$ )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )rB   ru   r"   dictbool
ValueErrorrb   update)rK   r   r<   r=   r1   r   resr8   s           r+   rL   CustomPolicy._run_policy   s     DF!))+F(//&)CcD$<00 CCF%I  YY{+F#t$$ c".4F+! ," '&r-   rt   N)rN   rO   rP   rQ   rR   r   r#   r$   r   ry   r   r/   r   r_   r   rL   rS   r7   r-   r+   r   r      s    0$(BII;dDcN>R8S+S"T $'YY' RYY' #s(^	'
 
biic3h'	('r-   r   rr   c                 "    U(       a  gU" U 5      $ )a  
A convenient auto wrap policy to wrap submodules based on an arbitrary user
function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
a `wrapper_cls` unit.

Return if a module should be wrapped during auto wrapping.

The first three parameters are required by :func:`_recursive_wrap`.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
        this module will be wrapped.
Tr7   )r   rT   rU   rr   s       r+   r   r     s    . Vr-   transformer_layer_clsc                     [        XX#5      $ )a  
See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
same as ``module_classes``. Note that shared parameters must be wrapped in
the same FSDP instance, so this auto wrap policy can help wrap shared
embeddings into the same FSDP instance for transformer models.
)rX   )r   rT   rU   r   s       r+   r   r   3  s     v0@XXr-   c                 <    U(       a  g[        U [        U5      5      $ )NTrW   )r   r;   rT   rF   r8   s        r+   _wrap_module_cls_individuallyr   B  s      &%"788r-   c                 6   ^ ^^ [        U UU4S jU 5       5      $ )zj
A policy that wraps ``module`` if any policy in the passed in iterable of
``policies`` returns ``True``.
c              3   4   >#    U  H  nU" TTTS 9v   M     g7f)r   rT   rU   Nr7   ).0policyr   rU   rT   s     r+   	<genexpr>_or_policy.<locals>.<genexpr>X  s"      F 	fg@PQs   )any)r   rT   rU   policiess   ``` r+   
_or_policyr   N  s        r-   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                     Uc  [         R                  OUnUc  [         R                  OUnUnX&:  nU(       a#  U=(       a    [        U [	        U5      5      (       + $ U=(       a    [        U [	        U5      5      (       + $ )ag  
A size-based auto wrap policy.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    min_num_params (int): Customizable policy input that controls the size
        threshold over which a module is ready to be wrapped. This is in
        units of numel.
    force_leaf_modules (Set[Type[nn.Module]]): Set of module types to keep
        as leaves, i.e. their children will never be wrapped.
    exclude_wrap_modules (Set[Type[nn.Module]]): Set of module types to be
        excluded in wrapping.

Returns:
    Whether ``module`` should be wrapped.
)r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr"   r@   )r   rT   rU   r   r   r   min_nonwrapped_numelis_larges           r+   r   r   ^  s    B % 	$66   ' 	$88!  *7HM
659K3L MMM O
659M3N OOOr-   wrapper_clswrapper_kwargs)NNNc              +   j   #    SU 0UEn[        S0 UD6   Sv   SSS5        g! , (       d  f       g= f7f)a  
Context manager to wrap modules using a wrapper.

Useful for when you'd like to apply the same configuration arguments to all
child modules that you wrap. A particularly important use case is wrapping
large layers so that they get sharded (in-place) during initialization, to
avoid running out of system memory. Large layers can indicate that they
should be sharded via the ``wrap`` annotation and this context manager can
provide the exact configuration for these nested instances.

Usage::

    with enable_wrap(wrapper_cls, **params):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    wrapper_cls:
        Class that `wrap` annotation will `wrap` modules with, such as
        `FullyShardedDataParallel`.
    **wrapper_kwargs:
        Configuration settings that will be passed to all ``wrap``
        instances inside the context
r   Nr7   )_ConfigAutoWrap)r   r   r8   s      r+   r   r     s6     : 	{
F 
	"6	" 
#	"	"s   3"	3
03wrap_overridesc                     [         R                  (       aB  [         R                  c   e0 [         R                  EUEn[	        U [         R                  40 UD6$ U $ )a  
Annotate that a module should be wrapped. Annotated modules will only be
wrapped if inside of an :func:`enable_wrap` context manager. This allows
a module to be initialized both with and without a wrapper without code
change.

The class that this function wraps the passed in ``nn.Module`` with is the
passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
the ``wrapper_cls`` instance. In the case of duplicate kwargs in
``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
respected.

Usage::

    with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
    **wrap_overrides: configuration overrides that will take priority over
        the values provided by the :func:`enable_wrap` context
)r   in_autowrap_contextr   r8   _wrap)r   r   s     r+   r   r     s^    2 ****666EO22EnE''
 
 	

 Mr-   c                 r    Uc   e[        U S5      (       a  0 UEU R                  EnU" U 40 UD6$ U" U 40 UD6$ )N_wrap_overrides)hasattrr   )r   r   r8   	overridess       r+   r   r     sS    """v())
 9v8!7!78	6/Y//v(((r-   auto_wrap_policyignored_paramsonly_wrap_childrenr8   c           
         ^ Uc   S5       eUc   S5       eU R                  5        H.  u  pxX;   a  M   [        U[        [        U5      5      (       a   eM0     [        U4S jU R                  5        5       5      n	Uc   eU" U SU	S9(       an  Sn
U R                  5        H/  u  pX;   a  M  [        S	UUUUTS.UD6u  p[        XU5        X-  n
M1     X-
  nU(       d  U" U SUS9(       a  [        X40 UD6U	4$ X
4$ U S4$ ! [         a     M  f = f)
a  
Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
``True`` with ``wrapper_cls``.

Args:
    module (nn.Module): Module to recursively wrap.
    auto_wrap_policy (Callable): A callable representing a policy that
        determines which modules to recursively wrap with ``wrapper_cls``.
    ignored_modules (Set[torch.nn.Module]): Modules to ignore when
        wrapping.
    ignored_params (Set[torch.nn.Parameter]): Parameters to ignore when
        wrapping; these should be the parameters contained in the modules
        in ``ignored_modules``.
Returns:
    (nn.Module, int):
        ``module`` after wrapping and the numel recursively wrapped.
zMust specify auto_wrap_policy.zMust specify wrapper_clsc              3   R   >#    U  H  oT;  d  M
  UR                  5       v   M     g 7fr6   )numel)r   pr   s     r+   r   "_recursive_wrap.<locals>.<genexpr>  s$      .a>2I			.s   	''Tr   r   )r   r   r   r<   r   Fr7   )named_modulesr"   r   type	TypeErrorsum
parametersr    _recursive_wrapr%   r   )r   r   r   r<   r   r   r8   _childrU   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainders       `          r+   r   r     sg   4 'I)II'">$>>"((*#	!%dK)@AAAAA	 +  !,,.  '''vtFVW!002KD'0? 1!1' /-1 1-M F-05 3  %:	!&659'
 779III..19K  		s   !C??
DDc                       \ rS rSr% SrSr\\S'   Sr\	\
   \S'   0 r\\\4   \S'   S\\\4   4S jr\S\S	S4S
 j5       r\SS j5       rSS jrS\S\S\S	S4S jrSrg)r   i9  z
Helper class to wrap modules based on default config args via a context manager.
See :func:`enable_wrap` for more information.
Fr   Nr   r8   c                     Xl         g r6   r8   )rK   r8   s     r+   r_   _ConfigAutoWrap.__init__C  s    r-   r3   c                     [         R                  (       a  [        S5      eS[         l        SU R                  5       ;   d   S5       e[	        [
        U S   5      [         l        U S	 U [         l        g )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tr   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)r   r   NotImplementedErrorkeysr   r   r   r8   r   s    r+   enable_autowrap_context'_ConfigAutoWrap.enable_autowrap_contextF  sk    ..%o  /3+ V[[]*	GF	G*&*8VM5J&K#=!!'r-   c                  F    S[         l        S [         l        0 [         l        g )NF)r   r   r   r8   r7   r-   r+   disable_autowrap_context(_ConfigAutoWrap.disable_autowrap_contextV  s    .3+&*#!#r-   c                 :    U R                  U R                  5        g r6   )r   r8   )rK   s    r+   	__enter___ConfigAutoWrap.__enter__\  s    $$T[[1r-   exc_typeexc_valexc_tbc                 $    U R                  5         g r6   )r   )rK   r   r   r   s       r+   __exit___ConfigAutoWrap.__exit___  s    %%'r-   r   )r3   N)rN   rO   rP   rQ   rR   r   ry   __annotations__r   r   r   r8   r   r/   r   r_   staticmethodr   r   r   r   rS   r7   r-   r+   r   r   9  s    
 !&%&*K(#*FDcNc3h  ( ( ( ( $ $
2( (s (C (D (r-   r   )F)3
contextlibrb   abcr   r   typingr   r   r   r   r	   r
   r   r   r   r   r   r   torch.nnr#   __all__r$   r0   r/   r:   rD   ry   r   rH   intrX   r   r   r   r   r   r   r   r   
ModuleList
ModuleDictr   MultiheadAttentionr   contextmanagerr   r   r   	Parameterr   r   r7   r-   r+   <module>r      s*     #    	&3&3"))hryy112&3R!"))T#s(^";<  ryyk8BII../	,##T"))_-# ^# c3h	#
 ""))T#s(^";<#(4 c (5II55 5 RYY(	5
 
5@"Dw "DJ4'7 4'nII $8;HP	8YIIYY Y tBII/	Y
 
Y	9II	9'/~	9@D	9II 
 
* c(9=;?3PII3P3P 3P
 3P !T"))_!563P #3tBII#783P 
3Pn 57MM2==3Q  0242G2G1H  .   +.     F" "c "bii "J
)")) 
)( 
) 
)&  %GIIGG G ^	G
 %G G G 299c>GT'( '(r-   