
    /Цiq=                     @   S r SSKrSSKJr  SSKJrJr  SSKrSSK	J
r
  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJrJr  SSKJ r   \RB                  " \RD                  5      RF                  r$S r%SS jr&S r'S r( " S S\\5      r)g)z<
A Theil-Sen Estimator for Multiple Linear Regression Model
    N)combinations)IntegralReal)effective_n_jobs)linalg)get_lapack_funcs)binom)RegressorMixin_fit_context)ConvergenceWarning)LinearModel)check_random_state)Interval)Paralleldelayed)validate_datac                 .   X-
  n[         R                  " [         R                  " US-  SS95      nU[        :  n[	        UR                  5       U R
                  S   :  5      nX$   nX4   SS2[         R                  4   n[        R                  " [         R                  " X#-  SS95      nU[        :  a8  [         R                  " XSS24   U-  SS9[         R                  " SU-  SS9-  nOSnSn[        SSXV-  -
  5      U-  [        SXV-  5      U-  -   $ )u  Modified Weiszfeld step.

This function defines one iteration step in order to approximate the
spatial median (L1 median). It is a form of an iteratively re-weighted
least squares method.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training vector, where `n_samples` is the number of samples and
    `n_features` is the number of features.

x_old : ndarray of shape = (n_features,)
    Current start vector.

Returns
-------
x_new : ndarray of shape (n_features,)
    New iteration step.

References
----------
- On Computation of Spatial Median for Robust Data Mining, 2005
  T. Kärkkäinen and S. Äyrämö
  http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
      axisr   Ng      ?        )npsqrtsum_EPSILONintshapenewaxisr   normmaxmin)Xx_olddiff	diff_normmaskis_x_old_in_Xquotient_normnew_directions           ^/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/sklearn/linear_model/_theil_sen.py_modified_weiszfeld_stepr,      s   6 9DtQwQ/0I D
QWWQZ/0M:D2::.IKKt'7a @AMxqqzI5A>	MB
 
  	C}445E
c=0
1E
9	:    c                    U R                   S   S:X  a%  S[        R                  " U R                  5       SS94$ US-  n[        R                  " U SS9n[        U5       H3  n[        X5      n[        R                  " X5-
  S-  5      U:  a    XE4$ UnM5     [        R                  " SR                  US9[        5        WW4$ )	u  Spatial median (L1 median).

The spatial median is member of a class of so-called M-estimators which
are defined by an optimization problem. Given a number of p points in an
n-dimensional space, the point x minimizing the sum of all distances to the
p other points is called spatial median.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training vector, where `n_samples` is the number of samples and
    `n_features` is the number of features.

max_iter : int, default=300
    Maximum number of iterations.

tol : float, default=1.e-3
    Stop the algorithm if spatial_median has converged.

Returns
-------
spatial_median : ndarray of shape = (n_features,)
    Spatial median.

n_iter : int
    Number of iterations needed.

References
----------
- On Computation of Spatial Median for Robust Data Mining, 2005
  T. Kärkkäinen and S. Äyrämö
  http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
r   T)keepdimsr   r   r   zYMaximum number of iterations {max_iter} reached in spatial median for TheilSen regressor.)max_iter)r   r   medianravelmeanranger,   r   warningswarnformatr   )r#   r0   tolspatial_median_oldn_iterspatial_medians         r+   _spatial_medianr<   P   s    D 	wwqzQ"))AGGI555AIC+/1!H66%61<=C !! "0 " 	vxv(		
 >!!r-   c                 :    SSSU-  -  X-
  S-   -  U-   S-
  U -  -
  $ )zApproximation of the breakdown point.

Parameters
----------
n_samples : int
    Number of samples.

n_subsamples : int
    Number of subsamples to consider.

Returns
-------
breakdown_point : float
    Approximation of breakdown point.
r   g      ? )	n_samplesn_subsampless     r+   _breakdown_pointrA      sE    " 	
A$%)AA)EF 	r-   c                    [        U5      nU R                  S   U-   nUR                  S   n[        R                  " UR                  S   U45      n[        R                  " XT45      n[        R
                  " [        XT5      5      n[        SXx45      u  n	[        U5       H-  u  pXSS24   USS2US24'   X   USU& U	" Xx5      S   SU Xj'   M/     U$ )aQ  Least Squares Estimator for TheilSenRegressor class.

This function calculates the least squares method on a subset of rows of X
and y defined by the indices array. Optionally, an intercept column is
added if intercept is set to true.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Design matrix, where `n_samples` is the number of samples and
    `n_features` is the number of features.

y : ndarray of shape (n_samples,)
    Target vector, where `n_samples` is the number of samples.

indices : ndarray of shape (n_subpopulation, n_subsamples)
    Indices of all subsamples with respect to the chosen subpopulation.

fit_intercept : bool
    Fit intercept or not.

Returns
-------
weights : ndarray of shape (n_subpopulation, n_features + intercept)
    Solution matrix of n_subpopulation solved least square problems.
r   r   )gelssN)	r   r   r   emptyoneszerosr!   r   	enumerate)r#   yindicesfit_intercept
n_featuresr@   weightsX_subpopulationy_subpopulationlstsqindexsubsets               r+   _lstsqrR      s    6 &Mm+J==#Lhha(*56Ggg|89OhhL =?O
_,NOHU"7+-.qy\=>)*)*&@CKZP ,
 Nr-   c                       \ rS rSr% SrS/\" \SSSS9/S\/\" \SSSS9/\" \S	SSS9/S
/S\/S/S.r\	\
S'   SSSSSSSSS.S jrS r\" SS9S 5       rSrg)TheilSenRegressor   a  Theil-Sen Estimator: robust multivariate regression model.

The algorithm calculates least square solutions on subsets with size
n_subsamples of the samples in X. Any value of n_subsamples between the
number of features and samples leads to an estimator with a compromise
between robustness and efficiency. Since the number of least square
solutions is "n_samples choose n_subsamples", it can be extremely large
and can therefore be limited with max_subpopulation. If this limit is
reached, the subsets are chosen randomly. In a final step, the spatial
median (or L1 median) is calculated of all least square solutions.

Read more in the :ref:`User Guide <theil_sen_regression>`.

Parameters
----------
fit_intercept : bool, default=True
    Whether to calculate the intercept for this model. If set
    to false, no intercept will be used in calculations.

max_subpopulation : int, default=1e4
    Instead of computing with a set of cardinality 'n choose k', where n is
    the number of samples and k is the number of subsamples (at least
    number of features), consider only a stochastic subpopulation of a
    given maximal size if 'n choose k' is larger than max_subpopulation.
    For other than small problem sizes this parameter will determine
    memory usage and runtime if n_subsamples is not changed. Note that the
    data type should be int but floats such as 1e4 can be accepted too.

n_subsamples : int, default=None
    Number of samples to calculate the parameters. This is at least the
    number of features (plus 1 if fit_intercept=True) and the number of
    samples as a maximum. A lower number leads to a higher breakdown
    point and a low efficiency while a high number leads to a low
    breakdown point and a high efficiency. If None, take the
    minimum number of subsamples leading to maximal robustness.
    If n_subsamples is set to n_samples, Theil-Sen is identical to least
    squares.

max_iter : int, default=300
    Maximum number of iterations for the calculation of spatial median.

tol : float, default=1e-3
    Tolerance when calculating spatial median.

random_state : int, RandomState instance or None, default=None
    A random number generator instance to define the state of the random
    permutations generator. Pass an int for reproducible output across
    multiple function calls.
    See :term:`Glossary <random_state>`.

n_jobs : int, default=None
    Number of CPUs to use during the cross validation.
    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
    for more details.

verbose : bool, default=False
    Verbose mode when fitting the model.

Attributes
----------
coef_ : ndarray of shape (n_features,)
    Coefficients of the regression model (median of distribution).

intercept_ : float
    Estimated intercept of regression model.

breakdown_ : float
    Approximated breakdown point.

n_iter_ : int
    Number of iterations needed for the spatial median.

n_subpopulation_ : int
    Number of combinations taken into account from 'n choose k', where n is
    the number of samples and k is the number of subsamples.

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

See Also
--------
HuberRegressor : Linear regression model that is robust to outliers.
RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

References
----------
- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
  Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
  http://home.olemiss.edu/~xdang/papers/MTSE.pdf

Examples
--------
>>> from sklearn.linear_model import TheilSenRegressor
>>> from sklearn.datasets import make_regression
>>> X, y = make_regression(
...     n_samples=200, n_features=2, noise=4.0, random_state=0)
>>> reg = TheilSenRegressor(random_state=0).fit(X, y)
>>> reg.score(X, y)
0.9884
>>> reg.predict(X[:1,])
array([-31.5871])
booleanr   Nleft)closedr   r   random_stateverboserJ   max_subpopulationr@   r0   r8   rY   n_jobsrZ   _parameter_constraintsTg     @,  MbP?Fc                d    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        g Nr[   )	selfrJ   r\   r@   r0   r8   rY   r]   rZ   s	            r+   __init__TheilSenRegressor.__init__M  s0     +!2( (r-   c           	         U R                   nU R                  (       a  US-   nOUnUbz  X1:  a  [        SR                  X15      5      eX:  a6  XC:  a0  U R                  (       a  SOSn[        SR                  XTU5      5      eO+X1:w  a  [        SR                  X15      5      eO[	        XA5      n[        S[        R                  " [        X5      5      5      n[        [	        U R                  U5      5      nX74$ )Nr   z=Invalid parameter since n_subsamples > n_samples ({0} > {1}).z+1 zAInvalid parameter since n_features{0} > n_subsamples ({1} > {2}).z\Invalid parameter since n_subsamples != n_samples ({0} != {1}) while n_samples < n_features.)r@   rJ   
ValueErrorr7   r"   r!   r   rintr	   r   r\   )rc   r?   rK   r@   n_dimplus_1all_combinationsn_subpopulations           r+   _check_subparams"TheilSenRegressor._check_subparamsb  s    ((NEE#' --3VL-L  &'%)%7%7TRF$!6&>  (  ,$((.|(G  - u0Lq"''%	*H"IJc$"8"8:JKL,,r-   )prefer_skip_nested_validationc           	        ^ ^^^ [        T R                  5      n[        T TTSS9u  mmTR                  u  pET R	                  XE5      u  nT l        [        XF5      T l        T R                  (       a  [        SR                  T R                  5      5        [        SR                  U5      5        [        T R                  U-  5      n[        SR                  U5      5        [        SR                  T R
                  5      5        [        R                  " [        XF5      5      T R                  ::  a  [!        [#        [%        U5      U5      5      nO3[%        T R
                  5       V	s/ s H  n	UR'                  XFSS9PM     nn	[)        T R*                  5      n
[        R,                  " X5      m[/        U
T R                  S	9" UUU U4S
 j[%        U
5       5       5      n[        R0                  " U5      n[3        UT R4                  T R6                  S9u  T l        nT R:                  (       a  US   T l        USS T l        T $ ST l        UT l        T $ s  sn	f )zFit linear model.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
    Training data.
y : ndarray of shape (n_samples,)
    Target values.

Returns
-------
self : returns an instance of self.
    Fitted `TheilSenRegressor` estimator.
T)	y_numericzBreakdown point: {0}zNumber of samples: {0}zTolerable outliers: {0}zNumber of subpopulations: {0}F)sizereplace)r]   rZ   c              3   n   >#    U  H*  n[        [        5      " TTTU   TR                  5      v   M,     g 7frb   )r   rR   rJ   ).0jobr#   
index_listrc   rH   s     r+   	<genexpr>(TheilSenRegressor.fit.<locals>.<genexpr>  s5      @
$ FOAq*S/43E3EFF$s   25)r0   r8   r   r   Nr   ) r   rY   r   r   rn   n_subpopulation_rA   
breakdown_rZ   printr7   r   r   ri   r	   r\   listr   r4   choicer   r]   array_splitr   vstackr<   r0   r8   n_iter_rJ   
intercept_coef_)rc   r#   rH   rY   r?   rK   r@   tol_outliersrI   _r]   rL   coefsrx   s   ```          @r+   fitTheilSenRegressor.fit  s     *$*;*;<T1a481 !	.2.C.C/
+d+ +9C<<(//@A*11)<=t:;L+22<@A1889N9NOP 77512d6L6LL<i(8,GHG t4455A ##I%#P5  
 "$++.^^G4
&$,,? @
V}@
 
 ))G$-dmm
e #AhDOqrDJ
  "DODJ/s   (I)r|   r   rJ   r   r0   r\   r   r]   r{   r@   rY   r8   rZ   )__name__
__module____qualname____firstlineno____doc__r   r   r   r^   dict__annotations__rd   rn   r   r   __static_attributes__r>   r-   r+   rT   rT      s    od $&tQVDEx(h4?@sD89'(";
$D 
 *#-J 59 69r-   rT   )r_   r`   )*r   r5   	itertoolsr   numbersr   r   numpyr   joblibr   scipyr   scipy.linalg.lapackr   scipy.specialr	   sklearn.baser
   r   sklearn.exceptionsr   sklearn.linear_model._baser   sklearn.utilsr   sklearn.utils._param_validationr   sklearn.utils.parallelr   r   sklearn.utils.validationr   finfodoubleepsr   r,   r<   rA   rR   rT   r>   r-   r+   <module>r      sx     " "  #  0  5 1 2 , 4 4 288BII""0f5"p6)Xr rr-   