Code snippets for page Node ListΒΆ

Download node_list.py. Browse the code snippet index.

# -*- coding: utf-8 -*-
# Generated by codesnippet sphinx extension on 2020-03-05

import mdp
import numpy as np
np.random.seed(0)
import mdp
from mdp import numx

def identity(x): return x

def u3(x): return numx.absolute(x)**3 #A simple nonlinear transformation

def norm2(x): #Computes the norm of each sample returning an Nx1 array
    return ((x**2).sum(axis=1)**0.5).reshape((-1,1)) 

x = numx.array([[-2., 2.], [0.2, 0.3], [0.6, 1.2]])
gen = mdp.nodes.GeneralExpansionNode(funcs=[identity, u3, norm2])
print(gen.execute(x))
[[-2.          2.          8.          8.          2.82842712]
 [ 0.2         0.3         0.008       0.027       0.36055513]
 [ 0.6         1.2         0.216       1.728       1.34164079]]

import numpy as np
from sklearn.preprocessing import FunctionTransformer
transformer = FunctionTransformer(np.log1p)
X = np.array([[0, 1], [2, 3]])
transformer.transform(X)
# Expected:
## array([[0.       , 0.6931...],
##        [1.0986..., 1.3862...]])

from sklearn.preprocessing import Binarizer
X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]
transformer = Binarizer().fit(X)  # fit does nothing.
transformer
# Expected:
## Binarizer()
transformer.transform(X)
# Expected:
## array([[1., 0., 1.],
##        [1., 0., 0.],
##        [0., 1., 0.]])

from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels
X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
K = pairwise_kernels(X, metric='linear')
K
# Expected:
## array([[  9.,   2.,  -2.],
##        [  2.,  14., -13.],
##        [ -2., -13.,  21.]])
transformer = KernelCenterer().fit(K)
transformer
# Expected:
## KernelCenterer()
transformer.transform(K)
# Expected:
## array([[  5.,   0.,  -5.],
##        [  0.,  14., -14.],
##        [ -5., -14.,  19.]])

from sklearn.preprocessing import MinMaxScaler
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler()
print(scaler.fit(data))
# Expected:
## MinMaxScaler()
print(scaler.data_max_)
# Expected:
## [ 1. 18.]
print(scaler.transform(data))
# Expected:
## [[0.   0.  ]
##  [0.25 0.25]
##  [0.5  0.5 ]
##  [1.   1.  ]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[1.5 0. ]]

from sklearn.preprocessing import MaxAbsScaler
X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]
transformer = MaxAbsScaler().fit(X)
transformer
# Expected:
## MaxAbsScaler()
transformer.transform(X)
# Expected:
## array([[ 0.5, -1. ,  1. ],
##        [ 1. ,  0. ,  0. ],
##        [ 0. ,  1. , -0.5]])

from sklearn.preprocessing import Normalizer
X = [[4, 1, 2, 2],
     [1, 3, 9, 3],
     [5, 7, 5, 1]]
transformer = Normalizer().fit(X)  # fit does nothing.
transformer
# Expected:
## Normalizer()
transformer.transform(X)
# Expected:
## array([[0.8, 0.2, 0.4, 0.4],
##        [0.1, 0.3, 0.9, 0.3],
##        [0.5, 0.7, 0.5, 0.1]])

from sklearn.preprocessing import RobustScaler
X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
transformer = RobustScaler().fit(X)
transformer
# Expected:
## RobustScaler()
transformer.transform(X)
# Expected:
## array([[ 0. , -2. ,  0. ],
##        [-1. ,  0. ,  0.4],
##        [ 1. ,  0. , -1.6]])

from sklearn.preprocessing import StandardScaler
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
print(scaler.fit(data))
# Expected:
## StandardScaler()
print(scaler.mean_)
# Expected:
## [0.5 0.5]
print(scaler.transform(data))
# Expected:
## [[-1. -1.]
##  [-1. -1.]
##  [ 1.  1.]
##  [ 1.  1.]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[3. 3.]]

import numpy as np
from sklearn.preprocessing import QuantileTransformer
rng = np.random.RandomState(0)
X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
qt = QuantileTransformer(n_quantiles=10, random_state=0)
qt.fit_transform(X)
# Expected:
## array([...])

import numpy as np
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
data = [[1, 2], [3, 2], [4, 5]]
print(pt.fit(data))
# Expected:
## PowerTransformer()
print(pt.lambdas_)
# Expected:
## [ 1.386... -3.100...]
print(pt.transform(data))
# Expected:
## [[-1.316... -0.707...]
##  [ 0.209... -0.707...]
##  [ 1.106...  1.414...]]

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
X = np.arange(6).reshape(3, 2)
X
# Expected:
## array([[0, 1],
##        [2, 3],
##        [4, 5]])
poly = PolynomialFeatures(2)
poly.fit_transform(X)
# Expected:
## array([[ 1.,  0.,  1.,  0.,  0.,  1.],
##        [ 1.,  2.,  3.,  4.,  6.,  9.],
##        [ 1.,  4.,  5., 16., 20., 25.]])
poly = PolynomialFeatures(interaction_only=True)
poly.fit_transform(X)
# Expected:
## array([[ 1.,  0.,  1.,  0.],
##        [ 1.,  2.,  3.,  6.],
##        [ 1.,  4.,  5., 20.]])

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)
# Expected:
## OneHotEncoder(handle_unknown='ignore')
enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 1], ['Male', 4]]).toarray()
# Expected:
## array([[1., 0., 1., 0., 0.],
##        [0., 1., 0., 0., 0.]])
enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])
# Expected:
## array([['Male', 1],
##        [None, 2]], dtype=object)
enc.get_feature_names(['gender', 'group'])
# Expected:
## array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],
##   dtype=object)
drop_enc = OneHotEncoder(drop='first').fit(X)
drop_enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()
# Expected:
## array([[0., 0., 0.],
##        [1., 1., 0.]])

from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)
# Expected:
## OrdinalEncoder()
enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 3], ['Male', 1]])
# Expected:
## array([[0., 2.],
##        [1., 0.]])

enc.inverse_transform([[1, 0], [0, 1]])
# Expected:
## array([['Male', 1],
##        ['Female', 2]], dtype=object)

from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit([1, 2, 6, 4, 2])
# Expected:
## LabelBinarizer()
lb.classes_
# Expected:
## array([1, 2, 4, 6])
lb.transform([1, 6])
# Expected:
## array([[1, 0, 0, 0],
##        [0, 0, 0, 1]])

lb = preprocessing.LabelBinarizer()
lb.fit_transform(['yes', 'no', 'no', 'yes'])
# Expected:
## array([[1],
##        [0],
##        [0],
##        [1]])

import numpy as np
lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
# Expected:
## LabelBinarizer()
lb.classes_
# Expected:
## array([0, 1, 2])
lb.transform([0, 1, 2, 1])
# Expected:
## array([[1, 0, 0],
##        [0, 1, 0],
##        [0, 0, 1],
##        [0, 1, 0]])

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit([1, 2, 2, 6])
# Expected:
## LabelEncoder()
le.classes_
# Expected:
## array([1, 2, 6])
le.transform([1, 1, 2, 6])
# Expected:
## array([0, 0, 1, 2]...)
le.inverse_transform([0, 0, 1, 2])
# Expected:
## array([1, 1, 2, 6])

le = preprocessing.LabelEncoder()
le.fit(["paris", "paris", "tokyo", "amsterdam"])
# Expected:
## LabelEncoder()
list(le.classes_)
# Expected:
## ['amsterdam', 'paris', 'tokyo']
le.transform(["tokyo", "tokyo", "paris"])
# Expected:
## array([2, 2, 1]...)
list(le.inverse_transform([2, 2, 1]))
# Expected:
## ['tokyo', 'tokyo', 'paris']

from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform([(1, 2), (3,)])
# Expected:
## array([[1, 1, 0],
##        [0, 0, 1]])
mlb.classes_
# Expected:
## array([1, 2, 3])

mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])
# Expected:
## array([[0, 1, 1],
##        [1, 0, 0]])
list(mlb.classes_)
# Expected:
## ['comedy', 'sci-fi', 'thriller']

mlb = MultiLabelBinarizer()
mlb.fit(['sci-fi', 'thriller', 'comedy'])
# Expected:
## MultiLabelBinarizer()
mlb.classes_
# Expected:
## array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',
##     'y'], dtype=object)

mlb = MultiLabelBinarizer()
mlb.fit([['sci-fi', 'thriller', 'comedy']])
# Expected:
## MultiLabelBinarizer()
mlb.classes_
# Expected:
## array(['comedy', 'sci-fi', 'thriller'], dtype=object)

X = [[-2, 1, -4,   -1],
     [-1, 2, -3, -0.5],
     [ 0, 3, -2,  0.5],
     [ 1, 4, -1,    2]]
est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
est.fit(X)
# Expected:
## KBinsDiscretizer(...)
Xt = est.transform(X)
Xt
# Expected:
## array([[ 0., 0., 0., 0.],
##        [ 1., 1., 1., 0.],
##        [ 2., 2., 2., 1.],
##        [ 2., 2., 2., 2.]])

est.bin_edges_[0]
# Expected:
## array([-2., -1.,  0.,  1.])
est.inverse_transform(Xt)
# Expected:
## array([[-1.5,  1.5, -3.5, -0.5],
##        [-0.5,  2.5, -2.5, -0.5],
##        [ 0.5,  3.5, -1.5,  0.5],
##        [ 0.5,  3.5, -1.5,  1.5]])

from sklearn.datasets import make_regression
from sklearn.isotonic import IsotonicRegression
X, y = make_regression(n_samples=10, n_features=1, random_state=41)
iso_reg = IsotonicRegression().fit(X.flatten(), y)
iso_reg.predict([.1, .2])
# Expected:
## array([1.8628..., 3.7256...])

from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(iris.data, iris.target)
# Expected:
## GridSearchCV(estimator=SVC(),
##              param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
sorted(clf.cv_results_.keys())
# Expected:
## ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
##  'param_C', 'param_kernel', 'params',...
##  'rank_test_score', 'split0_test_score',...
##  'split2_test_score', ...
##  'std_fit_time', 'std_score_time', 'std_test_score']

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
iris = load_iris()
logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,
                              random_state=0)
distributions = dict(C=uniform(loc=0, scale=4),
                     penalty=['l2', 'l1'])
clf = RandomizedSearchCV(logistic, distributions, random_state=0)
search = clf.fit(iris.data, iris.target)
search.best_params_
# Expected:
## {'C': 2..., 'penalty': 'l1'}

import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, y)
reg.score(X, y)
# Expected:
## 1.0
reg.coef_
# Expected:
## array([1., 2.])
reg.intercept_
# Expected:
## 3.0000...
reg.predict(np.array([[3, 5]]))
# Expected:
## array([16.])

from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## BayesianRidge()
clf.predict([[1, 1]])
# Expected:
## array([1.])

from sklearn import linear_model
clf = linear_model.ARDRegression()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## ARDRegression()
clf.predict([[1, 1]])
# Expected:
## array([1.])

from sklearn import linear_model
reg = linear_model.Lars(n_nonzero_coefs=1)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
# Expected:
## Lars(n_nonzero_coefs=1)
print(reg.coef_)
# Expected:
## [ 0. -1.11...]

from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
# Expected:
## LassoLars(alpha=0.01)
print(reg.coef_)
# Expected:
## [ 0.         -0.963257...]

from sklearn.linear_model import LarsCV
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9996...
reg.alpha_
# Expected:
## 0.0254...
reg.predict(X[:1,])
# Expected:
## array([154.0842...])

from sklearn.linear_model import LassoLarsCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4.0, random_state=0)
reg = LassoLarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9992...
reg.alpha_
# Expected:
## 0.0484...
reg.predict(X[:1,])
# Expected:
## array([-77.8723...])

from sklearn import linear_model
reg = linear_model.LassoLarsIC(criterion='bic')
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
# Expected:
## LassoLarsIC(criterion='bic')
print(reg.coef_)
# Expected:
## [ 0.  -1.11...]

from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## Lasso(alpha=0.1)
print(clf.coef_)
# Expected:
## [0.85 0.  ]
print(clf.intercept_)
# Expected:
## 0.15...

from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression

X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNet(random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNet(random_state=0)
print(regr.coef_)
# Expected:
## [18.83816048 64.55968825]
print(regr.intercept_)
# Expected:
## 1.451...
print(regr.predict([[0, 0]]))
# Expected:
## [1.451...]

from sklearn.linear_model import LassoCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = LassoCV(cv=5, random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9993...
reg.predict(X[:1,])
# Expected:
## array([-78.4951...])

from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression

X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNetCV(cv=5, random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNetCV(cv=5, random_state=0)
print(regr.alpha_)
# Expected:
## 0.199...
print(regr.intercept_)
# Expected:
## 0.398...
print(regr.predict([[0, 0]]))
# Expected:
## [0.398...]

from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskLasso(alpha=0.1)
print(clf.coef_)
# Expected:
## [[0.89393398 0.        ]
##  [0.89393398 0.        ]]
print(clf.intercept_)
# Expected:
## [0.10606602 0.10606602]

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskElasticNet(alpha=0.1)
print(clf.coef_)
# Expected:
## [[0.45663524 0.45612256]
##  [0.45663524 0.45612256]]
print(clf.intercept_)
# Expected:
## [0.0872422 0.0872422]

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNetCV(cv=3)
clf.fit([[0,0], [1, 1], [2, 2]],
        [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskElasticNetCV(cv=3)
print(clf.coef_)
# Expected:
## [[0.52875032 0.46958558]
##  [0.52875032 0.46958558]]
print(clf.intercept_)
# Expected:
## [0.00166409 0.00166409]

from sklearn.linear_model import MultiTaskLassoCV
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score
X, y = make_regression(n_targets=2, noise=4, random_state=0)
reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
r2_score(y, reg.predict(X))
# Expected:
## 0.9994...
reg.alpha_
# Expected:
## 0.5713...
reg.predict(X[:1,])
# Expected:
## array([[153.7971...,  94.9015...]])

import numpy as np
from sklearn.linear_model import HuberRegressor, LinearRegression
from sklearn.datasets import make_regression
rng = np.random.RandomState(0)
X, y, coef = make_regression(
    n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
X[:4] = rng.uniform(10, 20, (4, 2))
y[:4] = rng.uniform(10, 20, 4)
huber = HuberRegressor().fit(X, y)
huber.score(X, y)
# Expected:
## -7.284608623514573
huber.predict(X[:1,])
# Expected:
## array([806.7200...])
linear = LinearRegression().fit(X, y)
print("True coefficients:", coef)
# Expected:
## True coefficients: [20.4923...  34.1698...]
print("Huber coefficients:", huber.coef_)
# Expected:
## Huber coefficients: [17.7906... 31.0106...]
print("Linear Regression coefficients:", linear.coef_)
# Expected:
## Linear Regression coefficients: [-1.9221...  7.0226...]

import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
clf.fit(X, Y)
# Expected:
## SGDClassifier()

print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
from sklearn import linear_model
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(X, y)
# Expected:
## SGDRegressor()

from sklearn.linear_model import Ridge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y)
# Expected:
## Ridge()

from sklearn.datasets import load_diabetes
from sklearn.linear_model import RidgeCV
X, y = load_diabetes(return_X_y=True)
clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.5166...

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifier
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifier().fit(X, y)
clf.score(X, y)
# Expected:
## 0.9595...

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifierCV
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.9630...

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0).fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :])
# Expected:
## array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
##        [9.7...e-01, 2.8...e-02, ...e-08]])
clf.score(X, y)
# Expected:
## 0.97...

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegressionCV
X, y = load_iris(return_X_y=True)
clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :]).shape
# Expected:
## (2, 3)
clf.score(X, y)
# Expected:
## 0.98...

from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = OrthogonalMatchingPursuit().fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])

from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=100, n_informative=10,
                       noise=4, random_state=0)
reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.n_nonzero_coefs_
# Expected:
## 10
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])

from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_features=4, random_state=0)
clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
tol=1e-3)
clf.fit(X, y)
# Expected:
## PassiveAggressiveClassifier(random_state=0)
print(clf.coef_)
# Expected:
## [[0.26642044 0.45070924 0.67251877 0.64185414]]
print(clf.intercept_)
# Expected:
## [1.84127814]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.datasets import make_regression

X, y = make_regression(n_features=4, random_state=0)
regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,
tol=1e-3)
regr.fit(X, y)
# Expected:
## PassiveAggressiveRegressor(max_iter=100, random_state=0)
print(regr.coef_)
# Expected:
## [20.48736655 34.18818427 67.59122734 87.94731329]
print(regr.intercept_)
# Expected:
## [-0.02306214]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-0.02306214]

from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
X, y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X, y)
# Expected:
## Perceptron()
clf.score(X, y)
# Expected:
## 0.939...

from sklearn.linear_model import RANSACRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
    n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = RANSACRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9885...
reg.predict(X[:1,])
# Expected:
## array([-31.9417...])

from sklearn.linear_model import TheilSenRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
    n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = TheilSenRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9884...
reg.predict(X[:1,])
# Expected:
## array([-31.5871...])

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import SVC
clf = SVC(gamma='auto')
clf.fit(X, y)
# Expected:
## SVC(gamma='auto')
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import NuSVC
clf = NuSVC()
clf.fit(X, y)
# Expected:
## NuSVC()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.svm import SVR
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = SVR(C=1.0, epsilon=0.2)
clf.fit(X, y)
# Expected:
## SVR(epsilon=0.2)

from sklearn.svm import NuSVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = NuSVR(C=1.0, nu=0.1)
clf.fit(X, y)
# Expected:
## NuSVR(nu=0.1)

from sklearn.svm import OneClassSVM
X = [[0], [0.44], [0.45], [0.46], [1]]
clf = OneClassSVM(gamma='auto').fit(X)
clf.predict(X)
# Expected:
## array([-1,  1,  1,  1, -1])
clf.score_samples(X)
# Expected:
## array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])

from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = LinearSVC(random_state=0, tol=1e-5)
clf.fit(X, y)
# Expected:
## LinearSVC(random_state=0, tol=1e-05)
print(clf.coef_)
# Expected:
## [[0.085... 0.394... 0.498... 0.375...]]
print(clf.intercept_)
# Expected:
## [0.284...]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.svm import LinearSVR
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = LinearSVR(random_state=0, tol=1e-5)
regr.fit(X, y)
# Expected:
## LinearSVR(random_state=0, tol=1e-05)
print(regr.coef_)
# Expected:
## [16.35... 26.91... 42.30... 60.47...]
print(regr.intercept_)
# Expected:
## [-4.29...]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-4.29...]

import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(X)
H = model.components_

import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
# Expected:
## PCA(n_components=2)
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.0075...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]

pca = PCA(n_components=2, svd_solver='full')
pca.fit(X)
# Expected:
## PCA(n_components=2, svd_solver='full')
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.00755...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]

pca = PCA(n_components=1, svd_solver='arpack')
pca.fit(X)
# Expected:
## PCA(n_components=1, svd_solver='arpack')
print(pca.explained_variance_ratio_)
# Expected:
## [0.99244...]
print(pca.singular_values_)
# Expected:
## [6.30061...]

from sklearn.datasets import load_digits
from sklearn.decomposition import IncrementalPCA
from scipy import sparse
X, _ = load_digits(return_X_y=True)
transformer = IncrementalPCA(n_components=7, batch_size=200)
transformer.partial_fit(X[:100, :])
# Expected:
## IncrementalPCA(batch_size=200, n_components=7)
X_sparse = sparse.csr_matrix(X)
X_transformed = transformer.fit_transform(X_sparse)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn.datasets import load_digits
from sklearn.decomposition import KernelPCA
X, _ = load_digits(return_X_y=True)
transformer = KernelPCA(n_components=7, kernel='linear')
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import SparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = SparsePCA(n_components=5, random_state=0)
transformer.fit(X)
# Expected:
## SparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.9666...

import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import MiniBatchSparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,
                                 random_state=0)
transformer.fit(X)
# Expected:
## MiniBatchSparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.94

from sklearn.decomposition import TruncatedSVD
from scipy.sparse import random as sparse_random
from sklearn.random_projection import sparse_random_matrix
X = sparse_random(100, 100, density=0.01, format='csr',
                  random_state=42)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)
# Expected:
## TruncatedSVD(n_components=5, n_iter=7, random_state=42)
print(svd.explained_variance_ratio_)
# Expected:
## [0.0646... 0.0633... 0.0639... 0.0535... 0.0406...]
print(svd.explained_variance_ratio_.sum())
# Expected:
## 0.286...
print(svd.singular_values_)
# Expected:
## [1.553... 1.512...  1.510... 1.370... 1.199...]

from sklearn.datasets import load_digits
from sklearn.decomposition import FastICA
X, _ = load_digits(return_X_y=True)
transformer = FastICA(n_components=7,
        random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn.datasets import load_digits
from sklearn.decomposition import FactorAnalysis
X, _ = load_digits(return_X_y=True)
transformer = FactorAnalysis(n_components=7, random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn.decomposition import LatentDirichletAllocation
from sklearn.datasets import make_multilabel_classification
X, _ = make_multilabel_classification(random_state=0)
lda = LatentDirichletAllocation(n_components=5,
    random_state=0)
lda.fit(X)
# Expected:
## LatentDirichletAllocation(...)

lda.transform(X[-2:])
# Expected:
## array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],
##        [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])

from sklearn.manifold import Isomap
from sklearn.neighbors import KNeighborsTransformer
from sklearn.pipeline import make_pipeline
estimator = make_pipeline(
    KNeighborsTransformer(n_neighbors=5, mode='distance'),
    Isomap(neighbors_algorithm='precomputed'))

from sklearn.cluster import DBSCAN
from sklearn.neighbors import RadiusNeighborsTransformer
from sklearn.pipeline import make_pipeline
estimator = make_pipeline(
    RadiusNeighborsTransformer(radius=42.0, mode='distance'),
    DBSCAN(min_samples=30, metric='precomputed'))

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
# Expected:
## KNeighborsClassifier(...)
print(neigh.predict([[1.1]]))
# Expected:
## [0]
print(neigh.predict_proba([[0.9]]))
# Expected:
## [[0.66666667 0.33333333]]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsClassifier(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0]
print(neigh.predict_proba([[1.0]]))
# Expected:
## [[0.66666667 0.33333333]]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
# Expected:
## KNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsRegressor
neigh = RadiusNeighborsRegressor(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]

from sklearn.neighbors import NearestCentroid
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = NearestCentroid()
clf.fit(X, y)
# Expected:
## NearestCentroid()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
from sklearn.neighbors import LocalOutlierFactor
X = [[-1.1], [0.2], [101.1], [0.3]]
clf = LocalOutlierFactor(n_neighbors=2)
clf.fit_predict(X)
# Expected:
## array([ 1,  1, -1,  1])
clf.negative_outlier_factor_
# Expected:
## array([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])

from sklearn.neighbors import NeighborhoodComponentsAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y,
stratify=y, test_size=0.7, random_state=42)
nca = NeighborhoodComponentsAnalysis(random_state=42)
nca.fit(X_train, y_train)
# Expected:
## NeighborhoodComponentsAnalysis(...)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
# Expected:
## KNeighborsClassifier(...)
print(knn.score(X_test, y_test))
# Expected:
## 0.933333...
knn.fit(nca.transform(X_train), y_train)
# Expected:
## KNeighborsClassifier(...)
print(knn.score(nca.transform(X_test), y_test))
# Expected:
## 0.961904...

from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
iris = load_iris()
cross_val_score(clf, iris.data, iris.target, cv=10)

# Expected:
## array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
##         0.93...,  0.93...,  1.     ,  0.93...,  1.      ])

from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
X, y = load_boston(return_X_y=True)
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, X, y, cv=10)

# Expected:
## array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
##         0.07..., 0.29..., 0.33..., -1.42..., -1.77...])

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=0)
extra_tree = ExtraTreeRegressor(random_state=0)
reg = BaggingRegressor(extra_tree, random_state=0).fit(
    X_train, y_train)
reg.score(X_test, y_test)
# Expected:
## 0.7823...

from sklearn.datasets import load_digits
from sklearn.manifold import LocallyLinearEmbedding
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = LocallyLinearEmbedding(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)

from sklearn.datasets import load_digits
from sklearn.manifold import Isomap
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = Isomap(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)

from sklearn.cluster import MeanShift
import numpy as np
X = np.array([[1, 1], [2, 1], [1, 0],
              [4, 7], [3, 5], [3, 6]])
clustering = MeanShift(bandwidth=2).fit(X)
clustering.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0])
clustering.predict([[0, 0], [5, 5]])
# Expected:
## array([1, 0])
clustering
# Expected:
## MeanShift(bandwidth=2)

from sklearn.cluster import AffinityPropagation
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 4], [4, 0]])
clustering = AffinityPropagation().fit(X)
clustering
# Expected:
## AffinityPropagation()
clustering.labels_
# Expected:
## array([0, 0, 0, 1, 1, 1])
clustering.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1])
clustering.cluster_centers_
# Expected:
## array([[1, 2],
##        [4, 2]])

import numpy as np
from sklearn import datasets, cluster
digits = datasets.load_digits()
images = digits.images
X = np.reshape(images, (len(images), -1))
agglo = cluster.FeatureAgglomeration(n_clusters=32)
agglo.fit(X)
# Expected:
## FeatureAgglomeration(n_clusters=32)
X_reduced = agglo.transform(X)
X_reduced.shape
# Expected:
## (1797, 32)

from sklearn.cluster import KMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [10, 2], [10, 4], [10, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0], dtype=int32)
kmeans.predict([[0, 0], [12, 3]])
# Expected:
## array([1, 0], dtype=int32)
kmeans.cluster_centers_
# Expected:
## array([[10.,  2.],
##        [ 1.,  2.]])

from sklearn.cluster import MiniBatchKMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 0], [4, 4],
              [4, 5], [0, 1], [2, 2],
              [3, 2], [5, 5], [1, -1]])
kmeans = MiniBatchKMeans(n_clusters=2,
                         random_state=0,
                         batch_size=6)
kmeans = kmeans.partial_fit(X[0:6,:])
kmeans = kmeans.partial_fit(X[6:12,:])
kmeans.cluster_centers_
# Expected:
## array([[2. , 1. ],
##        [3.5, 4.5]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1], dtype=int32)
kmeans = MiniBatchKMeans(n_clusters=2,
                         random_state=0,
                         batch_size=6,
                         max_iter=10).fit(X)
kmeans.cluster_centers_
# Expected:
## array([[3.95918367, 2.40816327],
##        [1.12195122, 1.3902439 ]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([1, 0], dtype=int32)

from sklearn.cluster import Birch
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
brc = Birch(n_clusters=None)
brc.fit(X)
# Expected:
## Birch(n_clusters=None)
brc.predict(X)
# Expected:
## array([0, 0, 0, 1, 1, 1])

from sklearn.cross_decomposition import PLSCanonical
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
plsca = PLSCanonical(n_components=2)
plsca.fit(X, Y)
# Expected:
## PLSCanonical()
X_c, Y_c = plsca.transform(X, Y)

from sklearn.cross_decomposition import PLSRegression
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)
# Expected:
## PLSRegression()
Y_pred = pls2.predict(X)

import numpy as np
from sklearn.cross_decomposition import PLSSVD
X = np.array([[0., 0., 1.],
    [1.,0.,0.],
    [2.,2.,2.],
    [2.,5.,4.]])
Y = np.array([[0.1, -0.2],
    [0.9, 1.1],
    [6.2, 5.9],
    [11.9, 12.3]])
plsca = PLSSVD(n_components=2)
plsca.fit(X, Y)
# Expected:
## PLSSVD()
X_c, Y_c = plsca.transform(X, Y)
X_c.shape, Y_c.shape
# Expected:
## ((4, 2), (4, 2))

from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, Y)
# Expected:
## CCA(n_components=1)
X_c, Y_c = cca.transform(X, Y)

import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## LinearDiscriminantAnalysis()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## QuadraticDiscriminantAnalysis()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
from sklearn.dummy import DummyClassifier
X = np.array([-1, 1, 1, 1])
y = np.array([0, 1, 1, 1])
dummy_clf = DummyClassifier(strategy="most_frequent")
dummy_clf.fit(X, y)
# Expected:
## DummyClassifier(strategy='most_frequent')
dummy_clf.predict(X)
# Expected:
## array([1, 1, 1, 1])
dummy_clf.score(X, y)
# Expected:
## 0.75

import numpy as np
from sklearn.dummy import DummyRegressor
X = np.array([1.0, 2.0, 3.0, 4.0])
y = np.array([2.0, 3.0, 5.0, 10.0])
dummy_regr = DummyRegressor(strategy="mean")
dummy_regr.fit(X, y)
# Expected:
## DummyRegressor()
dummy_regr.predict(X)
# Expected:
## array([5., 5., 5., 5.])
dummy_regr.score(X, y)
# Expected:
## 0.0

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X, y)
# Expected:
## RandomForestClassifier(max_depth=2, random_state=0)
print(clf.feature_importances_)
# Expected:
## [0.14205973 0.76664038 0.0282433  0.06305659]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

X, y = make_regression(n_features=4, n_informative=2,
                       random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X, y)
# Expected:
## RandomForestRegressor(max_depth=2, random_state=0)
print(regr.feature_importances_)
# Expected:
## [0.18146984 0.81473937 0.00145312 0.00233767]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-8.32987858]

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf.fit(X, y)
# Expected:
## ExtraTreesClassifier(random_state=0)
clf.predict([[0, 0, 0, 0]])
# Expected:
## array([1])

from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = BaggingClassifier(base_estimator=SVC(),
                        n_estimators=10, random_state=0).fit(X, y)
clf.predict([[0, 0, 0, 0]])
# Expected:
## array([1])

from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=100, n_features=4,
                       n_informative=2, n_targets=1,
                       random_state=0, shuffle=False)
regr = BaggingRegressor(base_estimator=SVR(),
                        n_estimators=10, random_state=0).fit(X, y)
regr.predict([[0, 0, 0, 0]])
# Expected:
## array([-2.8720...])

from sklearn.ensemble import IsolationForest
X = [[-1.1], [0.3], [0.5], [100]]
clf = IsolationForest(random_state=0).fit(X)
clf.predict([[0.1], [0], [90]])
# Expected:
## array([ 1,  1, -1])

from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(X, y)
# Expected:
## AdaBoostClassifier(n_estimators=100, random_state=0)
clf.feature_importances_
# Expected:
## array([0.28..., 0.42..., 0.14..., 0.16...])
clf.predict([[0, 0, 0, 0]])
# Expected:
## array([1])
clf.score(X, y)
# Expected:
## 0.983...

from sklearn.ensemble import AdaBoostRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, n_informative=2,
                       random_state=0, shuffle=False)
regr = AdaBoostRegressor(random_state=0, n_estimators=100)
regr.fit(X, y)
# Expected:
## AdaBoostRegressor(n_estimators=100, random_state=0)
regr.feature_importances_
# Expected:
## array([0.2788..., 0.7109..., 0.0065..., 0.0036...])
regr.predict([[0, 0, 0, 0]])
# Expected:
## array([4.7972...])
regr.score(X, y)
# Expected:
## 0.9771...

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
print(eclf1.predict(X))
# Expected:
## [1 1 1 2 2 2]
np.array_equal(eclf1.named_estimators_.lr.predict(X),
               eclf1.named_estimators_['lr'].predict(X))
# Expected:
## True
eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft')
eclf2 = eclf2.fit(X, y)
print(eclf2.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf3 = VotingClassifier(estimators=[
       ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
       voting='soft', weights=[2,1,1],
       flatten_transform=True)
eclf3 = eclf3.fit(X, y)
print(eclf3.predict(X))
# Expected:
## [1 1 1 2 2 2]
print(eclf3.transform(X).shape)
# Expected:
## (6, 6)

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
y = np.array([2, 6, 12, 20, 30, 42])
er = VotingRegressor([('lr', r1), ('rf', r2)])
print(er.fit(X, y).predict(X))
# Expected:
## [ 3.3  5.7 11.8 19.7 28.  40.3]

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
X, y = load_iris(return_X_y=True)
estimators = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('svr', make_pipeline(StandardScaler(),
                          LinearSVC(random_state=42)))
]
clf = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42
)
clf.fit(X_train, y_train).score(X_test, y_test)
# Expected:
## 0.9...

from sklearn.datasets import load_diabetes
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
X, y = load_diabetes(return_X_y=True)
estimators = [
    ('lr', RidgeCV()),
    ('svr', LinearSVR(random_state=42))
]
reg = StackingRegressor(
    estimators=estimators,
    final_estimator=RandomForestRegressor(n_estimators=10,
                                          random_state=42)
)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42
)
reg.fit(X_train, y_train).score(X_test, y_test)
# Expected:
## 0.3...

from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
X = v.fit_transform(D)
X
# Expected:
## array([[2., 0., 1.],
##        [0., 1., 3.]])
v.inverse_transform(X) ==         [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
# Expected:
## True
v.transform({'foo': 4, 'unseen_feature': 3})
# Expected:
## array([[0., 0., 4.]])

from sklearn.feature_extraction import FeatureHasher
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
f.toarray()
# Expected:
## array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],
##        [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])

from sklearn.datasets import load_sample_images
from sklearn.feature_extraction import image

X = load_sample_images().images[1]
print('Image shape: {}'.format(X.shape))
# Expected:
## Image shape: (427, 640, 3)
pe = image.PatchExtractor(patch_size=(2, 2))
pe_fit = pe.fit(X)
pe_trans = pe.transform(X)
print('Patches shape: {}'.format(pe_trans.shape))
# Expected:
## Patches shape: (545706, 2, 2)

from sklearn.feature_extraction.text import HashingVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = HashingVectorizer(n_features=2**4)
X = vectorizer.fit_transform(corpus)
print(X.shape)
# Expected:
## (4, 16)

from sklearn.feature_extraction.text import CountVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.toarray())
# Expected:
## [[0 1 1 1 0 0 1 0 1]
##  [0 2 0 1 0 1 1 0 1]
##  [1 0 0 1 1 0 1 1 1]
##  [0 1 1 1 0 0 1 0 1]]
vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))
X2 = vectorizer2.fit_transform(corpus)
print(vectorizer2.get_feature_names())
# Expected:
## ['and this', 'document is', 'first document', 'is the', 'is this',
## 'second document', 'the first', 'the second', 'the third', 'third one',
##  'this document', 'this is', 'this the']
print(X2.toarray())
# Expected:
## [[0 0 1 1 0 0 1 0 0 0 0 1 0]
## [0 1 0 1 0 1 0 1 0 0 1 0 0]
## [1 0 0 1 0 0 0 0 1 1 0 1 0]
## [0 0 1 0 1 0 1 0 0 0 0 0 1]]

from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
import numpy as np
corpus = ['this is the first document',
          'this document is the second document',
          'and this is the third one',
          'is this the first document']
vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',
              'and', 'one']
pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),
                 ('tfid', TfidfTransformer())]).fit(corpus)
pipe['count'].transform(corpus).toarray()
# Expected:
## array([[1, 1, 1, 1, 0, 1, 0, 0],
##        [1, 2, 0, 1, 1, 1, 0, 0],
##        [1, 0, 0, 1, 0, 1, 1, 1],
##        [1, 1, 1, 1, 0, 1, 0, 0]])
pipe['tfid'].idf_
# Expected:
## array([1.        , 1.22314355, 1.51082562, 1.        , 1.91629073,
##        1.        , 1.91629073, 1.91629073])
pipe.transform(corpus).shape
# Expected:
## (4, 8)

from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.shape)
# Expected:
## (4, 9)

from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectPercentile, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 7)

from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectKBest, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 20)

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFpr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFdr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFwe, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 15)

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import GenericUnivariateSelect, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
transformer = GenericUnivariateSelect(chi2, 'k_best', param=20)
X_new = transformer.fit_transform(X, y)
X_new.shape
# Expected:
## (569, 20)

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True, False, False, False, False,
##        False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFECV(estimator, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True, False, False, False, False,
##        False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
X = [[ 0.87, -1.34,  0.31 ],
     [-2.79, -0.02, -0.85 ],
     [-1.34, -0.48, -2.55 ],
     [ 1.92,  1.48,  0.65 ]]
y = [0, 1, 0, 1]
selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)
selector.estimator_.coef_
# Expected:
## array([[-0.3252302 ,  0.83462377,  0.49750423]])
selector.threshold_
# Expected:
## 0.55245...
selector.get_support()
# Expected:
## array([False,  True, False])
selector.transform(X)
# Expected:
## array([[-1.34],
##        [-0.02],
##        [-0.48],
##        [ 1.48]])

import numpy as np
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
X = np.array([
    [10, 10],
    [8, 10],
    [-5, 5.5],
    [-5.4, 5.5],
    [-20, -20],
    [-15, -20]
])
y = np.array([0, 0, 1, 1, 2, 2])
clf = OneVsRestClassifier(SVC()).fit(X, y)
clf.predict([[-19, -20], [9, 9], [-5, 5]])
# Expected:
## array([2, 0, 1])

from sklearn.multiclass import OutputCodeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = OutputCodeClassifier(
    estimator=RandomForestClassifier(random_state=0),
    random_state=0).fit(X, y)
clf.predict([[0, 0, 0, 0]])
# Expected:
## array([1])

from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
        random_state=0).fit(X, y)
gpr.score(X, y)
# Expected:
## 0.3680...
gpr.predict(X[:2,:], return_std=True)
# Expected:
## (array([653.0..., 592.1...]), array([316.6..., 316.6...]))

from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
X, y = load_iris(return_X_y=True)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel,
        random_state=0).fit(X, y)
gpc.score(X, y)
# Expected:
## 0.9866...
gpc.predict_proba(X[:2,:])
# Expected:
## array([[0.83548752, 0.03228706, 0.13222543],
##        [0.79064206, 0.06525643, 0.14410151]])

from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(X)
clf = SGDClassifier(max_iter=5, tol=1e-3)
clf.fit(X_features, y)
# Expected:
## SGDClassifier(max_iter=5)
clf.score(X_features, y)
# Expected:
## 1.0

from sklearn.kernel_approximation import SkewedChi2Sampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
chi2_feature = SkewedChi2Sampler(skewedness=.01,
                                 n_components=10,
                                 random_state=0)
X_features = chi2_feature.fit_transform(X, y)
clf = SGDClassifier(max_iter=10, tol=1e-3)
clf.fit(X_features, y)
# Expected:
## SGDClassifier(max_iter=10)
clf.score(X_features, y)
# Expected:
## 1.0

from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import AdditiveChi2Sampler
X, y = load_digits(return_X_y=True)
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
X_transformed = chi2sampler.fit_transform(X, y)
clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
clf.fit(X_transformed, y)
# Expected:
## SGDClassifier(max_iter=5, random_state=0)
clf.score(X_transformed, y)
# Expected:
## 0.9499...

from sklearn import datasets, svm
from sklearn.kernel_approximation import Nystroem
X, y = datasets.load_digits(n_class=9, return_X_y=True)
data = X / 16.
clf = svm.LinearSVC()
feature_map_nystroem = Nystroem(gamma=.2,
                                random_state=1,
                                n_components=300)
data_transformed = feature_map_nystroem.fit_transform(data)
clf.fit(data_transformed, y)
# Expected:
## LinearSVC()
clf.score(data_transformed, y)
# Expected:
## 0.9987...

from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y)
# Expected:
## KernelRidge(alpha=1.0)

import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
# Expected:
## GaussianNB()
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
# Expected:
## GaussianNB()
print(clf_pf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
rng = np.random.RandomState(1)
X = rng.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
# Expected:
## MultinomialNB()
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
rng = np.random.RandomState(1)
X = rng.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import ComplementNB
clf = ComplementNB()
clf.fit(X, y)
# Expected:
## ComplementNB()
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
rng = np.random.RandomState(1)
X = rng.randint(5, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
from sklearn.naive_bayes import BernoulliNB
clf = BernoulliNB()
clf.fit(X, Y)
# Expected:
## BernoulliNB()
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
rng = np.random.RandomState(1)
X = rng.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import CategoricalNB
clf = CategoricalNB()
clf.fit(X, y)
# Expected:
## CategoricalNB()
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
from sklearn.neural_network import BernoulliRBM
X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
model = BernoulliRBM(n_components=2)
model.fit(X)
# Expected:
## BernoulliRBM(n_components=2)

import numpy as np
from sklearn.random_projection import GaussianRandomProjection
rng = np.random.RandomState(42)
X = rng.rand(100, 10000)
transformer = GaussianRandomProjection(random_state=rng)
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)

import numpy as np
from sklearn.random_projection import SparseRandomProjection
rng = np.random.RandomState(42)
X = rng.rand(100, 10000)
transformer = SparseRandomProjection(random_state=rng)
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)
np.mean(transformer.components_ != 0)
# Expected:
## 0.0100...

import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)
# Expected:
## LabelPropagation(...)

import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
label_prop_model = LabelSpreading()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)
# Expected:
## LabelSpreading(...)