plot_decision_regions: 可视化分类器的决策区域
一个用于绘制分类器在一维或二维中的决策区域的函数。
from mlxtend.plotting import plot_decision_regions
参考文献
示例 1 - 二维决策区域
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)
# Plotting decision regions
plot_decision_regions(X, y, clf=svm, legend=2)
# Adding axes annotations
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()
示例 2 - 一维决策区域
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, 2]
X = X[:, None]
y = iris.target
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)
# Plotting decision regions
plot_decision_regions(X, y, clf=svm, legend=2)
# Adding axes annotations
plt.xlabel('sepal length [cm]')
plt.title('SVM on Iris')
plt.show()
示例 3 - 决策区域网格
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import datasets
import numpy as np
# Initializing Classifiers
clf1 = LogisticRegression(random_state=1,
solver='newton-cg',
multi_class='multinomial')
clf2 = RandomForestClassifier(random_state=1, n_estimators=100)
clf3 = GaussianNB()
clf4 = SVC(gamma='auto')
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [0,2]]
y = iris.target
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
import matplotlib.gridspec as gridspec
import itertools
gs = gridspec.GridSpec(2, 2)
fig = plt.figure(figsize=(10,8))
labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
labels,
itertools.product([0, 1], repeat=2)):
clf.fit(X, y)
ax = plt.subplot(gs[grd[0], grd[1]])
fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
plt.title(lab)
plt.show()
示例 4 - 突出显示测试数据点
from mlxtend.plotting import plot_decision_regions
from mlxtend.preprocessing import shuffle_arrays_unison
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X, y = iris.data[:, [0,2]], iris.target
X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)
X_train, y_train = X[:100], y[:100]
X_test, y_test = X[100:], y[100:]
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X_train, y_train)
# Plotting decision regions
plot_decision_regions(X, y, clf=svm, legend=2,
X_highlight=X_test)
# Adding axes annotations
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()
示例 5 - 评估分类器在非线性问题上的行为
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
# Initializing Classifiers
clf1 = LogisticRegression(random_state=1, solver='lbfgs')
clf2 = RandomForestClassifier(n_estimators=100,
random_state=1)
clf3 = GaussianNB()
clf4 = SVC(gamma='auto')
# Loading Plotting Utilities
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import itertools
from mlxtend.plotting import plot_decision_regions
import numpy as np
XOR
xx, yy = np.meshgrid(np.linspace(-3, 3, 50),
np.linspace(-3, 3, 50))
rng = np.random.RandomState(0)
X = rng.randn(300, 2)
y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0),
dtype=int)
gs = gridspec.GridSpec(2, 2)
fig = plt.figure(figsize=(10,8))
labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
labels,
itertools.product([0, 1], repeat=2)):
clf.fit(X, y)
ax = plt.subplot(gs[grd[0], grd[1]])
fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
plt.title(lab)
plt.show()
Half-Moons (半月形)
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, random_state=123)
gs = gridspec.GridSpec(2, 2)
fig = plt.figure(figsize=(10,8))
labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
labels,
itertools.product([0, 1], repeat=2)):
clf.fit(X, y)
ax = plt.subplot(gs[grd[0], grd[1]])
fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
plt.title(lab)
plt.show()
Concentric Circles (同心圆)
from sklearn.datasets import make_circles
X, y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2)
gs = gridspec.GridSpec(2, 2)
fig = plt.figure(figsize=(10,8))
labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'SVM']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4],
labels,
itertools.product([0, 1], repeat=2)):
clf.fit(X, y)
ax = plt.subplot(gs[grd[0], grd[1]])
fig = plot_decision_regions(X=X, y=y, clf=clf, legend=2)
plt.title(lab)
plt.show()
示例 6 - 使用现有轴对象(使用子图)
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
import numpy as np
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, 2]
X = X[:, None]
y = iris.target
# Initializing and fitting classifiers
clf1 = LogisticRegression(random_state=1,
solver='lbfgs',
multi_class='multinomial')
clf2 = GaussianNB()
clf1.fit(X, y)
clf2.fit(X, y)
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
fig = plot_decision_regions(X=X, y=y, clf=clf1, ax=axes[0], legend=2)
fig = plot_decision_regions(X=X, y=y, clf=clf2, ax=axes[1], legend=1)
plt.show()
示例 7 - 具有两个以上训练特征的决策区域
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
X, y = datasets.make_blobs(n_samples=600, n_features=3,
centers=[[2, 2, -2],[-2, -2, 2]],
cluster_std=[2, 2], random_state=2)
# Training a classifier
svm = SVC(gamma='auto')
svm.fit(X, y)
# Plotting decision regions
fig, ax = plt.subplots()
# Decision region for feature 3 = 1.5
value = 1.5
# Plot training sample with feature 3 = 1.5 +/- 0.75
width = 0.75
plot_decision_regions(X, y, clf=svm,
filler_feature_values={2: value},
filler_feature_ranges={2: width},
legend=2, ax=ax)
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Feature 3 = {}'.format(value))
# Adding axes annotations
fig.suptitle('SVM on make_blobs')
plt.show()
示例 8 - 决策区域切片网格
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
X, y = datasets.make_blobs(n_samples=500, n_features=3, centers=[[2, 2, -2],[-2, -2, 2]],
cluster_std=[2, 2], random_state=2)
# Training a classifier
svm = SVC(gamma='auto')
svm.fit(X, y)
# Plotting decision regions
fig, axarr = plt.subplots(2, 2, figsize=(10,8), sharex=True, sharey=True)
values = [-4.0, -1.0, 1.0, 4.0]
width = 0.75
for value, ax in zip(values, axarr.flat):
plot_decision_regions(X, y, clf=svm,
filler_feature_values={2: value},
filler_feature_ranges={2: width},
legend=2, ax=ax)
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Feature 3 = {}'.format(value))
# Adding axes annotations
fig.suptitle('SVM on make_blobs')
plt.show()
示例 9 - 自定义绘图样式
from mlxtend.plotting import plot_decision_regions
from mlxtend.preprocessing import shuffle_arrays_unison
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)
X_train, y_train = X[:100], y[:100]
X_test, y_test = X[100:], y[100:]
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X_train, y_train)
# Specify keyword arguments to be passed to underlying plotting functions
scatter_kwargs = {'s': 120, 'edgecolor': None, 'alpha': 0.7}
contourf_kwargs = {'alpha': 0.2}
scatter_highlight_kwargs = {'s': 120, 'label': 'Test data', 'alpha': 0.7}
# Plotting decision regions
plot_decision_regions(X, y, clf=svm, legend=2,
X_highlight=X_test,
scatter_kwargs=scatter_kwargs,
contourf_kwargs=contourf_kwargs,
scatter_highlight_kwargs=scatter_highlight_kwargs)
# Adding axes annotations
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
plt.show()
示例 10 - 提供自定义图例标签
可以通过从 `plot_decision_region` 函数返回 `axis` 对象,然后获取图例的句柄和标签来提供自定义图例标签。然后可以通过 `ax.legend` 提供自定义句柄(即标签)。
ax = plot_decision_regions(X, y, clf=svm, legend=0)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles,
['class 0', 'class 1', 'class 2'],
framealpha=0.3, scatterpoints=1)
示例如下所示。
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)
# Plotting decision regions
ax = plot_decision_regions(X, y, clf=svm, legend=0)
# Adding axes annotations
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.title('SVM on Iris')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles,
['class square', 'class triangle', 'class circle'],
framealpha=0.3, scatterpoints=1)
plt.show()
示例 11 - 不同缩放因子的图
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
# Loading some example data
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
# Training a classifier
svm = SVC(C=0.5, kernel='linear')
svm.fit(X, y)
SVC(C=0.5, kernel='linear')在 Jupyter 环境中,请重新运行此单元格以显示 HTML 表示或信任 Notebook。
在 GitHub 上,HTML 表示无法渲染,请尝试使用 nbviewer.org 加载此页面。
SVC(C=0.5, kernel='linear')
默认缩放因子
plot_decision_regions(X, y, clf=svm, zoom_factor=1.)
plt.show()
缩小
plot_decision_regions(X, y, clf=svm, zoom_factor=0.1)
plt.show()
放大
请注意,即使在放大时(通过选择 `zoom_factor` > 1.0),图仍然会创建以显示图中的所有数据点。
plot_decision_regions(X, y, clf=svm, zoom_factor=2.0)
plt.show()
裁剪坐标轴
为了进一步放大(这意味着一些训练样本将不被显示),您可以简单地裁剪坐标轴,如下所示
plot_decision_regions(X, y, clf=svm, zoom_factor=2.0)
plt.xlim(5, 6)
plt.ylim(2, 5)
plt.show()
示例 12 - 使用期望独热编码输出的分类器 (Keras)
大多数模仿 scikit-learn estimator API 的分类对象应该与 `plot_decision_regions` 函数兼容。但是,如果分类模型(例如,典型的 Keras 模型)输出独热编码的预测,我们就必须使用一个额外的技巧。也就是说,对于独热编码的输出,我们需要将 Keras 模型包装在一个类中,该类将这些独热编码变量转换为整数。这样一个包装类可以像下面这样简单
class Onehot2Int(object):
def __init__(self, model):
self.model = model
def predict(self, X):
y_pred = self.model.predict(X)
return np.argmax(y_pred, axis=1)
下面的示例说明了如何将 Onehot2Int 类与输出独热编码标签的 Keras 模型一起使用
import keras
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy as np
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize
from mlxtend.plotting import plot_decision_regions
from keras.utils import to_categorical
X, y = iris_data()
X = X[:, [2, 3]]
X = standardize(X)
# OneHot encoding
y_onehot = to_categorical(y)
# Create the model
np.random.seed(123)
model = Sequential()
model.add(Dense(8, input_shape=(2,), activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(4, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(3, activation='softmax'))
# Configure the model and start training
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.005), metrics=['accuracy'])
history = model.fit(X, y_onehot, epochs=10, batch_size=5, verbose=1, validation_split=0.1)
Epoch 1/10
1/27 [>.............................] - ETA: 3s - loss: 1.2769 - accuracy: 0.4000
/Users/sebastianraschka/miniforge3/envs/mlxtend/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/adam.py:117: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super().__init__(name, **kwargs)
2023-03-28 17:48:13.901264: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
27/27 [==============================] - 0s 3ms/step - loss: 0.9526 - accuracy: 0.4222 - val_loss: 1.2656 - val_accuracy: 0.0000e+00
Epoch 2/10
27/27 [==============================] - 0s 834us/step - loss: 0.7062 - accuracy: 0.6741 - val_loss: 1.0939 - val_accuracy: 0.0000e+00
Epoch 3/10
27/27 [==============================] - 0s 808us/step - loss: 0.6461 - accuracy: 0.7111 - val_loss: 1.0705 - val_accuracy: 0.0667
Epoch 4/10
27/27 [==============================] - 0s 767us/step - loss: 0.6145 - accuracy: 0.7185 - val_loss: 1.0518 - val_accuracy: 0.0000e+00
Epoch 5/10
27/27 [==============================] - 0s 746us/step - loss: 0.5877 - accuracy: 0.7185 - val_loss: 1.0470 - val_accuracy: 0.0000e+00
Epoch 6/10
27/27 [==============================] - 0s 740us/step - loss: 0.5496 - accuracy: 0.7333 - val_loss: 1.0275 - val_accuracy: 0.0000e+00
Epoch 7/10
27/27 [==============================] - 0s 734us/step - loss: 0.4985 - accuracy: 0.7333 - val_loss: 1.0131 - val_accuracy: 0.0000e+00
Epoch 8/10
27/27 [==============================] - 0s 739us/step - loss: 0.4365 - accuracy: 0.7333 - val_loss: 0.9634 - val_accuracy: 0.0000e+00
Epoch 9/10
27/27 [==============================] - 0s 729us/step - loss: 0.3875 - accuracy: 0.7333 - val_loss: 0.9442 - val_accuracy: 0.0000e+00
Epoch 10/10
27/27 [==============================] - 0s 764us/step - loss: 0.3402 - accuracy: 0.7407 - val_loss: 0.8565 - val_accuracy: 0.0000e+00
# Wrap keras model
model_no_ohe = Onehot2Int(model)
# Plot decision boundary
plot_decision_regions(X, y, clf=model_no_ohe)
plt.show()
9600/9600 [==============================] - 3s 289us/step
API
plot_decision_regions(X, y, clf, feature_index=None, filler_feature_values=None, filler_feature_ranges=None, ax=None, X_highlight=None, zoom_factor=1.0, legend=1, hide_spines=True, markers='s^oxv<>', colors='#1f77b4,#ff7f0e,#3ca02c,#d62728,#9467bd,#8c564b,#e377c2,#7f7f7f,#bcbd22,#17becf', scatter_kwargs=None, contourf_kwargs=None, contour_kwargs=None, scatter_highlight_kwargs=None, n_jobs=None)
绘制分类器的决策区域。
Please note that this functions assumes that class labels are
labeled consecutively, e.g,. 0, 1, 2, 3, 4, and 5. If you have class
labels with integer labels > 4, you may want to provide additional colors
and/or markers as `colors` and `markers` arguments.
See https://matplotlib.net.cn/examples/color/named_colors.html for more
information.
参数
-
X
: array-like, shape = [n_samples, n_features]特征矩阵。
-
y
: array-like, shape = [n_samples]真实类别标签。
-
clf
: 分类器对象。必须有一个 .predict 方法。
-
feature_index
: array-like (默认: 1D 为 (0,), 否则为 (0, 1))用于绘图的特征索引。`feature_index` 中的第一个索引将作为 x 轴,第二个索引将作为 y 轴。
-
filler_feature_values
: dict (默认: None)仅在特征数量 > 2 时需要。未绘制特征的特征索引-值对字典。
-
filler_feature_ranges
: dict (默认: None)仅在特征数量 > 2 时需要。未绘制特征的特征索引-值对字典。将使用提供的范围选择训练样本进行绘图。
-
ax
: matplotlib.axes.Axes (默认: None)现有的 matplotlib Axes。如果 ax=None,则创建一个。
-
X_highlight
: array-like, shape = [n_samples, n_features] (默认: None)一个包含用于突出显示 `X` 中样本的数据点数组。
-
zoom_factor
: float (默认: 1.0)控制决策图 x 轴和 y 轴的比例。
-
hide_spines
: bool (默认: True)如果为 True,则隐藏坐标轴脊。
-
legend
: int (默认: 1)整数,指定图例位置。如果 legend 为 0,则无图例。
-
markers
: str (默认: 's^oxv<>')散点图标记。
-
colors
: str (默认: 'red,blue,limegreen,gray,cyan')逗号分隔的颜色列表。
-
scatter_kwargs
: dict (默认: None)底层 matplotlib scatter 函数的关键字参数。
-
contourf_kwargs
: dict (默认: None)底层 matplotlib contourf 函数的关键字参数。
-
contour_kwargs
: dict (默认: None)底层 matplotlib contour 函数(绘制决策区域之间的线条)的关键字参数。
-
scatter_highlight_kwargs
: dict (默认: None)底层 matplotlib scatter 函数的关键字参数。
-
n_jobs
: int or None, optional (默认=None)使用 Python 的 multiprocessing 库进行计算的 CPU 数量。
None
表示 1。-1
表示使用所有处理器。v0.22.0 新增。
返回
ax
: matplotlib.axes.Axes 对象
示例
使用示例请参阅 https://mlxtend.cn/mlxtend/user_guide/plotting/plot_decision_regions/