Classification
ROC-AUC Plot
def roc_plot(y_test, y_pred_score, model_label="", ax=None):
"""
Plot the roc curve given true values and predicted scores.
If plot on top of the main plot, need to send the ax to this function.
"""
fpr, tpr, threshold = roc_curve(y_test, y_pred_score)
auc = metrics.auc(fpr, tpr)
if ax == None:
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(fpr, tpr, label=model_label + " - AUC: {:.2f}".format(auc))
ax.plot([0, 1], [0, 1], "k--")
ax.set(
xlabel="False Positive Rate (FPR)",
ylabel="True Positive Rate (TPR)",
title="ROC curve",
)
ax.legend(loc="lower right")
return ax
else:
ax.plot(fpr, tpr, label=model_label + " - AUC: {:.2f}".format(auc))
ax.legend(loc="lower right")
ax = roc_plot(y_train, lgb_base.predict_proba(X_train)[:, 1], "Train")
roc_plot(y_val, lgb_base.predict_proba(X_val)[:, 1], "Validation", ax)
roc_plot(y_test, lgb_base.predict_proba(X_test)[:, 1], "Test", ax)
Confusion Matrix
def confusion_matrix_plot(y_test, y_pred, plot=True, prob=False, p=0.5):
"""
Plot the confusion matrix.
"""
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, confusion_matrix
# For best visualizations, need to use default style
# matplotlib.rcParams.update(matplotlib.rcParamsDefault)
if prob:
y_pred = np.where(y_pred > 0.5, 1, 0)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
if plot:
cm = confusion_matrix(y_test, y_pred)
# ConfusionMatrixDisplay.from_predictions(y_test, y_pred, colorbar=False)
# plt.show()
print(cm)
Regression Model
def resid_plot(y, y_pred, title=""):
fig, ax = plt.subplots()
ax.scatter(y_pred, y - y_pred)
ax.set_xlabel("y hat")
ax.set_ylabel("Residuals")
ax.set_title("{0} - Residual Plots".format(title))
def eval_reg_model(model, x_list, y_list,
dataLabel=["Train", "Val", "Test"],
residPlot=False):
"""
Evaluate using the MSE and residual plots to multiple pairs of X and y data.
"""
n = len(dataLabel)
records = list()
for i in range(n):
x = x_list[i]
y = y_list[i]
label = dataLabel[i]
y_pred = model.predict(x)
mse = mean_squared_error(y, y_pred)
records.append((label, mse))
if residPlot:
resid_plot(y, y_pred, label)
return pd.DataFrame.from_records(records, columns=["Data Set", "MSE"])
Feature Importance
def feature_importance_plot(importantces, names):
# rf2.feature_importances_, index=rf2.feature_names_in_
_importances = pd.Series(importantces, index=names)
_indices = np.argsort(importantces)
fig, ax = plt.subplots()
_importances[_indices].plot.barh(ax=ax)
ax.set_title("Feature importances")
fig.tight_layout()
feature_importance_plot(lgb_bo_insample.feature_importances_, lgb_bo_insample.feature_name_)