from sklearn.preprocessing import PolynomialFeatures
x = np.arange(0, 1, 0.1)
n = len(x)
eps = rng.normal(size=n, loc=0, scale=0.04)
y = x + eps
# But make one big outlier
midpoint = int(np.ceil((3/4)*n))
y[midpoint] = 0
of_df = pd.DataFrame({'x': x, 'y': y})
# Linear model
# lin_model = smf.ols(formula='y ~ x', data=of_data)
train_plot = sns.lmplot(
data=of_df,
x='x', y='y',
scatter_kws=dict(color='black'),
ci=None,
fit_reg=False,
height=2.4,
aspect=2,
)
plt.title("Training Data");
plt.show()
# Data setup
x_test = np.arange(0, 1, 0.1)
n_test = len(x_test)
eps_test = rng.normal(size=n_test, loc=0, scale=0.04)
y_test = x_test + eps_test
of_test_df = pd.DataFrame({'x': x_test, 'y': y_test})
test_points_plot = sns.lmplot(
data=of_df,
x='x', y='y',
scatter_kws=dict(color='black'),
line_kws=dict(color=cb_palette[0]),
ci=None,
height=2.4,
aspect=2,
fit_reg=False,
);
plt.title("Test Data");
plt.show()
perfect_plot = sns.lmplot(
data=of_df,
x='x', y='y',
scatter_kws=dict(color='black'),
line_kws=dict(color=cb_palette[0]),
ci=None,
height=2.4,
aspect=2,
)
perfect_plot.ax.set_ylim(-1, 1);
sns.regplot(
data=of_df,
x='x', y='y',
order=n,
ci=None,
scatter_kws=dict(color='black'),
line_kws=dict(color=cb_palette[1])
)
plt.title("A Perfect Model?");
plt.show()
test_plot = sns.lmplot(
data=of_test_df,
x='x', y='y',
ci=None,
scatter_kws=dict(color='black'),
line_kws=dict(color=cb_palette[0]),
height=2.4, aspect=2,
);
test_plot.ax.set_ylim(-1, 1);
sns.regplot(
data=of_df,
x='x', y='y',
order=n,
ci=None,
scatter_kws=dict(color='black'),
line_kws=dict(color=cb_palette[1]),
marker='',
);
plt.title("Evaluation: Unseen Test Data");
plt.show()