Regression Intuition#

[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
import seaborn as sns
from mpl_toolkits import mplot3d
[13]:
X, y = make_regression(n_features=1,noise=30,random_state=42,bias=100)
[18]:
df = pd.DataFrame(np.hstack((X,y.reshape(-1,1))),columns=['x1','y'])
[19]:
df['x0'] = 1
[20]:
df.plot(x='x1',y='y',kind='scatter')
[20]:
<AxesSubplot:xlabel='x1', ylabel='y'>
../_images/notebooks_regression_understanding_5_1.png
[21]:
def plot_regression(x,y,y_hat,figsize=(12,5)):
    fig, ax = plt.subplots(1,2,figsize=figsize)

    ax[0].scatter(x, y, label='original')
    ax[0].plot(x, y_hat, 'k.', label='predicted')

    ax[1].plot(y, label='original')
    ax[1].plot(y_hat, label='predicted')

    plt.legend()

Fitting a linear regression model#

revisiting psuedo inverse#

:nbsphinx-math:`begin{align}

X theta = Y\ theta = X^{-1} Y

end{align}`

[22]:
theta = np.linalg.pinv(df[['x0','x1']].values) @ df.y.values
print("theta :",theta)


y_hat = df[['x0','x1']].values @ theta

plot_regression(df.x1,df.y, y_hat)
theta : [103.49534596  49.82930935]
../_images/notebooks_regression_understanding_10_1.png

revisiting svd and linear systems#

[23]:
u,s,vT = np.linalg.svd(df[['x0','x1']].values,full_matrices=False)

theta = vT.T @ np.linalg.pinv(np.diag(s)) @ u.T @ df.y

print("theta :",theta)


y_hat = df[['x0','x1']].values @ theta
plot_regression(df.x1,df.y, y_hat)
theta : [103.49534596  49.82930935]
../_images/notebooks_regression_understanding_12_1.png

good old sklearn#

[24]:
from sklearn.linear_model import LinearRegression
[25]:
model = LinearRegression()
model = model.fit(df[['x0','x1']].values,df.y.values)
[26]:
y_hat = model.predict(df[['x0','x1']].values)
[27]:
plot_regression(df.x1,df.y, y_hat)
../_images/notebooks_regression_understanding_17_0.png

lets try something with Neural Networks#

[28]:
import tensorflow as tf

A neural net like perceptron#

[29]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
[30]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=1)
])
[31]:
model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense (Dense)               (None, 1)                 3

=================================================================
Total params: 8
Trainable params: 3
Non-trainable params: 5
_________________________________________________________________
[32]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[32]:
../_images/notebooks_regression_understanding_24_0.png
[33]:
y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
../_images/notebooks_regression_understanding_25_0.png

it is not trained yet. so result is understandable.

[34]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)
[35]:
history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=1000,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)
[36]:
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
[38]:
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
[38]:
<AxesSubplot:xlabel='epochs'>
../_images/notebooks_regression_understanding_30_1.png
../_images/notebooks_regression_understanding_30_2.png
[39]:
y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
../_images/notebooks_regression_understanding_31_0.png

A little bit deep neural net but no activation functions#

[40]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch

history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_1 (Dense)             (None, 5)                 15

 dense_2 (Dense)             (None, 5)                 30

 dense_3 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_33_1.png
../_images/notebooks_regression_understanding_33_2.png
../_images/notebooks_regression_understanding_33_3.png

So I didn’t introduce any activation/ non-linearity, and it is, no matter how deep the network is, a linear regression model. Ha Ha Ha

[41]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[41]:
../_images/notebooks_regression_understanding_35_0.png

now a neural net with sigmoid applied#

[42]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_4 (Dense)             (None, 5)                 15

 dense_5 (Dense)             (None, 5)                 30

 dense_6 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_37_1.png
../_images/notebooks_regression_understanding_37_2.png
../_images/notebooks_regression_understanding_37_3.png

A little bit curved from sigmoid, trying to fit the pattern.

[43]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[43]:
../_images/notebooks_regression_understanding_39_0.png

2 sigmoids applied in the net#

[44]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=500,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_7 (Dense)             (None, 5)                 15

 dense_8 (Dense)             (None, 5)                 30

 dense_9 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_41_1.png
../_images/notebooks_regression_understanding_41_2.png
../_images/notebooks_regression_understanding_41_3.png

more curves/ non-linear pattern matching, with increasing sigmoid layers.

[45]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[45]:
../_images/notebooks_regression_understanding_43_0.png

lets try with a little bit complex pattern#

[48]:
X, y = make_regression(n_features=1,noise=20,random_state=42,bias=100,n_samples=500)

df = pd.DataFrame()
df['x1'] = X[...,-1]**3
df['y'] = y
df['x0'] = 1
df.head()
[48]:
x1 y x0
0 -0.528099 57.401862 1
1 0.000913 102.950676 1
2 0.105983 123.553604 1
3 -3.232089 -9.967066 1
4 -0.057206 77.788884 1
[49]:
df.plot(x='x1',y='y',kind='scatter')
[49]:
<AxesSubplot:xlabel='x1', ylabel='y'>
../_images/notebooks_regression_understanding_46_1.png

a completely linear model for complex data#

[50]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
[51]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_10 (Dense)            (None, 5)                 15

 dense_11 (Dense)            (None, 5)                 30

 dense_12 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_49_1.png
../_images/notebooks_regression_understanding_49_2.png
../_images/notebooks_regression_understanding_49_3.png

As expected, no matter how deep it is, it matches a linear pattern.

[52]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[52]:
../_images/notebooks_regression_understanding_51_0.png

with a sigmoid introducing non linearity#

[53]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_13 (Dense)            (None, 5)                 15

 dense_14 (Dense)            (None, 5)                 30

 dense_15 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_53_1.png
../_images/notebooks_regression_understanding_53_2.png
../_images/notebooks_regression_understanding_53_3.png
[54]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[54]:
../_images/notebooks_regression_understanding_54_0.png

with a relu layer#

[57]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=200,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_22 (Dense)            (None, 5)                 15

 dense_23 (Dense)            (None, 5)                 30

 dense_24 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_56_1.png
../_images/notebooks_regression_understanding_56_2.png
../_images/notebooks_regression_understanding_56_3.png
[58]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[58]:
../_images/notebooks_regression_understanding_57_0.png

with two relu layers#

[59]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_25 (Dense)            (None, 5)                 15

 dense_26 (Dense)            (None, 5)                 30

 dense_27 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_59_1.png
../_images/notebooks_regression_understanding_59_2.png
../_images/notebooks_regression_understanding_59_3.png
[60]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[60]:
../_images/notebooks_regression_understanding_60_0.png

sine wave with a nerual network#

[61]:
X, y = make_regression(n_features=1,noise=10,random_state=42,n_samples=500)

X.shape, y.shape
[61]:
((500, 1), (500,))
[62]:
df = pd.DataFrame()
df['x1'] = y
df['y'] = np.sin(X[...,-1]*4)
df['x0'] = 1
df.plot(x='x1',y='y',kind='scatter')
[62]:
<AxesSubplot:xlabel='x1', ylabel='y'>
../_images/notebooks_regression_understanding_63_1.png

Trying out with linear model#

I know this is not gonna work.

[63]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='linear'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=500, \
                    batch_size=32, verbose=0, validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_2 (Normalizat  (None, 2)                5
 ion)

 dense_28 (Dense)            (None, 5)                 15

 dense_29 (Dense)            (None, 1)                 6

=================================================================
Total params: 26
Trainable params: 21
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_65_1.png
../_images/notebooks_regression_understanding_65_2.png
../_images/notebooks_regression_understanding_65_3.png

Pretty obvious.

[64]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[64]:
../_images/notebooks_regression_understanding_67_0.png

Now with 2 sigmoid layers#

[65]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=500, \
                    batch_size=32, verbose=0, validation_split = 0.3)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_3 (Normalizat  (None, 2)                5
 ion)

 dense_30 (Dense)            (None, 5)                 15

 dense_31 (Dense)            (None, 5)                 30

 dense_32 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_69_1.png
../_images/notebooks_regression_understanding_69_2.png
../_images/notebooks_regression_understanding_69_3.png
[66]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[66]:
../_images/notebooks_regression_understanding_70_0.png

6 layers neural network with sigmoid and tanh#

[69]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='tanh'),
    tf.keras.layers.Dense(units=100, activation='tanh'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=1000, \
                    batch_size=32, verbose=0, validation_split = 0.3)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)
Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_5 (Normalizat  (None, 2)                5
 ion)

 dense_39 (Dense)            (None, 100)               300

 dense_40 (Dense)            (None, 100)               10100

 dense_41 (Dense)            (None, 100)               10100

 dense_42 (Dense)            (None, 100)               10100

 dense_43 (Dense)            (None, 100)               10100

 dense_44 (Dense)            (None, 1)                 101

=================================================================
Total params: 40,806
Trainable params: 40,801
Non-trainable params: 5
_________________________________________________________________
../_images/notebooks_regression_understanding_72_1.png
../_images/notebooks_regression_understanding_72_2.png
../_images/notebooks_regression_understanding_72_3.png
[71]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[71]:
../_images/notebooks_regression_understanding_73_0.png