from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import pylab
import numpy as np

X, y = make_moons(n_samples=5000, random_state=42, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,test_size=0.3)

pylab.scatter(X[:,0], X[:,1], c=y)
pylab.show()

# There are only two features in the data X[:,0] and X[:,1]
n_feature = 2
# There are only two classes: 0 (purple) and 1 (yellow)
n_class = 2


def init_weights(n_hidden=100):
    # Initialize weights with Standard Normal random variables
    model = dict(
        W1=np.random.randn(n_feature + 1, n_hidden),
        W2=np.random.randn(n_hidden + 1, n_class)
    )

    return model

# Defines the softmax function. For two classes, this is equivalent to the logistic regression
def softmax(x):
    return np.exp(x) / np.exp(x).sum()

# For a single example $x$
def forward(x, model):
    x = np.append(x, 1)
    
    # Input times first layer matrix 
    z_1 = x @ model['W1']

    # ReLU activation goes to hidden layer
    h = z_1
    h[z_1 < 0] = 0

    # Hidden layer values to output
    h = np.append(h, 1)
    hat_y = softmax(h @ model['W2'])

    return h, hat_y

def backward(model, xs, hs, errs):
    """xs, hs, errs contain all information (input, hidden state, error) of all data in the minibatch"""
    # errs is the gradients of output layer for the minibatch
    dW2 = (hs.T @ errs)/xs.shape[0]

    # Get gradient of hidden layer
    dh = errs @ model['W2'].T
    dh[hs <= 0] = 0
    
    # The bias "neuron" is the constant 1, we don't need to backpropagate its gradient
    # since it has no inputs, so we just remove its column from the gradient
    dh = dh[:, :-1]

    # Add the 1 to the data, to compute the gradient of W1
    xs = np.hstack([xs, np.ones((xs.shape[0], 1))])

    dW1 = (xs.T @ dh)/xs.shape[0]

    return dict(W1=dW1, W2=dW2)

def get_gradient(model, X_train, y_train):
    xs, hs, errs = [], [], []

    for x, cls_idx in zip(X_train, y_train):
        h, y_pred = forward(x, model)

        # Create one-hot coding of true label
        y_true = np.zeros(n_class)
        y_true[int(cls_idx)] = 1.

        # Compute the gradient of output layer
        err = y_true - y_pred

        # Accumulate the informations of the examples
        # x: input
        # h: hidden state
        # err: gradient of output layer
        xs.append(x)
        hs.append(h)
        errs.append(err)

    # Backprop using the informations we get from the current minibatch
    return backward(model, np.array(xs), np.array(hs), np.array(errs))

def gradient_step(model, X_train, y_train, learning_rate=1e-1):
    grad = get_gradient(model, X_train, y_train)
    model = model.copy()

    # Update every parameters in our networks (W1 and W2) using their gradients
    for layer in grad:
        # Learning rate: 1e-1
        model[layer] += learning_rate * grad[layer]

    return model

def gradient_ascent(model, X_train, y_train, no_iter=10):
    for iter in range(no_iter):
        print('Iteration {}'.format(iter))

        model = gradient_step(model, X_train, y_train)

    return model

no_iter = 200

# Reset model
model = init_weights()

# Train the model
model = gradient_ascent(model, X_train, y_train, no_iter=no_iter)

y_pred = np.zeros_like(y_test)

accuracy = 0

for i, x in enumerate(X_test):
    # Predict the distribution of label
    _, prob = forward(x, model)
    # Get label by picking the most probable one
    y = np.argmax(prob)
    y_pred[i] = y

    # Accuracy of predictions with the true labels and take the percentage
    # Because our dataset is balanced, measuring just the accuracy is OK
    accuracy = (y_pred == y_test).sum() / y_test.size

print('Accuracy after {} iterations: {}'.format(no_iter,accuracy))

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49
Iteration 50
Iteration 51
Iteration 52
Iteration 53
Iteration 54
Iteration 55
Iteration 56
Iteration 57
Iteration 58
Iteration 59
Iteration 60
Iteration 61
Iteration 62
Iteration 63
Iteration 64
Iteration 65
Iteration 66
Iteration 67
Iteration 68
Iteration 69
Iteration 70
Iteration 71
Iteration 72
Iteration 73
Iteration 74
Iteration 75
Iteration 76
Iteration 77
Iteration 78
Iteration 79
Iteration 80
Iteration 81
Iteration 82
Iteration 83
Iteration 84
Iteration 85
Iteration 86
Iteration 87
Iteration 88
Iteration 89
Iteration 90
Iteration 91
Iteration 92
Iteration 93
Iteration 94
Iteration 95
Iteration 96
Iteration 97
Iteration 98
Iteration 99
Iteration 100
Iteration 101
Iteration 102
Iteration 103
Iteration 104
Iteration 105
Iteration 106
Iteration 107
Iteration 108
Iteration 109
Iteration 110
Iteration 111
Iteration 112
Iteration 113
Iteration 114
Iteration 115
Iteration 116
Iteration 117
Iteration 118
Iteration 119
Iteration 120
Iteration 121
Iteration 122
Iteration 123
Iteration 124
Iteration 125
Iteration 126
Iteration 127
Iteration 128
Iteration 129
Iteration 130
Iteration 131
Iteration 132
Iteration 133
Iteration 134
Iteration 135
Iteration 136
Iteration 137
Iteration 138
Iteration 139
Iteration 140
Iteration 141
Iteration 142
Iteration 143
Iteration 144
Iteration 145
Iteration 146
Iteration 147
Iteration 148
Iteration 149
Iteration 150
Iteration 151
Iteration 152
Iteration 153
Iteration 154
Iteration 155
Iteration 156
Iteration 157
Iteration 158
Iteration 159
Iteration 160
Iteration 161
Iteration 162
Iteration 163
Iteration 164
Iteration 165
Iteration 166
Iteration 167
Iteration 168
Iteration 169
Iteration 170
Iteration 171
Iteration 172
Iteration 173
Iteration 174
Iteration 175
Iteration 176
Iteration 177
Iteration 178
Iteration 179
Iteration 180
Iteration 181
Iteration 182
Iteration 183
Iteration 184
Iteration 185
Iteration 186
Iteration 187
Iteration 188
Iteration 189
Iteration 190
Iteration 191
Iteration 192
Iteration 193
Iteration 194
Iteration 195
Iteration 196
Iteration 197
Iteration 198
Iteration 199
Accuracy after 200 iterations: 0.99

pylab.scatter(X_test[:,0], X_test[:,1], c=y_pred)
pylab.show()

no_iter = 10
no_runs = 10

accuracies = np.zeros(no_runs)


for run in range(no_runs):
    print("Run {}".format(run))
    # Reset model
    model = init_weights()

    # Train the model
    model = gradient_ascent(model, X_train, y_train, no_iter=no_iter)

    y_pred = np.zeros_like(y_test)
    
    for i, x in enumerate(X_test):
        # Predict the distribution of label
        _, prob = forward(x, model)
        # Get label by picking the most probable one
        y = np.argmax(prob)
        y_pred[i] = y

        # Accuracy of predictions with the true labels and take the percentage
        # Because our dataset is balanced, measuring just the accuracy is OK
        accuracies[run]= (y_pred == y_test).sum() / y_test.size

print('Mean accuracy over test data: {}, std: {}'.format(accuracies.mean(), accuracies.std()))

Run 0
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 1
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 2
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 3
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 4
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 5
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 6
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 7
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 8
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Run 9
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Mean accuracy over test data: 0.8674666666666667, std: 0.045322351611049

CS37300 - Data Mining & Machine Learning¶

Fall 2024¶

Instructor & Copyright: Bruno Ribeiro¶

Deep Learning: Backpropagation¶

What is Backpropagation?¶

Feedforward Neural Network Recap¶

Artificial neurons¶

Gradient Descent: General Recursive Gradient Computation (Backpropagation)¶

Recap: Feedforward neural network (FFNN a.k.a. MLP)¶

Maximum Likelihood Estimation (MLE)¶

General Recursive Gradient Computation (Backpropagation)¶

Backpropagation: Practical Challenges with Deep Models¶

The following code requires Python 3.5 or greater.¶

Example: Feedforward Classification using Python + Numpy¶

In this iPython noteboook we will see how to create a neural network classifier using python and numpy.¶

Ploting the data...¶

Create the Neural Network Weights and Initialize Them¶

These are all defined by defining the hidden layer matrices¶

Define the nonlinear activation function (will be used in the last layer)¶

Python + Numpy tricks¶

Define the forward pass¶

Hidden layer activation¶

Define backpropagation¶

Other derivatives¶

Do the forward and backward procedures to get the gradient¶

One gradient ascent step¶

Do gradient ascent some fixed number of times¶

Train the model and test the accuracy over the test data¶