convo_net.py | searchcode

/convo_net.py

https://gitlab.com/Mounphuix/MNIST · Python · 251 lines · 167 code · 38 blank · 46 comment · 16 complexity · 946d12eea9febbff99f38e527d8724da MD5 · raw file

from __future__ import print_function
import os
import sys
import timeit
import numpy as np
import pandas as pd
import theano
import theano.tensor as T
import six.moves.cPickle as pickle
from network_layers import LogisticRegression
from network_layers import HiddenLayer
from network_layers import LeNetConvPoolLayer
from utility_functions import load_data


class CNN(object):
    """
    Convolutional Neural Network with two convolution layers, a hideen layer,
    and a logistic regression layer
    """
    def __init__(self, rng, input, batch_size, nkerns):
        """
        Initialize the parameters for the multilayer perceptron
        """
        self.convLayer0 = LeNetConvPoolLayer(
            rng,
            input=input.reshape((batch_size, 1, 28, 28)),
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
            poolsize=(2, 2)
        )

        self.convLayer1 = LeNetConvPoolLayer(
            rng,
            input=self.convLayer0.output,
            image_shape=(batch_size, nkerns[0], 12, 12),
            filter_shape=(nkerns[1], nkerns[0], 5, 5),
            poolsize=(2, 2)
        )

        self.hiddenLayer = HiddenLayer(
            rng,
            input=self.convLayer1.output.flatten(2),
            n_in=nkerns[1] * 4 * 4,
            n_out=500,
            activation=T.tanh
        )

        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=500,
            n_out=10
        )

        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        self.errors = self.logRegressionLayer.errors
        self.params = (
            self.logRegressionLayer.params +
            self.hiddenLayer.params +
            self.convLayer1.params +
            self.convLayer0.params
        )
        self.input = input


def evaluate_lenet5(learning_rate=0.005, n_epochs=1000,
                    dataset='input/train.csv',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = np.random.RandomState(1)
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # construct the CNN class
    classifier = CNN(
        rng=rng,
        input=x,
        batch_size=batch_size,
        nkerns=nkerns
    )

    # the cost we minimize during training is the NLL of the model
    cost = classifier.negative_log_likelihood(y)
    validate_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of gradients for all model parameters
    gparams = [T.grad(cost, param) for param in classifier.params]

    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = np.inf
    best_iter = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)

            train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in range(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    with open('best_model_cnn.pkl', 'wb') as f:
                        pickle.dump((classifier.params,
                                     classifier.logRegressionLayer.y_pred,
                                     classifier.input), f)
            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,' %
          (best_validation_loss * 100., best_iter + 1))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)


def predict():
    """
    Load trained model and predict labels
    """
    batch_size = 500
    classifier = CNN(
        rng=np.random.RandomState(1234),
        input=T.matrix('x'),
        batch_size=batch_size,
        nkerns=[20, 50]
    )

    # load the saved model
    (classifier.param,
     classifier.logRegressionLayer.y_pred,
     classifier.input) = pickle.load(open('best_model_cnn.pkl'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.logRegressionLayer.y_pred)

    test_data = pd.read_csv('input/test.csv')
    x_test = test_data.values.astype(float)
#    predicted_values = predict_model(x_test[:])
    predicted_values = np.zeros
    for i in xrange(len(x_test)/batch_size):
        if i == 0:
            predicted_values = predict_model(x_test[i * batch_size: (i+1) * batch_size])
        else:
            predicted_values = np.append(predicted_values, predict_model(x_test[i * batch_size: (i+1) * batch_size]))
            print(i, len(predicted_values))
    imageids = np.array(range(1, len(predicted_values)+1))
    results = pd.DataFrame(data={'ImageId': imageids, 'Label': predicted_values})
    results.to_csv('output/results_cnn.csv', index=False)
    print('Predictions Complete')

if __name__ == '__main__':
    evaluate_lenet5()
    predict()
Tech Fingerprint

Alerts (11)

'def' Ensure functions have docstrings for documentation
66
'print(' Use logging module for better control and configurability
105 149 169 179 204 205 207 243 247
'open(' Use 'with open()' to ensure Files are properly closed
227