convo_net.py | searchcode

/convo_net.py

https://gitlab.com/Mounphuix/MNIST
Python | 251 lines | 167 code | 38 blank | 46 comment | 16 complexity | 946d12eea9febbff99f38e527d8724da MD5 | raw file

from __future__ import print_function
import os
import sys
import timeit
import numpy as np
import pandas as pd
import theano
import theano.tensor as T
import six.moves.cPickle as pickle
from network_layers import LogisticRegression
from network_layers import HiddenLayer
from network_layers import LeNetConvPoolLayer
from utility_functions import load_data


class CNN(object):
    """
    Convolutional Neural Network with two convolution layers, a hideen layer,
    and a logistic regression layer
    """
    def __init__(self, rng, input, batch_size, nkerns):
        """
        Initialize the parameters for the multilayer perceptron
        """
        self.convLayer0 = LeNetConvPoolLayer(
            rng,
            input=input.reshape((batch_size, 1, 28, 28)),
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
            poolsize=(2, 2)
        )

        self.convLayer1 = LeNetConvPoolLayer(
            rng,
            input=self.convLayer0.output,
            image_shape=(batch_size, nkerns[0], 12, 12),
            filter_shape=(nkerns[1], nkerns[0], 5, 5),
            poolsize=(2, 2)
        )

        self.hiddenLayer = HiddenLayer(
            rng,
            input=self.convLayer1.output.flatten(2),
            n_in=nkerns[1] * 4 * 4,
            n_out=500,
            activation=T.tanh
        )

        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=500,
            n_out=10
        )

        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        self.errors = self.logRegressionLayer.errors
        self.params = (
            self.logRegressionLayer.params +
            self.hiddenLayer.params +
            self.convLayer1.params +
            self.convLayer0.params
        )
        self.input = input


def evaluate_lenet5(learning_rate=0.005, n_epochs=1000,
                    dataset='input/train.csv',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = np.random.RandomState(1)
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # construct the CNN class
    classifier = CNN(
        rng=rng,
        input=x,
        batch_size=batch_size,
        nkerns=nkerns
    )

    # the cost we minimize during training is the NLL of the model
    cost = classifier.negative_log_likelihood(y)
    validate_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of gradients for all model parameters
    gparams = [T.grad(cost, param) for param in classifier.params]

    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = np.inf
    best_iter = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)

            train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in range(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    with open('best_model_cnn.pkl', 'wb') as f:
                        pickle.dump((classifier.params,
                                     classifier.logRegressionLayer.y_pred,
                                     classifier.input), f)
            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,' %
          (best_validation_loss * 100., best_iter + 1))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)


def predict():
    """
    Load trained model and predict labels
    """
    batch_size = 500
    classifier = CNN(
        rng=np.random.RandomState(1234),
        input=T.matrix('x'),
        batch_size=batch_size,
        nkerns=[20, 50]
    )

    # load the saved model
    (classifier.param,
     classifier.logRegressionLayer.y_pred,
     classifier.input) = pickle.load(open('best_model_cnn.pkl'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.logRegressionLayer.y_pred)

    test_data = pd.read_csv('input/test.csv')
    x_test = test_data.values.astype(float)
#    predicted_values = predict_model(x_test[:])
    predicted_values = np.zeros
    for i in xrange(len(x_test)/batch_size):
        if i == 0:
            predicted_values = predict_model(x_test[i * batch_size: (i+1) * batch_size])
        else:
            predicted_values = np.append(predicted_values, predict_model(x_test[i * batch_size: (i+1) * batch_size]))
            print(i, len(predicted_values))
    imageids = np.array(range(1, len(predicted_values)+1))
    results = pd.DataFrame(data={'ImageId': imageids, 'Label': predicted_values})
    results.to_csv('output/results_cnn.csv', index=False)
    print('Predictions Complete')

if __name__ == '__main__':
    evaluate_lenet5()
    predict()