/convo_net.py
Python | 251 lines | 167 code | 38 blank | 46 comment | 16 complexity | 946d12eea9febbff99f38e527d8724da MD5 | raw file
- from __future__ import print_function
- import os
- import sys
- import timeit
- import numpy as np
- import pandas as pd
- import theano
- import theano.tensor as T
- import six.moves.cPickle as pickle
- from network_layers import LogisticRegression
- from network_layers import HiddenLayer
- from network_layers import LeNetConvPoolLayer
- from utility_functions import load_data
- class CNN(object):
- """
- Convolutional Neural Network with two convolution layers, a hideen layer,
- and a logistic regression layer
- """
- def __init__(self, rng, input, batch_size, nkerns):
- """
- Initialize the parameters for the multilayer perceptron
- """
- self.convLayer0 = LeNetConvPoolLayer(
- rng,
- input=input.reshape((batch_size, 1, 28, 28)),
- image_shape=(batch_size, 1, 28, 28),
- filter_shape=(nkerns[0], 1, 5, 5),
- poolsize=(2, 2)
- )
- self.convLayer1 = LeNetConvPoolLayer(
- rng,
- input=self.convLayer0.output,
- image_shape=(batch_size, nkerns[0], 12, 12),
- filter_shape=(nkerns[1], nkerns[0], 5, 5),
- poolsize=(2, 2)
- )
- self.hiddenLayer = HiddenLayer(
- rng,
- input=self.convLayer1.output.flatten(2),
- n_in=nkerns[1] * 4 * 4,
- n_out=500,
- activation=T.tanh
- )
- self.logRegressionLayer = LogisticRegression(
- input=self.hiddenLayer.output,
- n_in=500,
- n_out=10
- )
- self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
- self.errors = self.logRegressionLayer.errors
- self.params = (
- self.logRegressionLayer.params +
- self.hiddenLayer.params +
- self.convLayer1.params +
- self.convLayer0.params
- )
- self.input = input
- def evaluate_lenet5(learning_rate=0.005, n_epochs=1000,
- dataset='input/train.csv',
- nkerns=[20, 50], batch_size=500):
- """ Demonstrates lenet on MNIST dataset
- :type learning_rate: float
- :param learning_rate: learning rate used (factor for the stochastic
- gradient)
- :type n_epochs: int
- :param n_epochs: maximal number of epochs to run the optimizer
- :type dataset: string
- :param dataset: path to the dataset used for training /testing (MNIST here)
- :type nkerns: list of ints
- :param nkerns: number of kernels on each layer
- """
- rng = np.random.RandomState(1)
- datasets = load_data(dataset)
- train_set_x, train_set_y = datasets[0]
- valid_set_x, valid_set_y = datasets[1]
- # compute number of minibatches for training, validation and testing
- n_train_batches = train_set_x.get_value(borrow=True).shape[0]
- n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
- n_train_batches //= batch_size
- n_valid_batches //= batch_size
- # allocate symbolic variables for the data
- index = T.lscalar() # index to a [mini]batch
- x = T.matrix('x') # the data is presented as rasterized images
- y = T.ivector('y') # the labels are presented as 1D vector of [int] labels
- ######################
- # BUILD ACTUAL MODEL #
- ######################
- print('... building the model')
- # construct the CNN class
- classifier = CNN(
- rng=rng,
- input=x,
- batch_size=batch_size,
- nkerns=nkerns
- )
- # the cost we minimize during training is the NLL of the model
- cost = classifier.negative_log_likelihood(y)
- validate_model = theano.function(
- [index],
- classifier.errors(y),
- givens={
- x: valid_set_x[index * batch_size: (index + 1) * batch_size],
- y: valid_set_y[index * batch_size: (index + 1) * batch_size]
- }
- )
- # create a list of gradients for all model parameters
- gparams = [T.grad(cost, param) for param in classifier.params]
- # create the updates list by automatically looping over all
- # (params[i], grads[i]) pairs.
- updates = [
- (param, param - learning_rate * gparam)
- for param, gparam in zip(classifier.params, gparams)
- ]
- train_model = theano.function(
- [index],
- cost,
- updates=updates,
- givens={
- x: train_set_x[index * batch_size: (index + 1) * batch_size],
- y: train_set_y[index * batch_size: (index + 1) * batch_size]
- }
- )
- ###############
- # TRAIN MODEL #
- ###############
- print('... training')
- # early-stopping parameters
- patience = 10000
- patience_increase = 2
- improvement_threshold = 0.995
- validation_frequency = min(n_train_batches, patience // 2)
- best_validation_loss = np.inf
- best_iter = 0
- start_time = timeit.default_timer()
- epoch = 0
- done_looping = False
- while (epoch < n_epochs) and (not done_looping):
- epoch = epoch + 1
- for minibatch_index in range(n_train_batches):
- iter = (epoch - 1) * n_train_batches + minibatch_index
- if iter % 100 == 0:
- print('training @ iter = ', iter)
- train_model(minibatch_index)
- if (iter + 1) % validation_frequency == 0:
- # compute zero-one loss on validation set
- validation_losses = [validate_model(i) for i in range(n_valid_batches)]
- this_validation_loss = np.mean(validation_losses)
- print('epoch %i, minibatch %i/%i, validation error %f %%' %
- (epoch, minibatch_index + 1, n_train_batches,
- this_validation_loss * 100.))
- # if we got the best validation score until now
- if this_validation_loss < best_validation_loss:
- # improve patience if loss improvement is good enough
- if this_validation_loss < best_validation_loss * \
- improvement_threshold:
- patience = max(patience, iter * patience_increase)
- # save best validation score and iteration number
- best_validation_loss = this_validation_loss
- best_iter = iter
- with open('best_model_cnn.pkl', 'wb') as f:
- pickle.dump((classifier.params,
- classifier.logRegressionLayer.y_pred,
- classifier.input), f)
- if patience <= iter:
- done_looping = True
- break
- end_time = timeit.default_timer()
- print('Optimization complete.')
- print('Best validation score of %f %% obtained at iteration %i,' %
- (best_validation_loss * 100., best_iter + 1))
- print(('The code for file ' +
- os.path.split(__file__)[1] +
- ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
- def predict():
- """
- Load trained model and predict labels
- """
- batch_size = 500
- classifier = CNN(
- rng=np.random.RandomState(1234),
- input=T.matrix('x'),
- batch_size=batch_size,
- nkerns=[20, 50]
- )
- # load the saved model
- (classifier.param,
- classifier.logRegressionLayer.y_pred,
- classifier.input) = pickle.load(open('best_model_cnn.pkl'))
- # compile a predictor function
- predict_model = theano.function(
- inputs=[classifier.input],
- outputs=classifier.logRegressionLayer.y_pred)
- test_data = pd.read_csv('input/test.csv')
- x_test = test_data.values.astype(float)
- # predicted_values = predict_model(x_test[:])
- predicted_values = np.zeros
- for i in xrange(len(x_test)/batch_size):
- if i == 0:
- predicted_values = predict_model(x_test[i * batch_size: (i+1) * batch_size])
- else:
- predicted_values = np.append(predicted_values, predict_model(x_test[i * batch_size: (i+1) * batch_size]))
- print(i, len(predicted_values))
- imageids = np.array(range(1, len(predicted_values)+1))
- results = pd.DataFrame(data={'ImageId': imageids, 'Label': predicted_values})
- results.to_csv('output/results_cnn.csv', index=False)
- print('Predictions Complete')
- if __name__ == '__main__':
- evaluate_lenet5()
- predict()