PageRenderTime 25ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/convo_net.py

https://gitlab.com/Mounphuix/MNIST
Python | 251 lines | 167 code | 38 blank | 46 comment | 16 complexity | 946d12eea9febbff99f38e527d8724da MD5 | raw file
  1. from __future__ import print_function
  2. import os
  3. import sys
  4. import timeit
  5. import numpy as np
  6. import pandas as pd
  7. import theano
  8. import theano.tensor as T
  9. import six.moves.cPickle as pickle
  10. from network_layers import LogisticRegression
  11. from network_layers import HiddenLayer
  12. from network_layers import LeNetConvPoolLayer
  13. from utility_functions import load_data
  14. class CNN(object):
  15. """
  16. Convolutional Neural Network with two convolution layers, a hideen layer,
  17. and a logistic regression layer
  18. """
  19. def __init__(self, rng, input, batch_size, nkerns):
  20. """
  21. Initialize the parameters for the multilayer perceptron
  22. """
  23. self.convLayer0 = LeNetConvPoolLayer(
  24. rng,
  25. input=input.reshape((batch_size, 1, 28, 28)),
  26. image_shape=(batch_size, 1, 28, 28),
  27. filter_shape=(nkerns[0], 1, 5, 5),
  28. poolsize=(2, 2)
  29. )
  30. self.convLayer1 = LeNetConvPoolLayer(
  31. rng,
  32. input=self.convLayer0.output,
  33. image_shape=(batch_size, nkerns[0], 12, 12),
  34. filter_shape=(nkerns[1], nkerns[0], 5, 5),
  35. poolsize=(2, 2)
  36. )
  37. self.hiddenLayer = HiddenLayer(
  38. rng,
  39. input=self.convLayer1.output.flatten(2),
  40. n_in=nkerns[1] * 4 * 4,
  41. n_out=500,
  42. activation=T.tanh
  43. )
  44. self.logRegressionLayer = LogisticRegression(
  45. input=self.hiddenLayer.output,
  46. n_in=500,
  47. n_out=10
  48. )
  49. self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
  50. self.errors = self.logRegressionLayer.errors
  51. self.params = (
  52. self.logRegressionLayer.params +
  53. self.hiddenLayer.params +
  54. self.convLayer1.params +
  55. self.convLayer0.params
  56. )
  57. self.input = input
  58. def evaluate_lenet5(learning_rate=0.005, n_epochs=1000,
  59. dataset='input/train.csv',
  60. nkerns=[20, 50], batch_size=500):
  61. """ Demonstrates lenet on MNIST dataset
  62. :type learning_rate: float
  63. :param learning_rate: learning rate used (factor for the stochastic
  64. gradient)
  65. :type n_epochs: int
  66. :param n_epochs: maximal number of epochs to run the optimizer
  67. :type dataset: string
  68. :param dataset: path to the dataset used for training /testing (MNIST here)
  69. :type nkerns: list of ints
  70. :param nkerns: number of kernels on each layer
  71. """
  72. rng = np.random.RandomState(1)
  73. datasets = load_data(dataset)
  74. train_set_x, train_set_y = datasets[0]
  75. valid_set_x, valid_set_y = datasets[1]
  76. # compute number of minibatches for training, validation and testing
  77. n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  78. n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
  79. n_train_batches //= batch_size
  80. n_valid_batches //= batch_size
  81. # allocate symbolic variables for the data
  82. index = T.lscalar() # index to a [mini]batch
  83. x = T.matrix('x') # the data is presented as rasterized images
  84. y = T.ivector('y') # the labels are presented as 1D vector of [int] labels
  85. ######################
  86. # BUILD ACTUAL MODEL #
  87. ######################
  88. print('... building the model')
  89. # construct the CNN class
  90. classifier = CNN(
  91. rng=rng,
  92. input=x,
  93. batch_size=batch_size,
  94. nkerns=nkerns
  95. )
  96. # the cost we minimize during training is the NLL of the model
  97. cost = classifier.negative_log_likelihood(y)
  98. validate_model = theano.function(
  99. [index],
  100. classifier.errors(y),
  101. givens={
  102. x: valid_set_x[index * batch_size: (index + 1) * batch_size],
  103. y: valid_set_y[index * batch_size: (index + 1) * batch_size]
  104. }
  105. )
  106. # create a list of gradients for all model parameters
  107. gparams = [T.grad(cost, param) for param in classifier.params]
  108. # create the updates list by automatically looping over all
  109. # (params[i], grads[i]) pairs.
  110. updates = [
  111. (param, param - learning_rate * gparam)
  112. for param, gparam in zip(classifier.params, gparams)
  113. ]
  114. train_model = theano.function(
  115. [index],
  116. cost,
  117. updates=updates,
  118. givens={
  119. x: train_set_x[index * batch_size: (index + 1) * batch_size],
  120. y: train_set_y[index * batch_size: (index + 1) * batch_size]
  121. }
  122. )
  123. ###############
  124. # TRAIN MODEL #
  125. ###############
  126. print('... training')
  127. # early-stopping parameters
  128. patience = 10000
  129. patience_increase = 2
  130. improvement_threshold = 0.995
  131. validation_frequency = min(n_train_batches, patience // 2)
  132. best_validation_loss = np.inf
  133. best_iter = 0
  134. start_time = timeit.default_timer()
  135. epoch = 0
  136. done_looping = False
  137. while (epoch < n_epochs) and (not done_looping):
  138. epoch = epoch + 1
  139. for minibatch_index in range(n_train_batches):
  140. iter = (epoch - 1) * n_train_batches + minibatch_index
  141. if iter % 100 == 0:
  142. print('training @ iter = ', iter)
  143. train_model(minibatch_index)
  144. if (iter + 1) % validation_frequency == 0:
  145. # compute zero-one loss on validation set
  146. validation_losses = [validate_model(i) for i in range(n_valid_batches)]
  147. this_validation_loss = np.mean(validation_losses)
  148. print('epoch %i, minibatch %i/%i, validation error %f %%' %
  149. (epoch, minibatch_index + 1, n_train_batches,
  150. this_validation_loss * 100.))
  151. # if we got the best validation score until now
  152. if this_validation_loss < best_validation_loss:
  153. # improve patience if loss improvement is good enough
  154. if this_validation_loss < best_validation_loss * \
  155. improvement_threshold:
  156. patience = max(patience, iter * patience_increase)
  157. # save best validation score and iteration number
  158. best_validation_loss = this_validation_loss
  159. best_iter = iter
  160. with open('best_model_cnn.pkl', 'wb') as f:
  161. pickle.dump((classifier.params,
  162. classifier.logRegressionLayer.y_pred,
  163. classifier.input), f)
  164. if patience <= iter:
  165. done_looping = True
  166. break
  167. end_time = timeit.default_timer()
  168. print('Optimization complete.')
  169. print('Best validation score of %f %% obtained at iteration %i,' %
  170. (best_validation_loss * 100., best_iter + 1))
  171. print(('The code for file ' +
  172. os.path.split(__file__)[1] +
  173. ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
  174. def predict():
  175. """
  176. Load trained model and predict labels
  177. """
  178. batch_size = 500
  179. classifier = CNN(
  180. rng=np.random.RandomState(1234),
  181. input=T.matrix('x'),
  182. batch_size=batch_size,
  183. nkerns=[20, 50]
  184. )
  185. # load the saved model
  186. (classifier.param,
  187. classifier.logRegressionLayer.y_pred,
  188. classifier.input) = pickle.load(open('best_model_cnn.pkl'))
  189. # compile a predictor function
  190. predict_model = theano.function(
  191. inputs=[classifier.input],
  192. outputs=classifier.logRegressionLayer.y_pred)
  193. test_data = pd.read_csv('input/test.csv')
  194. x_test = test_data.values.astype(float)
  195. # predicted_values = predict_model(x_test[:])
  196. predicted_values = np.zeros
  197. for i in xrange(len(x_test)/batch_size):
  198. if i == 0:
  199. predicted_values = predict_model(x_test[i * batch_size: (i+1) * batch_size])
  200. else:
  201. predicted_values = np.append(predicted_values, predict_model(x_test[i * batch_size: (i+1) * batch_size]))
  202. print(i, len(predicted_values))
  203. imageids = np.array(range(1, len(predicted_values)+1))
  204. results = pd.DataFrame(data={'ImageId': imageids, 'Label': predicted_values})
  205. results.to_csv('output/results_cnn.csv', index=False)
  206. print('Predictions Complete')
  207. if __name__ == '__main__':
  208. evaluate_lenet5()
  209. predict()