/asgd/auto_step_size.py

https://github.com/jaberg/asgd
Python | 142 lines | 72 code | 19 blank | 51 comment | 8 complexity | ff509a1860c0bb5c292db4abc1d98955 MD5 | raw file
  1. import copy
  2. import logging
  3. import time
  4. import numpy as np
  5. import scipy.optimize
  6. logger = logging.getLogger(__name__)
  7. DEFAULT_MAX_EXAMPLES = 1000 # estimate stepsize from this many examples
  8. DEFAULT_TOLERANCE = 1.0 # in log-2 units of the learning rate
  9. DEFAULT_SGD_STEP_SIZE_FLOOR = 1e-7 # -- for huge feature vectors, reduce this.
  10. def find_sgd_step_size0(
  11. model, partial_fit_args,
  12. tolerance=DEFAULT_TOLERANCE):
  13. """Use a Brent line search to find the best step size
  14. Parameters
  15. ----------
  16. model: BinaryASGD
  17. Instance of a BinaryASGD
  18. partial_fit_args - tuple of arguments for model.partial_fit.
  19. This tuple must start with X, y, ...
  20. tolerance: in logarithmic step size units
  21. Returns
  22. -------
  23. Optimal sgd_step_size0 given `X` and `y`.
  24. """
  25. # -- stupid solver calls some sizes twice!?
  26. _cache = {}
  27. def eval_size0(log2_size0):
  28. try:
  29. return _cache[float(log2_size0)]
  30. except KeyError:
  31. pass
  32. other = copy.deepcopy(model)
  33. current_step_size = 2 ** log2_size0
  34. other.sgd_step_size0 = current_step_size
  35. other.sgd_step_size = current_step_size
  36. other.partial_fit(*partial_fit_args)
  37. other.asgd_weights = other.sgd_weights
  38. other.asgd_bias = other.sgd_bias
  39. X, y = partial_fit_args[:2]
  40. rval = other.cost(X, y)
  41. if np.isnan(rval):
  42. rval = float('inf')
  43. logger.info('find step %e: %e' % (current_step_size, rval))
  44. _cache[float(log2_size0)] = rval
  45. return rval
  46. if tolerance < 0.5:
  47. raise NotImplementedError(
  48. 'tolerance too small, need adaptive stepsize')
  49. # N.B. we step downward first so that if both y0 == y1 == inf
  50. # we stay going downward
  51. step = -tolerance
  52. x0 = np.log2(model.sgd_step_size0)
  53. x1 = np.log2(model.sgd_step_size0) + step
  54. y0 = eval_size0(x0)
  55. y1 = eval_size0(x1)
  56. if y1 > y0:
  57. step *= -1
  58. y0, y1 = y1, y0
  59. x0, x1 = x1, x0
  60. while (y1 < y0) or (y1 == float('inf')):
  61. x0, y0 = x1, y1
  62. x1 += step
  63. y1 = eval_size0(x1)
  64. # I tried using sp.optimize.fmin, but this function is bumpy and we only
  65. # want a really coarse estimate of the optimmum, so fmin and fmin_powell
  66. # end up being relatively inefficient even compared to this really stupid
  67. # search.
  68. #
  69. # TODO: increase the stepsize every time it still goes down, and then
  70. # backtrack when we over-step
  71. rval = 2.0 ** x0
  72. return rval
  73. # XXX: use different name, function is not specific to binary classification
  74. def binary_fit(
  75. model, fit_args,
  76. max_examples=DEFAULT_MAX_EXAMPLES,
  77. step_size_floor=DEFAULT_SGD_STEP_SIZE_FLOOR,
  78. **find_sgd_step_size0_kwargs):
  79. """Returns a model with automatically-selected sgd_step_size0
  80. Parameters
  81. ----------
  82. model: BaseASGD instance
  83. Instance of the model to be fitted.
  84. fit_args - tuple of args to model.fit
  85. This method assumes they are all length-of-dataset ndarrays.
  86. max_examples: int
  87. Maximum number of examples to use from `X` and `y` to find an
  88. estimate of the best sgd_step_size0. N.B. That the entirety of X and y
  89. is used for the final fit() call after the best step size has been found.
  90. Returns
  91. -------
  92. model: model, fitted with an estimate of the best sgd_step_size0
  93. """
  94. assert max_examples > 0
  95. logger.info('binary_fit: design matrix shape %s' % str(fit_args[0].shape))
  96. # randomly choose up to max_examples uniformly without replacement from
  97. # across the whole set of training data.
  98. all_idxs = model.rstate.permutation(len(fit_args[0]))
  99. idxs = all_idxs[:max_examples]
  100. # Find the best learning rate for that subset
  101. t0 = time.time()
  102. best = find_sgd_step_size0(
  103. model, [a[idxs] for a in fit_args], **find_sgd_step_size0_kwargs)
  104. logger.info('found best stepsize %e in %f seconds' % (
  105. best, time.time() - t0))
  106. # Heuristic: take the best stepsize according to the first max_examples,
  107. # and go half that fast for the full run.
  108. step_size0 = max(best / 2.0, step_size_floor)
  109. logger.info('setting sgd_step_size: %e' % step_size0)
  110. model.sgd_step_size0 = float(step_size0)
  111. model.sgd_step_size = float(step_size0)
  112. t0 = time.time()
  113. model.fit(*fit_args)
  114. logger.info('full fit took %f seconds' % (time.time() - t0))
  115. return model