auto_step_size.py | searchcode

/asgd/auto_step_size.py

https://github.com/jaberg/asgd · Python · 142 lines · 72 code · 19 blank · 51 comment · 8 complexity · ff509a1860c0bb5c292db4abc1d98955 MD5 · raw file

import copy
import logging
import time
import numpy as np
import scipy.optimize

logger = logging.getLogger(__name__)

DEFAULT_MAX_EXAMPLES = 1000  # estimate stepsize from this many examples
DEFAULT_TOLERANCE = 1.0     # in log-2 units of the learning rate
DEFAULT_SGD_STEP_SIZE_FLOOR = 1e-7  # -- for huge feature vectors, reduce this.


def find_sgd_step_size0(
    model, partial_fit_args,
    tolerance=DEFAULT_TOLERANCE):
    """Use a Brent line search to find the best step size

    Parameters
    ----------
    model: BinaryASGD
        Instance of a BinaryASGD

    partial_fit_args - tuple of arguments for model.partial_fit.
        This tuple must start with X, y, ...

    tolerance: in logarithmic step size units

    Returns
    -------
        Optimal sgd_step_size0 given `X` and `y`.
    """
    # -- stupid solver calls some sizes twice!?
    _cache = {}

    def eval_size0(log2_size0):
        try:
            return _cache[float(log2_size0)]
        except KeyError:
            pass
        other = copy.deepcopy(model)
        current_step_size = 2 ** log2_size0
        other.sgd_step_size0 = current_step_size
        other.sgd_step_size = current_step_size
        other.partial_fit(*partial_fit_args)
        other.asgd_weights = other.sgd_weights
        other.asgd_bias = other.sgd_bias

        X, y = partial_fit_args[:2]
        rval = other.cost(X, y)
        if np.isnan(rval):
            rval = float('inf')
        logger.info('find step %e: %e' % (current_step_size, rval))
        _cache[float(log2_size0)] = rval
        return rval

    if tolerance < 0.5:
        raise NotImplementedError(
                'tolerance too small, need adaptive stepsize')

    # N.B. we step downward first so that if both y0 == y1 == inf
    #      we stay going downward
    step = -tolerance
    x0 = np.log2(model.sgd_step_size0)
    x1 = np.log2(model.sgd_step_size0) + step
    y0 = eval_size0(x0)
    y1 = eval_size0(x1)
    if y1 > y0:
        step *= -1
        y0, y1 = y1, y0
        x0, x1 = x1, x0

    while (y1 < y0) or (y1 == float('inf')):
        x0, y0 = x1, y1
        x1 += step
        y1 = eval_size0(x1)

    # I tried using sp.optimize.fmin, but this function is bumpy and we only
    # want a really coarse estimate of the optimmum, so fmin and fmin_powell
    # end up being relatively inefficient even compared to this really stupid
    # search.
    #
    # TODO: increase the stepsize every time it still goes down, and then
    #       backtrack when we over-step

    rval = 2.0 ** x0
    return rval


# XXX: use different name, function is not specific to binary classification
def binary_fit(
    model, fit_args,
    max_examples=DEFAULT_MAX_EXAMPLES,
    step_size_floor=DEFAULT_SGD_STEP_SIZE_FLOOR,
    **find_sgd_step_size0_kwargs):
    """Returns a model with automatically-selected sgd_step_size0

    Parameters
    ----------
    model: BaseASGD instance
        Instance of the model to be fitted.

    fit_args - tuple of args to model.fit
        This method assumes they are all length-of-dataset ndarrays.

    max_examples: int
        Maximum number of examples to use from `X` and `y` to find an
        estimate of the best sgd_step_size0. N.B. That the entirety of X and y
        is used for the final fit() call after the best step size has been found.

    Returns
    -------
    model: model, fitted with an estimate of the best sgd_step_size0
    """

    assert max_examples > 0
    logger.info('binary_fit: design matrix shape %s' % str(fit_args[0].shape))

    # randomly choose up to max_examples uniformly without replacement from
    # across the whole set of training data.
    all_idxs = model.rstate.permutation(len(fit_args[0]))
    idxs = all_idxs[:max_examples]

    # Find the best learning rate for that subset
    t0 = time.time()
    best = find_sgd_step_size0(
        model, [a[idxs] for a in fit_args], **find_sgd_step_size0_kwargs)
    logger.info('found best stepsize %e in %f seconds' % (
        best, time.time() - t0))

    # Heuristic: take the best stepsize according to the first max_examples,
    # and go half that fast for the full run.
    step_size0 = max(best / 2.0, step_size_floor)

    logger.info('setting sgd_step_size: %e' % step_size0)
    model.sgd_step_size0 = float(step_size0)
    model.sgd_step_size = float(step_size0)
    t0 = time.time()
    model.fit(*fit_args)
    logger.info('full fit took %f seconds' % (time.time() - t0))

    return model
Tech Fingerprint

Alerts (3)

'def' Ensure functions have docstrings for documentation
14 36 91