Source code for blitzml._sparse_linear

# coding: utf-8

from ._core import *
from ._dataset import Dataset


lib.BlitzML_sparse_linear_solver_compute_max_l1_penalty.argtypes = [pointer_t, pointer_t, pointer_t]
lib.BlitzML_sparse_linear_solver_compute_max_l1_penalty.restype = value_t

class SparseLinearProblem(Problem):
  def __init__(self, A, b):
    self._check_data_inputs(A, b)
    self._dataset = Dataset(A, b) 
    self._set_solver_c()

  def _set_solver_c(self):
    self._solver_c_wrap = lib.BlitzML_new_sparse_linear_solver_wrapper()

  def _check_data_inputs(self, A, b):
    if not sp.isspmatrix(A) and type(A) != np.ndarray:
      msg = ("Design matrix of type {} is invalid. Type must be "
             "numpy.ndarray or scipy sparse matrix.").format(type(A).__name__)
      value_error(msg)
    if type(b) != np.ndarray:
      msg = ("Labels vector of type {} is invalid. Type must be "
             "numpy.ndarray.").format(type(b).__name__)
      value_error(msg)
    if A.ndim != 2:
      msg = ("Design matrix with shape {} is invalid. "
             "Matrix must be 2d.").format(A.shape)
      value_error(msg)
    if b.ndim != 1:
      msg = ("Labels vector with shape {} is invalid. " 
             "Labels must be 1d.").format(b.shape)
      value_error(msg)
    if A.shape[0] != len(b):
      msg = ("Dimension mismatch between matrix shape {} and labels shape {}. "
             "Length of labels vector must equal number of rows in "
             "design matrix.").format(A.shape, b.shape)
      value_error(msg)

  def compute_max_l1_penalty(self, include_bias_term=True):
    """Compute the smallest L1 penalty parameter for which all weights
    in the solution equal zero.

    Parameters
    ----------
    include_bias_term : bool, optional
      Whether to include an unregularized bias term in the model.  Default
      is True.

    Returns
    -------
    max_l1_penalty : float
    """
    parameters = np.zeros(4)
    parameters[2] = float(include_bias_term)
    parameters[3] = self._loss_index
    parameters_c_wrap = self._set_parameters(parameters)

    ret = lib.BlitzML_sparse_linear_solver_compute_max_l1_penalty(
                                              self._solver_c_wrap.c_pointer, 
                                              self._dataset.c_pointer, 
                                              parameters_c_wrap.c_pointer)
    return ret
    

  def solve(self, l1_penalty, include_bias_term=True, initial_weights=None, 
            stopping_tolerance=1e-3, max_time=3.154e+7, min_time=0., 
            max_iterations=100000, verbose=False, log_directory=None):
    r"""Minimizes the objective

    .. math::

       \sum_i L(a_i^T w, b_i) + \texttt{l1\_penalty} ||w||_1 ,

    where L is the problem's loss function.

    Parameters
    ----------
    l1_penalty : float > 0
      Regularization parameter for L1 norm penalty on weights. When this value
      is larger, the solution generally contains more zero entries and Blitz
      generally completes optimization faster.

    include_bias_term : bool, optional
      Whether to include an unregularized bias parameter in the model.  
      Default is True.

    initial_weights : numpy.ndarray of length d, optional
      Initial weights to warm-start optimization.  The solver requires
      less time if initialized to a good approximate solution. Default is None.

    stopping_tolerance : float, optional
      Stopping tolerance for solver.  Optimization terminates if
      (duality_gap / objective_value) is less than stopping_tolerance.
      Default is 1e-3.

    max_time : float, optional
      Time limit in seconds for solving. If stopping tolerance is not reached, 
      optimization terminates after this number of seconds.  Default is 1 year.

    min_time : float, optional
      Minimum time in seconds for solving.  Optimization continues until 
      this amount of time passes, even after reaching stopping tolerance.  
      Default is zero.
    
    max_iterations : int, optional
      Iterations limit for algorithm. If stopping tolerance is not reached 
      after this number of iterations, optimization terminates.  Default is 
      100000.

    verbose : bool, optional
      Whether to print information, such as objective value, to sys.stdout 
      during optimization. Default is False.

    log_directory : string, optional
      Path to existing directory for Blitz to log time and objective value 
      information.  This directory should be empty prior to solving. Default
      is None.

      
    Returns
    -------
    solution : BlitzMLSolution 
    """
    option_names = ["stopping_tolerance", "max_time", "min_time",
                    "max_iterations", "verbose"] 
    options = {}
    for name in option_names:
      options[name] = locals()[name]
    self._set_solver_options(**options)

    parameters = np.zeros(4)
    parameters[0] = l1_penalty
    parameters[2] = float(include_bias_term)
    parameters[3] = self._loss_index
    parameters_c_wrap = self._set_parameters(parameters)

    num_examples, num_features = self._dataset.A.shape
    result = np.zeros(num_features + 1 + num_examples + 2)
    if initial_weights is not None:
      result[0:num_features] = initial_weights
    result_c = self._set_result(result)

    solution_status_c = self._setup_solution_status()

    log_dir_c = self._set_log_directory(log_directory)

    lib.BlitzML_solve_problem(self._solver_c_wrap.c_pointer, 
                              self._dataset.c_pointer, 
                              parameters_c_wrap.c_pointer, 
                              result_c, 
                              solution_status_c, 
                              log_dir_c)

    self._format_solution_status()

    weights = result[0:num_features]
    bias = result[num_features]
    dual_solution = result[1+num_features:1+num_features+num_examples]
    duality_gap = result[-2]
    objective_value = -result[-1]

    return self._solution_class(weights, 
                                bias, 
                                dual_solution, 
                                self._solution_status,
                                duality_gap, 
                                objective_value)


[docs]class LassoProblem(SparseLinearProblem):
  r"""Class for training sparse linear models with squared loss.
  The optimization objective is

  .. math::

    \tfrac{1}2 ||A w - b||^2 + \lambda ||w||_1 .

  Parameters
  ----------
  A : numpy.ndarray or scipy.sparse matrix
    n x d design matrix.

  b : numpy.ndarray
    Labels array of length n.


  Blitz tries its best to avoid copying data, but depending on the formats of A
  and b, Blitz may make a copy of these arrays. To avoid copying, follow these
  guidelines:

  * If A is dense (numpy.ndarray), define A as an `F-contiguous array
    <https://docs.scipy.org/doc/numpy/reference/generated/numpy.asfortranarray.html>`_.
    The dtype for A should match ctypes.c_double, ctypes.c_float, ctypes.c_int, 
    or ctypes.c_bool---that is, ``A.dtype == ctypes.c_double`` evaluates to 
    True, for example.

  * If A is sparse, define A as a scipy.sparse.csc_matrix. The dtype for
    A.indices should match type ctypes.c_int. The dtype for A.indptr should
    match type ctypes.c_size_t. BlitzML can work with double, float, int, and
    bool dtypes for A.data without copying.

  * The dtype for b should match type ctypes.c_double.

  BlitzML will print a warning when objects with more than 1e7 elements 
  are being copied.  To suppress warnings, use ``blitzml.suppress_warnings()``. 
  """

  @property
  def _solution_class(self):
    return LassoSolution

  @property
  def _loss_index(self):
    return 0.

  def _check_data_inputs(self, A, b):
    SparseLinearProblem._check_data_inputs(self, A, b)
    check_regression_labels(b)

  def _set_solver_c(self):
    self._solver_c_wrap = lib.BlitzML_new_lasso_solver_wrapper()


[docs]class SparseHuberProblem(SparseLinearProblem):
  r"""Class for training sparse linear models with huber loss.
  The optimization objective is

  .. math::

    \sum_i L(a_i^T w, b_i) + \lambda ||w||_1 ,  
 
  where
  
  .. math::

    L(a_i^T w, b_i) = \left\{
    \begin{array}{lll}
      \tfrac{1}2 (a_i^T w - b_i)^2 & & \text{if}\ |a_i^T w - b_i| < 1  \\[0.4em]
      a_i^T w - b_i - \onehalf     & & \text{if}\ a_i^T w - b_i \geq 1 \\[0.4em]
      b_i - a_i^T w - \onehalf     & & \text{otherwise.} 
    \end{array} \right.

  Here i indexes the ith row in A and ith entry in b.
  """
  @property
  def _solution_class(self):
    return SparseHuberSolution

  @property
  def _loss_index(self):
    return 1.

  def _check_data_inputs(self, A, b):
    SparseLinearProblem._check_data_inputs(self, A, b)
    check_regression_labels(b)


[docs]class SparseLogisticRegressionProblem(SparseLinearProblem):
  r"""Class for training sparse linear models with logistic loss.
  The optimization objective is

  .. math::

    \sum_i \log(1 + \exp(-b_i a_i^T w)) + \lambda ||w||_1 ,

  where i indexes the ith row in A and ith entry in b.  Each label b_i should 
  have value 1 or -1.  BlitzML treats other label values as 1 if the value 
  exceeds zero and -1 otherwise.
  """
  @property
  def _solution_class(self):
    return SparseLogisticRegressionSolution

  @property
  def _loss_index(self):
    return 2.

  def _check_data_inputs(self, A, b):
    SparseLinearProblem._check_data_inputs(self, A, b)
    check_logreg_labels(b)

  def _set_solver_c(self):
    self._solver_c_wrap = lib.BlitzML_new_sparse_logreg_solver_wrapper()


[docs]class SparseSquaredHingeProblem(SparseLinearProblem):
  r"""Class for training sparse linear models with squared hinge loss.
  The optimization objective is

  .. math::

    \sum_i \onehalf (1 - b_i a_i^T w)_+^2 + \lambda ||w||_1 ,

  where the "+" subscript denotes the rectifier function.  Each label should 
  have value 1 or -1.
  """
  @property
  def _solution_class(self):
    return SparseSquaredHingeSolution

  @property
  def _loss_index(self):
    return 3.

  def _check_data_inputs(self, A, b):
    SparseLinearProblem._check_data_inputs(self, A, b)
    check_classification_labels(b)


[docs]class SparseSmoothedHingeProblem(SparseLinearProblem):
  r"""Class for training sparse linear models with smoothed hinge loss.
  The optimization objective is

  .. math::

    \sum_i L(a_i^T w, b_i) + \lambda ||w||_1 ,  
 
  where
  
  .. math::

    L(a_i^T w, b_i) = \left\{
    \begin{array}{lll}
      \onehalf - b_i a_i^T w       && \text{if}\ b_i a_i^T w < 0       \\[0.4em]
      \onehalf (1 - b_i a_i^T w)^2 && \text{if}\ b_i a_i^T w \in [0,1) \\[0.4em]
      0                            && \text{otherwise.} 
    \end{array} \right.

  Each label should have value 1 or -1.
  """
  @property
  def _solution_class(self):
    return SparseSmoothedHingeSolution

  @property
  def _loss_index(self):
    return 4.

  def _check_data_inputs(self, A, b):
    SparseLinearProblem._check_data_inputs(self, A, b)
    check_classification_labels(b)


[docs]class LassoSolution(RegressionSolution):
  """Solution class for ``LassoProblem``."""

  def _compute_loss(self, Aw, b):
    loss = 0.5 * np.linalg.norm(Aw - b) ** 2
    return loss


[docs]class SparseHuberSolution(RegressionSolution):
  """Solution class for ``SparseHuberProblem``."""
  def _compute_loss(self, Aw, b):
    width = 1.
    residuals = Aw - b
    one_half_width_sq = 0.5 * width ** 2
    
    left = np.sum(-width * residuals[residuals < -width] - one_half_width_sq)
    residuals[residuals < -width] = 0.

    right = np.sum(width * residuals[residuals > width] - one_half_width_sq)
    residuals[residuals > width] = 0.

    middle = np.sum(residuals ** 2) * 0.5
    
    return left + middle + right


[docs]class SparseLogisticRegressionSolution(ClassificationSolution):
  """Solution class for ``SparseLogisticRegressionProblem``."""
[docs]  def predict_probabilities(self, A):
    """Predict probability values from feature vectors.

    Parameters
    ----------
    A : numpy.ndarray or scipy.sparse matrix
      n x d design matrix to predict probabilities for.

    Returns
    -------
    predicted_probabilities : numpy.ndarray (length n with values in [0, 1]).
    """
    Aw = self._compute_A_times_weights(A)
    probs = 1 / (1 + np.exp(-Aw))
    return probs

  def _compute_loss(self, Aw, b):
    return np.sum(np.log1p(np.exp(-b * Aw)))


[docs]class SparseSquaredHingeSolution(ClassificationSolution):
  """Solution class for ``SparseSquaredHingeProblem``.  """

  def _compute_loss(self, Aw, b):
    loss_terms = 1 - b * Aw
    loss_terms[loss_terms < 0] = 0.
    return 0.5 * np.sum(loss_terms ** 2)


[docs]class SparseSmoothedHingeSolution(ClassificationSolution):
  """Solution class for ``SparseSquaredHingeProblem``.  """

  def _compute_loss(self, Aw, b):
    loss_terms = 1 - b * Aw
    loss_terms[loss_terms < 0] = 0.
    squared_examples = (loss_terms < 1)
    squared_loss = 0.5 * np.sum(loss_terms[squared_examples] ** 2)
    linear_examples = (loss_terms > 1)
    linear_loss = np.sum(loss_terms[linear_examples] - 0.5)
    return squared_loss + linear_loss