SG++-Doxygen-Documentation
Calculating the regularization path

This example generates a regularization path for sparsity-inducing penalties.

The output format is a comma seperated file. It accepts one argument that determines the desired regularization penalty. The weight path calculation follows the discussion in

 "Regularization paths for generalized linear models via coordinate descent."
Friedman, Jerome, Trevor Hastie, and Rob Tibshirani, Journal of statistical software, 2010.
1 import numpy as np
2 import pysgpp as sg; sg.omp_set_num_threads(4)
3 import pandas as pd
4 import sklearn.datasets as data
5 from scipy.sparse.linalg import LinearOperator, svds
6 import sys

This function generates the Friedman1 dataset.

1 def generate_friedman1(seed):
2  (X,y) = data.make_friedman1(n_samples=10000, random_state=seed, noise=1.0)
3  y = sg.DataVector(y)
4  X = sg.DataMatrix(X)
5  return X, y

This function calculates the design matrix for the linear model.

1 def get_Phi(X_train):
2  def eval_op(x, op, size):
3  result_vec = sg.DataVector(size)
4  x = sg.DataVector(np.array(x).flatten())
5  op.mult(x, result_vec)
6  return result_vec.array().copy()
7
8  def eval_op_transpose(x, op, size):
9  result_vec = sg.DataVector(size)
10  x = sg.DataVector(np.array(x).flatten())
11  op.multTranspose(x, result_vec)
12  return result_vec.array().copy()
13
14  num_elem = X_train.array().shape[0]
15
16  grid = sg.Grid.createModLinearGrid(10)
17  gen = grid.getGenerator()
18  gen.regular(2)
19
20  op = sg.createOperationMultipleEval(grid, X_train)
21  matvec = lambda x: eval_op(x, op, num_elem)
22  rmatvec = lambda x: eval_op_transpose(x, op, grid.getSize())
23
24  shape = (num_elem, grid.getSize())
25  linop = LinearOperator(shape, matvec, rmatvec, dtype='float64')
26
27  Phi = linop.matmat(np.matrix(np.identity(grid.getSize())))
28  return Phi

This function calculates the value of $$\lambda$$ for which all weights are zero.

1 def get_max_lambda(Phi, y, num_rows, l1_ratio=1.0):
2  max_prod = 0
3  for i in range(0, Phi.shape[1]):
4  a = np.asarray(Phi[:,i]).flatten()
5  prod = np.inner(a, y)
6  max_prod = max(max_prod, prod)
7  max_lambda = max_prod / (l1_ratio * num_rows)
8  return max_lambda

This function calculates the weights for different lambdas. The result is printed to the standard output; it then resembles a comma seperated value file.

1 def calculate_weight_path(X, y, max_lambda,penalty, l1_ratio):
2  epsilon=0.001
3  num_lambdas=25
4  estimator = make_estimator(penalty, max_lambda, l1_ratio)
5  estimator.train(X, y)

We create a grid such that $$\lambda \in \{ \varepsilon \cdot \lambda_\text{max} \text{ and } \lambda_\text{max} \}$$.

1  min_lambda = epsilon * max_lambda
2  lambda_grid = np.logspace(np.log10(max_lambda), np.log10(min_lambda), num=num_lambdas)
3  print("no_learner, lambda, weights")
4  for i, lamb in enumerate(lambda_grid):
5  last_weights = estimator.getWeights()
6  estimator = make_estimator(penalty, l1_ratio, lamb,)
7  estimator.setWeights(last_weights) # reuse old weights
8  estimator.train(X, y)
9  print("{}, {:2.6f}, {}".format(i, lamb, np.array2string(estimator.getWeights().array(), separator=',', max_line_width=float('inf'))))

This function returns an estimator that uses the given penalty, l1_ratio and $$\lambda$$

1 def make_estimator(penalty, l1_ratio, lambda_reg):
2  grid = sg.RegularGridConfiguration()
3  grid.dim_ = 10
4  grid.level_ = 2
5  grid.type_ = sg.GridType_ModLinear
6
10
11  solv = sg.SLESolverConfiguration()
12  solv.maxIterations_ = 500
13  solv.threshold_ = 10e-10
14  solv.type_ = sg.SLESolverType_FISTA
15
16  final_solv = solv
17
18  regular = sg.RegularizationConfiguration()
19  regular.type_ = penalty
20  regular.exponentBase_ = 1.0
21  regular.lambda_ = lambda_reg
22  regular.l1_ratio_ = l1_ratio
23
24  estimator = sg.RegressionLearner(grid, adapt, solv, final_solv,regular)
25  return estimator
26
27 def main():
28  penalties = {'lasso': sg.RegularizationType_Lasso,
29  'elasticNet': sg.RegularizationType_ElasticNet,
30  'groupLasso': sg.RegularizationType_GroupLasso}
31  if len(sys.argv) <= 1 or sys.argv[1] not in penalties:
32  print("Call this script by ./sparsePathExample.py <regularization method> <l1_ratio>")
33  print("Acceptable regularization methods are:")
34  print(list(penalties.keys()))
35  return
36  reg_method = penalties[sys.argv[1]]
37  if len(sys.argv) > 2 and sys.argv[1]=='elasticNet':
38  l1_ratio = float(sys.argv[2])
39  else:
40  l1_ratio = 1.0
41
42  X, y = generate_friedman1(123456)

Create the design matrix, calculate $$\lambda_\text{max}$$ and calculate the weight path.

1  Phi = get_Phi(X)
2  max_lambda = get_max_lambda(Phi, y.array(), X.array().shape[0], l1_ratio)
3  calculate_weight_path(X, y, max_lambda, reg_method, l1_ratio)
4
5 if __name__ == '__main__':
6  main()