SG++
icholConvergenceTest.cpp

This example can be found under datadriven/examples/icholConvergenceTest.cpp.

// Copyright (C) 2008-today The SG++ project
// This file is part of the SG++ project. For conditions of distribution and
// use, please see the copyright notice provided with SG++ or at
// sgpp.sparsegrids.org
#ifdef USE_GSL
#endif /* USE_GSL */
#include <string>
int main() {
#ifdef USE_GSL
size_t totalSets = 1;
size_t totalFolds = 1; // set to 5 to perform 5-fold cv
double avgError = 0.0;
double avgErrorFolds = 0.0;
for (size_t numSets = 0; numSets < totalSets; numSets++) {
sgpp::base::DataVector avgErrorsFolds(51, 0.0);
for (size_t numFolds = 0; numFolds < totalFolds; numFolds++) {
std::string filename = "../../datasets/ripley/ripleyGarcke.train.arff";
// load training samples
std::cout << "# loading file: " << filename << std::endl;
filename = "../../datasets/ripley/ripleyGarcke.test.arff";
// load test samples
std::cout << "# loading file: " << filename << std::endl;
// if fixed validation data should be used (required for convergence
// monitor):
/*filename = ""; // specify file containing validation data here
// load validation samples
std::cout << "# loading file: " << filename << std::endl;
sgpp::datadriven::Dataset validationDataset =
sgpp::datadriven::ARFFTools::readARFF(filename); */
size_t classNum = 2;
sgpp::base::DataVector classLabels(classNum);
classLabels[0] = -1;
classLabels[1] = 1;
std::cout << "# create grid config" << std::endl;
gridConfig.dim_ = trainDataset.getDimension();
gridConfig.level_ = 5;
// gridConfig.type_ = sgpp::base::GridType::ModLinear;
std::cout << "# create regularization config" << std::endl;
// initial regularization parameter lambda
regularizationConfig.lambda_ = 0.01;
std::string decompType;
// choose "LU decomposition"
// dt = MatrixDecompositionType::DBMatDecompLU;
// decompType = "LU decomposition";
// choose"Eigen decomposition"
// dt = MatrixDecompositionType::DBMatDecompEigen;
// decompType = "Eigen decomposition";
// choose "Cholesky decomposition"
// dt = sgpp::datadriven::MatrixDecompositionType::Chol;
// decompType = "Cholesky decomposition";
// dt = sgpp::datadriven::MatrixDecompositionType::IChol;
// decompType = "Incomplete Cholesky decomposition";
decompType = "Incomplete Cholesky decomposition on Dense Matrix";
std::cout << "Decomposition type: " << decompType << std::endl;
densityEstimationConfig.decomposition_ = dt;
densityEstimationConfig.iCholSweepsDecompose_ = 2;
densityEstimationConfig.iCholSweepsSolver_ = 2;
std::cout << "# create adaptive refinement configuration" << std::endl;
std::string refMonitor;
// select periodic monitor - perform refinements in fixed intervals
refMonitor = "periodic";
size_t refPeriod = 40; // the refinement interval
// select convergence monitor - perform refinements if algorithm has
// converged
// (convergence measured with respect to changes of the classification
// accuracy)
// refMonitor = "convergence";
// the convergence threshold
double accDeclineThreshold = 0.001;
// number of accuracy measurements which
// are considered for convergence check
size_t accDeclineBufferSize = 140;
// minimum number of iterations before next refinement
// is allowed to be performed
size_t minRefInterval = 10;
std::cout << "Refinement monitor: " << refMonitor << std::endl;
std::string refType;
// select surplus refinement
// refType = "surplus";
// select data-based refinement
// refType = "data";
// select zero-crossings-based refinement
refType = "zero";
std::cout << "Refinement type: " << refType << std::endl;
adaptConfig.numRefinements_ = 0;
adaptConfig.noPoints_ = 7;
adaptConfig.threshold_ = 0.0; // only required for surplus refinement
// initial weighting factor
double beta = 0.0;
// specify if prior should be used to predict class labels
bool usePrior = false;
std::cout << "# create learner" << std::endl;
sgpp::datadriven::LearnerSGDEOnOff learner(gridConfig, adaptConfig, regularizationConfig,
densityEstimationConfig, trainDataset, testDataset,
nullptr, classLabels, classNum, usePrior, beta);
bool enableCv = false;
// set cv configuration if cv enabled
size_t nextCvStep = 50;
double cvLambdaStart = 1e-1;
double cvLambdaEnd = 1e-10;
int cvLambdaSteps = 10;
bool cvLogScale = true;
sgpp::base::DataMatrix* cvTestData = &testDataset.getData();
sgpp::base::DataMatrix* cvTestDataRes = nullptr; // needed?
learner.setCrossValidationParameters(cvLambdaSteps, cvLambdaStart, cvLambdaEnd, cvTestData,
cvTestDataRes, cvLogScale);
// specify batch size
// (set to 1 for processing only a single data point each iteration)
size_t batchSize = 250;
// specify max number of passes over traininig data set
size_t maxDataPasses = 2;
std::cout << "# start to train the learner" << std::endl;
learner.train(batchSize, maxDataPasses, refType, refMonitor, refPeriod, accDeclineThreshold,
accDeclineBufferSize, minRefInterval, enableCv, nextCvStep);
double acc = learner.getAccuracy();
std::cout << "# accuracy (test data): " << acc << std::endl;
// store results (classified data, grids, density functions)
// learner.storeResults();
avgErrorFolds += 1.0 - learner.getAccuracy();
learner.getAvgErrors(tmp);
avgErrorsFolds.add(tmp);
}
avgErrorFolds = avgErrorFolds / static_cast<double>(totalFolds);
if ((totalSets > 1) && (totalFolds > 1)) {
std::cout << "Average accuracy on test data (set " + std::to_string(numSets + 1) +
"): " << (1.0 - avgErrorFolds) << std::endl;
}
avgError += avgErrorFolds;
avgErrorFolds = 0.0;
avgErrorsFolds.mult(1.0 / static_cast<double>(totalFolds));
// write error evaluation to csv-file
/*std::ofstream output;
output.open("SGDEOnOff_avg_classification_error_"+std::to_string(numSets+1)+".csv");
if (output.fail()) {
std::cout << "failed to create csv file!" << std::endl;
}
else {
for (size_t i = 0; i < avgErrorsFolds.getSize(); i++) {
output << avgErrorsFolds.get(i) << ";" << std::endl;
}
output.close();
}*/
}
#endif /* USE_GSL */
}