Helper to create learner
Helper to evaluate the classifiers
std::vector<std::string> doClassification(std::vector<sgpp::base::Grid*> grids,
std::vector<sgpp::base::DataVector*> alphas,
This example shows how the multiple class classification refinement strategy is used. To do classification, for each class a PDF is approximated with LearnerSGDE and the class with the highest probability gets assigned for new data points to be classified. This example is merely a tech-example.
All parameters are set in the beginning. Allows to have an overview over set parameter.
std::string filepath = "../datasets/";
std::string filename = "multipleClassesTest.arff";
size_t classes = 4;
size_t dim = 2;
size_t level = 4;
double lambda = 1e-2;
size_t numSteps = 5;
size_t numRefinements = 3;
size_t partCombined = 0;
double thresh = 0;
std::cout <<
"Read training data: " << dataTrain.
getNrows() << std::endl;
Empty DataMartix are created to be filled with the data points from the data set Using a vector, to be flexible for the amount of classes
std::vector<sgpp::base::DataMatrix> dataCl;
std::vector<sgpp::datadriven::LearnerSGDE> learner;
for (
size_t i = 0;
i < classes;
i++) {
}
If classes are set to [0,classes-1] points are seperated into given classes. Independent of the amount of classes needed
Seperates the points into the different DataMatrix dependent on class
dataCl.at((
size_t)targetTrain.
get(
i)).appendRow(row);
}
Approximate a probability density function for the class data using LearnerSGDE, one for each class. Initialize the learners with the data
for (
size_t i = 0;
i < classes;
i++) {
std::cout <<
"Data points of class " << std::setw(3) << std::right <<
i <<
": ";
std::cout << std::setw(14) << std::right << dataCl.at(i).getNrows() << " | ";
learner.push_back(createSGDELearner(dim, level, lambda));
learner.back().initialize(dataCl.at(i));
}
Bundle grids and surplus vector pointer needed for refinement and evaluation
std::vector<sgpp::base::Grid*> grids;
std::vector<sgpp::base::DataVector*> alphas;
for (size_t i = 0; i < classes; i++) {
grids.push_back(learner.at(i).getGrid());
alphas.push_back(learner.at(i).getSurpluses());
}
std::cout << "---------------------------------------------" << std::endl;
for (size_t i = 0; i < classes; i++) {
learner.at(i).train(*grids.at(i), *alphas.at(i), dataCl.at(i),
lambda);
}
std::vector<std::string> eval = doClassification(grids, alphas, dataTrain, targetTrain, classes);
std::cout << " 0 | " << eval.at(0) << " | " << eval.at(1) << std::endl;
thresh);
for (size_t x = 1; x < numSteps + 1; x++) {
std::cout << "---------------------------------------------" << std::endl;
for (size_t i = 0; i < classes; i++) {
learner.at(i).train(*grids.at(i), *alphas.at(i), dataCl.at(i),
lambda);
}
eval = doClassification(grids, alphas, dataTrain, targetTrain, classes);
std::cout << " " << x << " | " << eval.at(0) << " | " << eval.at(1) << std::endl;
}
}
solverConfig.
eps_ = 1e-10;
crossvalidationConfig.
enable_ =
false;
crossvalidationConfig.
kfold_ = 3;
crossvalidationConfig.
lambda_ = 3.16228e-06;
crossvalidationConfig.
seed_ = 1234567;
crossvalidationConfig.
silent_ =
true;
crossvalidationConfig);
return learner;
}
Helper function it does the classification, gets the predictions and generates some error-output
std::vector<std::string> doClassification(std::vector<sgpp::base::Grid*> grids,
std::vector<sgpp::base::DataVector*> alphas,
double best_eval = -1000.0;
double eval = 0.0;
std::vector<std::unique_ptr<sgpp::base::OperationEval>> evalOps;
for (size_t i = 0; i < grids.size(); i++) {
std::unique_ptr<sgpp::base::OperationEval> e(
evalOps.push_back(std::move(e));
}
std::vector<std::vector<int>> gridEval(classes + 1, std::vector<int>(classes + 1, 0));
for (
size_t i = 0; i < testData.
getNrows(); i++) {
for (
size_t j = 0;
j < grids.size();
j++) {
eval = evalOps.at(
j)->eval(*alphas.at(
j),
p);
if (eval > best_eval) {
best_eval = eval;
indices.set(i, static_cast<double>(
j));
evals.set(i, best_eval);
}
}
best_eval = -1000.0;
}
std::vector<int> classCounts(grids.size(), 0);
std::vector<int> classErrorCounts(grids.size(), 0);
totalError.sub(testLabel);
size_t totalCount = 0;
for (
size_t i = 0; i < testData.
getNrows(); i++) {
classCounts.at(static_cast<size_t>(floor(testLabel.
get(i)))) += 1;
if (fabs(totalError.get(i)) > 0.01) {
totalCount++;
classErrorCounts.at(static_cast<size_t>(floor(testLabel.
get(i)))) += 1;
}
}
std::stringstream ss;
for (size_t i = 0; i < grids.size(); i++) {
double ce = (100.0 * (1.0 - (static_cast<double>(classErrorCounts.at(i)) / classCounts.at(i))));
ss << std::fixed << std::setprecision(2) << ce;
if (i < grids.size() - 1) {
ss << " ";
}
}
std::stringstream ss2;
ss2 << std::fixed << std::setprecision(3);
ss2 << (100.0 *
(1.0 - (
static_cast<double>(totalCount) / static_cast<double>(testData.
getNrows()))));
std::vector<std::string> result;
result.push_back(ss.str());
result.push_back(ss2.str());
return result;
}