LearnerSGDEOnOffParallel learns the data using sparse grid density estimation. More...

#include <LearnerSGDEOnOffParallel.hpp>

Public Member Functions
void	assembleNextBatchData (Dataset dataBatch, size_t batchOffset) const
	Copies the data from the training set into the data batch. More...

size_t	assignBatchToWorker (size_t batchOffset, bool doCrossValidation)
	Asks the scheduler where to assign the next batch to and sends the MPI request. More...

bool	checkAllGridsConsistent ()
	Check whether all grids are not in a temporarily inconsistent state. More...

bool	checkGridStateConsistent (size_t classIndex)
	Check whether the grid is in a final state where learning can occur. More...

void	computeNewSystemMatrixDecomposition (size_t classIndex, size_t gridVersion)
	Update the system matrix decomposition after a refinement step. More...

double	getAccuracy () const
	Returns the accuracy of the classifier measured on the test data. More...

std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t > > &	getDensityFunctions ()
	Returns the density functions mapped to class labels. More...

size_t	getDimensionality ()
	Returns the dimensionality of the learner as determined from its training set. More...

double	getError (Dataset &dataset) const
	Error evaluation required for convergence-based refinement. More...

Grid &	getGrid (size_t classIndex)
	Retrieves the grid for a certain class. More...

size_t	getLocalGridVersion (size_t classIndex)
	Returns the internally stored current version of the grid. More...

size_t	getNumClasses () const
	Returns the number of existing classes. More...

std::unique_ptr< DBMatOffline > &	getOffline ()
	Gets the DBMatOffline object. More...

RefinementHandler &	getRefinementHandler ()
	Returns a reference to the refinement handler, that contains logic to handle the master's refinement cycles. More...

MPITaskScheduler &	getScheduler ()
	Gets a reference to the currently installed MPI Scheduler. More...

Dataset &	getTrainData ()
	Returns a reference to the currently used training data set. More...

Dataset *	getValidationData ()
	Returns a reference to the currently used test data set. More...

	LearnerSGDEOnOffParallel (sgpp::base::RegularGridConfiguration &gridConfig, sgpp::base::AdaptivityConfiguration &adaptivityConfig, sgpp::datadriven::RegularizationConfiguration &regularizationConfig, sgpp::datadriven::DensityEstimationConfiguration &densityEstimationConfig, Dataset &trainData, Dataset &testData, Dataset *validationData, DataVector &classLabels, size_t numClassesInit, bool usePrior, double beta, MPITaskScheduler &mpiTaskScheduler)

void	mergeAlphaValues (size_t classIndex, size_t remoteGridVersion, DataVector dataVector, size_t batchOffset, size_t batchSize, bool isLastPacketInSeries)
	Merge alpha values received from a remote process into the local alpha vector. More...

void	predict (DataMatrix &test, DataVector &classLabels) const
	Predicts the class labels of the test data points. More...

void	setLocalGridVersion (size_t classIndex, size_t gridVersion)
	Set the grid version. More...

void	shutdownMPINodes ()
	If this is run on master, it issues shutdown requests to all workers and waits for them to return. More...

void	train (Dataset &dataBatch, bool doCrossValidation)
	Trains the learner with the given data batch. More...

void	train (std::vector< std::pair< sgpp::base::DataMatrix *, double > > &trainDataClasses, bool doCrossValidation)
	Trains the learner with the given data batch that is already split up wrt its different classes. More...

void	trainParallel (size_t batchSize, size_t maxDataPasses, std::string refinementFunctorType, std::string refMonitor, size_t refPeriod, double accDeclineThreshold, size_t accDeclineBufferSize, size_t minRefInterval)
	Trains the learner with the given dataset. More...

void	updateAlpha (size_t classIndex, std::list< size_t > *deletedPoints, size_t newPoints)
	Updates the surplus vector of a certain class. More...

void	workBatch (Dataset dataset, size_t batchOffset, bool doCrossValidation)
	Train from a batch. More...

virtual	~LearnerSGDEOnOffParallel ()
	Runs MPI finalize when destructing the learner. More...

Static Public Member Functions
static bool	isVersionConsistent (size_t version)
	Check whether a specific grid version is consistent, i.e. More...

Protected Member Functions
void	allocateClassMatrices (size_t dim, std::vector< std::pair< base::DataMatrix *, double >> &trainDataClasses, std::map< double, int > &classIndices) const
	Allocates memory for every class to hold training data before learning. More...

void	doRefinementForAll (const std::string &refinementFunctorType, const std::string &refinementMonitorType, const std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t >> &onlineObjects, RefinementMonitor &monitor)
	Do an entire refinement cycle for all classes. More...

void	printGridSizeStatistics (const char *messageString, std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t >> &onlineObjects)
	Shows grid size statistics along with a message. More...

void	splitBatchIntoClasses (const Dataset &dataset, size_t dim, const std::vector< std::pair< DataMatrix *, double >> &trainDataClasses, std::map< double, int > &classIndices) const

void	waitForAllGridsConsistent ()
	Wait for all grids to reach a consistent state before continuing. More...

Protected Attributes
sgpp::base::AdaptivityConfiguration &	adaptivityConfig

std::vector< DataVector * >	alphas

DataVector	avgErrors

double	beta

DataVector	classLabels

sgpp::datadriven::DensityEstimationConfiguration &	densityEstimationConfig

std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t > >	densityFunctions

sgpp::base::GeneralGridConfiguration &	gridConfig

std::vector< std::unique_ptr< Grid > >	grids

std::vector< size_t >	localGridVersions
	Vector that holds the grid version for every class. More...

MPITaskScheduler &	mpiTaskScheduler
	Reference to the currently installed MPI Task Scheduler. More...

size_t	numClasses

std::unique_ptr< DBMatOffline >	offline

std::vector< std::unique_ptr< DBMatOffline > >	offlineContainer

std::map< double, double >	prior

size_t	processedPoints

RefinementHandler	refinementHandler
	Instance of the currently installed refinement handler. More...

sgpp::datadriven::RegularizationConfiguration &	regularizationConfig

Dataset &	testData

Dataset &	trainData

bool	trained

bool	usePrior

Dataset *	validationData

bool	workerActive
	Boolean used to detect when a shutdown of a worker has been requested. More...

Detailed Description

LearnerSGDEOnOffParallel learns the data using sparse grid density estimation.

The system matrix is precomputed and factorized using Eigen-, LU- or Cholesky decomposition (offline step). Then, for each class a density function is computed by solving the system in every iteration (online step). If Cholesky decomposition is chosen, refinement/coarsening can be applied. This learner uses MPI to parallelize the learning phase across multiple nodes.

Constructor & Destructor Documentation

◆ LearnerSGDEOnOffParallel()

sgpp::datadriven::LearnerSGDEOnOffParallel::LearnerSGDEOnOffParallel	(	sgpp::base::RegularGridConfiguration &	gridConfig,
		sgpp::base::AdaptivityConfiguration &	adaptivityConfig,
		sgpp::datadriven::RegularizationConfiguration &	regularizationConfig,
		sgpp::datadriven::DensityEstimationConfiguration &	densityEstimationConfig,
		Dataset &	trainData,
		Dataset &	testData,
		Dataset *	validationData,
		DataVector &	classLabels,
		size_t	numClassesInit,
		bool	usePrior,
		double	beta,
		MPITaskScheduler &	mpiTaskScheduler
	)

◆ ~LearnerSGDEOnOffParallel()

sgpp::datadriven::LearnerSGDEOnOffParallel::~LearnerSGDEOnOffParallel ( )

virtual

Runs MPI finalize when destructing the learner.

References sgpp::datadriven::MPIMethods::finalizeMPI().

Member Function Documentation

◆ allocateClassMatrices()

void sgpp::datadriven::LearnerSGDEOnOffParallel::allocateClassMatrices	(	size_t	dim,
		std::vector< std::pair< base::DataMatrix *, double >> &	trainDataClasses,
		std::map< double, int > &	classIndices
	)		const

protected

Allocates memory for every class to hold training data before learning.

Parameters

dim	The dimensionality of the current problem
trainDataClasses	Storage that will be allocated that holds space for data and label
classIndices	A map of each classes label to its index

References classLabels, getNumClasses(), python.statsfileInfo::i, m, and friedman::p.

Referenced by train().

◆ assembleNextBatchData()

void sgpp::datadriven::LearnerSGDEOnOffParallel::assembleNextBatchData	(	Dataset *	dataBatch,
		size_t *	batchOffset
	)		const

Copies the data from the training set into the data batch.

Parameters

dataBatch	Batch of data to fill, with set dimensionality and size
batchOffset	The offset in the training data from which to start copying

References D, sgpp::base::DataVector::get(), sgpp::datadriven::Dataset::getData(), sgpp::datadriven::Dataset::getDimension(), sgpp::datadriven::Dataset::getNumberInstances(), sgpp::base::DataMatrix::getRow(), sgpp::datadriven::Dataset::getTargets(), python.utils.statsfile2gnuplot::j, sgpp::base::DataVector::set(), sgpp::base::DataMatrix::setRow(), and trainData.

Referenced by workBatch().

◆ assignBatchToWorker()

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::assignBatchToWorker	(	size_t	batchOffset,
		bool	doCrossValidation
	)

Asks the scheduler where to assign the next batch to and sends the MPI request.

Parameters

batchOffset	Starting offset of the new batch
doCrossValidation	Whether the client should do cross-validation

Returns: The size of the batch assigned by the scheduler

References sgpp::datadriven::MPIMethods::assignBatch(), sgpp::datadriven::MPITaskScheduler::assignTaskVariableTaskSize(), sgpp::datadriven::Dataset::getNumberInstances(), mpiTaskScheduler, sgpp::datadriven::TRAIN_FROM_BATCH, and trainData.

Referenced by trainParallel().

◆ checkAllGridsConsistent()

bool sgpp::datadriven::LearnerSGDEOnOffParallel::checkAllGridsConsistent ( )

Check whether all grids are not in a temporarily inconsistent state.

Returns: Whether all grids are consistent

References isVersionConsistent(), and localGridVersions.

Referenced by sgpp::datadriven::RefinementHandler::checkReadyForRefinement().

◆ checkGridStateConsistent()

bool sgpp::datadriven::LearnerSGDEOnOffParallel::checkGridStateConsistent ( size_t classIndex )

Check whether the grid is in a final state where learning can occur.

This is not the case while receiving refinement results or updating the system matrix decomposition.

Parameters

classIndex The class for which to check consistency.

Returns: Whether the grid is currently in a consistent state

References isVersionConsistent(), and localGridVersions.

Referenced by sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), setLocalGridVersion(), sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement(), waitForAllGridsConsistent(), and sgpp::datadriven::MPIMethods::waitForGridConsistent().

◆ computeNewSystemMatrixDecomposition()

void sgpp::datadriven::LearnerSGDEOnOffParallel::computeNewSystemMatrixDecomposition	(	size_t	classIndex,
		size_t	gridVersion
	)

Update the system matrix decomposition after a refinement step.

This will wait for the receiving of refinement results to complete. After computation, the system matrix is sent back to the master

Parameters

classIndex	The class for which to update the system matrix decomposition
gridVersion	The new grid version to set after updating the matrix

References sgpp::datadriven::RefinementResult::addedGridPoints, D, sgpp::datadriven::RefinementResult::deletedGridPointsIndices, densityEstimationConfig, sgpp::datadriven::DBMatOffline::getDecomposedMatrix(), getDensityFunctions(), getLocalGridVersion(), sgpp::datadriven::DBMatOnline::getOfflineObject(), sgpp::datadriven::RefinementHandler::getRefinementResult(), GRID_RECEIVED_ADDED_POINTS, grids, sgpp::datadriven::RegularizationConfiguration::lambda_, refinementHandler, regularizationConfig, sgpp::datadriven::MPIMethods::sendSystemMatrixDecomposition(), setLocalGridVersion(), sgpp::datadriven::UPDATE_GRID, sgpp::datadriven::DBMatOnline::updateSystemMatrixDecomposition(), and sgpp::datadriven::MPIMethods::waitForIncomingMessageType().

Referenced by sgpp::datadriven::MPIMethods::processIncomingMPICommands().

◆ doRefinementForAll()

void sgpp::datadriven::LearnerSGDEOnOffParallel::doRefinementForAll	(	const std::string &	refinementFunctorType,
		const std::string &	refinementMonitorType,
		const std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t >> &	onlineObjects,
		RefinementMonitor &	monitor
	)

protected

Do an entire refinement cycle for all classes.

Parameters

refinementFunctorType	String constant specifying the functor to use in refinement
refinementMonitorType	String constant specifying the monitor to use in refinement
onlineObjects	Reference to the online objects for density estimation
monitor	The setup of the convergence monitor for refinement

Referenced by trainParallel().

◆ getAccuracy()

double sgpp::datadriven::LearnerSGDEOnOffParallel::getAccuracy ( ) const

Returns the accuracy of the classifier measured on the test data.

Returns: The classification accuracy measured on the test data

References sgpp::base::DataVector::get(), sgpp::datadriven::Dataset::getData(), sgpp::datadriven::Dataset::getNumberInstances(), sgpp::datadriven::Dataset::getTargets(), python.statsfileInfo::i, predict(), and testData.

◆ getDensityFunctions()

std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t > > & sgpp::datadriven::LearnerSGDEOnOffParallel::getDensityFunctions ( )

Returns the density functions mapped to class labels.

Returns: The density function objects mapped to class labels

References densityFunctions.

Referenced by computeNewSystemMatrixDecomposition(), sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), trainParallel(), and workBatch().

◆ getDimensionality()

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::getDimensionality ( )

Returns the dimensionality of the learner as determined from its training set.

Returns: The data dimensionality

References sgpp::datadriven::Dataset::getDimension(), and trainData.

Referenced by sgpp::datadriven::RefinementHandler::doRefinementForClass(), sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), sgpp::datadriven::MPIMethods::runBatch(), and sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement().

◆ getError()

double sgpp::datadriven::LearnerSGDEOnOffParallel::getError ( Dataset & dataset ) const

Error evaluation required for convergence-based refinement.

Parameters

dataset The data to measure the error on

Returns: The error evaluation

References sgpp::base::DataVector::get(), sgpp::datadriven::Dataset::getData(), sgpp::datadriven::Dataset::getNumberInstances(), sgpp::datadriven::Dataset::getTargets(), python.statsfileInfo::i, and predict().

Referenced by sgpp::datadriven::RefinementHandler::checkRefinementNecessary().

◆ getGrid()

Grid & sgpp::datadriven::LearnerSGDEOnOffParallel::getGrid ( size_t classIndex )

Retrieves the grid for a certain class.

Parameters

classIndex the index of the desired class

Returns: the underlying grid

References grids.

Referenced by sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate().

◆ getLocalGridVersion()

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::getLocalGridVersion ( size_t classIndex )

Returns the internally stored current version of the grid.

Parameters

classIndex The class of the grid to search for

Returns: The current version of the grid

References localGridVersions.

Referenced by sgpp::datadriven::MPIMethods::assignSystemMatrixUpdate(), computeNewSystemMatrixDecomposition(), mergeAlphaValues(), sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), sgpp::datadriven::MPIMethods::sendMergeGridNetworkMessage(), sgpp::datadriven::MPIMethods::sendSystemMatrixDecomposition(), sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement(), waitForAllGridsConsistent(), sgpp::datadriven::MPIMethods::waitForGridConsistent(), and workBatch().

◆ getNumClasses()

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::getNumClasses ( ) const

Returns the number of existing classes.

Returns: The number of classes

References numClasses.

Referenced by allocateClassMatrices(), sgpp::datadriven::RoundRobinScheduler::assignTaskStaticTaskSize(), doRefinementForAll(), train(), and workBatch().

◆ getOffline()

std::unique_ptr< DBMatOffline > & sgpp::datadriven::LearnerSGDEOnOffParallel::getOffline ( )

Gets the DBMatOffline object.

Returns: The DBMatOffline object

References offline.

Referenced by sgpp::datadriven::RefinementHandler::checkRefinementNecessary(), and sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement().

◆ getRefinementHandler()

RefinementHandler & sgpp::datadriven::LearnerSGDEOnOffParallel::getRefinementHandler ( )

Returns a reference to the refinement handler, that contains logic to handle the master's refinement cycles.

Returns: A reference to the refinement handler

References refinementHandler.

Referenced by sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate().

◆ getScheduler()

MPITaskScheduler & sgpp::datadriven::LearnerSGDEOnOffParallel::getScheduler ( )

Gets a reference to the currently installed MPI Scheduler.

The scheduler assigns tasks of variable or static size to workers.

Returns: A reference to the installed MPI Task Scheduler

References mpiTaskScheduler.

Referenced by sgpp::datadriven::RefinementHandler::checkReadyForRefinement(), and sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement().

◆ getTrainData()

Dataset & sgpp::datadriven::LearnerSGDEOnOffParallel::getTrainData ( )

Returns a reference to the currently used training data set.

Returns: A reference to the training data set

References trainData.

Referenced by sgpp::datadriven::RefinementHandler::checkRefinementNecessary(), and sgpp::datadriven::RefinementHandler::handleSurplusBasedRefinement().

◆ getValidationData()

Dataset * sgpp::datadriven::LearnerSGDEOnOffParallel::getValidationData ( )

Returns a reference to the currently used test data set.

Returns: A reference to the test data set

References validationData.

Referenced by sgpp::datadriven::RefinementHandler::checkRefinementNecessary().

◆ isVersionConsistent()

bool sgpp::datadriven::LearnerSGDEOnOffParallel::isVersionConsistent ( size_t version )

static

Check whether a specific grid version is consistent, i.e.

whether it is higher than MINIMUM_CONSISTENT_GRID_VERSION

Parameters

version The version of the grid to check against

Returns: Whether the version indicates consistency.

Referenced by checkAllGridsConsistent(), checkGridStateConsistent(), doRefinementForAll(), mergeAlphaValues(), sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), and setLocalGridVersion().

◆ mergeAlphaValues()

void sgpp::datadriven::LearnerSGDEOnOffParallel::mergeAlphaValues	(	size_t	classIndex,
		size_t	remoteGridVersion,
		DataVector	dataVector,
		size_t	batchOffset,
		size_t	batchSize,
		bool	isLastPacketInSeries
	)

Merge alpha values received from a remote process into the local alpha vector.

Parameters

classIndex	The class to which the alpha vector belongs
remoteGridVersion	The remote grid version this alpha vector was trained on
dataVector	The alpha vector itself
batchOffset	The offset from the start of the training set this vector was trained from
batchSize	The size of the batch this vector was trained from
isLastPacketInSeries	Whether this merge is the last merge in several for the same class and batch

References sgpp::base::DataVector::add(), sgpp::datadriven::RefinementResult::addedGridPoints, alphas, classLabels, D, sgpp::datadriven::RefinementResult::deletedGridPointsIndices, sgpp::base::DataVector::get(), getLocalGridVersion(), sgpp::datadriven::RefinementHandler::getRefinementResult(), sgpp::base::DataVector::getSize(), python.statsfileInfo::i, isVersionConsistent(), localGridVersions, mpiTaskScheduler, sgpp::datadriven::MPITaskScheduler::onMergeRequestIncoming(), prior, refinementHandler, sgpp::base::DataVector::resizeZero(), usePrior, and sgpp::datadriven::MPIMethods::waitForGridConsistent().

Referenced by sgpp::datadriven::MPIMethods::receiveMergeGridNetworkMessage().

◆ predict()

void sgpp::datadriven::LearnerSGDEOnOffParallel::predict	(	DataMatrix &	test,
		DataVector &	classLabels
	)		const

Predicts the class labels of the test data points.

Parameters

test	The data points for which labels will be precicted
classLabels	vector containing the predicted class labels

References alphas, classLabels, densityFunctions, sgpp::base::DataMatrix::getNrows(), grids, numClasses, chess::point, prior, and python.utils.pca_normalize_dataset::u.

Referenced by getAccuracy(), and getError().

◆ printGridSizeStatistics()

void sgpp::datadriven::LearnerSGDEOnOffParallel::printGridSizeStatistics	(	const char *	messageString,
		std::vector< std::pair< std::unique_ptr< DBMatOnlineDE >, size_t >> &	onlineObjects
	)

protected

Shows grid size statistics along with a message.

Parameters

messageString	The message to display alongside the statistics
onlineObjects	The current density estimation objects

References sgpp::base::Grid::getSize(), grid(), and grids.

Referenced by trainParallel().

◆ setLocalGridVersion()

void sgpp::datadriven::LearnerSGDEOnOffParallel::setLocalGridVersion	(	size_t	classIndex,
		size_t	gridVersion
	)

Set the grid version.

Parameters

classIndex	The class of the grid to search for
gridVersion	The new version of the grid

References checkGridStateConsistent(), D, isVersionConsistent(), and localGridVersions.

Referenced by computeNewSystemMatrixDecomposition(), sgpp::datadriven::MPIMethods::receiveGridComponentsUpdate(), and sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement().

◆ shutdownMPINodes()

void sgpp::datadriven::LearnerSGDEOnOffParallel::shutdownMPINodes ( )

If this is run on master, it issues shutdown requests to all workers and waits for them to return.

If this is run on a worker, it sets the shutdown flag.

References sgpp::datadriven::MPIMethods::bcastCommandNoArgs(), sgpp::datadriven::MPIMethods::getWorldSize(), sgpp::datadriven::MPIMethods::isMaster(), sgpp::datadriven::SHUTDOWN, sgpp::datadriven::MPIMethods::waitForIncomingMessageType(), sgpp::datadriven::WORKER_SHUTDOWN_SUCCESS, and workerActive.

Referenced by sgpp::datadriven::MPIMethods::processIncomingMPICommands(), and trainParallel().

◆ splitBatchIntoClasses()

void sgpp::datadriven::LearnerSGDEOnOffParallel::splitBatchIntoClasses	(	const Dataset &	dataset,
		size_t	dim,
		const std::vector< std::pair< DataMatrix *, double >> &	trainDataClasses,
		std::map< double, int > &	classIndices
	)		const

protected

References sgpp::datadriven::Dataset::getData(), sgpp::datadriven::Dataset::getNumberInstances(), sgpp::base::DataMatrix::getRow(), sgpp::datadriven::Dataset::getTargets(), and python.statsfileInfo::i.

Referenced by train().

◆ train() [1/2]

void sgpp::datadriven::LearnerSGDEOnOffParallel::train	(	Dataset &	dataBatch,
		bool	doCrossValidation
	)

Trains the learner with the given data batch.

Parameters

dataBatch	The next data batch to process
doCrossValidation	Enable cross-validation

References allocateClassMatrices(), D, chess::dim, sgpp::datadriven::Dataset::getDimension(), sgpp::datadriven::Dataset::getNumberInstances(), getNumClasses(), and splitBatchIntoClasses().

Referenced by workBatch().

◆ train() [2/2]

void sgpp::datadriven::LearnerSGDEOnOffParallel::train	(	std::vector< std::pair< sgpp::base::DataMatrix *, double > > &	trainDataClasses,
		bool	doCrossValidation
	)

Trains the learner with the given data batch that is already split up wrt its different classes.

Parameters

trainDataClasses	A vector of pairs; Each pair contains the data points that belong to one class and the corresponding class label
doCrossValidation	Enable cross-validation

References sgpp::datadriven::RefinementResult::addedGridPoints, alphas, D, sgpp::datadriven::RefinementResult::deletedGridPointsIndices, densityEstimationConfig, densityFunctions, sgpp::datadriven::RefinementHandler::getRefinementResult(), grids, friedman::p, prior, processedPoints, refinementHandler, trained, and usePrior.

◆ trainParallel()

void sgpp::datadriven::LearnerSGDEOnOffParallel::trainParallel	(	size_t	batchSize,
		size_t	maxDataPasses,
		std::string	refinementFunctorType,
		std::string	refMonitor,
		size_t	refPeriod,
		double	accDeclineThreshold,
		size_t	accDeclineBufferSize,
		size_t	minRefInterval
	)

Trains the learner with the given dataset.

Parameters

batchSize	Size of subset of data points used for each training step
maxDataPasses	The number of passes over the whole training data
refinementFunctorType	The refinement indicator (surplus, zero-crossings or data-based)
refMonitor	The refinement strategy (periodic or convergence-based)
refPeriod	The refinement interval (if periodic refinement is chosen)
accDeclineThreshold	The convergence threshold (if convergence-based refinement is chosen)
accDeclineBufferSize	The number of accuracy measurements which are used to check convergence (if convergence-based refinement is chosen)
minRefInterval	The minimum number of data points (or data batches) which have to be processed before next refinement can be scheduled (if convergence-based refinement is chosen)

◆ updateAlpha()

void sgpp::datadriven::LearnerSGDEOnOffParallel::updateAlpha	(	size_t	classIndex,
		std::list< size_t > *	deletedPoints,
		size_t	newPoints
	)

Updates the surplus vector of a certain class.

Parameters

classIndex	the index of the class
deletedPoints	a list of indexes of deleted points (coarsening)
newPoints	the number of new grid points (refinemenet)

References alpha, alphas, sgpp::base::DataVector::getSize(), sgpp::base::DataVector::remove(), and sgpp::base::DataVector::resizeZero().

Referenced by sgpp::datadriven::RefinementHandler::updateClassVariablesAfterRefinement().

◆ waitForAllGridsConsistent()

void sgpp::datadriven::LearnerSGDEOnOffParallel::waitForAllGridsConsistent ( )

protected

Wait for all grids to reach a consistent state before continuing.

References checkGridStateConsistent(), getLocalGridVersion(), localGridVersions, and sgpp::datadriven::MPIMethods::waitForGridConsistent().

Referenced by workBatch().

◆ workBatch()

void sgpp::datadriven::LearnerSGDEOnOffParallel::workBatch	(	Dataset	dataset,
		size_t	batchOffset,
		bool	doCrossValidation
	)

Train from a batch.

Will wait until all grids are consistent, fill the dataset, learn from the dataset and send the new alpha vector to the master

Parameters

dataset	An empty dataset with size and dimension set.
batchOffset	The offset from the start of the training set to assemble the batch from.
doCrossValidation	Whether to cross validate results.

References alphas, assembleNextBatchData(), D, getDensityFunctions(), getLocalGridVersion(), sgpp::datadriven::Dataset::getNumberInstances(), getNumClasses(), sgpp::datadriven::MPIMethods::sendMergeGridNetworkMessage(), train(), and waitForAllGridsConsistent().

Referenced by sgpp::datadriven::MPIMethods::runBatch().

Member Data Documentation

◆ adaptivityConfig

sgpp::base::AdaptivityConfiguration& sgpp::datadriven::LearnerSGDEOnOffParallel::adaptivityConfig

protected

Referenced by doRefinementForAll(), LearnerSGDEOnOffParallel(), and trainParallel().

◆ alphas

std::vector<DataVector*> sgpp::datadriven::LearnerSGDEOnOffParallel::alphas

protected

Referenced by doRefinementForAll(), LearnerSGDEOnOffParallel(), mergeAlphaValues(), predict(), train(), updateAlpha(), and workBatch().

◆ avgErrors

DataVector sgpp::datadriven::LearnerSGDEOnOffParallel::avgErrors

protected

Referenced by LearnerSGDEOnOffParallel().

◆ beta

double sgpp::datadriven::LearnerSGDEOnOffParallel::beta

protected

Referenced by LearnerSGDEOnOffParallel().

◆ classLabels

DataVector sgpp::datadriven::LearnerSGDEOnOffParallel::classLabels

protected

Referenced by allocateClassMatrices(), LearnerSGDEOnOffParallel(), mergeAlphaValues(), and predict().

◆ densityEstimationConfig

sgpp::datadriven::DensityEstimationConfiguration& sgpp::datadriven::LearnerSGDEOnOffParallel::densityEstimationConfig

protected

Referenced by computeNewSystemMatrixDecomposition(), LearnerSGDEOnOffParallel(), and train().

◆ densityFunctions

std::vector<std::pair<std::unique_ptr<DBMatOnlineDE>, size_t> > sgpp::datadriven::LearnerSGDEOnOffParallel::densityFunctions

protected

Referenced by getDensityFunctions(), LearnerSGDEOnOffParallel(), predict(), and train().

◆ gridConfig

sgpp::base::GeneralGridConfiguration& sgpp::datadriven::LearnerSGDEOnOffParallel::gridConfig

protected

Referenced by LearnerSGDEOnOffParallel().

◆ grids

std::vector<std::unique_ptr<Grid> > sgpp::datadriven::LearnerSGDEOnOffParallel::grids

protected

Referenced by computeNewSystemMatrixDecomposition(), doRefinementForAll(), getGrid(), LearnerSGDEOnOffParallel(), predict(), printGridSizeStatistics(), and train().

◆ localGridVersions

std::vector<size_t> sgpp::datadriven::LearnerSGDEOnOffParallel::localGridVersions

protected

Vector that holds the grid version for every class.

Referenced by checkAllGridsConsistent(), checkGridStateConsistent(), getLocalGridVersion(), LearnerSGDEOnOffParallel(), mergeAlphaValues(), setLocalGridVersion(), and waitForAllGridsConsistent().

◆ mpiTaskScheduler

MPITaskScheduler& sgpp::datadriven::LearnerSGDEOnOffParallel::mpiTaskScheduler

protected

Reference to the currently installed MPI Task Scheduler.

Referenced by assignBatchToWorker(), getScheduler(), LearnerSGDEOnOffParallel(), mergeAlphaValues(), and trainParallel().

◆ numClasses

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::numClasses

protected

Referenced by getNumClasses(), LearnerSGDEOnOffParallel(), and predict().

◆ offline

std::unique_ptr<DBMatOffline> sgpp::datadriven::LearnerSGDEOnOffParallel::offline

protected

Referenced by getOffline(), and LearnerSGDEOnOffParallel().

◆ offlineContainer

std::vector<std::unique_ptr<DBMatOffline> > sgpp::datadriven::LearnerSGDEOnOffParallel::offlineContainer

protected

Referenced by LearnerSGDEOnOffParallel().

◆ prior

std::map<double, double> sgpp::datadriven::LearnerSGDEOnOffParallel::prior

protected

Referenced by LearnerSGDEOnOffParallel(), mergeAlphaValues(), predict(), and train().

◆ processedPoints

size_t sgpp::datadriven::LearnerSGDEOnOffParallel::processedPoints

protected

Referenced by LearnerSGDEOnOffParallel(), train(), and trainParallel().

◆ refinementHandler

RefinementHandler sgpp::datadriven::LearnerSGDEOnOffParallel::refinementHandler

protected

Instance of the currently installed refinement handler.

Referenced by computeNewSystemMatrixDecomposition(), doRefinementForAll(), getRefinementHandler(), LearnerSGDEOnOffParallel(), mergeAlphaValues(), train(), and trainParallel().

◆ regularizationConfig

sgpp::datadriven::RegularizationConfiguration& sgpp::datadriven::LearnerSGDEOnOffParallel::regularizationConfig

protected

Referenced by computeNewSystemMatrixDecomposition(), and LearnerSGDEOnOffParallel().

◆ testData

Dataset& sgpp::datadriven::LearnerSGDEOnOffParallel::testData

protected

Referenced by getAccuracy(), and LearnerSGDEOnOffParallel().

◆ trainData

Dataset& sgpp::datadriven::LearnerSGDEOnOffParallel::trainData

protected

Referenced by python.uq.dists.KDEDist.KDEDist::__init__(), assembleNextBatchData(), assignBatchToWorker(), doRefinementForAll(), getDimensionality(), getTrainData(), python.uq.dists.KDEDist.KDEDist::marginalize(), python.uq.dists.KDEDist.KDEDist::marginalizeToDimX(), python.uq.dists.KDEDist.KDEDist::toJson(), and trainParallel().

◆ trained

bool sgpp::datadriven::LearnerSGDEOnOffParallel::trained

protected

Referenced by LearnerSGDEOnOffParallel(), and train().

◆ usePrior

bool sgpp::datadriven::LearnerSGDEOnOffParallel::usePrior

protected

Referenced by LearnerSGDEOnOffParallel(), mergeAlphaValues(), and train().

◆ validationData

Dataset* sgpp::datadriven::LearnerSGDEOnOffParallel::validationData

protected

Referenced by getValidationData(), and LearnerSGDEOnOffParallel().

◆ workerActive

bool sgpp::datadriven::LearnerSGDEOnOffParallel::workerActive

protected

Boolean used to detect when a shutdown of a worker has been requested.

Referenced by LearnerSGDEOnOffParallel(), shutdownMPINodes(), and trainParallel().

The documentation for this class was generated from the following files:

datadriven/src/sgpp/datadriven/application/learnersgdeonoffparallel/LearnerSGDEOnOffParallel.hpp
datadriven/src/sgpp/datadriven/application/learnersgdeonoffparallel/LearnerSGDEOnOffParallel.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LearnerSGDEOnOffParallel()

◆ ~LearnerSGDEOnOffParallel()

Member Function Documentation

◆ allocateClassMatrices()

◆ assembleNextBatchData()

◆ assignBatchToWorker()

◆ checkAllGridsConsistent()

◆ checkGridStateConsistent()

◆ computeNewSystemMatrixDecomposition()

◆ doRefinementForAll()

◆ getAccuracy()

◆ getDensityFunctions()

◆ getDimensionality()

◆ getError()

◆ getGrid()

◆ getLocalGridVersion()

◆ getNumClasses()

◆ getOffline()

◆ getRefinementHandler()

◆ getScheduler()

◆ getTrainData()

◆ getValidationData()

◆ isVersionConsistent()

◆ mergeAlphaValues()

◆ predict()

◆ printGridSizeStatistics()

◆ setLocalGridVersion()

◆ shutdownMPINodes()

◆ splitBatchIntoClasses()

◆ train() [1/2]

◆ train() [2/2]

◆ trainParallel()

◆ updateAlpha()

◆ waitForAllGridsConsistent()

◆ workBatch()

Member Data Documentation

◆ adaptivityConfig

◆ alphas

◆ avgErrors

◆ beta

◆ classLabels

◆ densityEstimationConfig

◆ densityFunctions

◆ gridConfig

◆ grids

◆ localGridVersions

◆ mpiTaskScheduler

◆ numClasses

◆ offline

◆ offlineContainer

◆ prior

◆ processedPoints

◆ refinementHandler

◆ regularizationConfig

◆ testData

◆ trainData

◆ trained

◆ usePrior

◆ validationData

◆ workerActive