Skip to content

Commit

Permalink
feature: adding new parameters in pca (#2601)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandr-Solovev authored Dec 22, 2023
1 parent 4ff1b59 commit ef1f6c8
Show file tree
Hide file tree
Showing 52 changed files with 1,720 additions and 225 deletions.
2 changes: 2 additions & 0 deletions cpp/daal/include/algorithms/pca/pca_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,8 @@ class DAAL_EXPORT BaseBatchParameter : public daal::algorithms::Parameter
DAAL_UINT64 resultsToCompute; /*!< 64 bit integer flag that indicates the results to compute */
size_t nComponents; /*!< number of components for reduced implementation */
bool isDeterministic; /*!< sign flip if required */
bool doScale; /*!< scaling if required */
bool isCorrelation; /*!< correlation is provided */
};

/**
Expand Down
2 changes: 2 additions & 0 deletions cpp/daal/include/services/error_indexes.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ enum ErrorID
computation modes */
ErrorIncorrectNComponents = -7805, /*!< Incorrect nComponents parameter: nComponents should be less or equal
to number of columns in testing dataset */
ErrorIncorrectEigenValuesSum = -7806, /*!< The sum of eigenvalues is less or equal to zero */
ErrorIncorrectSingularValuesDenominator = -7807, /*!< The denominator of eigenvalues is less or equal to zero */

// QR errors: -8000..-8199
ErrorQRInternal = -8000, /*!< QR internal error */
Expand Down
2 changes: 1 addition & 1 deletion cpp/daal/src/algorithms/pca/pca_baseparameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace pca
{
namespace interface3
{
BaseBatchParameter::BaseBatchParameter() : resultsToCompute(none), nComponents(0), isDeterministic(false) {}
BaseBatchParameter::BaseBatchParameter() : resultsToCompute(none), nComponents(0), isDeterministic(false), doScale(true), isCorrelation(false) {}
} // namespace interface3
} // namespace pca
} // namespace algorithms
Expand Down
3 changes: 3 additions & 0 deletions cpp/daal/src/algorithms/pca/pca_dense_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class PCADenseBase : public Kernel
services::Status signFlipEigenvectors(NumericTable & eigenvectors) const;
services::Status fillTable(NumericTable & table, algorithmFPType val) const;
services::Status copyTable(NumericTable & source, NumericTable & dest) const;
services::Status computeExplainedVariancesRatio(const data_management::NumericTable & eigenvalues,
const data_management::NumericTable & variances,
data_management::NumericTable & explained_variances_ratio);

private:
void signFlipArray(size_t size, algorithmFPType * source) const;
Expand Down
2 changes: 1 addition & 1 deletion cpp/daal/src/algorithms/pca/pca_dense_base_fpt_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace pca
{
namespace internal
{
template class PCADenseBase<DAAL_FPTYPE, DAAL_CPU>;
template class DAAL_EXPORT PCADenseBase<DAAL_FPTYPE, DAAL_CPU>;
} // namespace internal
} // namespace pca
} // namespace algorithms
Expand Down
33 changes: 33 additions & 0 deletions cpp/daal/src/algorithms/pca/pca_dense_base_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,39 @@ namespace internal
{
using namespace daal::internal;

template <typename algorithmFPType, CpuType cpu>
services::Status PCADenseBase<algorithmFPType, cpu>::computeExplainedVariancesRatio(const data_management::NumericTable & eigenvalues,
const data_management::NumericTable & variances,
data_management::NumericTable & explained_variances_ratio)
{
const size_t nComponents = eigenvalues.getNumberOfColumns();
const size_t nColumns = variances.getNumberOfColumns();

ReadRows<algorithmFPType, cpu> eigenValuesBlock(const_cast<data_management::NumericTable &>(eigenvalues), 0, 1);
DAAL_CHECK_BLOCK_STATUS(eigenValuesBlock);
const algorithmFPType * const eigenValuesArray = eigenValuesBlock.get();
ReadRows<algorithmFPType, cpu> variancesBlock(const_cast<data_management::NumericTable &>(variances), 0, 1);
DAAL_CHECK_BLOCK_STATUS(variancesBlock);
const algorithmFPType * const variancesBlockArray = variancesBlock.get();
WriteRows<algorithmFPType, cpu> explainedVariancesRatioBlock(explained_variances_ratio, 0, 1);
DAAL_CHECK_MALLOC(explainedVariancesRatioBlock.get());
algorithmFPType * explainedVariancesRatioArray = explainedVariancesRatioBlock.get();
algorithmFPType sum = 0;
for (size_t i = 0; i < nColumns; i++)
{
sum += variancesBlockArray[i];
}
if (sum <= algorithmFPType(0))
{
return services::Status(services::ErrorIncorrectEigenValuesSum);
}
for (size_t i = 0; i < nComponents; i++)
{
explainedVariancesRatioArray[i] = eigenValuesArray[i] / sum;
}
return services::Status();
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCADenseBase<algorithmFPType, cpu>::copyTable(NumericTable & source, NumericTable & dest) const
{
Expand Down
3 changes: 3 additions & 0 deletions cpp/daal/src/algorithms/pca/pca_dense_correlation_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class PCACorrelationBase : public PCACorrelationBaseIface<algorithmFPType>, publ
data_management::NumericTable & eigenvalues) DAAL_C11_OVERRIDE;
services::Status computeEigenvectorsInplace(size_t nFeatures, algorithmFPType * eigenvectors, algorithmFPType * eigenvalues);
services::Status sortEigenvectorsDescending(size_t nFeatures, algorithmFPType * eigenvectors, algorithmFPType * eigenvalues);
services::Status computeSingularValues(const data_management::NumericTable & eigenvalues, data_management::NumericTable & variances,
size_t nRows);
services::Status computeVariancesFromCov(const data_management::NumericTable & correlation, data_management::NumericTable & variances);
services::Status signFlipEigenvectors(NumericTable & eigenvectors) const DAAL_C11_OVERRIDE;
services::Status fillTable(NumericTable & table, algorithmFPType val) const DAAL_C11_OVERRIDE;
services::Status copyVarianceFromCovarianceTable(NumericTable & source, NumericTable & dest) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ class PCACorrelationBaseIface
virtual services::Status computeCorrelationEigenvalues(const data_management::NumericTable & correlation,
data_management::NumericTable & eigenvectors,
data_management::NumericTable & eigenvalues) = 0;

virtual services::Status signFlipEigenvectors(NumericTable & eigenvectors) const = 0;
virtual services::Status fillTable(NumericTable & table, algorithmFPType val) const = 0;
virtual services::Status signFlipEigenvectors(NumericTable & eigenvectors) const = 0;
virtual services::Status fillTable(NumericTable & table, algorithmFPType val) const = 0;
};

} // namespace internal
Expand Down
48 changes: 45 additions & 3 deletions cpp/daal/src/algorithms/pca/pca_dense_correlation_base_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,27 @@ void PCACorrelationBase<algorithmFPType, cpu>::copyArray(size_t size, const algo
}
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationBase<algorithmFPType, cpu>::computeVariancesFromCov(const data_management::NumericTable & covariance,
data_management::NumericTable & variances)
{
size_t nFeatures = covariance.getNumberOfRows();
DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION(size_t, nFeatures, sizeof(algorithmFPType));
ReadRows<algorithmFPType, cpu> covarianceBlock(const_cast<data_management::NumericTable &>(covariance), 0, nFeatures);
DAAL_CHECK_BLOCK_STATUS(covarianceBlock);
const algorithmFPType * covarianceArray = covarianceBlock.get();

WriteRows<algorithmFPType, cpu> variancesBlock(variances, 0, 1);
DAAL_CHECK_MALLOC(variancesBlock.get());
algorithmFPType * variancesArray = variancesBlock.get();

for (size_t i = 0ul; i < nFeatures; ++i)
{
variancesArray[i] = covarianceArray[i * nFeatures + i];
}
return services::Status();
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationBase<algorithmFPType, cpu>::correlationFromCovarianceTable(NumericTable & covariance) const
{
Expand All @@ -67,12 +88,12 @@ services::Status PCACorrelationBase<algorithmFPType, cpu>::correlationFromCovari
algorithmFPType * covarianceArray = covarianceBlock.get();

algorithmFPType * diagInvSqrts = diagInvSqrtsArray.get();
for (size_t i = 0; i < nFeatures; i++)
for (size_t i = 0ul; i < nFeatures; ++i)
{
diagInvSqrts[i] = 1.0 / daal::internal::MathInst<algorithmFPType, cpu>::sSqrt(covarianceArray[i * nFeatures + i]);
}

for (size_t i = 0; i < nFeatures; i++)
for (size_t i = 0ul; i < nFeatures; ++i)
{
for (size_t j = 0; j < i; j++)
{
Expand All @@ -82,7 +103,7 @@ services::Status PCACorrelationBase<algorithmFPType, cpu>::correlationFromCovari
}

/* Copy results into symmetric upper triangle */
for (size_t i = 0; i < nFeatures; i++)
for (size_t i = 0ul; i < nFeatures; ++i)
{
for (size_t j = 0; j < i; j++)
{
Expand Down Expand Up @@ -113,6 +134,27 @@ services::Status PCACorrelationBase<algorithmFPType, cpu>::copyVarianceFromCovar
return services::Status();
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationBase<algorithmFPType, cpu>::computeSingularValues(const data_management::NumericTable & eigenvalues,
data_management::NumericTable & singular_values, size_t nRows)
{
typedef daal::internal::MathInst<algorithmFPType, cpu> Math;
const size_t nComponents = eigenvalues.getNumberOfColumns();
ReadRows<algorithmFPType, cpu> eigenValuesBlock(const_cast<data_management::NumericTable &>(eigenvalues), 0, 1);
DAAL_CHECK_BLOCK_STATUS(eigenValuesBlock);
const algorithmFPType * const eigenValuesArray = eigenValuesBlock.get();
WriteRows<algorithmFPType, cpu> singularValuesBlock(singular_values, 0, 1);
DAAL_CHECK_MALLOC(singularValuesBlock.get());
algorithmFPType * singularValuesArray = singularValuesBlock.get();
const algorithmFPType factor = nRows - 1;
for (size_t i = 0ul; i < nComponents; ++i)
{
singularValuesArray[i] = factor * eigenValuesArray[i];
}
Math::vSqrt(nComponents, singularValuesArray, singularValuesArray);
return services::Status();
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationBase<algorithmFPType, cpu>::computeCorrelationEigenvalues(const data_management::NumericTable & correlation,
data_management::NumericTable & eigenvectors,
Expand Down
30 changes: 28 additions & 2 deletions cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ namespace internal
using namespace daal::services::internal;
using namespace daal::data_management;
using namespace daal::internal;

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationKernel<batch, algorithmFPType, cpu>::compute(bool isCorrelation, const data_management::NumericTable & dataTable,
covariance::BatchImpl * covarianceAlg,
Expand All @@ -62,7 +63,7 @@ template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationKernel<batch, algorithmFPType, cpu>::compute(
bool isCorrelation, bool isDeterministic, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg,
DAAL_UINT64 resultsToCompute, data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues,
data_management::NumericTable & means, data_management::NumericTable & variances)
data_management::NumericTable & means, data_management::NumericTable & variances, bool doScale)
{
DAAL_ITTNOTIFY_SCOPED_TASK(compute);

Expand Down Expand Up @@ -108,7 +109,7 @@ services::Status PCACorrelationKernel<batch, algorithmFPType, cpu>::compute(
DAAL_ITTNOTIFY_SCOPED_TASK(compute.full.copyVariances);
DAAL_CHECK_STATUS(status, this->copyVarianceFromCovarianceTable(covarianceTable, variances));
}

if (doScale)
{
DAAL_ITTNOTIFY_SCOPED_TASK(compute.full.correlationFromCovariance);
DAAL_CHECK_STATUS(status, this->correlationFromCovarianceTable(covarianceTable));
Expand All @@ -129,6 +130,31 @@ services::Status PCACorrelationKernel<batch, algorithmFPType, cpu>::compute(
return status;
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCACorrelationKernel<batch, algorithmFPType, cpu>::compute(
const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, data_management::NumericTable & eigenvectors,
data_management::NumericTable & eigenvalues, data_management::NumericTable & means, data_management::NumericTable & variances,
data_management::NumericTable * singular_values, data_management::NumericTable * explained_variances_ratio, const BaseBatchParameter * parameter)
{
DAAL_ITTNOTIFY_SCOPED_TASK(compute);

services::Status status;
this->compute(parameter->isCorrelation, parameter->isDeterministic, dataTable, covarianceAlg, parameter->resultsToCompute, eigenvectors,
eigenvalues, means, variances, parameter->doScale);

if (singular_values != nullptr)
{
DAAL_ITTNOTIFY_SCOPED_TASK(compute.correlation.computeSingularValues);
DAAL_CHECK_STATUS(status, this->computeSingularValues(eigenvalues, *singular_values, dataTable.getNumberOfRows()));
}
if (explained_variances_ratio != nullptr)
{
DAAL_ITTNOTIFY_SCOPED_TASK(compute.correlation.computeExplainedVariancesRatio);
DAAL_CHECK_STATUS(status, this->computeExplainedVariancesRatio(eigenvalues, variances, *explained_variances_ratio));
}
return status;
}

} // namespace internal
} // namespace pca
} // namespace algorithms
Expand Down
17 changes: 16 additions & 1 deletion cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,28 @@ class PCACorrelationKernel<batch, algorithmFPType, cpu> : public PCACorrelationB
{
public:
explicit PCACorrelationKernel() {};

using PCACorrelationBase<algorithmFPType, cpu>::computeCorrelationEigenvalues;

using PCACorrelationBase<algorithmFPType, cpu>::computeSingularValues;

using PCACorrelationBase<algorithmFPType, cpu>::computeVariancesFromCov;

using PCADenseBase<algorithmFPType, cpu>::computeExplainedVariancesRatio;

services::Status compute(bool isCorrelation, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg,
data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues);

services::Status compute(bool isCorrelation, bool isDeterministic, const data_management::NumericTable & dataTable,
covariance::BatchImpl * covarianceAlg, DAAL_UINT64 resultsToCompute, data_management::NumericTable & eigenvectors,
data_management::NumericTable & eigenvalues, data_management::NumericTable & means,
data_management::NumericTable & variances);
data_management::NumericTable & variances, bool doScale = true);

services::Status compute(const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg,
data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues,
data_management::NumericTable & means, data_management::NumericTable & variances,
data_management::NumericTable * singular_values, data_management::NumericTable * explained_variances_ratio,
const BaseBatchParameter * parameter);
};

} // namespace internal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ class PCACorrelationKernel<online, algorithmFPType, cpu> : public PCACorrelation

using PCACorrelationBase<algorithmFPType, cpu>::computeCorrelationEigenvalues;

using PCACorrelationBase<algorithmFPType, cpu>::computeSingularValues;

using PCACorrelationBase<algorithmFPType, cpu>::computeVariancesFromCov;

using PCADenseBase<algorithmFPType, cpu>::computeExplainedVariancesRatio;

services::Status compute(const data_management::NumericTablePtr & pData, PartialResult<correlationDense> * partialResult,
const OnlineParameter<algorithmFPType, correlationDense> * parameter);

Expand Down
26 changes: 26 additions & 0 deletions cpp/daal/src/algorithms/pca/pca_dense_svd_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ namespace pca
{
namespace internal
{
using namespace daal::services::internal;
using namespace daal::data_management;
using namespace daal::internal;

enum InputDataType
{
nonNormalizedDataset = 0, /*!< Original, non-normalized data set */
Expand All @@ -47,12 +51,34 @@ class PCASVDKernelBase : public PCADenseBase<algorithmFPType, cpu>
{
public:
PCASVDKernelBase() {}
using PCADenseBase<algorithmFPType, cpu>::computeExplainedVariancesRatio;
virtual ~PCASVDKernelBase() {}

protected:
services::Status computeEigenValues(const data_management::NumericTable & eigenvalues, data_management::NumericTable & singular_values,
size_t nRows);
services::Status scaleSingularValues(data_management::NumericTable & eigenvaluesTable, size_t nVectors);
};

template <typename algorithmFPType, CpuType cpu>
services::Status PCASVDKernelBase<algorithmFPType, cpu>::computeEigenValues(const data_management::NumericTable & singular_values,
data_management::NumericTable & eigenvalues, size_t nRows)
{
const size_t nComponents = singular_values.getNumberOfColumns();
ReadRows<algorithmFPType, cpu> SingularValuesBlock(const_cast<data_management::NumericTable &>(singular_values), 0, 1);
DAAL_CHECK_BLOCK_STATUS(SingularValuesBlock);
const algorithmFPType * const SingularValuesArray = SingularValuesBlock.get();
WriteRows<algorithmFPType, cpu> EigenValuesBlock(eigenvalues, 0, 1);
DAAL_CHECK_MALLOC(EigenValuesBlock.get());
algorithmFPType * EigenValuesArray = EigenValuesBlock.get();
if ((nRows - 1) <= 0) return services::Status(services::ErrorIncorrectSingularValuesDenominator);
for (size_t i = 0; i < nComponents; i++)
{
EigenValuesArray[i] = SingularValuesArray[i] * SingularValuesArray[i] / (nRows - 1);
}
return services::Status();
}

template <typename algorithmFPType, CpuType cpu>
services::Status PCASVDKernelBase<algorithmFPType, cpu>::scaleSingularValues(NumericTable & eigenvaluesTable, size_t nVectors)
{
Expand Down
28 changes: 27 additions & 1 deletion cpp/daal/src/algorithms/pca/pca_dense_svd_batch_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ services::Status PCASVDBatchKernel<algorithmFPType, ParameterType, cpu>::compute
}

DAAL_CHECK_STATUS(status, this->decompose(normalizedData, eigenvalues, eigenvectors));
DAAL_CHECK_STATUS(status, this->scaleSingularValues(eigenvalues, data.getNumberOfRows()));
if (parameter->doScale)
{
DAAL_CHECK_STATUS(status, this->scaleSingularValues(eigenvalues, data.getNumberOfRows()));
}
if (parameter->isDeterministic)
{
DAAL_CHECK_STATUS(status, this->signFlipEigenvectors(eigenvectors));
Expand All @@ -111,6 +114,29 @@ services::Status PCASVDBatchKernel<algorithmFPType, ParameterType, cpu>::compute
return status;
}

template <typename algorithmFPType, typename ParameterType, CpuType cpu>
services::Status PCASVDBatchKernel<algorithmFPType, ParameterType, cpu>::compute(
InputDataType type, data_management::NumericTable & data, data_management::NumericTable & eigenvectors,
data_management::NumericTable & singular_values, data_management::NumericTable & means, data_management::NumericTable & variances,
data_management::NumericTable * eigenvalues, data_management::NumericTable * explained_variances_ratio, const ParameterType * parameter)
{
Status status;
this->compute(type, data, parameter, singular_values, eigenvectors, means, variances);
if (eigenvalues != nullptr && parameter->doScale == false)
{
DAAL_CHECK_STATUS(status, this->computeEigenValues(singular_values, *eigenvalues, data.getNumberOfRows()));
}
else
{
DAAL_CHECK_STATUS(status, this->copyTable(singular_values, *eigenvalues));
}
if (explained_variances_ratio != nullptr)
{
DAAL_CHECK_STATUS(status, this->computeExplainedVariancesRatio(*eigenvalues, variances, *explained_variances_ratio));
}
return status;
}

/********************* tls_data_t class *******************************************************/
template <typename algorithmFPType, CpuType cpu>
struct tls_data_t
Expand Down
Loading

0 comments on commit ef1f6c8

Please sign in to comment.