-
Notifications
You must be signed in to change notification settings - Fork 217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add spectral embedding algorithm #2875
base: main
Are you sure you want to change the base?
Changes from all commits
7afd556
dfe990c
34d930f
e67cb3e
b93530e
a208c80
32b9425
8f0e130
c3318e9
4eb85bc
aaeada8
a48b92e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -223,6 +223,7 @@ daal_algorithms( | |
"stump", | ||
"svd", | ||
"svm", | ||
"spectral_embedding", | ||
"weak_learner/inner", | ||
], | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package(default_visibility = ["//visibility:public"]) | ||
load("@onedal//dev/bazel:daal.bzl", "daal_module") | ||
|
||
daal_module( | ||
name = "kernel", | ||
auto = True, | ||
deps = [ | ||
"@onedal//cpp/daal:core", | ||
"@onedal//cpp/daal/src/algorithms/cosdistance:kernel", | ||
], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* file: spectral_embedding_default_dense_fpt_cpu.cpp */ | ||
/******************************************************************************* | ||
* Copyright contributors to the oneDAL project | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
/* | ||
//++ | ||
// Instantiation of CPU-specific spectral_embedding kernel implementations | ||
//-- | ||
*/ | ||
|
||
#include "spectral_embedding_kernel.h" | ||
#include "spectral_embedding_default_dense_impl.i" | ||
|
||
namespace daal | ||
{ | ||
namespace algorithms | ||
{ | ||
namespace spectral_embedding | ||
{ | ||
namespace internal | ||
{ | ||
template class DAAL_EXPORT SpectralEmbeddingKernel<DAAL_FPTYPE, Method::defaultDense, DAAL_CPU>; | ||
} // namespace internal | ||
} // namespace spectral_embedding | ||
} // namespace algorithms | ||
} // namespace daal |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,214 @@ | ||||||
/* file: spectral_embedding_default_dense_impl.i */ | ||||||
/******************************************************************************* | ||||||
* Copyright contributors to the oneDAL project | ||||||
* | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
* you may not use this file except in compliance with the License. | ||||||
* You may obtain a copy of the License at | ||||||
* | ||||||
* http://www.apache.org/licenses/LICENSE-2.0 | ||||||
* | ||||||
* Unless required by applicable law or agreed to in writing, software | ||||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
* See the License for the specific language governing permissions and | ||||||
* limitations under the License. | ||||||
*******************************************************************************/ | ||||||
|
||||||
/* | ||||||
//++ | ||||||
// Implementation of cosine distance. | ||||||
//-- | ||||||
*/ | ||||||
|
||||||
#include "services/daal_defines.h" | ||||||
#include "src/externals/service_math.h" | ||||||
#include "src/externals/service_blas.h" | ||||||
#include "src/threading/threading.h" | ||||||
#include "src/algorithms/service_error_handling.h" | ||||||
#include "src/data_management/service_numeric_table.h" | ||||||
#include "src/algorithms/cosdistance/cosdistance_kernel.h" | ||||||
#include "src/externals/service_lapack.h" | ||||||
#include <iostream> | ||||||
|
||||||
using namespace daal::internal; | ||||||
|
||||||
namespace daal | ||||||
{ | ||||||
namespace algorithms | ||||||
{ | ||||||
namespace spectral_embedding | ||||||
{ | ||||||
namespace internal | ||||||
{ | ||||||
|
||||||
template <typename algorithmFPType, CpuType cpu> | ||||||
services::Status computeEigenvectorsInplace(size_t nFeatures, algorithmFPType * eigenvectors, algorithmFPType * eigenvalues) | ||||||
{ | ||||||
char jobz = 'V'; | ||||||
char uplo = 'U'; | ||||||
|
||||||
DAAL_INT lwork = 2 * nFeatures * nFeatures + 6 * nFeatures + 1; | ||||||
DAAL_INT liwork = 5 * nFeatures + 3; | ||||||
DAAL_INT info; | ||||||
|
||||||
TArray<algorithmFPType, cpu> work(lwork); | ||||||
TArray<DAAL_INT, cpu> iwork(liwork); | ||||||
DAAL_CHECK_MALLOC(work.get() && iwork.get()); | ||||||
|
||||||
LapackInst<algorithmFPType, cpu>::xsyevd(&jobz, &uplo, (DAAL_INT *)(&nFeatures), eigenvectors, (DAAL_INT *)(&nFeatures), eigenvalues, work.get(), | ||||||
&lwork, iwork.get(), &liwork, &info); | ||||||
if (info != 0) return services::Status(services::ErrorPCAFailedToComputeCorrelationEigenvalues); // CHANGE ERROR STATUS | ||||||
return services::Status(); | ||||||
} | ||||||
|
||||||
/** | ||||||
* \brief Kernel for Spectral Embedding calculation | ||||||
*/ | ||||||
template <typename algorithmFPType, Method method, CpuType cpu> | ||||||
services::Status SpectralEmbeddingKernel<algorithmFPType, method, cpu>::compute(const NumericTable * xTable, NumericTable * embeddingTable, | ||||||
NumericTable * eigenTable, const KernelParameter & par) | ||||||
{ | ||||||
services::Status status; | ||||||
// std::cout << "inside DAAL kernel" << std::endl; | ||||||
// std::cout << "Params: " << par.numberOfEmbeddings << " " << par.numberOfNeighbors << std::endl; | ||||||
size_t k = par.numberOfEmbeddings; | ||||||
size_t filtNum = par.numberOfNeighbors + 1; | ||||||
size_t n = xTable->getNumberOfRows(); /* Number of input feature vectors */ | ||||||
|
||||||
SharedPtr<HomogenNumericTable<algorithmFPType> > tmpMatrixPtr = | ||||||
HomogenNumericTable<algorithmFPType>::create(n, n, NumericTable::doAllocate, &status); | ||||||
|
||||||
DAAL_CHECK_STATUS_VAR(status); | ||||||
NumericTable * covOutput = tmpMatrixPtr.get(); | ||||||
NumericTable * a0 = const_cast<NumericTable *>(xTable); | ||||||
NumericTable * eigenvalues = const_cast<NumericTable *>(eigenTable); | ||||||
|
||||||
// Compute cosine distances matrix | ||||||
{ | ||||||
auto cosDistanceKernel = cosine_distance::internal::DistanceKernel<algorithmFPType, cosine_distance::Method::defaultDense, cpu>(); | ||||||
DAAL_CHECK_STATUS(status, cosDistanceKernel.compute(0, &a0, 0, &covOutput, nullptr)); | ||||||
} | ||||||
|
||||||
WriteRows<algorithmFPType, cpu> xMatrix(covOutput, 0, n); | ||||||
DAAL_CHECK_BLOCK_STATUS(xMatrix); | ||||||
algorithmFPType * x = xMatrix.get(); | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that according to SDL somewhere around here should be several |
||||||
size_t lcnt, rcnt, cnt; | ||||||
algorithmFPType L, R, M; | ||||||
Comment on lines
+97
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would recommend to initialize these variables. It's not a C code =D |
||||||
// Use binary search to find such d that the number of verticies having distance <= d is filtNum | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
const size_t binarySearchIterNum = 20; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What means a magic number 20? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now it is actually a magic number but it should be log(1/eps) where eps is floating point number precision There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be better then to define pre-computed constants for |
||||||
// TODO: add parallel_for | ||||||
for (size_t i = 0; i < n; ++i) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would split off the BinarySearch into a separate function which could be inlined. (i.e. everything in this for loop). Then applying the daal::threader_for would be easier (something like this as an example https://github.com/oneapi-src/oneDAL/blob/main/cpp/daal/src/algorithms/dtrees/forest/df_train_dense_default_impl.i#L434) |
||||||
{ | ||||||
L = 0; // min possible cos distance | ||||||
R = 2; // max possible cos distance | ||||||
Comment on lines
+104
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I remember at least some of these variables are actually |
||||||
lcnt = 0; // number of elements with cos distance <= L | ||||||
rcnt = n; // number of elements with cos distance <= R | ||||||
for (size_t ij = 0; ij < binarySearchIterNum; ++ij) | ||||||
{ | ||||||
M = (L + R) / 2; | ||||||
cnt = 0; | ||||||
// Calculate the number of elements in the row with value <= M | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just from a high level perspective, it looks like this code will run O(binarySearchIterNum*n) to try and find the proper M which yields the right number of neighbors. binarySearchIterNum will be log(1/eps) (from here https://github.com/oneapi-src/oneDAL/pull/2875/files#r1720389061) Would a binary search tree be faster? At some point also wouldn't just sorting the array may also be faster (when log(n) < log(1/eps))? |
||||||
for (size_t j = 0; j < n; ++j) | ||||||
{ | ||||||
if (x[i * n + j] <= M) | ||||||
{ | ||||||
cnt++; | ||||||
} | ||||||
} | ||||||
if (cnt < filtNum) | ||||||
{ | ||||||
L = M; | ||||||
lcnt = cnt; | ||||||
} | ||||||
else | ||||||
{ | ||||||
R = M; | ||||||
rcnt = cnt; | ||||||
} | ||||||
// distance threshold is found | ||||||
if (rcnt == filtNum) | ||||||
{ | ||||||
break; | ||||||
} | ||||||
} | ||||||
// create edges for the closest neighbors | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I guess now that I think about it this is getting in the direction of this sklearn function: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.kneighbors_graph.html Thoughts on using kneighbors from daal for this, or am I missing something big? |
||||||
for (size_t j = 0; j < n; ++j) | ||||||
{ | ||||||
if (x[i * n + j] <= R) | ||||||
{ | ||||||
x[i * n + j] = 1.0; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
else | ||||||
{ | ||||||
x[i * n + j] = 0.0; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
} | ||||||
// fill the diagonal of matrix with zeros | ||||||
x[i * n + i] = 0; | ||||||
} | ||||||
|
||||||
// Create Laplassian matrix | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for (size_t i = 0; i < n; ++i) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dumb question, I sort assumed the nearest cosine neighbors from above created almost an adjacency matrix which could be used here to create the laplacian via L=D-A. I guess I am trying to understand the creation math here, could you send me a link what definition you used here? |
||||||
{ | ||||||
for (size_t j = 0; j < i; ++j) | ||||||
{ | ||||||
algorithmFPType val = (x[i * n + j] + x[j * n + i]) / 2; | ||||||
x[i * n + j] = -val; | ||||||
x[j * n + i] = -val; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little bitconfused with inconsistency - in this case you are assigning values to the elements of matrix, but for the other part you are incrementing the symmetric ones. Does this matrix represent two triangular ones or should operations be actually symmetric? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am/was too. Hopefully a bit of clarity to help in a code comment. |
||||||
x[i * n + i] += val; | ||||||
x[j * n + j] += val; | ||||||
} | ||||||
} | ||||||
|
||||||
// std::cout << "Laplacian matrix" << std::endl; | ||||||
// for (int i = 0; i < n; ++i) { | ||||||
// for (int j = 0; j < n; ++j) { | ||||||
// std::cout << x[i * n + j] << " "; | ||||||
// } | ||||||
// std::cout << std::endl; | ||||||
// } | ||||||
// std::cout << "------" << std::endl; | ||||||
|
||||||
// Find the eigen vectors and eigne values of the matix | ||||||
//TArray<algorithmFPType, cpu> eigenvalues(n); | ||||||
//DAAL_CHECK_MALLOC(eigenvalues.get()); | ||||||
WriteRows<algorithmFPType, cpu> eigenValuesBlock(eigenvalues, 0, n); | ||||||
DAAL_CHECK_BLOCK_STATUS(eigenValuesBlock); | ||||||
algorithmFPType * eigenValuesPtr = eigenValuesBlock.get(); | ||||||
|
||||||
status |= computeEigenvectorsInplace<algorithmFPType, cpu>(n, x, eigenValuesPtr); | ||||||
DAAL_CHECK_STATUS_VAR(status); | ||||||
|
||||||
// std::cout << "Eigen vectors: " << std::endl; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a reminder to delete unused code |
||||||
// for (int i = 0; i < n; ++i) { | ||||||
// for (int j = 0; j < n; ++j) { | ||||||
// std::cout << x[i * n + j] << " "; | ||||||
// } | ||||||
// std::cout << std::endl; | ||||||
// } | ||||||
|
||||||
// Fill the output matrix with eigen vectors corresponding to the smallest eigen values | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
WriteOnlyRows<algorithmFPType, cpu> embedMatrix(embeddingTable, 0, n); | ||||||
DAAL_CHECK_BLOCK_STATUS(embedMatrix); | ||||||
algorithmFPType * embed = embedMatrix.get(); | ||||||
|
||||||
for (int i = 0; i < k; ++i) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry if this a dumb request, could you leave a comment in the code above this double for loop as to the matrix operation is doing? I know its related to the eigenvectors out of X, but why into the columns of embed? May save some time in the future for someone unfamiliar when they try to get up to speed. I can see its the transpose copy of part of a row of x into a column of embed, is that right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe also a todo for a parallel for here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be |
||||||
{ | ||||||
for (int j = 0; j < n; ++j) | ||||||
{ | ||||||
embed[j * k + i] = x[i * n + j]; | ||||||
} | ||||||
} | ||||||
|
||||||
return status; | ||||||
} | ||||||
|
||||||
} // namespace internal | ||||||
|
||||||
} // namespace spectral_embedding | ||||||
|
||||||
} // namespace algorithms | ||||||
|
||||||
} // namespace daal |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* file: spectral_embedding_kernel.h */ | ||
/******************************************************************************* | ||
* Copyright contributors to the oneDAL project | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
/* | ||
//++ | ||
// Declaration of template structs that calculate SVM Training functions. | ||
//-- | ||
*/ | ||
|
||
#ifndef __SPECTRAL_EMBEDDING_KERNEL_H__ | ||
#define __SPECTRAL_EMBEDDING_KERNEL_H__ | ||
|
||
#include "data_management/data/numeric_table.h" | ||
#include "services/daal_defines.h" | ||
#include "src/algorithms/kernel.h" | ||
|
||
namespace daal | ||
{ | ||
namespace algorithms | ||
{ | ||
namespace spectral_embedding | ||
{ | ||
|
||
enum Method | ||
{ | ||
defaultDense = 0 | ||
}; | ||
|
||
namespace internal | ||
{ | ||
|
||
using namespace daal::data_management; | ||
using namespace daal::services; | ||
|
||
struct KernelParameter : daal::algorithms::Parameter | ||
{ | ||
size_t numberOfEmbeddings = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it be |
||
size_t numberOfNeighbors = 1; | ||
}; | ||
|
||
template <typename algorithmFPType, Method method, CpuType cpu> | ||
struct SpectralEmbeddingKernel : public Kernel | ||
{ | ||
services::Status compute(const NumericTable * xTable, NumericTable * embeddingTable, NumericTable * eigenTable, const KernelParameter & par); | ||
}; | ||
|
||
} // namespace internal | ||
} // namespace spectral_embedding | ||
} // namespace algorithms | ||
} // namespace daal | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
/******************************************************************************* | ||
* Copyright 2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#pragma once | ||
|
||
#include "oneapi/dal/algo/spectral_embedding/compute.hpp" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add arguments description.