From 15f66616164c4d0d9d2a0fd5d2b813ab5835f6b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 12 Jul 2023 15:21:01 +0100 Subject: [PATCH 01/43] Expose the zero inflation parameter in the likelihood function and add it as the command line argument `--zero-inflation` (default: 0.0`). --- include/Likelihood.hpp | 44 ++++++++++++++++++++++-------------------- src/mSWEEP.cpp | 3 ++- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 82153ff..48d59d9 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -83,6 +83,7 @@ class LL_WOR21 : public Likelihood { seamat::DenseMatrix log_likelihoods; std::vector log_ec_counts; std::vector> bb_params; + double zero_inflation; seamat::DenseMatrix precalc_lls(const std::vector &group_sizes, const size_t n_groups) { V max_size = 0; // Storing the grouping can take a lot less space if it can be done with uint16_t or uint8_t. @@ -90,11 +91,11 @@ class LL_WOR21 : public Likelihood { max_size = (group_sizes[i] > max_size ? group_sizes[i] : max_size); } - seamat::DenseMatrix ll_mat(n_groups, max_size + 1, -4.60517); + seamat::DenseMatrix ll_mat(n_groups, max_size + 1, std::log(this->zero_inflation)); #pragma omp parallel for schedule(static) shared(ll_mat) for (size_t i = 0; i < n_groups; ++i) { for (V j = 1; j <= max_size; ++j) { - ll_mat(i, j) = ldbb_scaled(j, group_sizes[i], this->bb_params[i][0], this->bb_params[i][1]) - 0.01005034; // log(0.99) = -0.01005034 + ll_mat(i, j) = ldbb_scaled(j, group_sizes[i], this->bb_params[i][0], this->bb_params[i][1]) + std::log1p(-this->zero_inflation); // log(0.99) = -0.01005034 } } @@ -106,7 +107,7 @@ class LL_WOR21 : public Likelihood { const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(group_sizes, n_groups); - this->log_likelihoods.resize(n_groups, num_ecs, -4.60517); // -4.60517 = log(0.01) + this->log_likelihoods.resize(n_groups, num_ecs, std::log(zero_inflation)); // -4.60517 = log(0.01) #pragma omp parallel for schedule(static) shared(precalc_lls_mat) for (size_t j = 0; j < num_ecs; ++j) { for (size_t i = 0; i < n_groups; ++i) { @@ -139,8 +140,9 @@ class LL_WOR21 : public Likelihood { public: LL_WOR21() = default; - LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu) { + LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const T _zero_inflation) { T bb_constants[2] = { tol, frac_mu }; + this->zero_inflation = _zero_inflation; this->update_bb_parameters(group_sizes, n_groups, bb_constants); this->from_grouped_alignment(alignment, group_sizes, n_groups); } @@ -258,49 +260,49 @@ class LL_WOR21 : public Likelihood { }; template -std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e) { +std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const T zero_inflation) { size_t max_group_size = grouping.max_group_size(); size_t n_groups = grouping.get_n_groups(); std::unique_ptr> log_likelihoods; if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } } else { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, zero_inflation)); } } return log_likelihoods; diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index b43323e..f60d941 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -137,6 +137,7 @@ void parse_args(int argc, char* argv[], cxxargs::Arguments &args) { args.add_short_argument('e', "Dispersion term for the beta-binomial component (default: 0.01).", 0.01); // Prior parameters for estimation args.add_long_argument>("alphas", "Prior counts for the relative abundances, supply as comma-separated nonzero values (default: all 1.0)."); + args.add_long_argument("zero-inflation", "Likelihood of an observation that contains 0 pseudoalignments against a reference group (default: 0.01).", 0.01); args.set_not_required("alphas"); if (CmdOptionPresent(argv, argv+argc, "--help")) { @@ -363,7 +364,7 @@ int main (int argc, char *argv[]) { // Use the alignment data to populate the log_likelihoods matrix. try { - log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e')); + log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("zero-inflation")); } catch (std::exception &e) { finalize("Building the log-likelihood array failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; From 10b26e3ccf6be77442ebf80935260566d899b73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 12 Jul 2023 15:24:10 +0100 Subject: [PATCH 02/43] Document the new `--zero-inflation` argument. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ae11f08..384176f 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,7 @@ Likelihood options: -q Mean for the beta-binomial component (default: 0.65). -e Dispersion term for the beta-binomial component (default: 0.01). --alphas Prior counts for the relative abundances, supply as comma-separated nonzero values (default: all 1.0). +--zero-inflation Likelihood of an observation that contains 0 pseudoalignments against a reference group (default: 0.01). ``` # References From 70213d0df2921ea44b907c9bb65e89a7862d3c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 14:11:45 +0300 Subject: [PATCH 03/43] Update dependency seamat includes. --- config/CMakeLists-rcgpar.txt.in | 2 +- config/CMakeLists-seamat.txt.in | 2 +- config/CMakeLists-telescope.txt.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in index 9fa41fa..95cb94d 100644 --- a/config/CMakeLists-rcgpar.txt.in +++ b/config/CMakeLists-rcgpar.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(rcgpar-download GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG v1.1.0 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-seamat.txt.in b/config/CMakeLists-seamat.txt.in index 3e576a2..c7bc6fb 100644 --- a/config/CMakeLists-seamat.txt.in +++ b/config/CMakeLists-seamat.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(seamat-download GIT_REPOSITORY https://github.com/tmaklin/seamat - GIT_TAG v0.2.1 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-telescope.txt.in b/config/CMakeLists-telescope.txt.in index b64ee3f..4306849 100644 --- a/config/CMakeLists-telescope.txt.in +++ b/config/CMakeLists-telescope.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(telescope-download GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.6.2 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" BUILD_IN_SOURCE 0 BUILD_COMMAND "" From 9d8e090ffd58b5fd297b013095b7e1f21ab8383c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 14:12:16 +0300 Subject: [PATCH 04/43] Link correctly with rcgutils from rcgpar. --- CMakeLists.txt | 5 ++--- config/CMakeLists-rcgpar.txt.in | 2 +- config/CMakeLists-seamat.txt.in | 2 +- config/CMakeLists-telescope.txt.in | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75e8ab1..c62bc5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,8 +236,7 @@ else() endif() add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/seamat ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/build) - target_link_libraries(mSWEEP rcgomp) - set(CMAKE_SEAMAT_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/include) + set(CMAKE_SEAMAT_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/include ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/build/include) endif() include_directories(${CMAKE_SEAMAT_HEADERS}) @@ -261,7 +260,7 @@ else() endif() add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/build) - target_link_libraries(mSWEEP rcgomp) + target_link_libraries(mSWEEP rcgomp rcgutils) set(CMAKE_RCGPAR_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/include) endif() include_directories(${CMAKE_RCGPAR_HEADERS}) diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in index 95cb94d..9fa41fa 100644 --- a/config/CMakeLists-rcgpar.txt.in +++ b/config/CMakeLists-rcgpar.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(rcgpar-download GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG build-system-patch-1 + GIT_TAG v1.1.0 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-seamat.txt.in b/config/CMakeLists-seamat.txt.in index c7bc6fb..3e576a2 100644 --- a/config/CMakeLists-seamat.txt.in +++ b/config/CMakeLists-seamat.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(seamat-download GIT_REPOSITORY https://github.com/tmaklin/seamat - GIT_TAG build-system-patch-1 + GIT_TAG v0.2.1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-telescope.txt.in b/config/CMakeLists-telescope.txt.in index 4306849..b64ee3f 100644 --- a/config/CMakeLists-telescope.txt.in +++ b/config/CMakeLists-telescope.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(telescope-download GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG build-system-patch-1 + GIT_TAG v0.6.2 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" BUILD_IN_SOURCE 0 BUILD_COMMAND "" From 9089be2a5550d6c5190710d4d3c5380a348663e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 14:12:26 +0300 Subject: [PATCH 05/43] Update rcgpar,seamat,telescope to track dev branches. --- config/CMakeLists-rcgpar.txt.in | 2 +- config/CMakeLists-seamat.txt.in | 2 +- config/CMakeLists-telescope.txt.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in index 9fa41fa..95cb94d 100644 --- a/config/CMakeLists-rcgpar.txt.in +++ b/config/CMakeLists-rcgpar.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(rcgpar-download GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG v1.1.0 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-seamat.txt.in b/config/CMakeLists-seamat.txt.in index 3e576a2..c7bc6fb 100644 --- a/config/CMakeLists-seamat.txt.in +++ b/config/CMakeLists-seamat.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(seamat-download GIT_REPOSITORY https://github.com/tmaklin/seamat - GIT_TAG v0.2.1 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" BUILD_IN_SOURCE 0 BUILD_COMMAND "" diff --git a/config/CMakeLists-telescope.txt.in b/config/CMakeLists-telescope.txt.in index b64ee3f..4306849 100644 --- a/config/CMakeLists-telescope.txt.in +++ b/config/CMakeLists-telescope.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(telescope-download GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.6.2 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" BUILD_IN_SOURCE 0 BUILD_COMMAND "" From 675adce827d6251b4344aac7d9092ebd330b61ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 15:19:29 +0300 Subject: [PATCH 06/43] Allowing passing zlib, bzip2, liblzma as cmake arguments instead of using find_package. --- CMakeLists.txt | 64 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c62bc5b..6dcbbfc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,20 +85,60 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/mSWEEP_mpi_config.hpp.in ${CMA add_executable(mSWEEP ${CMAKE_CURRENT_SOURCE_DIR}/src/mSWEEP.cpp) ## Check supported compression types -find_package(BZip2) -if (BZIP2_FOUND) - include_directories(${BZIP2_INCLUDE_DIRS}) +## Dependencies +### Check supported compression types +#### zlib +if ((DEFINED ZLIB_LIBRARY AND DEFINED ZLIB_INCLUDE_DIR) AND (NOT DEFINED ZLIB_FOUND)) + message(STATUS "zlib library provided in: " ${ZLIB_LIBRARY}) + message(STATUS "zlib headers provided in: " ${ZLIB_INCLUDE_DIR}) + include_directories(${ZLIB_INCLUDE_DIR}) + target_link_libraries(mSWEEP ${ZLIB_LIBRARY}) + set(MSWEEP_HAVE_ZLIB 1) +else() + find_package(ZLIB) + if (ZLIB_FOUND) + include_directories(${ZLIB_INCLUDE_DIR}) + target_link_libraries(mSWEEP ${ZLIB_LIBRARY}) + set(MSWEEP_HAVE_ZLIB 1) + else() + set(MSWEEP_HAVE_ZLIB 0) + endif() +endif() + +#### bzip2 +if (DEFINED BZIP2_LIBRARIES AND DEFINED BZIP2_INCLUDE_DIR AND (NOT DEFINED BZIP2_FOUND)) + message(STATUS "bzip2 library provided in: " ${BZIP2_LIBRARIES}) + message(STATUS "bzip2 headers provided in: " ${BZIP2_INCLUDE_DIR}) + include_directories(${BZIP2_INCLUDE_DIR}) target_link_libraries(mSWEEP ${BZIP2_LIBRARIES}) -endif() -find_package(LibLZMA) -if (LIBLZMA_FOUND) - include_directories(${LIBLZMA_INCLUDE_DIRS}) - target_link_libraries(mSWEEP ${LIBLZMA_LIBRARIES}) + set(MSWEEP_HAVE_BZIP2 1) +else() + find_package(BZip2) + if (BZIP2_FOUND) + include_directories(${BZIP2_INCLUDE_DIR}) + target_link_libraries(mSWEEP ${BZIP2_LIBRARIES}) + set(MSWEEP_HAVE_BZIP2 1) + else() + set(MSWEEP_HAVE_BZIP2 0) + endif() endif() -find_package(ZLIB) -if (ZLIB_FOUND) - include_directories(${ZLIB_INCLUDE_DIRS}) - target_link_libraries(mSWEEP ${ZLIB_LIBRARIES}) + +#### lzma +if (DEFINED LIBLZMA_LIBRARY AND DEFINED LIBLZMA_INCLUDE_DIR AND (NOT DEFINED LIBLZMA_FOUND)) + message(STATUS "liblzma library provided in: " ${LIBLZMA_LIBRARY}) + message(STATUS "liblzma headers provided in: " ${LIBLZMA_INCLUDE_DIR}) + include_directories(${LIBLZMA_INCLUDE_DIR}) + target_link_libraries(mSWEEP ${LIBLZMA_LIBRARY}) + set(MSWEEP_HAVE_LIBLZMA 1) +else() + find_package(LibLZMA) + if (LIBLZMA_FOUND) + include_directories(${LIBLZMA_INCLUDE_DIR}) + target_link_libraries(mSWEEP ${LIBLZMA_LIBRARY}) + set(MSWEEP_HAVE_LIBLZMA 1) + else() + set(MSWEEP_HAVE_LIBLZMA 0) + endif() endif() ## bxzstr From 0c4bb1fe88cb6b8927ad7aa7aed50769203667f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 15:19:51 +0300 Subject: [PATCH 07/43] Pass info about supported compression types to bxzstr. --- config/CMakeLists-bxzstr.txt.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/CMakeLists-bxzstr.txt.in b/config/CMakeLists-bxzstr.txt.in index 3c5384f..cee3d86 100644 --- a/config/CMakeLists-bxzstr.txt.in +++ b/config/CMakeLists-bxzstr.txt.in @@ -10,6 +10,9 @@ ExternalProject_Add(bxzstr-download BUILD_IN_SOURCE 0 BUILD_COMMAND "" CMAKE_ARGS -D ZSTD_FOUND=0 + CMAKE_ARGS -D ZLIB_FOUND=${MSWEEP_HAVE_ZLIB} + CMAKE_ARGS -D BZIP2_FOUND=${MSWEEP_HAVE_BZIP2} + CMAKE_ARGS -D LIBLZMA_FOUND=${MSWEEP_HAVE_LIBLZMA} INSTALL_COMMAND "" TEST_COMMAND "" UPDATE_COMMAND "" From 36347177f7da4234004ea7008b5fd624b15a9d24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 15:29:52 +0300 Subject: [PATCH 08/43] Styling --- config/CMakeLists-bxzstr.txt.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/CMakeLists-bxzstr.txt.in b/config/CMakeLists-bxzstr.txt.in index cee3d86..f4af3be 100644 --- a/config/CMakeLists-bxzstr.txt.in +++ b/config/CMakeLists-bxzstr.txt.in @@ -10,9 +10,9 @@ ExternalProject_Add(bxzstr-download BUILD_IN_SOURCE 0 BUILD_COMMAND "" CMAKE_ARGS -D ZSTD_FOUND=0 - CMAKE_ARGS -D ZLIB_FOUND=${MSWEEP_HAVE_ZLIB} - CMAKE_ARGS -D BZIP2_FOUND=${MSWEEP_HAVE_BZIP2} - CMAKE_ARGS -D LIBLZMA_FOUND=${MSWEEP_HAVE_LIBLZMA} + -D ZLIB_FOUND=${MSWEEP_HAVE_ZLIB} + -D BZIP2_FOUND=${MSWEEP_HAVE_BZIP2} + -D LIBLZMA_FOUND=${MSWEEP_HAVE_LIBLZMA} INSTALL_COMMAND "" TEST_COMMAND "" UPDATE_COMMAND "" From aff7bc88a5e0206079898734ee235af072e7ebfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Mon, 4 Sep 2023 15:30:05 +0300 Subject: [PATCH 09/43] Track build-system-patch-1. --- config/CMakeLists-alignment-writer.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/CMakeLists-alignment-writer.txt.in b/config/CMakeLists-alignment-writer.txt.in index b42adbd..89e24bb 100644 --- a/config/CMakeLists-alignment-writer.txt.in +++ b/config/CMakeLists-alignment-writer.txt.in @@ -5,7 +5,7 @@ include(ExternalProject) ExternalProject_Add(alignment-writer-download GIT_REPOSITORY https://github.com/tmaklin/alignment-writer.git - GIT_TAG v0.4.0 + GIT_TAG build-system-patch-1 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer" BUILD_IN_SOURCE 0 BUILD_COMMAND "" From 5b52ed9f5953140fff361bb23128086ccf1c18f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 11:02:00 +0000 Subject: [PATCH 10/43] Create make_release.yml --- .github/workflows/make_release.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/make_release.yml diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml new file mode 100644 index 0000000..18c9489 --- /dev/null +++ b/.github/workflows/make_release.yml @@ -0,0 +1,14 @@ +on: [push] + +jobs: + hello_world_job: + runs-on: ubuntu-latest + name: Build linux-x86_64 binary + steps: + - name: Download build script + id: dl-build-script + run: wget https://github.com/tmaklin/biobins/blob/master/linux/mSWEEP/build.sh + - name: Compile binary in Holy Build Box container + id: compile-in-container + uses: phusion/holy-build-box-64@3.0.2 + run: ./build.sh 2.0.0 From 6d2048aa3c88813f35b7b3b2816866d9ab1e9eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:06:43 +0200 Subject: [PATCH 11/43] Update build CI --- .github/workflows/make_release.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 18c9489..8f018ca 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -1,14 +1,15 @@ +name: Build linux-x86_64 binary on: [push] jobs: - hello_world_job: + build_linux-x86_64: runs-on: ubuntu-latest - name: Build linux-x86_64 binary + container: phusion/holy-build-box-64@3.0.2 steps: - name: Download build script id: dl-build-script run: wget https://github.com/tmaklin/biobins/blob/master/linux/mSWEEP/build.sh + - name: Compile binary in Holy Build Box container id: compile-in-container - uses: phusion/holy-build-box-64@3.0.2 run: ./build.sh 2.0.0 From 433e9680f2890a775b586b7b6140b120babd85dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:07:36 +0200 Subject: [PATCH 12/43] Fix release reference --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 8f018ca..a3fea71 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -4,7 +4,7 @@ on: [push] jobs: build_linux-x86_64: runs-on: ubuntu-latest - container: phusion/holy-build-box-64@3.0.2 + container: phusion/holy-build-box-64:3.0.2 steps: - name: Download build script id: dl-build-script From 22f740e9a41a165339c40de4c27b8289bba44c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:09:34 +0200 Subject: [PATCH 13/43] Add step to install wget --- .github/workflows/make_release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index a3fea71..3ace401 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -6,6 +6,10 @@ jobs: runs-on: ubuntu-latest container: phusion/holy-build-box-64:3.0.2 steps: + - name: Install wget + - id: install-wget + - run: apt install -y wget + - name: Download build script id: dl-build-script run: wget https://github.com/tmaklin/biobins/blob/master/linux/mSWEEP/build.sh From dcd185093d54c7b65e994ae540177ef0972e2545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:10:01 +0200 Subject: [PATCH 14/43] Fix formatting --- .github/workflows/make_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 3ace401..2e814d3 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -7,8 +7,8 @@ jobs: container: phusion/holy-build-box-64:3.0.2 steps: - name: Install wget - - id: install-wget - - run: apt install -y wget + id: install-wget + run: apt install -y wget - name: Download build script id: dl-build-script From 45d9a9e2cf00dd9357cd1778643aff0542c748bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:11:27 +0200 Subject: [PATCH 15/43] apt->apt-get --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 2e814d3..39ea45a 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -8,7 +8,7 @@ jobs: steps: - name: Install wget id: install-wget - run: apt install -y wget + run: apt-get install -y wget - name: Download build script id: dl-build-script From 265d824b83d48d20ed6c7d0418a6fe7bb834192e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:14:01 +0200 Subject: [PATCH 16/43] Forgot container uses yum --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 39ea45a..d80f67e 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -8,7 +8,7 @@ jobs: steps: - name: Install wget id: install-wget - run: apt-get install -y wget + run: yum install -y wget - name: Download build script id: dl-build-script From d8f2bd89c0c61d84a995f8023f9f2f62ae5d1921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:15:55 +0200 Subject: [PATCH 17/43] Set +x for downloaded script --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index d80f67e..df44ac6 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -16,4 +16,4 @@ jobs: - name: Compile binary in Holy Build Box container id: compile-in-container - run: ./build.sh 2.0.0 + run: chmod +x build.sh && ./build.sh 2.0.0 From 9dc295fe6e82e3cb4bae86d314d8b6c1d73526c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:17:51 +0200 Subject: [PATCH 18/43] Fix build.sh dl path --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index df44ac6..19c2e7c 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -12,7 +12,7 @@ jobs: - name: Download build script id: dl-build-script - run: wget https://github.com/tmaklin/biobins/blob/master/linux/mSWEEP/build.sh + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/linux/mSWEEP/build.sh - name: Compile binary in Holy Build Box container id: compile-in-container From 92a44f562296c80fcb4e1bc9761e2118305bc9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:23:42 +0200 Subject: [PATCH 19/43] Create /io at root --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 19c2e7c..abbebbd 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -16,4 +16,4 @@ jobs: - name: Compile binary in Holy Build Box container id: compile-in-container - run: chmod +x build.sh && ./build.sh 2.0.0 + run: chmod +x build.sh && mkdir /io && cd /io && ./build.sh 2.0.0 From a428cc18cafcfdcc2c0f0834ba08fb999d695cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:26:05 +0200 Subject: [PATCH 20/43] Create /io before downloading build script --- .github/workflows/make_release.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index abbebbd..30bfb97 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -10,10 +10,14 @@ jobs: id: install-wget run: yum install -y wget + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + - name: Download build script id: dl-build-script run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/linux/mSWEEP/build.sh - name: Compile binary in Holy Build Box container id: compile-in-container - run: chmod +x build.sh && mkdir /io && cd /io && ./build.sh 2.0.0 + run: chmod +x build.sh && ./build.sh 2.0.0 From 02da2ce7cb7a582dd5f65727b17b738c19fcfee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:35:07 +0200 Subject: [PATCH 21/43] Add macOS x86-64 and arm64 builds. --- .github/workflows/make_release.yml | 50 +++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 30bfb97..9b14eb4 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -1,4 +1,4 @@ -name: Build linux-x86_64 binary +name: Build binaries on: [push] jobs: @@ -21,3 +21,51 @@ jobs: - name: Compile binary in Holy Build Box container id: compile-in-container run: chmod +x build.sh && ./build.sh 2.0.0 + + build_macOS-x86_64: + runs-on: ubuntu-latest + container: ghcr.io/shepherdjerred/macos-cross-compiler:latest + steps: + - name: Install wget + id: install-wget + run: yum install -y wget + + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + + - name: Download toolchain file + id: dl-toolchain-file + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake ../ + + - name: Download build script + id: dl-build-script + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/mSWEEP/build.sh + + - name: Compile binary in Holy Build Box container + id: compile-in-container + run: chmod +x build.sh && ./build.sh 2.0.0 x86-64 + + build_macOS-arm64: + runs-on: ubuntu-latest + container: ghcr.io/shepherdjerred/macos-cross-compiler:latest + steps: + - name: Install wget + id: install-wget + run: yum install -y wget + + - name: Create io directory + id: mkdir-io + run: mkdir /io && cd /io + + - name: Download toolchain file + id: dl-toolchain-file + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/arm64-toolchain.cmake && cp arm64-toolchain.cmake ../ + + - name: Download build script + id: dl-build-script + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/mSWEEP/build.sh + + - name: Compile binary in Holy Build Box container + id: compile-in-container + run: chmod +x build.sh && ./build.sh 2.0.0 arm64 From 77c1b8afeff2bd2b908aea363dc9c4cbffcaaff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:38:04 +0200 Subject: [PATCH 22/43] macOS container uses apt --- .github/workflows/make_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 9b14eb4..efe419f 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -28,7 +28,7 @@ jobs: steps: - name: Install wget id: install-wget - run: yum install -y wget + run: apt install -y wget - name: Create io directory id: mkdir-io @@ -52,7 +52,7 @@ jobs: steps: - name: Install wget id: install-wget - run: yum install -y wget + run: apt install -y wget - name: Create io directory id: mkdir-io From e7bca5c13d625f74bf7e48a588d51f4ed2ce2577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:44:00 +0200 Subject: [PATCH 23/43] Try fixing the toolchain paths. --- .github/workflows/make_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index efe419f..e6d68f6 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -36,7 +36,7 @@ jobs: - name: Download toolchain file id: dl-toolchain-file - run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake ../ + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /io/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /x86-64-toolchain.cmake - name: Download build script id: dl-build-script @@ -60,7 +60,7 @@ jobs: - name: Download toolchain file id: dl-toolchain-file - run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/arm64-toolchain.cmake && cp arm64-toolchain.cmake ../ + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /io/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /x86-64-toolchain.cmake - name: Download build script id: dl-build-script From 950a0cbb26307d1446384edb0e64ab1c33e740cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 13:47:07 +0200 Subject: [PATCH 24/43] Correct toolchain for arm64. --- .github/workflows/make_release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index e6d68f6..ec97915 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -60,7 +60,7 @@ jobs: - name: Download toolchain file id: dl-toolchain-file - run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /io/x86-64-toolchain.cmake && cp x86-64-toolchain.cmake /x86-64-toolchain.cmake + run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/arm64-toolchain.cmake && cp arm64-toolchain.cmake /io/arm64-toolchain.cmake && cp arm64-toolchain.cmake /arm64-toolchain.cmake - name: Download build script id: dl-build-script From c56e0ce3f11455ce087437e8fbdeb9f73d4c4ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 14:19:41 +0200 Subject: [PATCH 25/43] Add upload to release page on version tag or tmaklin-patch-1 push. --- .github/workflows/make_release.yml | 36 +++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index ec97915..e56d21d 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -1,5 +1,10 @@ name: Build binaries -on: [push] +on: + push: + tags: + - "v*.*.*" + branches: + - tmaklin-patch-1 jobs: build_linux-x86_64: @@ -20,7 +25,14 @@ jobs: - name: Compile binary in Holy Build Box container id: compile-in-container - run: chmod +x build.sh && ./build.sh 2.0.0 + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} + + - name: Upload linux-x86_64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: mSWEEP-${{ github.ref_name }}-x86_64-redhat-linux + path: /io/mSWEEP-${{ github.ref_name }}-x86_64-redhat-linux.tar.gz build_macOS-x86_64: runs-on: ubuntu-latest @@ -42,9 +54,16 @@ jobs: id: dl-build-script run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/mSWEEP/build.sh - - name: Compile binary in Holy Build Box container + - name: Compile binary in macOS Cross Compiler container id: compile-in-container - run: chmod +x build.sh && ./build.sh 2.0.0 x86-64 + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} + + - name: Upload macOS-x86_64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: mSWEEP-${{ github.ref_name }}-x86_64-apple-darwin22 + path: /io/mSWEEP-${{ github.ref_name }}-x86_64-apple-darwin22.tar.gz build_macOS-arm64: runs-on: ubuntu-latest @@ -66,6 +85,13 @@ jobs: id: dl-build-script run: wget https://raw.githubusercontent.com/tmaklin/biobins/master/macOS/mSWEEP/build.sh - - name: Compile binary in Holy Build Box container + - name: Compile binary in macOS Cross Compiler container id: compile-in-container run: chmod +x build.sh && ./build.sh 2.0.0 arm64 + + - name: Upload macOS-arm64 binary + if: success() + uses: actions/upload-artifact@v3 + with: + name: mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22 + path: /io/mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22.tar.gz From d4236e93f13b48833971675891271b5e53ee626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 14:32:09 +0200 Subject: [PATCH 26/43] Fix architectures in macOS builds. --- .github/workflows/make_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index e56d21d..62977b1 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -56,7 +56,7 @@ jobs: - name: Compile binary in macOS Cross Compiler container id: compile-in-container - run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} x86-64 - name: Upload macOS-x86_64 binary if: success() @@ -87,7 +87,7 @@ jobs: - name: Compile binary in macOS Cross Compiler container id: compile-in-container - run: chmod +x build.sh && ./build.sh 2.0.0 arm64 + run: chmod +x build.sh && ./build.sh ${{ github.ref_name }} arm64 - name: Upload macOS-arm64 binary if: success() From b295738e4d976b4d16bd7efa6af122c6da3c466c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 14:38:36 +0200 Subject: [PATCH 27/43] Add create-release step to publish binaries. --- .github/workflows/make_release.yml | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index 62977b1..d9c3ee4 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -95,3 +95,34 @@ jobs: with: name: mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22 path: /io/mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22.tar.gz + + create-release: + runs-on: ubuntu-latest + + needs: [ build_linux-x86_64, build_macOS-x86_64, build_macOS-arm64 ] + + steps: + - uses: actions/checkout@v2 + + - uses: actions/download-artifact@v2 + with: + path: build + + - name: Organise files + shell: bash + run: | + cp build/mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22/mSWEEP-${{ github.ref_name }}-arm64-apple-darwin22.tar.gz . + cp build/mSWEEP-${{ github.ref_name }}-x86_64-apple-darwin22/mSWEEP-${{ github.ref_name }}-x86_64-apple-darwin22.tar.gz . + cp build/mSWEEP-${{ github.ref_name }}-x86_64-redhat-linux/mSWEEP-${{ github.ref_name }}-x86_64-redhat-linux.tar.gz . + + - name: Create release + id: create_release + uses: softprops/action-gh-release@v1 + with: + name: Release ${{ github.ref_name }} + draft: true + prerelease: false + fail_on_unmatched_files: true + generate_release_notes: true + files: | + mSWEEP-*.tar.gz \ No newline at end of file From 82cedc3de0c616d4b25d261a038739beccebae7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 14:44:29 +0200 Subject: [PATCH 28/43] Finish workflow. --- .github/workflows/make_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/make_release.yml b/.github/workflows/make_release.yml index d9c3ee4..50f374b 100644 --- a/.github/workflows/make_release.yml +++ b/.github/workflows/make_release.yml @@ -4,7 +4,7 @@ on: tags: - "v*.*.*" branches: - - tmaklin-patch-1 + - msweep-release-testing jobs: build_linux-x86_64: @@ -120,7 +120,7 @@ jobs: uses: softprops/action-gh-release@v1 with: name: Release ${{ github.ref_name }} - draft: true + draft: false prerelease: false fail_on_unmatched_files: true generate_release_notes: true From e741fddd4eaee408ccba3f86f31bea4c1eae1996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 16:50:09 +0200 Subject: [PATCH 29/43] Move mSWEEP openmp/mpi/version config files to include/. --- CMakeLists.txt | 6 +++--- {config => include}/mSWEEP_mpi_config.hpp.in | 0 {config => include}/mSWEEP_openmp_config.hpp.in | 0 {config => include}/mSWEEP_version.h.in | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename {config => include}/mSWEEP_mpi_config.hpp.in (100%) rename {config => include}/mSWEEP_openmp_config.hpp.in (100%) rename {config => include}/mSWEEP_version.h.in (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75e8ab1..ed811fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,11 +76,11 @@ endif() string(TIMESTAMP _BUILD_TIMESTAMP) ## Generate a version.h file containing build version and timestamp -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/mSWEEP_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_version.h @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/mSWEEP_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_version.h @ONLY) ## Configure OpenMP if it supported on the system. -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/mSWEEP_openmp_config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_openmp_config.hpp @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/mSWEEP_openmp_config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_openmp_config.hpp @ONLY) ## Configure MPI if it's supported on the system. -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/mSWEEP_mpi_config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_mpi_config.hpp @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/mSWEEP_mpi_config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/mSWEEP_mpi_config.hpp @ONLY) add_executable(mSWEEP ${CMAKE_CURRENT_SOURCE_DIR}/src/mSWEEP.cpp) diff --git a/config/mSWEEP_mpi_config.hpp.in b/include/mSWEEP_mpi_config.hpp.in similarity index 100% rename from config/mSWEEP_mpi_config.hpp.in rename to include/mSWEEP_mpi_config.hpp.in diff --git a/config/mSWEEP_openmp_config.hpp.in b/include/mSWEEP_openmp_config.hpp.in similarity index 100% rename from config/mSWEEP_openmp_config.hpp.in rename to include/mSWEEP_openmp_config.hpp.in diff --git a/config/mSWEEP_version.h.in b/include/mSWEEP_version.h.in similarity index 100% rename from config/mSWEEP_version.h.in rename to include/mSWEEP_version.h.in From a685b5e8f6cf1b5858a0a81905ddb8ccd80963dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 17:46:14 +0200 Subject: [PATCH 30/43] Incorporate config files in CMakeLists (breaks mSWEEP depend graph!) --- CMakeLists.txt | 292 ++++++++++++---------- config/CMakeLists-alignment-writer.txt.in | 17 -- config/CMakeLists-bxzstr.txt.in | 16 -- config/CMakeLists-cxxargs.txt.in | 16 -- config/CMakeLists-cxxio.txt.in | 16 -- config/CMakeLists-rcgpar.txt.in | 18 -- config/CMakeLists-seamat.txt.in | 16 -- config/CMakeLists-telescope.txt.in | 20 -- 8 files changed, 155 insertions(+), 256 deletions(-) delete mode 100644 config/CMakeLists-alignment-writer.txt.in delete mode 100644 config/CMakeLists-bxzstr.txt.in delete mode 100644 config/CMakeLists-cxxargs.txt.in delete mode 100644 config/CMakeLists-cxxio.txt.in delete mode 100644 config/CMakeLists-rcgpar.txt.in delete mode 100644 config/CMakeLists-seamat.txt.in delete mode 100644 config/CMakeLists-telescope.txt.in diff --git a/CMakeLists.txt b/CMakeLists.txt index ed811fb..a6da89c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 2.8.12) project(mSWEEP) +include(ExternalProject) ## Determine build type if(NOT CMAKE_BUILD_TYPE) @@ -105,19 +106,18 @@ endif() if (DEFINED CMAKE_BXZSTR_HEADERS) message(STATUS "bxzstr headers provided in: ${CMAKE_BXZSTR_HEADERS}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-bxzstr.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr-download ) - if(result) - message(FATAL_ERROR "CMake step for bxzstr failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr-download ) - if(result) - message(FATAL_ERROR "Build step for bxzstr failed: ${result}") - endif() + ExternalProject_Add(bxzstr + GIT_REPOSITORY https://github.com/tmaklin/bxzstr.git + GIT_TAG v1.1.0 + PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr" + BUILD_IN_SOURCE 1 + CMAKE_ARGS -D ZSTD_FOUND=0 + BUILD_COMMAND "" + CONFIGURE_COMMAND "" + INSTALL_COMMAND "" + ) + add_dependencies(mSWEEP bxzstr) set(CMAKE_BXZSTR_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr/include) endif() include_directories(${CMAKE_BXZSTR_HEADERS}) @@ -127,19 +127,18 @@ include_directories(${CMAKE_BXZSTR_HEADERS}) if (DEFINED CMAKE_CXXIO_HEADERS) message(STATUS "cxxio headers provided in: ${CMAKE_CXXIO_HEADERS}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-cxxio.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/cxxio-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/cxxio-download ) - if(result) - message(FATAL_ERROR "CMake step for cxxio failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/cxxio-download ) - if(result) - message(FATAL_ERROR "Build step for cxxio failed: ${result}") - endif() + ExternalProject_Add(cxxio + GIT_REPOSITORY https://github.com/tmaklin/cxxio.git + GIT_TAG v0.1.0 + PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxio" + BUILD_IN_SOURCE 1 + BUILD_COMMAND "" + CONFIGURE_COMMAND "" + INSTALL_COMMAND "" + DEPENDS bxzstr + ) + add_dependencies(mSWEEP cxxio) set(CMAKE_CXXIO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/cxxio/include) endif() include_directories("${CMAKE_CXXIO_HEADERS}") @@ -148,158 +147,177 @@ include_directories("${CMAKE_CXXIO_HEADERS}") if (DEFINED CMAKE_CXXARGS_HEADERS) message(STATUS "cxxargs headers provided in: ${CMAKE_CXXARGS_HEADERS}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-cxxargs.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs-download ) - if(result) - message(FATAL_ERROR "CMake step for cxxargs failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs-download ) - if(result) - message(FATAL_ERROR "Build step for cxxargs failed: ${result}") - endif() + ExternalProject_Add(cxxargs + GIT_REPOSITORY https://github.com/tmaklin/cxxargs.git + GIT_TAG v1.1.4 + PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs" + BUILD_IN_SOURCE 1 + BUILD_COMMAND "" + CONFIGURE_COMMAND "" + INSTALL_COMMAND "" + ) + add_dependencies(mSWEEP cxxargs) set(CMAKE_CXXARGS_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs/include) endif() include_directories("${CMAKE_CXXARGS_HEADERS}") ## alignment-writer -if (DEFINED CMAKE_ALIGNMENT_WRITER_HEADERS) +if (DEFINED CMAKE_ALIGNMENT_WRITER_HEADERS AND DEFINED CMAKE_ALIGNMENT_WRITER_LIBRARY) message(STATUS "alignment-writer headers provided in: ${CMAKE_ALIGNMENT_WRITER_HEADERS}") + message(STATUS "alignment-writer library provided in: ${CMAKE_ALIGNMENT_WRITER_LIBRARY}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-alignment-writer.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer-download ) - if(result) - message(FATAL_ERROR "CMake step for alignment-writer failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer-download ) - if(result) - message(FATAL_ERROR "Build step for alignment-writer failed: ${result}") - endif() - add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer - ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer/build) - set_target_properties(alignment-writer PROPERTIES EXCLUDE_FROM_ALL 1) + ExternalProject_Add(alignment-writer-compile + GIT_REPOSITORY https://github.com/tmaklin/alignment-writer.git + GIT_TAG v0.4.0 + PREFIX "external" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer" + BUILD_IN_SOURCE 0 + CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} + -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} + -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + INSTALL_COMMAND "" + DEPENDS bxzstr + cxxargs + ) + add_dependencies(mSWEEP alignment-writer-compile) + set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer/build/lib/libalignmentwriter.a) set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer/include) endif() include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS}) +target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) ## telescope if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS) - find_library(TELESCOPE NAMES telescope HINTS ${CMAKE_TELESCOPE_LIBRARY}) - target_link_libraries(mSWEEP ${TELESCOPE}) + message(STATUS "telescope headers provided in: ${CMAKE_TELESCOPE_HEADERS}") + message(STATUS "telescope library provided in: ${CMAKE_TELESCOPE_LIBRARY}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-telescope.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/telescope-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/telescope-download ) - if(result) - message(FATAL_ERROR "CMake step for telescope failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/telescope-download ) - if(result) - message(FATAL_ERROR "Build step for telescope failed: ${result}") - endif() - add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/telescope - ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/build) - set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1) + ExternalProject_Add(telescope-compile + GIT_REPOSITORY https://github.com/tmaklin/telescope.git + GIT_TAG v0.6.2 + PREFIX "external" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" + BUILD_IN_SOURCE 0 + CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} + -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} + -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} + -D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src + -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + INSTALL_COMMAND "" + DEPENDS bxzstr + cxxargs + cxxio + alignment-writer-compile + ) + add_dependencies(mSWEEP telescope-compile) set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/include) + set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/build/lib/libtelescope.a) set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src) - get_property(CMAKE_TELESCOPE_LIBRARY TARGET telescope PROPERTY LOCATION) - target_link_libraries(mSWEEP libtelescope libalignmentwriter) endif() include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS}) +target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) ## seamat if (DEFINED CMAKE_SEAMAT_HEADERS) message(STATUS "seamat headers provided in: ${CMAKE_SEAMAT_HEADERS}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-seamat.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/seamat-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/seamat-download ) - if(result) - message(FATAL_ERROR "CMake step for seamat failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/seamat-download ) - if(result) - message(FATAL_ERROR "Build step for seamat failed: ${result}") - endif() - add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/seamat - ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/build) - target_link_libraries(mSWEEP rcgomp) + ExternalProject_Add(seamat + GIT_REPOSITORY https://github.com/tmaklin/seamat.git + GIT_TAG v0.2.1 + PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" + BUILD_IN_SOURCE 1 + CMAKE_ARGS -D CMAKE_BUILD_TESTS=0 + BUILD_COMMAND "" + CONFIGURE_COMMAND "" + INSTALL_COMMAND "" + ) + add_dependencies(mSWEEP seamat) set(CMAKE_SEAMAT_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/include) endif() include_directories(${CMAKE_SEAMAT_HEADERS}) ## rcgpar if (DEFINED CMAKE_RCGPAR_LIBRARY AND DEFINED CMAKE_RCGPAR_HEADERS) - find_library(RCGPAR NAMES rcgpar HINTS ${CMAKE_RCGPAR_LIBRARY}) - target_link_libraries(mSWEEP ${RCGPAR}) + message(STATUS "rcgpar headers provided in: ${CMAKE_RCGPAR_HEADERS}") + message(STATUS "rcgpar library provided in: ${CMAKE_RCGPAR_LIBRARY}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-rcgpar.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar-download ) - if(result) - message(FATAL_ERROR "CMake step for rcgpar failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar-download ) - if(result) - message(FATAL_ERROR "Build step for rcgpar failed: ${result}") - endif() - add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar - ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/build) - target_link_libraries(mSWEEP rcgomp) + ExternalProject_Add(rcgpar-compile + GIT_REPOSITORY https://github.com/tmaklin/telescope.git + GIT_TAG v1.1.0 + PREFIX "external" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" + BUILD_IN_SOURCE 0 + CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} + -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} + -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + INSTALL_COMMAND "" + DEPENDS seamat + ) + add_dependencies(mSWEEP rcgpar-compile) set(CMAKE_RCGPAR_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/include) + set(CMAKE_RCGPAR_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/build/lib/librcgomp.a) endif() +target_link_libraries(mSWEEP ${CMAKE_RCGPAR_LIBRARY}) include_directories(${CMAKE_RCGPAR_HEADERS}) -## BitMagic -set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src) -include_directories(${CMAKE_BITMAGIC_HEADERS}) - ## mGEMS if (DEFINED CMAKE_MGEMS_LIBRARY AND DEFINED CMAKE_MGEMS_HEADERS) - find_library(MGEMS NAMES mgems HINTS ${CMAKE_MGEMS_LIBRARY}) - target_link_libraries(mSWEEP ${MGEMS}) + message(STATUS "mGEMS headers provided in: ${CMAKE_MGEMS_HEADERS}") + message(STATUS "mGEMS library provided in: ${CMAKE_MGEMS_LIBRARY}") else() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/CMakeLists-mGEMS.txt.in ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS-download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS-download ) - if(result) - message(FATAL_ERROR "CMake step for libmgems failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS-download ) - if(result) - message(FATAL_ERROR "Build step for libmgems failed: ${result}") - endif() - add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS - ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/build) - set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1) - target_link_libraries(mSWEEP libmgems) + ExternalProject_Add(mGEMS-compile + GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git + GIT_TAG v1.3.0 + PREFIX "external" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS" + BUILD_IN_SOURCE 0 + CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} + -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} + -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} + -D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src + -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} + -D CMAKE_TELESCOPE_HEADERS=${CMAKE_TELESCOPE_HEADERS} + -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + INSTALL_COMMAND "" + DEPENDS bxzstr + cxxargs + cxxio + alignment-writer-compile + seamat + telescope-compile + ) + add_dependencies(mSWEEP mGEMS-compile) set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/include) + set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/build/lib/libmgems.a) endif() +target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY}) include_directories(${CMAKE_MGEMS_HEADERS}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include -${CMAKE_CURRENT_SOURCE_DIR}/external ${CMAKE_CURRENT_SOURCE_DIR}/include/tools -${CMAKE_CURRENT_SOURCE_DIR}/external/cxxio -${CMAKE_CURRENT_BINARY_DIR}/include) +include_directories( + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/external ${CMAKE_CURRENT_SOURCE_DIR}/include/tools + ${CMAKE_CURRENT_BINARY_DIR}/include + ) if(MPI_FOUND) add_dependencies(mSWEEP rcgmpi bigmpi) diff --git a/config/CMakeLists-alignment-writer.txt.in b/config/CMakeLists-alignment-writer.txt.in deleted file mode 100644 index b42adbd..0000000 --- a/config/CMakeLists-alignment-writer.txt.in +++ /dev/null @@ -1,17 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(alignment-writer-get NONE) -include(ExternalProject) - -ExternalProject_Add(alignment-writer-download - GIT_REPOSITORY https://github.com/tmaklin/alignment-writer.git - GIT_TAG v0.4.0 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} - -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-bxzstr.txt.in b/config/CMakeLists-bxzstr.txt.in deleted file mode 100644 index 3c5384f..0000000 --- a/config/CMakeLists-bxzstr.txt.in +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(bxzstr-get NONE) -include(ExternalProject) - -ExternalProject_Add(bxzstr-download - GIT_REPOSITORY https://github.com/tmaklin/bxzstr.git - GIT_TAG v1.1.0 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D ZSTD_FOUND=0 - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-cxxargs.txt.in b/config/CMakeLists-cxxargs.txt.in deleted file mode 100644 index fedfd51..0000000 --- a/config/CMakeLists-cxxargs.txt.in +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(cxxargs-get NONE) -include(ExternalProject) - -ExternalProject_Add(cxxargs-download - GIT_REPOSITORY https://github.com/tmaklin/cxxargs.git - GIT_TAG v1.1.4 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs" - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-cxxio.txt.in b/config/CMakeLists-cxxio.txt.in deleted file mode 100644 index b448f55..0000000 --- a/config/CMakeLists-cxxio.txt.in +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(cxxio-get NONE) -include(ExternalProject) - -ExternalProject_Add(cxxio-download - GIT_REPOSITORY https://github.com/tmaklin/cxxio.git - GIT_TAG v0.1.0 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxio" - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in deleted file mode 100644 index 9fa41fa..0000000 --- a/config/CMakeLists-rcgpar.txt.in +++ /dev/null @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(rcgpar-get NONE) -include(ExternalProject) - -ExternalProject_Add(rcgpar-download - GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG v1.1.0 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} - -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-seamat.txt.in b/config/CMakeLists-seamat.txt.in deleted file mode 100644 index 3e576a2..0000000 --- a/config/CMakeLists-seamat.txt.in +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(seamat-get NONE) -include(ExternalProject) - -ExternalProject_Add(seamat-download - GIT_REPOSITORY https://github.com/tmaklin/seamat - GIT_TAG v0.2.1 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_BUILD_TESTS=0 - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-telescope.txt.in b/config/CMakeLists-telescope.txt.in deleted file mode 100644 index b64ee3f..0000000 --- a/config/CMakeLists-telescope.txt.in +++ /dev/null @@ -1,20 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(telescope-get NONE) -include(ExternalProject) - -ExternalProject_Add(telescope-download - GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.6.2 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} - -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} - -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src - -D CMAKE_ALIGNMENT_WRITER_HEADERS=$CMAKE_ALIGNMENT_WRITER_HEADERS - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) From 52fef56550ad97ddaa837b3a9f65d2be3696281d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 17:47:55 +0200 Subject: [PATCH 31/43] Remove remaining unused config files. --- config/CMakeLists-mGEMS.txt.in | 22 ---------------------- config/CMakeLists-zlib.txt.in | 15 --------------- 2 files changed, 37 deletions(-) delete mode 100644 config/CMakeLists-mGEMS.txt.in delete mode 100644 config/CMakeLists-zlib.txt.in diff --git a/config/CMakeLists-mGEMS.txt.in b/config/CMakeLists-mGEMS.txt.in deleted file mode 100644 index da5b462..0000000 --- a/config/CMakeLists-mGEMS.txt.in +++ /dev/null @@ -1,22 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(mGEMS-get NONE) -include(ExternalProject) - -ExternalProject_Add(mGEMS-download - GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git - GIT_TAG v1.3.0 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} - -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} - -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} - -D CMAKE_TELESCOPE_HEADERS=$CMAKE_TELESCOPE_HEADERS - -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src - -D CMAKE_ALIGNMENT_WRITER_HEADERS=$CMAKE_ALIGNMENT_WRITER_HEADERS - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) diff --git a/config/CMakeLists-zlib.txt.in b/config/CMakeLists-zlib.txt.in deleted file mode 100644 index 3cfd215..0000000 --- a/config/CMakeLists-zlib.txt.in +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(zlib-get NONE) -include(ExternalProject) - -ExternalProject_Add(zlib-download - GIT_REPOSITORY https://github.com/madler/zlib.git - GIT_TAG v1.2.11 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/zlib" - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/external/zlib/configure --static - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) From a16ee0c7117e27cd7ccf2454cfcedcac34346bc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 28 Feb 2024 10:11:53 +0200 Subject: [PATCH 32/43] Use FetchContent instead of ExternalProject (req cmake >= 3.11). --- CMakeLists.txt | 124 +++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 60 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6da89c..b8e798b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.11) project(mSWEEP) -include(ExternalProject) +include(FetchContent) ## Determine build type if(NOT CMAKE_BUILD_TYPE) @@ -23,6 +23,8 @@ if(CMAKE_BUILD_TYPE MATCHES Release) endif() endif() +set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH} ${CURRENT_BINARY_DIR}/cmake") + if(CMAKE_BUILD_WITH_FLTO) cmake_policy(SET CMP0069 NEW) set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) @@ -106,19 +108,19 @@ endif() if (DEFINED CMAKE_BXZSTR_HEADERS) message(STATUS "bxzstr headers provided in: ${CMAKE_BXZSTR_HEADERS}") else() - ExternalProject_Add(bxzstr + FetchContent_Declare(bxzstr GIT_REPOSITORY https://github.com/tmaklin/bxzstr.git GIT_TAG v1.1.0 PREFIX "external" - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr" - BUILD_IN_SOURCE 1 + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/bxzstr" + BUILD_IN_SOURCE 0 CMAKE_ARGS -D ZSTD_FOUND=0 BUILD_COMMAND "" CONFIGURE_COMMAND "" INSTALL_COMMAND "" ) - add_dependencies(mSWEEP bxzstr) - set(CMAKE_BXZSTR_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/bxzstr/include) + FetchContent_MakeAvailable(bxzstr) + set(CMAKE_BXZSTR_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/bxzstr/include) endif() include_directories(${CMAKE_BXZSTR_HEADERS}) @@ -127,19 +129,18 @@ include_directories(${CMAKE_BXZSTR_HEADERS}) if (DEFINED CMAKE_CXXIO_HEADERS) message(STATUS "cxxio headers provided in: ${CMAKE_CXXIO_HEADERS}") else() - ExternalProject_Add(cxxio + FetchContent_Declare(cxxio GIT_REPOSITORY https://github.com/tmaklin/cxxio.git GIT_TAG v0.1.0 PREFIX "external" - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxio" - BUILD_IN_SOURCE 1 + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/cxxio" + BUILD_IN_SOURCE 0 BUILD_COMMAND "" CONFIGURE_COMMAND "" INSTALL_COMMAND "" - DEPENDS bxzstr ) - add_dependencies(mSWEEP cxxio) - set(CMAKE_CXXIO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/cxxio/include) + FetchContent_MakeAvailable(cxxio) + set(CMAKE_CXXIO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/cxxio/include) endif() include_directories("${CMAKE_CXXIO_HEADERS}") @@ -147,18 +148,18 @@ include_directories("${CMAKE_CXXIO_HEADERS}") if (DEFINED CMAKE_CXXARGS_HEADERS) message(STATUS "cxxargs headers provided in: ${CMAKE_CXXARGS_HEADERS}") else() - ExternalProject_Add(cxxargs + FetchContent_Declare(cxxargs GIT_REPOSITORY https://github.com/tmaklin/cxxargs.git GIT_TAG v1.1.4 PREFIX "external" - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs" - BUILD_IN_SOURCE 1 + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/cxxargs" + BUILD_IN_SOURCE 0 BUILD_COMMAND "" CONFIGURE_COMMAND "" INSTALL_COMMAND "" ) - add_dependencies(mSWEEP cxxargs) - set(CMAKE_CXXARGS_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/cxxargs/include) + FetchContent_MakeAvailable(cxxargs) + set(CMAKE_CXXARGS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/cxxargs/include) endif() include_directories("${CMAKE_CXXARGS_HEADERS}") @@ -167,10 +168,11 @@ if (DEFINED CMAKE_ALIGNMENT_WRITER_HEADERS AND DEFINED CMAKE_ALIGNMENT_WRITER_LI message(STATUS "alignment-writer headers provided in: ${CMAKE_ALIGNMENT_WRITER_HEADERS}") message(STATUS "alignment-writer library provided in: ${CMAKE_ALIGNMENT_WRITER_LIBRARY}") else() - ExternalProject_Add(alignment-writer-compile + FetchContent_Declare(alignment-writer GIT_REPOSITORY https://github.com/tmaklin/alignment-writer.git GIT_TAG v0.4.0 PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer" BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} @@ -181,12 +183,12 @@ else() -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" INSTALL_COMMAND "" - DEPENDS bxzstr - cxxargs ) - add_dependencies(mSWEEP alignment-writer-compile) - set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer/build/lib/libalignmentwriter.a) - set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer/include) + FetchContent_MakeAvailable(alignment-writer) + add_dependencies(mSWEEP libalignmentwriter) + set_target_properties(alignment-writer PROPERTIES EXCLUDE_FROM_ALL 1) + set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/include) + set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libalignment-writer.a) endif() include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS}) target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) @@ -196,65 +198,68 @@ if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS) message(STATUS "telescope headers provided in: ${CMAKE_TELESCOPE_HEADERS}") message(STATUS "telescope library provided in: ${CMAKE_TELESCOPE_LIBRARY}") else() - ExternalProject_Add(telescope-compile + FetchContent_Declare(telescope GIT_REPOSITORY https://github.com/tmaklin/telescope.git GIT_TAG v0.6.2 PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/telescope" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} -D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src + -D CMAKE_ALIGNMENT_WRITER_LIBRARY=${CMAKE_ALIGNMENT_WRITER_LIBRARY} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" INSTALL_COMMAND "" - DEPENDS bxzstr - cxxargs - cxxio - alignment-writer-compile ) - add_dependencies(mSWEEP telescope-compile) - set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/include) - set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/build/lib/libtelescope.a) - set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src) + FetchContent_MakeAvailable(telescope) + add_dependencies(telescope libalignmentwriter) + add_dependencies(mSWEEP libtelescope) + set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1) + set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/include) + set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libtelescope.a) + set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src) endif() include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS}) -target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) +target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY}) ## seamat if (DEFINED CMAKE_SEAMAT_HEADERS) message(STATUS "seamat headers provided in: ${CMAKE_SEAMAT_HEADERS}") else() - ExternalProject_Add(seamat + FetchContent_Declare(seamat GIT_REPOSITORY https://github.com/tmaklin/seamat.git GIT_TAG v0.2.1 PREFIX "external" - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/seamat" - BUILD_IN_SOURCE 1 + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/seamat" + BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_BUILD_TESTS=0 BUILD_COMMAND "" CONFIGURE_COMMAND "" INSTALL_COMMAND "" ) - add_dependencies(mSWEEP seamat) - set(CMAKE_SEAMAT_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/seamat/include) + FetchContent_MakeAvailable(seamat) + set(CMAKE_SEAMAT_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/seamat/include) endif() include_directories(${CMAKE_SEAMAT_HEADERS}) ## rcgpar -if (DEFINED CMAKE_RCGPAR_LIBRARY AND DEFINED CMAKE_RCGPAR_HEADERS) +if (DEFINED CMAKE_RCGPAR_LIBRARY AND DEFINED CMAKE_RCGPAR_HEADERS AND DEFINED CMAKE_RCGUTILS_LIBRARY) message(STATUS "rcgpar headers provided in: ${CMAKE_RCGPAR_HEADERS}") message(STATUS "rcgpar library provided in: ${CMAKE_RCGPAR_LIBRARY}") + message(STATUS "rcgutils library provided in: ${CMAKE_RCGUTILS_LIBRARY}") else() - ExternalProject_Add(rcgpar-compile - GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v1.1.0 + FetchContent_Declare(rcgpar + GIT_REPOSITORY https://github.com/tmaklin/rcgpar.git + GIT_TAG v1.1.2 PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/rcgpar" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} @@ -266,13 +271,14 @@ else() -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" INSTALL_COMMAND "" - DEPENDS seamat ) - add_dependencies(mSWEEP rcgpar-compile) - set(CMAKE_RCGPAR_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/include) - set(CMAKE_RCGPAR_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar/build/lib/librcgomp.a) + FetchContent_MakeAvailable(rcgpar) + add_dependencies(mSWEEP rcgomp) + set(CMAKE_RCGPAR_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/rcgpar/include) + set(CMAKE_RCGPAR_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/lib/librcgomp.a") + set(CMAKE_RCGUTILS_LIBRARY "${CMAKE_CURRENT_BINARY_DIR}/lib/librcgutils.a") endif() -target_link_libraries(mSWEEP ${CMAKE_RCGPAR_LIBRARY}) +target_link_libraries(mSWEEP ${CMAKE_RCGPAR_LIBRARY} ${CMAKE_RCGUTILS_LIBRARY}) include_directories(${CMAKE_RCGPAR_HEADERS}) ## mGEMS @@ -280,10 +286,11 @@ if (DEFINED CMAKE_MGEMS_LIBRARY AND DEFINED CMAKE_MGEMS_HEADERS) message(STATUS "mGEMS headers provided in: ${CMAKE_MGEMS_HEADERS}") message(STATUS "mGEMS library provided in: ${CMAKE_MGEMS_LIBRARY}") else() - ExternalProject_Add(mGEMS-compile + FetchContent_Declare(mGEMS GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git GIT_TAG v1.3.0 PREFIX "external" + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS" BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} @@ -299,18 +306,15 @@ else() -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" INSTALL_COMMAND "" - DEPENDS bxzstr - cxxargs - cxxio - alignment-writer-compile - seamat - telescope-compile ) - add_dependencies(mSWEEP mGEMS-compile) - set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/include) - set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/build/lib/libmgems.a) + FetchContent_MakeAvailable(mGEMS) + add_dependencies(mGEMS libtelescope libalignmentwriter) + add_dependencies(mSWEEP libmgems) + set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1) + set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include) + set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/lib/libmgems.a) endif() -target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY}) +target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) include_directories(${CMAKE_MGEMS_HEADERS}) include_directories( From 0ebce32b5364574e9f0c8268c498e7650a546326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 27 Feb 2024 11:35:43 +0200 Subject: [PATCH 33/43] Add --run-rate toggle to calculate relative reliability of estimates --- README.md | 20 +++++++++++++++ include/Sample.hpp | 14 +++++++++++ src/Sample.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++++ src/mSWEEP.cpp | 34 +++++++++++++++++++++++-- 4 files changed, 129 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 28c7832..c946cc4 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,23 @@ mSWEEP --themisto-1 fwd_compressed.aln --themisto-2 rev_compressed.aln -i cluste ``` +## (experimental) Reliability of abundance estimates +Add the `--run-rate` flag to calculate a relative reliability value for each abundance estimate using a variation of the [RATE method](https://doi.org/10.1214/18-AOAS1222) +``` +mSWEEP --themisto compressed.aln -i clustering.txt -t 2 --run-rate +``` +This will append the RATE and KLD columns to the output. RATE values that exceed `1/(number of lineages in clustering.txt)` are considered reliably estimated. + +If the reference contains many sequences that have zero or very few pseudoalignments, the denominator should be set to the number of lineages that have a nonzero abundance estimate instead of the total lineage count. + +A reliably estimated value means that an abundance estimate from +mSWEEP has a large effect on how well the statistical model in mSWEEP +fits to the input alignment data. This translates to a high value in +the KLD column and the RATE columns, which is derived from the KLD +values by dividing each value by the sum of all KLDs. + +__RATE as implemented in mSWEEP has not been tested thoroughly and is considered experimental.__ Consider using additional methods to verify the correctness of your results after filtering by RATE. + ## More options mSWEEP additionally accepts the following flags: @@ -153,6 +170,9 @@ Likelihood options: -q Mean for the beta-binomial component (default: 0.65). -e Dispersion term for the beta-binomial component (default: 0.01). --alphas Prior counts for the relative abundances, supply as comma-separated nonzero values (default: all 1.0). + +Experimental options: +--run-rate Calculate relative reliability for each abundance estimate using RATE (default: false). ``` # References diff --git a/include/Sample.hpp b/include/Sample.hpp index faad5f1..0424369 100644 --- a/include/Sample.hpp +++ b/include/Sample.hpp @@ -42,6 +42,10 @@ class Sample { size_t counts_total; seamat::DenseMatrix ec_probabilities; + // Relative abundance estimate scoring via RATEs + bool rate_run = false; + std::vector log_KLDs; + protected: void count_alignments(const telescope::Alignment &alignment); @@ -59,13 +63,23 @@ class Sample { // Store equivalence class probabilities void store_probs(const seamat::DenseMatrix &probs) { this->ec_probabilities = std::move(probs); } + // Write equivalence class probabilities void write_probs(const std::vector &cluster_indicators_to_string, std::ostream *of); + // Calculate KLDs from probs + void dirichlet_kld(const std::vector &log_ec_hit_counts); + // Getters size_t get_counts_total() const { return this->counts_total; }; size_t get_n_reads() const { return this->n_reads; }; + const seamat::DenseMatrix& get_probs() const { return this->ec_probabilities; } + const std::vector& get_log_klds() const { return this->log_KLDs; } + std::vector get_rates() const; + size_t get_n_ecs() const { return this->ec_probabilities.get_rows(); } + size_t get_n_refs() const { return this->ec_probabilities.get_cols(); } + size_t get_rate_run() const { return this->rate_run; } }; class Binning { diff --git a/src/Sample.cpp b/src/Sample.cpp index 8e5b1d9..91821d0 100644 --- a/src/Sample.cpp +++ b/src/Sample.cpp @@ -84,4 +84,67 @@ void Sample::write_probs(const std::vector &cluster_indicators_to_s } } +double digamma(double x) { + double result = 0, xx, xx2, xx4; + for ( ; x < 7; ++x) + result -= 1/x; + x -= 1.0/2.0; + xx = 1.0/x; + xx2 = xx*xx; + xx4 = xx2*xx2; + result += std::log(x)+(1./24.)*xx2-(7.0/960.0)*xx4+(31.0/8064.0)*xx4*xx2-(127.0/30720.0)*xx4*xx4; + return result; +} + +void Sample::dirichlet_kld(const std::vector &log_ec_hit_counts) { + size_t rows = this->get_probs().get_rows(); + size_t cols = this->get_probs().get_cols(); + + std::vector alphas(rows, 0.0); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < cols; ++j) { + size_t num_hits = std::round(std::exp(log_ec_hit_counts[j])); + for (size_t k = 0; k < num_hits; ++k) { + alphas[i] += std::exp(this->get_probs()(i, j)); + } + } + } + + double alpha0 = 0.0; + for (size_t i = 0; i < rows; ++i) { + alpha0 += alphas[i]; + } + + this->log_KLDs.resize(rows); + for (size_t i = 0; i < rows; ++i) { + double log_theta = std::log(alphas[i]) - std::log(alpha0); + double alpha_k = alphas[rows - 1]; + double alpha_j = alphas[i]; + double KLD = std::max(std::lgamma(alpha0) - std::lgamma(alpha0 - alpha_j) - std::lgamma(alpha_j) + alpha_j * (digamma(alpha_j) - digamma(alpha0)), 1e-16); + this->log_KLDs[i] = std::log(KLD); + } + + this->rate_run = true; +} + +std::vector Sample::get_rates() const { + double max_elem = 0.0; + // TODO pragma with custom reduction to find maximum + for (size_t i = 0; i < this->log_KLDs.size(); ++i) { + max_elem = (max_elem > this->log_KLDs[i] ? max_elem : this->log_KLDs[i]); + } + double tmp_sum = 0.0; +#pragma omp parallel for schedule(static) reduction(+:tmp_sum) + for (size_t i = 0; i < this->log_KLDs.size(); ++i) { + tmp_sum += std::exp(this->log_KLDs[i] - max_elem); + } + double log_KLDs_sum = std::log(tmp_sum) + max_elem; + + std::vector RATE(this->log_KLDs.size()); +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < this->log_KLDs.size(); ++i) { + RATE[i] = std::exp(this->log_KLDs[i] - log_KLDs_sum); + } + return RATE; +} } diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index b43323e..b2ae10b 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -136,9 +136,11 @@ void parse_args(int argc, char* argv[], cxxargs::Arguments &args) { // Dispersion term for likelihood args.add_short_argument('e', "Dispersion term for the beta-binomial component (default: 0.01).", 0.01); // Prior parameters for estimation - args.add_long_argument>("alphas", "Prior counts for the relative abundances, supply as comma-separated nonzero values (default: all 1.0)."); + args.add_long_argument>("alphas", "Prior counts for the relative abundances, supply as comma-separated nonzero values (default: all 1.0).\n\nExperimental options:"); args.set_not_required("alphas"); + args.add_long_argument("run-rate", "Calculate relative reliability for each abundance estimate using RATE (default: false).", false); + if (CmdOptionPresent(argv, argv+argc, "--help")) { // Print help message and continue. std::cerr << "\n" + args.help() << '\n' << '\n'; @@ -424,6 +426,11 @@ int main (int argc, char *argv[]) { return 1; } + if (args.value("run-rate")) { + std::cerr << "WARNING: --run-rate is an experimental option that has not been thoroughly tested and is subject to change.\n" << std::endl; + sample->dirichlet_kld(log_likelihoods->log_counts()); + } + // Run binning if requested and write results to files. if (rank == 0) { // root performs the rest. // Turn the probs into relative abundances @@ -508,7 +515,30 @@ int main (int argc, char *argv[]) { // Write relative abundances if (rank == 0 && !args.value("no-fit-model")) { try { - sample->write_abundances(reference->group_names(i), out.abundances()); + if (sample->get_rate_run()) { + const std::vector &log_kld = sample->get_log_klds(); + const std::vector &RATE = sample->get_rates(); + const std::vector &relative_abundances = sample->get_abundances(); + const std::vector &group_names = reference->group_names(i); + + std::ostream *of = out.abundances(); + // Write relative abundances to &of, + if (of->good()) { + (*of) << "#mSWEEP_version:" << '\t' << MSWEEP_BUILD_VERSION << '\n'; + (*of) << "#num_reads:" << '\t' << sample->get_n_reads() << '\n'; + (*of) << "#num_aligned:" << '\t' << sample->get_counts_total() << '\n'; + (*of) << "#c_id" << '\t' << "mean_theta" << '\t' << "RATE" << '\t' << "KLD" << '\n'; + for (size_t i = 0; i < relative_abundances.size(); ++i) { + double KLD = std::exp(log_kld[i]); + (*of) << group_names[i] << '\t' << relative_abundances[i] << '\t' << RATE[i] << '\t' << KLD <<'\n'; + } + of->flush(); + } else { + throw std::runtime_error("Can't write to abundances file."); + } + } else { + sample->write_abundances(reference->group_names(i), out.abundances()); + } } catch (std::exception &e) { finalize("Writing the relative abundances failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; From a16959e8b2a353c03828bc0322f0de6b17fdf8fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 12 Dec 2023 19:43:36 +0200 Subject: [PATCH 34/43] Update rcgpar to v1.1.2 --- config/CMakeLists-rcgpar.txt.in | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 config/CMakeLists-rcgpar.txt.in diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in new file mode 100644 index 0000000..e09430d --- /dev/null +++ b/config/CMakeLists-rcgpar.txt.in @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 2.8.2) + +project(rcgpar-get NONE) +include(ExternalProject) + +ExternalProject_Add(rcgpar-download + GIT_REPOSITORY https://github.com/tmaklin/rcgpar + GIT_TAG v1.1.2 + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" + BUILD_IN_SOURCE 0 + BUILD_COMMAND "" + CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} + -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} + INSTALL_COMMAND "" + TEST_COMMAND "" + UPDATE_COMMAND "" +) From e0cb515542bdd8cb2fda437a82df01000b70eb74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 10:30:02 +0200 Subject: [PATCH 35/43] Update telescope to v0.7.0-prerelease and alignment-writer to v0.5.0. --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8e798b..962ddc1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,7 +170,7 @@ if (DEFINED CMAKE_ALIGNMENT_WRITER_HEADERS AND DEFINED CMAKE_ALIGNMENT_WRITER_LI else() FetchContent_Declare(alignment-writer GIT_REPOSITORY https://github.com/tmaklin/alignment-writer.git - GIT_TAG v0.4.0 + GIT_TAG v0.5.0 PREFIX "external" SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/alignment-writer" @@ -200,7 +200,7 @@ if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS) else() FetchContent_Declare(telescope GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.6.2 + GIT_TAG v0.7.0-prerelease PREFIX "external" SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/telescope" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" @@ -220,7 +220,7 @@ else() ) FetchContent_MakeAvailable(telescope) add_dependencies(telescope libalignmentwriter) - add_dependencies(mSWEEP libtelescope) + add_dependencies(mSWEEP telescope) set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1) set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/include) set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libtelescope.a) @@ -308,7 +308,7 @@ else() INSTALL_COMMAND "" ) FetchContent_MakeAvailable(mGEMS) - add_dependencies(mGEMS libtelescope libalignmentwriter) + add_dependencies(mGEMS telescope libalignmentwriter) add_dependencies(mSWEEP libmgems) set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1) set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include) From 1434f70c6f07bc4fb256f12679585c1ebbe63ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 14:24:52 +0200 Subject: [PATCH 36/43] Add experimental --ignore-zeros to drop targets with zero alignments --- README.md | 1 + include/Likelihood.hpp | 84 +++++++++++++++++++++++++++++------------ include/Sample.hpp | 8 ++++ src/BootstrapSample.cpp | 33 ++++++++++++++++ src/PlainSample.cpp | 25 ++++++++++++ src/Sample.cpp | 34 +++++++++++++++++ src/mSWEEP.cpp | 48 +++++++++++++++++------ 7 files changed, 197 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 28c7832..dc5674c 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ Estimation options: --no-fit-model Do not estimate the abundances. Useful if only the likelihood matrix is required (default: false). --max-iters Maximum number of iterations to run the abundance estimation optimizer for (default: 5000). --tol Optimization terminates when the bound changes by less than the given tolerance (default: 0.000001). +--ignore-zeros Ignore target clusters that did not have any reads align against them (default: false): Bootstrapping options: --iters Number of times to rerun estimation with bootstrapped alignments (default: 0). diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 82153ff..51066a8 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -75,6 +75,8 @@ class Likelihood { // Get the ec counts virtual const std::vector& log_counts() const =0; + // Get vector indicating which groups were included + virtual const std::vector& groups_considered() const =0; }; template @@ -83,6 +85,7 @@ class LL_WOR21 : public Likelihood { seamat::DenseMatrix log_likelihoods; std::vector log_ec_counts; std::vector> bb_params; + std::vector groups_mask; seamat::DenseMatrix precalc_lls(const std::vector &group_sizes, const size_t n_groups) { V max_size = 0; // Storing the grouping can take a lot less space if it can be done with uint16_t or uint8_t. @@ -101,15 +104,44 @@ class LL_WOR21 : public Likelihood { return ll_mat; } - void fill_ll_mat(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups) { + void fill_ll_mat(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const bool mask_groups) { size_t num_ecs = alignment.n_ecs(); - const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(group_sizes, n_groups); + this->groups_mask = std::vector(n_groups, !mask_groups); + if (mask_groups) { + // Create mask identifying groups that have at least 1 alignment +#pragma omp parallel for schedule(static) shared(groups_mask) + for (size_t i = 0; i < num_ecs; ++i) { + for (size_t j = 0; j < n_groups; ++j) { + this->groups_mask[j] = groups_mask[j] || (alignment(j, i) > 0); + std::cerr << alignment(j, i) << '\t'; + } + std::cerr << '\n'; + } + } + size_t n_masked_groups = 0; +#pragma omp parallel for schedule(static) reduction(+:n_masked_groups) + for (size_t i = 0; i < n_groups; ++i) { + n_masked_groups += groups_mask[i]; + } + + std::vector masked_group_sizes; + if (mask_groups) { + for (size_t i = 0; i < n_groups; ++i) { + if (this->groups_mask[i]) { + masked_group_sizes.push_back(group_sizes[i]); + } + } + } else { + masked_group_sizes = group_sizes; + } + + const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups); - this->log_likelihoods.resize(n_groups, num_ecs, -4.60517); // -4.60517 = log(0.01) + this->log_likelihoods.resize(n_masked_groups, num_ecs, -4.60517); // -4.60517 = log(0.01) #pragma omp parallel for schedule(static) shared(precalc_lls_mat) for (size_t j = 0; j < num_ecs; ++j) { - for (size_t i = 0; i < n_groups; ++i) { + for (size_t i = 0; i < n_masked_groups; ++i) { this->log_likelihoods(i, j) = precalc_lls_mat(i, alignment(i, j)); } } @@ -139,14 +171,14 @@ class LL_WOR21 : public Likelihood { public: LL_WOR21() = default; - LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu) { + LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const bool mask_groups) { T bb_constants[2] = { tol, frac_mu }; this->update_bb_parameters(group_sizes, n_groups, bb_constants); - this->from_grouped_alignment(alignment, group_sizes, n_groups); + this->from_grouped_alignment(alignment, group_sizes, n_groups, mask_groups); } - void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups) { - this->fill_ll_mat(alignment, group_sizes, n_groups); + void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const bool mask_groups) { + this->fill_ll_mat(alignment, group_sizes, n_groups, mask_groups); this->fill_ec_counts(alignment); } @@ -256,51 +288,53 @@ class LL_WOR21 : public Likelihood { // Get the ec counts const std::vector& log_counts() const override { return this->log_ec_counts; }; + // Get the groups mask + const std::vector& groups_considered() const override { return this->groups_mask; }; }; template -std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e) { +std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const bool mask_groups) { size_t max_group_size = grouping.max_group_size(); size_t n_groups = grouping.get_n_groups(); std::unique_ptr> log_likelihoods; if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } } else { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups)); } } return log_likelihoods; diff --git a/include/Sample.hpp b/include/Sample.hpp index faad5f1..0b2f0a2 100644 --- a/include/Sample.hpp +++ b/include/Sample.hpp @@ -54,12 +54,16 @@ class Sample { virtual const std::vector& get_abundances() const =0; // Write the relative abundances virtual void write_abundances(const std::vector &group_names, std::ostream *of) const =0; + virtual void write_abundances2(const std::vector &estimated_group_names, + const std::vector &zero_group_names, std::ostream *of) const =0; // Non-virtuals // Store equivalence class probabilities void store_probs(const seamat::DenseMatrix &probs) { this->ec_probabilities = std::move(probs); } void write_probs(const std::vector &cluster_indicators_to_string, std::ostream *of); + void write_probs2(const std::vector &cluster_indicators_to_string, + const std::vector &zero_indicators_to_string, std::ostream *of); // Getters size_t get_counts_total() const { return this->counts_total; }; @@ -102,6 +106,8 @@ class PlainSample : public Sample { // Write the relative abundances void write_abundances(const std::vector &group_names, std::ostream *of) const override; + void write_abundances2(const std::vector &estimated_group_names, + const std::vector &zero_group_names, std::ostream *of) const override; // Getters const std::vector& get_abundances() const override { return this->relative_abundances; } @@ -161,6 +167,8 @@ class BootstrapSample : public Sample { // Write the bootstrap results void write_abundances(const std::vector &group_names, std::ostream *os) const override; + void write_abundances2(const std::vector &estimated_group_names, + const std::vector &zero_group_names, std::ostream *of) const override; // Getters const std::vector& get_abundances() const override { return this->bootstrap_results[0]; } diff --git a/src/BootstrapSample.cpp b/src/BootstrapSample.cpp index fb7f15e..694baf9 100644 --- a/src/BootstrapSample.cpp +++ b/src/BootstrapSample.cpp @@ -95,4 +95,37 @@ void BootstrapSample::write_abundances(const std::vector &group_nam } } +void BootstrapSample::write_abundances2(const std::vector &estimated_group_names, + const std::vector &zero_group_names, std::ostream *of) const { + // Write relative abundances to a file, + // outputs to std::cout if outfile is empty. + if (of->good()) { + (*of) << "#mSWEEP_version:" << '\t' << MSWEEP_BUILD_VERSION << '\n'; + (*of) << "#num_reads:" << '\t' << this->get_n_reads() << '\n'; + (*of) << "#num_aligned:" << '\t' << this->get_counts_total() << '\n'; + (*of) << "#bootstrap_iters:" << '\t' << this->iters << '\n'; + (*of) << "#c_id" << '\t' << "mean_theta" << '\t' << "bootstrap_mean_thetas" << '\n'; + + size_t n_targets = estimated_group_names.size() + zero_group_names.size(); + for (size_t i = 0; i < n_targets; ++i) { + if (i < estimated_group_names.size()) { + (*of) << estimated_group_names[i] << '\t'; + (*of) << this->bootstrap_results[0][i] << '\t'; // First vec has the relative abundances without bootstrapping + for (size_t j = 0; j < this->iters; ++j) { + (*of) << this->bootstrap_results[j + 1][i] << (j == this->iters - 1 ? '\n' : '\t'); + } + } else { + (*of) << zero_group_names[i - estimated_group_names.size()] << '\t'; + (*of) << (double)0.0 << '\t'; // First vec has the relative abundances without bootstrapping + for (size_t j = 0; j < this->iters; ++j) { + (*of) << (double)0.0 << (j == this->iters - 1 ? '\n' : '\t'); + } + + } + } + of->flush(); + } else { + throw std::runtime_error("Could not write to abundances file."); + } +} } diff --git a/src/PlainSample.cpp b/src/PlainSample.cpp index f0c064c..c297a79 100644 --- a/src/PlainSample.cpp +++ b/src/PlainSample.cpp @@ -45,4 +45,29 @@ void PlainSample::write_abundances(const std::vector &group_names, } } +void PlainSample::write_abundances2(const std::vector &estimated_group_names, + const std::vector &zero_group_names, std::ostream *of) const { + // Write relative abundances to &of, + if (of->good()) { + (*of) << "#mSWEEP_version:" << '\t' << MSWEEP_BUILD_VERSION << '\n'; + (*of) << "#num_reads:" << '\t' << this->get_n_reads() << '\n'; + (*of) << "#num_aligned:" << '\t' << this->get_counts_total() << '\n'; + (*of) << "#c_id" << '\t' << "mean_theta" << '\n'; + size_t n_targets = estimated_group_names.size() + zero_group_names.size(); + for (size_t i = 0; i < n_targets; ++i) { + if (i < estimated_group_names.size()) { + (*of) << estimated_group_names[i] << '\t'; + (*of) << this->relative_abundances[i]; + } else { + (*of) << zero_group_names[i - estimated_group_names.size()] << '\t'; + (*of) << (double)0.0; + } + (*of) << '\n'; + } + of->flush(); + } else { + throw std::runtime_error("Can't write to abundances file."); + } +} + } diff --git a/src/Sample.cpp b/src/Sample.cpp index 8e5b1d9..c927521 100644 --- a/src/Sample.cpp +++ b/src/Sample.cpp @@ -84,4 +84,38 @@ void Sample::write_probs(const std::vector &cluster_indicators_to_s } } +void Sample::write_probs2(const std::vector &estimated_indicators_to_string, + const std::vector &zero_indicators_to_string, std::ostream *of) { + // Write the probability matrix to a file. + if (of->good()) { + *of << "ec_id" << '\t'; + size_t n_rows = estimated_indicators_to_string.size() + zero_indicators_to_string.size(); + size_t n_cols = this->ec_probabilities.get_cols(); + for (size_t i = 0; i < n_rows; ++i) { + if (i < estimated_indicators_to_string.size()) { + *of << estimated_indicators_to_string[i]; + *of << (i < n_rows - 1 ? '\t' : '\n'); + } else { + *of << zero_indicators_to_string[i - estimated_indicators_to_string.size()]; + *of << (i < n_rows - 1 ? '\t' : '\n'); + } + } + for (size_t i = 0; i < n_cols; ++i) { + *of << i << '\t'; + for (size_t j = 0; j < n_rows; ++j) { + if (j < estimated_indicators_to_string.size()) { + *of << std::exp(this->ec_probabilities(j, i)); + } else { + *of << (double)0.0; + } + *of << (j < n_rows - 1 ? '\t' : '\n'); + } + } + *of << std::endl; + of->flush(); + } else { + throw std::runtime_error("Can't write to probs file."); + } +} + } diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index b43323e..8b44f97 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -122,7 +122,8 @@ void parse_args(int argc, char* argv[], cxxargs::Arguments &args) { // Maximum iterations to run the optimizer for args.add_long_argument("max-iters", "Maximum number of iterations to run the abundance estimation optimizer for (default: 5000).", (size_t)5000); // Tolerance for abundance estimation convergence - args.add_long_argument("tol", "Optimization terminates when the bound changes by less than the given tolerance (default: 0.000001).\n\nBootstrapping options:", (double)0.000001); + args.add_long_argument("tol", "Optimization terminates when the bound changes by less than the given tolerance (default: 0.000001).", (double)0.000001); + args.add_long_argument("ignore-zeros", "Ignore target clusters that did not have any reads align against them (default: false).\n\nBootstrapping options:):", false); // Number of iterations to run bootstrapping for args.add_long_argument("iters", "Number of times to rerun estimation with bootstrapped alignments (default: 0).", (size_t)0); @@ -363,7 +364,7 @@ int main (int argc, char *argv[]) { // Use the alignment data to populate the log_likelihoods matrix. try { - log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e')); + log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("ignore-zeros")); } catch (std::exception &e) { finalize("Building the log-likelihood array failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; @@ -400,6 +401,8 @@ int main (int argc, char *argv[]) { return 1; } + std::vector estimated_reference_names; + std::vector zero_reference_names; // Start the abundance estimation part if (args.value("no-fit-model")) { log << "Skipping relative abundance estimation (--no-fit-model toggled)" << '\n'; @@ -407,9 +410,9 @@ int main (int argc, char *argv[]) { log << "Estimating relative abundances" << '\n'; // Prior parameters - std::vector prior_counts(n_groups, 1.0); // Default is all = 1.0 + std::vector prior_counts(log_likelihoods->log_mat().get_rows(), 1.0); // Default is all = 1.0 if (CmdOptionPresent(argv, argv+argc, "--alphas")) { - if (args.value>("alphas").size() != n_groups) { + if (args.value>("alphas").size() != log_likelihoods->log_mat().get_rows()) { finalize("Error: --alphas must have the same number of values as there are groups.", log, true); return 1; } @@ -429,25 +432,36 @@ int main (int argc, char *argv[]) { // Turn the probs into relative abundances sample->store_abundances(rcgpar::mixture_components(sample->get_probs(), log_likelihoods->log_counts())); + if (args.value("ignore-zeros")) { + for (size_t j = 0; j < reference->group_names(i).size(); ++j) { + if (log_likelihoods->groups_considered()[j]) { + estimated_reference_names.push_back(reference->group_names(i)[j]); + } else { + zero_reference_names.push_back(reference->group_names(i)[j]); + } + } + } else { + estimated_reference_names = reference->group_names(i); + } // Bin the reads if requested if (bin_reads) { std::vector target_names; if (CmdOptionPresent(argv, argv+argc, "--target-groups")) { target_names = std::move(args.value>("target-groups")); } else { - target_names = reference->group_names(i); + target_names = estimated_reference_names; } if (CmdOptionPresent(argv, argv+argc, "--min-abundance")) { - mGEMS::FilterTargetGroups(reference->group_names(i), sample->get_abundances(), args.value("min-abundance"), &target_names); + mGEMS::FilterTargetGroups(estimated_reference_names, sample->get_abundances(), args.value("min-abundance"), &target_names); } std::vector> bins; try { if (bootstrap_mode) { mSWEEP::BinningBootstrap* bs = static_cast(&(*sample)); - bins = std::move(mGEMS::BinFromMatrix(bs->get_aligned_reads(), sample->get_abundances(), sample->get_probs(), reference->group_names(i), &target_names)); + bins = std::move(mGEMS::BinFromMatrix(bs->get_aligned_reads(), sample->get_abundances(), sample->get_probs(), estimated_reference_names, &target_names)); } else { mSWEEP::BinningSample* bs = static_cast(&(*sample)); - bins = std::move(mGEMS::BinFromMatrix(bs->get_aligned_reads(), sample->get_abundances(), sample->get_probs(), reference->group_names(i), &target_names)); + bins = std::move(mGEMS::BinFromMatrix(bs->get_aligned_reads(), sample->get_abundances(), sample->get_probs(), estimated_reference_names, &target_names)); } } catch (std::exception &e) { finalize("Binning the reads failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); @@ -470,10 +484,18 @@ int main (int argc, char *argv[]) { // Note: this ignores the printing_output variable because // we might want to print the probs even when writing to // pipe them somewhere. - sample->write_probs(reference->group_names(i), &std::cout); + if (args.value("ignore-zeros")) { + sample->write_probs2(estimated_reference_names, zero_reference_names, &std::cout); + } else { + sample->write_probs(estimated_reference_names, &std::cout); + } } if (args.value("write-probs")) { - sample->write_probs(reference->group_names(i), out.probs()); + if (args.value("ignore-zeros")) { + sample->write_probs2(estimated_reference_names, zero_reference_names, out.probs()); + } else { + sample->write_probs(estimated_reference_names, out.probs()); + } } } catch (std::exception &e) { finalize("Writing the probabilities failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); @@ -508,7 +530,11 @@ int main (int argc, char *argv[]) { // Write relative abundances if (rank == 0 && !args.value("no-fit-model")) { try { - sample->write_abundances(reference->group_names(i), out.abundances()); + if (args.value("ignore-zeros")) { + sample->write_abundances2(estimated_reference_names, zero_reference_names, out.abundances()); + } else { + sample->write_abundances(estimated_reference_names, out.abundances()); + } } catch (std::exception &e) { finalize("Writing the relative abundances failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; From 88fb17661270de75cb79e1d5589191b1449e9010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 15:00:52 +0200 Subject: [PATCH 37/43] Remove extra prints --- include/Likelihood.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 51066a8..fc228ab 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -114,9 +114,7 @@ class LL_WOR21 : public Likelihood { for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_groups; ++j) { this->groups_mask[j] = groups_mask[j] || (alignment(j, i) > 0); - std::cerr << alignment(j, i) << '\t'; } - std::cerr << '\n'; } } size_t n_masked_groups = 0; From 5619bc5cb2f73834c399af466a61de5a1c946fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 17:36:56 +0200 Subject: [PATCH 38/43] Remove unused config file. --- config/CMakeLists-rcgpar.txt.in | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 config/CMakeLists-rcgpar.txt.in diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in deleted file mode 100644 index e09430d..0000000 --- a/config/CMakeLists-rcgpar.txt.in +++ /dev/null @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 2.8.2) - -project(rcgpar-get NONE) -include(ExternalProject) - -ExternalProject_Add(rcgpar-download - GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG v1.1.2 - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" - BUILD_IN_SOURCE 0 - BUILD_COMMAND "" - CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} - -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_COMMAND "" -) From 5328669dc7daad7c9fdb5ce715119f7996843d1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 21:47:16 +0200 Subject: [PATCH 39/43] Fix updating the beta-binomial parameters --- include/Likelihood.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 38ad911..c38ceca 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -86,7 +86,8 @@ class LL_WOR21 : public Likelihood { std::vector log_ec_counts; std::vector> bb_params; std::vector groups_mask; - double zero_inflation; + T zero_inflation; + T bb_constants[2]; seamat::DenseMatrix precalc_lls(const std::vector &group_sizes, const size_t n_groups) { V max_size = 0; // Storing the grouping can take a lot less space if it can be done with uint16_t or uint8_t. @@ -111,7 +112,6 @@ class LL_WOR21 : public Likelihood { this->groups_mask = std::vector(n_groups, !mask_groups); if (mask_groups) { // Create mask identifying groups that have at least 1 alignment -#pragma omp parallel for schedule(static) shared(groups_mask) for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_groups; ++j) { this->groups_mask[j] = groups_mask[j] || (alignment(j, i) > 0); @@ -119,7 +119,6 @@ class LL_WOR21 : public Likelihood { } } size_t n_masked_groups = 0; -#pragma omp parallel for schedule(static) reduction(+:n_masked_groups) for (size_t i = 0; i < n_groups; ++i) { n_masked_groups += groups_mask[i]; } @@ -135,9 +134,10 @@ class LL_WOR21 : public Likelihood { masked_group_sizes = group_sizes; } + this->update_bb_parameters(masked_group_sizes, n_masked_groups, this->bb_constants); const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups); - this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(zero_inflation); + this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation)); #pragma omp parallel for schedule(static) shared(precalc_lls_mat) for (size_t j = 0; j < num_ecs; ++j) { for (size_t i = 0; i < n_masked_groups; ++i) { @@ -170,10 +170,10 @@ class LL_WOR21 : public Likelihood { public: LL_WOR21() = default; - LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const bool mask_groups, const T_zero_inflation) { - T bb_constants[2] = { tol, frac_mu }; + LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const bool mask_groups, const T _zero_inflation) { + this->bb_constants[0] = tol; + this->bb_constants[1] = frac_mu; this->zero_inflation = _zero_inflation; - this->update_bb_parameters(group_sizes, n_groups, bb_constants); this->from_grouped_alignment(alignment, group_sizes, n_groups, mask_groups); } From a2ed5dd036b50e9655d93e10d8965e736b4b3bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 28 May 2024 22:46:38 +0200 Subject: [PATCH 40/43] Fix indexing likelihoods when ignoring zeros --- include/Likelihood.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index c38ceca..47d267b 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -140,8 +140,12 @@ class LL_WOR21 : public Likelihood { this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation)); #pragma omp parallel for schedule(static) shared(precalc_lls_mat) for (size_t j = 0; j < num_ecs; ++j) { - for (size_t i = 0; i < n_masked_groups; ++i) { - this->log_likelihoods(i, j) = precalc_lls_mat(i, alignment(i, j)); + size_t groups_pos = 0; + for (size_t i = 0; i < n_groups; ++i) { + if (this->groups_mask[i]) { + this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, alignment(i, j)); + ++groups_pos; + } } } } From 59fe08c58aac03c4430301cf5bddb2842cf3a1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 29 May 2024 17:33:57 +0200 Subject: [PATCH 41/43] Rename ignore-zeros -> min-hits, allow higher ignore thresholds. --- README.md | 2 +- include/Likelihood.hpp | 59 ++++++++++++++++++++---------------------- src/mSWEEP.cpp | 16 ++++++------ 3 files changed, 37 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index e4b052b..d7c0dbc 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ Likelihood options: Experimental options: --run-rate Calculate relative reliability for each abundance estimate using RATE (default: false). ---ignore-zeros Ignore target clusters that did not have any reads align against them (default: false): +--min-hits Only consider target groups that have at least this many reads align to any sequence in them (default: 0). ``` # References diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 47d267b..2265395 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -106,26 +106,22 @@ class LL_WOR21 : public Likelihood { return ll_mat; } - void fill_ll_mat(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const bool mask_groups) { + void fill_ll_mat(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { size_t num_ecs = alignment.n_ecs(); + bool mask_groups = min_hits > 0; this->groups_mask = std::vector(n_groups, !mask_groups); + std::vector masked_group_sizes; if (mask_groups) { + std::vector group_hit_counts(n_groups, (size_t)0); // Create mask identifying groups that have at least 1 alignment for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_groups; ++j) { - this->groups_mask[j] = groups_mask[j] || (alignment(j, i) > 0); + group_hit_counts[j] += (alignment(j, i) > 0); } } - } - size_t n_masked_groups = 0; - for (size_t i = 0; i < n_groups; ++i) { - n_masked_groups += groups_mask[i]; - } - - std::vector masked_group_sizes; - if (mask_groups) { for (size_t i = 0; i < n_groups; ++i) { + this->groups_mask[i] = groups_mask[i] || (group_hit_counts[i] >= min_hits); if (this->groups_mask[i]) { masked_group_sizes.push_back(group_sizes[i]); } @@ -133,6 +129,7 @@ class LL_WOR21 : public Likelihood { } else { masked_group_sizes = group_sizes; } + size_t n_masked_groups = masked_group_sizes.size(); this->update_bb_parameters(masked_group_sizes, n_masked_groups, this->bb_constants); const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups); @@ -174,15 +171,15 @@ class LL_WOR21 : public Likelihood { public: LL_WOR21() = default; - LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const bool mask_groups, const T _zero_inflation) { + LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) { this->bb_constants[0] = tol; this->bb_constants[1] = frac_mu; this->zero_inflation = _zero_inflation; - this->from_grouped_alignment(alignment, group_sizes, n_groups, mask_groups); + this->from_grouped_alignment(alignment, group_sizes, n_groups, min_hits); } - void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const bool mask_groups) { - this->fill_ll_mat(alignment, group_sizes, n_groups, mask_groups); + void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { + this->fill_ll_mat(alignment, group_sizes, n_groups, min_hits); this->fill_ec_counts(alignment); } @@ -296,49 +293,49 @@ class LL_WOR21 : public Likelihood { const std::vector& groups_considered() const override { return this->groups_mask; }; }; template -std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const bool mask_groups, const T zero_inflation) { +std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) { size_t max_group_size = grouping.max_group_size(); size_t n_groups = grouping.get_n_groups(); std::unique_ptr> log_likelihoods; if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } } else if (max_group_size <= std::numeric_limits::max()) { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } } else { if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else if (n_groups <= std::numeric_limits::max()) { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } else { - log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, mask_groups, zero_inflation)); + log_likelihoods.reset(new mSWEEP::LL_WOR21(static_cast*>(&grouping)->get_sizes(), alignment, n_groups, q, e, min_hits, zero_inflation)); } } return log_likelihoods; diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index 1bc2927..04d666c 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -141,7 +141,7 @@ void parse_args(int argc, char* argv[], cxxargs::Arguments &args) { args.set_not_required("alphas"); args.add_long_argument("run-rate", "Calculate relative reliability for each abundance estimate using RATE (default: false).", false); - args.add_long_argument("ignore-zeros", "Ignore target clusters that did not have any reads align against them (default: false).", false); + args.add_long_argument("min-hits", "Only consider target groups that have at least this many reads align to any sequence in them (default: 0).", (size_t)0); if (CmdOptionPresent(argv, argv+argc, "--help")) { // Print help message and continue. @@ -367,7 +367,7 @@ int main (int argc, char *argv[]) { // Use the alignment data to populate the log_likelihoods matrix. try { - log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("ignore-zeros"), args.value("zero-inflation")); + log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("min-hits"), args.value("zero-inflation")); } catch (std::exception &e) { finalize("Building the log-likelihood array failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; @@ -435,8 +435,8 @@ int main (int argc, char *argv[]) { sample->dirichlet_kld(log_likelihoods->log_counts()); } - if (args.value("ignore-zeros")) { - std::cerr << "WARNING: --ignore-zeros is an experimental option that has not been thoroughly tested and is subject to change.\n" << std::endl; + if (args.value("min-hits") > 0) { + std::cerr << "WARNING: --min-hits > 0 is an experimental option that has not been thoroughly tested and is subject to change.\n" << std::endl; } // Run binning if requested and write results to files. @@ -444,7 +444,7 @@ int main (int argc, char *argv[]) { // Turn the probs into relative abundances sample->store_abundances(rcgpar::mixture_components(sample->get_probs(), log_likelihoods->log_counts())); - if (args.value("ignore-zeros")) { + if (args.value("min-hits") > 0) { for (size_t j = 0; j < reference->group_names(i).size(); ++j) { if (log_likelihoods->groups_considered()[j]) { estimated_reference_names.push_back(reference->group_names(i)[j]); @@ -496,14 +496,14 @@ int main (int argc, char *argv[]) { // Note: this ignores the printing_output variable because // we might want to print the probs even when writing to // pipe them somewhere. - if (args.value("ignore-zeros")) { + if (args.value("min-hits") > 0) { sample->write_probs2(estimated_reference_names, zero_reference_names, &std::cout); } else { sample->write_probs(estimated_reference_names, &std::cout); } } if (args.value("write-probs")) { - if (args.value("ignore-zeros")) { + if (args.value("min-hits") > 0) { sample->write_probs2(estimated_reference_names, zero_reference_names, out.probs()); } else { sample->write_probs(estimated_reference_names, out.probs()); @@ -568,7 +568,7 @@ int main (int argc, char *argv[]) { throw std::runtime_error("Can't write to abundances file."); } } else { - if (args.value("ignore-zeros")) { + if (args.value("min-hits") > 0) { sample->write_abundances2(estimated_reference_names, zero_reference_names, out.abundances()); } else { sample->write_abundances(estimated_reference_names, out.abundances()); From d7dee7b248b8eb91543f8f1b8074c753e30326f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 29 May 2024 18:39:38 +0200 Subject: [PATCH 42/43] Weigh the group_hit_counts by EC counts --- include/Likelihood.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 2265395..73046bd 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -117,7 +117,7 @@ class LL_WOR21 : public Likelihood { // Create mask identifying groups that have at least 1 alignment for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += (alignment(j, i) > 0); + group_hit_counts[j] += (alignment(j, i) > 0) * alignment.reads_in_ec(i); } } for (size_t i = 0; i < n_groups; ++i) { From e11f4453c32393706d5fc42a8ece50bc96333f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Thu, 30 May 2024 09:51:15 +0200 Subject: [PATCH 43/43] Document --min-hits. --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index d7c0dbc..c7ed8b1 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ i.e. the file format is automatically detected (alignment-writer v0.4.0 and newe We recommend running [demix\_check](https://github.com/tmaklin/coreutils_demix_check) on the binned reads and/or [checkm](https://github.com/Ecogenomics/CheckM) on the bin-assembled genomes (BAGs) to evaluate the accuracy of the results. ## Working with large alignment files +### Compressing Themisto output files For complex input data with many organisms, the pseudoalignment files from Themisto can get infeasibly large. In these cases, [alignment-writer](https://github.com/tmaklin/alignment-writer) can be used to compress the alignment files to <10% of the original size. mSWEEP >=v2.0.0 can read the compressed alignments in directly by running @@ -100,6 +101,15 @@ mSWEEP --themisto-1 fwd_compressed.aln --themisto-2 rev_compressed.aln -i cluste ``` +### Running estimation on large sparse alignments +If the target alignment is sparse, meaning that there are target groups which have few/no reads aligning against them in the whole sample, mSWEEP can be instructed to ignore these in the estimation by adding the `--min-hits 1` flag: +``` +mSWEEP --themisto sparse.aln -i clustering.txt -t 2 --min-hits 1 +``` +This will reduce the runtime and memory use of the estimation proportional to how many target groups are removed. Using `--min-hits 1` does not affect the results beyond differences in computational accuracy. + +The `--min-hits` flag also accepts values higher than 1 for pruning target groups with a small number of aligned reads. Using a value higher than 1 will change the resulting values. + ## (experimental) Reliability of abundance estimates Add the `--run-rate` flag to calculate a relative reliability value for each abundance estimate using a variation of the [RATE method](https://doi.org/10.1214/18-AOAS1222) ```