From 621d9f2bc05baa8a2f0ec8c387c5c35dc6094398 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Sat, 25 Apr 2026 02:56:51 -0400 Subject: [PATCH 1/3] Fix SDPR initialization and seed handling --- R/cpp11.R | 4 ++-- R/regularized_regression.R | 13 ++++++++-- man/sdpr.Rd | 9 +++++++ src/cpp11.cpp | 8 +++---- src/sdpr.cpp | 5 ++-- src/sdpr_mcmc.cpp | 11 +++++---- src/sdpr_mcmc.h | 22 ++++++++++------- tests/testthat/test_rr_dispatch.R | 16 +++++++++++++ tests/testthat/test_rr_sdpr.R | 40 +++++++++++++++++++++++++++++++ 9 files changed, 104 insertions(+), 24 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 8889b7a4..5a2b2a96 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -16,6 +16,6 @@ qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, Im .Call(`_pecotmr_qtl_enrichment_rcpp`, r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, double_shrinkage, bessel_correction, num_threads) } -sdpr_rcpp <- function(bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed) { - .Call(`_pecotmr_sdpr_rcpp`, bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed) +sdpr_rcpp <- function(bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, legacy_random_init) { + .Call(`_pecotmr_sdpr_rcpp`, bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, legacy_random_init) } diff --git a/R/regularized_regression.R b/R/regularized_regression.R index a003d4e6..6530e7dc 100644 --- a/R/regularized_regression.R +++ b/R/regularized_regression.R @@ -148,11 +148,17 @@ prs_cs_weights <- function(stat, LD, ...) { #' @param iter Number of iterations for MCMC. Default is 1000. #' @param burn Number of burn-in iterations for MCMC. Default is 200. #' @param thin Thinning interval for MCMC. Default is 5. +#' @param init Initialization mode for cluster assignments. Use `"legacy_random"` +#' to restore the original SDPR-style random initialization, or `"null"` to +#' start all SNPs in the null cluster. Default is `"legacy_random"`. #' @param n_threads Number of threads to use. Default is 1. #' @param opt_llk Which likelihood to evaluate. 1 for equation 6 (slightly shrink the correlation of SNPs) #' and 2 for equation 5 (SNPs genotyped on different arrays in a separate cohort). #' Default is 1. #' @param verbose Whether to print verbose output. Default is true. +#' @param seed Optional unsigned integer seed for the C++ SDPR sampler. When +#' \code{NULL}, the sampler uses \code{std::random_device} and is not +#' reproducible. #' #' @return A list containing the estimated effect sizes (beta) and heritability (h2). #' @examples @@ -203,8 +209,10 @@ prs_cs_weights <- function(stat, LD, ...) { #' #' @export sdpr <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000, - a0k = 0.5, b0k = 0.5, iter = 1000, burn = 200, thin = 5, n_threads = 1, + a0k = 0.5, b0k = 0.5, iter = 1000, burn = 200, thin = 5, init = c("legacy_random", "null"), n_threads = 1, opt_llk = 1, verbose = TRUE, seed = NULL) { + init <- match.arg(init) + # Check if the sum of the rows in LD list is the same as length of bhat if (sum(sapply(LD, nrow)) != length(bhat)) { stop("The sum of the rows in LD list must be the same as the length of bhat.") @@ -236,7 +244,8 @@ sdpr <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = result <- sdpr_rcpp( bhat, LD, as.integer(n), per_variant_sample_size, array, a, c, as.integer(M), a0k, b0k, as.integer(iter), as.integer(burn), as.integer(thin), - as.integer(n_threads), as.integer(opt_llk), verbose, seed + as.integer(n_threads), as.integer(opt_llk), verbose, seed, + identical(init, "legacy_random") ) return(result) diff --git a/man/sdpr.Rd b/man/sdpr.Rd index 28577cd6..e83255bd 100644 --- a/man/sdpr.Rd +++ b/man/sdpr.Rd @@ -18,6 +18,7 @@ sdpr( iter = 1000, burn = 200, thin = 5, + init = c("legacy_random", "null"), n_threads = 1, opt_llk = 1, verbose = TRUE, @@ -53,6 +54,10 @@ initialized to a vector of 1's with length equal to `bhat`.} \item{thin}{Thinning interval for MCMC. Default is 5.} +\item{init}{Initialization mode for cluster assignments. Use `"legacy_random"` +to restore the original SDPR-style random initialization, or `"null"` to +start all SNPs in the null cluster. Default is `"legacy_random"`.} + \item{n_threads}{Number of threads to use. Default is 1.} \item{opt_llk}{Which likelihood to evaluate. 1 for equation 6 (slightly shrink the correlation of SNPs) @@ -60,6 +65,10 @@ and 2 for equation 5 (SNPs genotyped on different arrays in a separate cohort). Default is 1.} \item{verbose}{Whether to print verbose output. Default is true.} + +\item{seed}{Optional unsigned integer seed for the C++ SDPR sampler. When +\code{NULL}, the sampler uses \code{std::random_device} and is not +reproducible.} } \value{ A list containing the estimated effect sizes (beta) and heritability (h2). diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 568ae02d..085a67aa 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -34,10 +34,10 @@ extern "C" SEXP _pecotmr_qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_f END_CPP11 } // sdpr.cpp -cpp11::writable::list sdpr_rcpp(const doubles& bhat_r, const list& LD, int n, sexp per_variant_sample_size, sexp array, double a, double c, int M, double a0k, double b0k, int iter, int burn, int thin, int n_threads, int opt_llk, bool verbose, sexp seed); -extern "C" SEXP _pecotmr_sdpr_rcpp(SEXP bhat_r, SEXP LD, SEXP n, SEXP per_variant_sample_size, SEXP array, SEXP a, SEXP c, SEXP M, SEXP a0k, SEXP b0k, SEXP iter, SEXP burn, SEXP thin, SEXP n_threads, SEXP opt_llk, SEXP verbose, SEXP seed) { +cpp11::writable::list sdpr_rcpp(const doubles& bhat_r, const list& LD, int n, sexp per_variant_sample_size, sexp array, double a, double c, int M, double a0k, double b0k, int iter, int burn, int thin, int n_threads, int opt_llk, bool verbose, sexp seed, bool legacy_random_init); +extern "C" SEXP _pecotmr_sdpr_rcpp(SEXP bhat_r, SEXP LD, SEXP n, SEXP per_variant_sample_size, SEXP array, SEXP a, SEXP c, SEXP M, SEXP a0k, SEXP b0k, SEXP iter, SEXP burn, SEXP thin, SEXP n_threads, SEXP opt_llk, SEXP verbose, SEXP seed, SEXP legacy_random_init) { BEGIN_CPP11 - return cpp11::as_sexp(sdpr_rcpp(cpp11::as_cpp>(bhat_r), cpp11::as_cpp>(LD), cpp11::as_cpp>(n), cpp11::as_cpp>(per_variant_sample_size), cpp11::as_cpp>(array), cpp11::as_cpp>(a), cpp11::as_cpp>(c), cpp11::as_cpp>(M), cpp11::as_cpp>(a0k), cpp11::as_cpp>(b0k), cpp11::as_cpp>(iter), cpp11::as_cpp>(burn), cpp11::as_cpp>(thin), cpp11::as_cpp>(n_threads), cpp11::as_cpp>(opt_llk), cpp11::as_cpp>(verbose), cpp11::as_cpp>(seed))); + return cpp11::as_sexp(sdpr_rcpp(cpp11::as_cpp>(bhat_r), cpp11::as_cpp>(LD), cpp11::as_cpp>(n), cpp11::as_cpp>(per_variant_sample_size), cpp11::as_cpp>(array), cpp11::as_cpp>(a), cpp11::as_cpp>(c), cpp11::as_cpp>(M), cpp11::as_cpp>(a0k), cpp11::as_cpp>(b0k), cpp11::as_cpp>(iter), cpp11::as_cpp>(burn), cpp11::as_cpp>(thin), cpp11::as_cpp>(n_threads), cpp11::as_cpp>(opt_llk), cpp11::as_cpp>(verbose), cpp11::as_cpp>(seed), cpp11::as_cpp>(legacy_random_init))); END_CPP11 } @@ -47,7 +47,7 @@ static const R_CallMethodDef CallEntries[] = { {"_pecotmr_lassosum_rss_rcpp", (DL_FUNC) &_pecotmr_lassosum_rss_rcpp, 5}, {"_pecotmr_prs_cs_rcpp", (DL_FUNC) &_pecotmr_prs_cs_rcpp, 12}, {"_pecotmr_qtl_enrichment_rcpp", (DL_FUNC) &_pecotmr_qtl_enrichment_rcpp, 9}, - {"_pecotmr_sdpr_rcpp", (DL_FUNC) &_pecotmr_sdpr_rcpp, 17}, + {"_pecotmr_sdpr_rcpp", (DL_FUNC) &_pecotmr_sdpr_rcpp, 18}, {NULL, NULL, 0} }; } diff --git a/src/sdpr.cpp b/src/sdpr.cpp index 090561a3..8019b70b 100644 --- a/src/sdpr.cpp +++ b/src/sdpr.cpp @@ -26,7 +26,8 @@ cpp11::writable::list sdpr_rcpp( int n_threads = 1, int opt_llk = 1, bool verbose = true, - sexp seed = R_NilValue + sexp seed = R_NilValue, + bool legacy_random_init = true ) { // Convert inputs to C++ types std::vector bhat = cpp11::as_cpp>(bhat_r); @@ -64,7 +65,7 @@ cpp11::writable::list sdpr_rcpp( // Call the mcmc function std::unordered_map results = mcmc( - data, n, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed_val + data, n, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed_val, legacy_random_init ); // Convert results to list diff --git a/src/sdpr_mcmc.cpp b/src/sdpr_mcmc.cpp index 0abd42bc..1955a049 100644 --- a/src/sdpr_mcmc.cpp +++ b/src/sdpr_mcmc.cpp @@ -144,13 +144,13 @@ void MCMC_state::sample_assignment(size_t j, const mcmc_data &dat, // Log-sum-exp for numerical stability (replaces SSE _mm_max_ps + exp_ps + _mm_hadd_ps) float max_elem = prob.max(); float log_exp_sum = max_elem - + std::logf(arma::accu(arma::exp(prob - max_elem))); + + std::log(arma::accu(arma::exp(prob - max_elem))); // Categorical sampling via inverse CDF // Original: mcmc.cpp lines 155-163 cls_assgn[i + start_i] = M - 1; for (size_t k = 0; k < M - 1; k++) { - rnd_i -= std::expf(prob(k) - log_exp_sum); + rnd_i -= std::exp(prob(k) - log_exp_sum); if (rnd_i < 0) { cls_assgn[i + start_i] = k; break; @@ -217,7 +217,7 @@ void MCMC_state::update_p() { p[M - 1] = (1 - sum > 0) ? (1 - sum) : 0; for (size_t i = 0; i < M; i++) { - log_p[i] = std::logf(static_cast(p[i]) + 1e-40f); + log_p[i] = std::log(static_cast(p[i]) + 1e-40f); } } @@ -510,14 +510,15 @@ std::unordered_map mcmc( unsigned n_threads, int opt_llk, bool verbose, - unsigned int seed + unsigned int seed, + bool legacy_random_init ) { int n_pst = (iter - burn) / thin; ldmat_data ldmat_dat; - MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz, seed); + MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz, seed, legacy_random_init); // Deflation correction for (size_t i = 0; i < data.beta_mrg.size(); i++) { diff --git a/src/sdpr_mcmc.h b/src/sdpr_mcmc.h index 46f0ddd4..3479d41d 100644 --- a/src/sdpr_mcmc.h +++ b/src/sdpr_mcmc.h @@ -126,7 +126,8 @@ std::vector cluster_var; std::vector suff_stats; std::vector sumsq; MCMC_state(size_t num_snp, size_t max_cluster, \ - double a0, double b0, double sz, unsigned int seed) { + double a0, double b0, double sz, unsigned int seed, + bool legacy_random_init = false) { a0k = a0; b0k = b0; N = sz; // Changed May 20 2021 // Now N (sz) is absorbed into A, B; so set to 1. @@ -148,14 +149,16 @@ MCMC_state(size_t num_snp, size_t max_cluster, \ suff_stats.assign(max_cluster, 0); sumsq.assign(max_cluster, 0.0); V.assign(max_cluster, 0.0); - // Initialize all SNPs to the null cluster (k=0). The original SDPR - // used random initialization (uniform over 0..M-1), but this causes - // the first sample_beta() call to allocate an enormous dense matrix - // (nearly all SNPs are "causal"), crashing with "Mat::init() too large". - // Starting from null is standard MCMC practice and lets the sampler - // discover causal assignments organically. - cls_assgn.assign(num_snp, 0); r.seed(seed); + if (legacy_random_init) { + std::uniform_int_distribution init_dist(0, M - 1); + cls_assgn.resize(num_snp); + for (size_t i = 0; i < num_snp; i++) { + cls_assgn[i] = static_cast(init_dist(r)); + } + } else { + cls_assgn.assign(num_snp, 0); + } } void sample_sigma2(); @@ -255,5 +258,6 @@ std::unordered_map mcmc( unsigned n_threads, int opt_llk, bool verbose, - unsigned int seed + unsigned int seed, + bool legacy_random_init = false ); diff --git a/tests/testthat/test_rr_dispatch.R b/tests/testthat/test_rr_dispatch.R index 2b2a4223..e6bc6a28 100644 --- a/tests/testthat/test_rr_dispatch.R +++ b/tests/testthat/test_rr_dispatch.R @@ -59,6 +59,22 @@ test_that("sdpr_weights dispatches to sdpr with correct arguments", { expect_equal(result, seq_len(p) * 0.02) }) +test_that("sdpr_weights forwards init mode", { + p <- 5 + bhat <- rnorm(p, sd = 0.1) + R <- diag(p) + stat <- list(b = bhat, n = rep(321, p)) + captured <- new.env(parent = emptyenv()) + local_mocked_bindings( + sdpr = function(bhat, LD, n, ...) { + captured$dots <- list(...) + list(beta_est = rep(0, length(bhat))) + } + ) + sdpr_weights(stat = stat, LD = R, init = "null", iter = 10, burn = 2) + expect_equal(captured$dots$init, "null") +}) + test_that("lassosum_rss_weights dispatches to lassosum_rss once per s value", { set.seed(42) p <- 10 diff --git a/tests/testthat/test_rr_sdpr.R b/tests/testthat/test_rr_sdpr.R index 39e35873..b2b78d3a 100644 --- a/tests/testthat/test_rr_sdpr.R +++ b/tests/testthat/test_rr_sdpr.R @@ -38,6 +38,13 @@ test_that("sdpr errors on invalid array values", { ) }) +test_that("sdpr errors on invalid init mode", { + expect_error( + sdpr(bhat = rnorm(5), LD = list(blk1 = diag(5)), n = 100, init = "bogus"), + "should be one of" + ) +}) + test_that("sdpr runs successfully", { set.seed(42) p <- 10 @@ -75,6 +82,39 @@ test_that("sdpr with valid array parameter", { expect_true(all(is.finite(result$beta_est))) }) +test_that("sdpr fixed-seed runs are reproducible with n_threads = 1 and legacy_random init", { + set.seed(42) + p <- 10 + bhat <- rnorm(p, sd = 0.1) + R <- diag(p) + out1 <- sdpr( + bhat = bhat, LD = list(blk1 = R), n = 100, + iter = 50, burn = 10, thin = 2, verbose = FALSE, + seed = 42L, init = "legacy_random", n_threads = 1 + ) + out2 <- sdpr( + bhat = bhat, LD = list(blk1 = R), n = 100, + iter = 50, burn = 10, thin = 2, verbose = FALSE, + seed = 42L, init = "legacy_random", n_threads = 1 + ) + expect_equal(out1$beta_est, out2$beta_est) + expect_equal(out1$h2, out2$h2) +}) + +test_that("sdpr supports null initialization explicitly", { + set.seed(42) + p <- 10 + bhat <- rnorm(p, sd = 0.1) + R <- diag(p) + result <- sdpr( + bhat = bhat, LD = list(blk1 = R), n = 100, + iter = 50, burn = 10, thin = 2, verbose = FALSE, + seed = 42L, init = "null" + ) + expect_equal(length(result$beta_est), p) + expect_true(all(is.finite(result$beta_est))) +}) + # ---- sdpr signal recovery ---- test_that("sdpr recovers signal direction on simulated genotype data", { set.seed(2024) From bc37d98e318d95031c8d8ad13af35faba2e29600 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Sat, 25 Apr 2026 23:34:49 -0400 Subject: [PATCH 2/3] Tighten SDPR initialization API --- R/cpp11.R | 4 ++-- R/regularized_regression.R | 11 ++++++----- man/sdpr.Rd | 9 +++++---- src/cpp11.cpp | 6 +++--- src/sdpr.cpp | 4 ++-- src/sdpr_mcmc.cpp | 10 +++++----- src/sdpr_mcmc.h | 6 +++--- tests/testthat/test_rr_sdpr.R | 6 +++--- 8 files changed, 29 insertions(+), 27 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 5a2b2a96..d18956bc 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -16,6 +16,6 @@ qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, Im .Call(`_pecotmr_qtl_enrichment_rcpp`, r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, double_shrinkage, bessel_correction, num_threads) } -sdpr_rcpp <- function(bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, legacy_random_init) { - .Call(`_pecotmr_sdpr_rcpp`, bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, legacy_random_init) +sdpr_rcpp <- function(bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, random_init) { + .Call(`_pecotmr_sdpr_rcpp`, bhat_r, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed, random_init) } diff --git a/R/regularized_regression.R b/R/regularized_regression.R index 6530e7dc..7ff12830 100644 --- a/R/regularized_regression.R +++ b/R/regularized_regression.R @@ -148,9 +148,10 @@ prs_cs_weights <- function(stat, LD, ...) { #' @param iter Number of iterations for MCMC. Default is 1000. #' @param burn Number of burn-in iterations for MCMC. Default is 200. #' @param thin Thinning interval for MCMC. Default is 5. -#' @param init Initialization mode for cluster assignments. Use `"legacy_random"` -#' to restore the original SDPR-style random initialization, or `"null"` to -#' start all SNPs in the null cluster. Default is `"legacy_random"`. +#' @param init Initialization mode for latent cluster assignments. Use `"random"` +#' to initialize SNPs uniformly across SDPR mixture components, matching the +#' original SDPR implementation, or `"null"` to start all SNPs in the null +#' cluster. Default is `"random"`. #' @param n_threads Number of threads to use. Default is 1. #' @param opt_llk Which likelihood to evaluate. 1 for equation 6 (slightly shrink the correlation of SNPs) #' and 2 for equation 5 (SNPs genotyped on different arrays in a separate cohort). @@ -209,7 +210,7 @@ prs_cs_weights <- function(stat, LD, ...) { #' #' @export sdpr <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000, - a0k = 0.5, b0k = 0.5, iter = 1000, burn = 200, thin = 5, init = c("legacy_random", "null"), n_threads = 1, + a0k = 0.5, b0k = 0.5, iter = 1000, burn = 200, thin = 5, init = c("random", "null"), n_threads = 1, opt_llk = 1, verbose = TRUE, seed = NULL) { init <- match.arg(init) @@ -245,7 +246,7 @@ sdpr <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = bhat, LD, as.integer(n), per_variant_sample_size, array, a, c, as.integer(M), a0k, b0k, as.integer(iter), as.integer(burn), as.integer(thin), as.integer(n_threads), as.integer(opt_llk), verbose, seed, - identical(init, "legacy_random") + identical(init, "random") ) return(result) diff --git a/man/sdpr.Rd b/man/sdpr.Rd index e83255bd..fdd5ae07 100644 --- a/man/sdpr.Rd +++ b/man/sdpr.Rd @@ -18,7 +18,7 @@ sdpr( iter = 1000, burn = 200, thin = 5, - init = c("legacy_random", "null"), + init = c("random", "null"), n_threads = 1, opt_llk = 1, verbose = TRUE, @@ -54,9 +54,10 @@ initialized to a vector of 1's with length equal to `bhat`.} \item{thin}{Thinning interval for MCMC. Default is 5.} -\item{init}{Initialization mode for cluster assignments. Use `"legacy_random"` -to restore the original SDPR-style random initialization, or `"null"` to -start all SNPs in the null cluster. Default is `"legacy_random"`.} +\item{init}{Initialization mode for latent cluster assignments. Use `"random"` +to initialize SNPs uniformly across SDPR mixture components, matching the +original SDPR implementation, or `"null"` to start all SNPs in the null +cluster. Default is `"random"`.} \item{n_threads}{Number of threads to use. Default is 1.} diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 085a67aa..5f57b1a4 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -34,10 +34,10 @@ extern "C" SEXP _pecotmr_qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_f END_CPP11 } // sdpr.cpp -cpp11::writable::list sdpr_rcpp(const doubles& bhat_r, const list& LD, int n, sexp per_variant_sample_size, sexp array, double a, double c, int M, double a0k, double b0k, int iter, int burn, int thin, int n_threads, int opt_llk, bool verbose, sexp seed, bool legacy_random_init); -extern "C" SEXP _pecotmr_sdpr_rcpp(SEXP bhat_r, SEXP LD, SEXP n, SEXP per_variant_sample_size, SEXP array, SEXP a, SEXP c, SEXP M, SEXP a0k, SEXP b0k, SEXP iter, SEXP burn, SEXP thin, SEXP n_threads, SEXP opt_llk, SEXP verbose, SEXP seed, SEXP legacy_random_init) { +cpp11::writable::list sdpr_rcpp(const doubles& bhat_r, const list& LD, int n, sexp per_variant_sample_size, sexp array, double a, double c, int M, double a0k, double b0k, int iter, int burn, int thin, int n_threads, int opt_llk, bool verbose, sexp seed, bool random_init); +extern "C" SEXP _pecotmr_sdpr_rcpp(SEXP bhat_r, SEXP LD, SEXP n, SEXP per_variant_sample_size, SEXP array, SEXP a, SEXP c, SEXP M, SEXP a0k, SEXP b0k, SEXP iter, SEXP burn, SEXP thin, SEXP n_threads, SEXP opt_llk, SEXP verbose, SEXP seed, SEXP random_init) { BEGIN_CPP11 - return cpp11::as_sexp(sdpr_rcpp(cpp11::as_cpp>(bhat_r), cpp11::as_cpp>(LD), cpp11::as_cpp>(n), cpp11::as_cpp>(per_variant_sample_size), cpp11::as_cpp>(array), cpp11::as_cpp>(a), cpp11::as_cpp>(c), cpp11::as_cpp>(M), cpp11::as_cpp>(a0k), cpp11::as_cpp>(b0k), cpp11::as_cpp>(iter), cpp11::as_cpp>(burn), cpp11::as_cpp>(thin), cpp11::as_cpp>(n_threads), cpp11::as_cpp>(opt_llk), cpp11::as_cpp>(verbose), cpp11::as_cpp>(seed), cpp11::as_cpp>(legacy_random_init))); + return cpp11::as_sexp(sdpr_rcpp(cpp11::as_cpp>(bhat_r), cpp11::as_cpp>(LD), cpp11::as_cpp>(n), cpp11::as_cpp>(per_variant_sample_size), cpp11::as_cpp>(array), cpp11::as_cpp>(a), cpp11::as_cpp>(c), cpp11::as_cpp>(M), cpp11::as_cpp>(a0k), cpp11::as_cpp>(b0k), cpp11::as_cpp>(iter), cpp11::as_cpp>(burn), cpp11::as_cpp>(thin), cpp11::as_cpp>(n_threads), cpp11::as_cpp>(opt_llk), cpp11::as_cpp>(verbose), cpp11::as_cpp>(seed), cpp11::as_cpp>(random_init))); END_CPP11 } diff --git a/src/sdpr.cpp b/src/sdpr.cpp index 8019b70b..41fe80cd 100644 --- a/src/sdpr.cpp +++ b/src/sdpr.cpp @@ -27,7 +27,7 @@ cpp11::writable::list sdpr_rcpp( int opt_llk = 1, bool verbose = true, sexp seed = R_NilValue, - bool legacy_random_init = true + bool random_init = true ) { // Convert inputs to C++ types std::vector bhat = cpp11::as_cpp>(bhat_r); @@ -65,7 +65,7 @@ cpp11::writable::list sdpr_rcpp( // Call the mcmc function std::unordered_map results = mcmc( - data, n, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed_val, legacy_random_init + data, n, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose, seed_val, random_init ); // Convert results to list diff --git a/src/sdpr_mcmc.cpp b/src/sdpr_mcmc.cpp index 1955a049..982b3ed6 100644 --- a/src/sdpr_mcmc.cpp +++ b/src/sdpr_mcmc.cpp @@ -144,13 +144,13 @@ void MCMC_state::sample_assignment(size_t j, const mcmc_data &dat, // Log-sum-exp for numerical stability (replaces SSE _mm_max_ps + exp_ps + _mm_hadd_ps) float max_elem = prob.max(); float log_exp_sum = max_elem - + std::log(arma::accu(arma::exp(prob - max_elem))); + + std::logf(arma::accu(arma::exp(prob - max_elem))); // Categorical sampling via inverse CDF // Original: mcmc.cpp lines 155-163 cls_assgn[i + start_i] = M - 1; for (size_t k = 0; k < M - 1; k++) { - rnd_i -= std::exp(prob(k) - log_exp_sum); + rnd_i -= std::expf(prob(k) - log_exp_sum); if (rnd_i < 0) { cls_assgn[i + start_i] = k; break; @@ -217,7 +217,7 @@ void MCMC_state::update_p() { p[M - 1] = (1 - sum > 0) ? (1 - sum) : 0; for (size_t i = 0; i < M; i++) { - log_p[i] = std::log(static_cast(p[i]) + 1e-40f); + log_p[i] = std::logf(static_cast(p[i]) + 1e-40f); } } @@ -511,14 +511,14 @@ std::unordered_map mcmc( int opt_llk, bool verbose, unsigned int seed, - bool legacy_random_init + bool random_init ) { int n_pst = (iter - burn) / thin; ldmat_data ldmat_dat; - MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz, seed, legacy_random_init); + MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz, seed, random_init); // Deflation correction for (size_t i = 0; i < data.beta_mrg.size(); i++) { diff --git a/src/sdpr_mcmc.h b/src/sdpr_mcmc.h index 3479d41d..fc4dac11 100644 --- a/src/sdpr_mcmc.h +++ b/src/sdpr_mcmc.h @@ -127,7 +127,7 @@ std::vector suff_stats; std::vector sumsq; MCMC_state(size_t num_snp, size_t max_cluster, \ double a0, double b0, double sz, unsigned int seed, - bool legacy_random_init = false) { + bool random_init = false) { a0k = a0; b0k = b0; N = sz; // Changed May 20 2021 // Now N (sz) is absorbed into A, B; so set to 1. @@ -150,7 +150,7 @@ MCMC_state(size_t num_snp, size_t max_cluster, \ sumsq.assign(max_cluster, 0.0); V.assign(max_cluster, 0.0); r.seed(seed); - if (legacy_random_init) { + if (random_init) { std::uniform_int_distribution init_dist(0, M - 1); cls_assgn.resize(num_snp); for (size_t i = 0; i < num_snp; i++) { @@ -259,5 +259,5 @@ std::unordered_map mcmc( int opt_llk, bool verbose, unsigned int seed, - bool legacy_random_init = false + bool random_init = false ); diff --git a/tests/testthat/test_rr_sdpr.R b/tests/testthat/test_rr_sdpr.R index b2b78d3a..6f18d00a 100644 --- a/tests/testthat/test_rr_sdpr.R +++ b/tests/testthat/test_rr_sdpr.R @@ -82,7 +82,7 @@ test_that("sdpr with valid array parameter", { expect_true(all(is.finite(result$beta_est))) }) -test_that("sdpr fixed-seed runs are reproducible with n_threads = 1 and legacy_random init", { +test_that("sdpr fixed-seed runs are reproducible with n_threads = 1 and random init", { set.seed(42) p <- 10 bhat <- rnorm(p, sd = 0.1) @@ -90,12 +90,12 @@ test_that("sdpr fixed-seed runs are reproducible with n_threads = 1 and legacy_r out1 <- sdpr( bhat = bhat, LD = list(blk1 = R), n = 100, iter = 50, burn = 10, thin = 2, verbose = FALSE, - seed = 42L, init = "legacy_random", n_threads = 1 + seed = 42L, init = "random", n_threads = 1 ) out2 <- sdpr( bhat = bhat, LD = list(blk1 = R), n = 100, iter = 50, burn = 10, thin = 2, verbose = FALSE, - seed = 42L, init = "legacy_random", n_threads = 1 + seed = 42L, init = "random", n_threads = 1 ) expect_equal(out1$beta_est, out2$beta_est) expect_equal(out1$h2, out2$h2) From 37ccad37a114b9fba283edfd4611e197c39dd770 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Sun, 26 Apr 2026 05:01:37 -0400 Subject: [PATCH 3/3] Use portable single-precision SDPR math calls --- src/sdpr_mcmc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sdpr_mcmc.cpp b/src/sdpr_mcmc.cpp index 982b3ed6..dbd76149 100644 --- a/src/sdpr_mcmc.cpp +++ b/src/sdpr_mcmc.cpp @@ -144,13 +144,13 @@ void MCMC_state::sample_assignment(size_t j, const mcmc_data &dat, // Log-sum-exp for numerical stability (replaces SSE _mm_max_ps + exp_ps + _mm_hadd_ps) float max_elem = prob.max(); float log_exp_sum = max_elem - + std::logf(arma::accu(arma::exp(prob - max_elem))); + + ::logf(arma::accu(arma::exp(prob - max_elem))); // Categorical sampling via inverse CDF // Original: mcmc.cpp lines 155-163 cls_assgn[i + start_i] = M - 1; for (size_t k = 0; k < M - 1; k++) { - rnd_i -= std::expf(prob(k) - log_exp_sum); + rnd_i -= ::expf(prob(k) - log_exp_sum); if (rnd_i < 0) { cls_assgn[i + start_i] = k; break; @@ -217,7 +217,7 @@ void MCMC_state::update_p() { p[M - 1] = (1 - sum > 0) ? (1 - sum) : 0; for (size_t i = 0; i < M; i++) { - log_p[i] = std::logf(static_cast(p[i]) + 1e-40f); + log_p[i] = ::logf(static_cast(p[i]) + 1e-40f); } }