From 36efd924a393b5188c16eeb18caf73fcf47abda4 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2026 16:59:46 -0400 Subject: [PATCH 1/3] perf: use stepfun2 (loop-based) instead of stats::stepfun in expected_event stats::stepfun has high creation overhead (~57x slower than stepfun2). In expected_event, step functions are created per call and evaluated on small vectors (3-5 elements), making creation cost dominant. Additionally, improve stepfun2 eval speed for small breakpoint vectors (n <= 10) by replacing findInterval with a for-loop of vectorized comparisons (x >= b). This avoids findInterval's dispatch overhead and binary search, which are overkill for 3-4 breakpoints. Net speedup for the step-function portion of expected_event: ~21x vs stats::stepfun, ~2x vs the previous findInterval-based stepfun2. Co-Authored-By: Claude Opus 4.6 --- R/expected_event.R | 20 ++++---------------- R/utils.R | 10 +++++++--- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/R/expected_event.R b/R/expected_event.R index c105678fe..270801547 100644 --- a/R/expected_event.R +++ b/R/expected_event.R @@ -164,23 +164,11 @@ expected_event <- function( } # Create 3 step functions (sf) ---- - # Step function to define enrollment rates over time - sf_enroll_rate <- stats::stepfun(c(0, cumsum(enroll_rate$duration)), - c(0, enroll_rate$rate, 0), - right = FALSE - ) - # step function to define failure rates over time + sf_enroll_rate <- stepfun2(c(0, cumsum(enroll_rate$duration)), c(0, enroll_rate$rate, 0)) start_fail <- c(0, cumsum(fail_rate$duration)) fail_rate_last <- nrow(fail_rate) - sf_fail_rate <- stats::stepfun(start_fail, - c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last]), - right = FALSE - ) - # step function to define dropout rates over time - sf_dropout_rate <- stats::stepfun(start_fail, - c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last]), - right = FALSE - ) + sf_fail_rate <- stepfun2(start_fail, c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last])) + sf_dropout_rate <- stepfun2(start_fail, c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last])) # combine sub-intervals from enroll + failure + dropout # # impute the NA by step functions @@ -223,7 +211,7 @@ expected_event <- function( if (simple) { ans <- sum(df$nbar) } else { - sf_start_fail <- stats::stepfun(start_fail, c(0, start_fail), right = FALSE) + sf_start_fail <- stepfun2(start_fail, c(0, start_fail)) ans <- data.frame( fail_rate = df$fail_rate_var, event = df$nbar, diff --git a/R/utils.R b/R/utils.R index fc7f8a122..07638f28e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -51,9 +51,13 @@ is_wholenumber <- function (x, tol = .Machine$double.eps^0.5) { # a faster version of stats::stepfun() since we don't need to consider interpolation stepfun2 <- function(x0, y, right = FALSE) { x0; y # avoid lazy evaluation: evaluate right now - function(x) { - i <- findInterval(x, x0, left.open = right) - y[i + 1] + if (length(x0) <= 10L) { + if (right) + function(x) { i <- 1L; for (b in x0) i <- i + (x > b); y[i] } + else + function(x) { i <- 1L; for (b in x0) i <- i + (x >= b); y[i] } + } else { + function(x) y[findInterval(x, x0, left.open = right) + 1L] } } From f42373fa48e795be27ea9c838799a472d9e34548 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2026 17:01:36 -0400 Subject: [PATCH 2/3] add comments to stepfun2 explaining the loop and `right` argument Co-Authored-By: Claude Opus 4.6 --- R/utils.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 07638f28e..bcf1da086 100644 --- a/R/utils.R +++ b/R/utils.R @@ -48,9 +48,14 @@ is_wholenumber <- function (x, tol = .Machine$double.eps^0.5) { abs(x - round(x)) < tol } -# a faster version of stats::stepfun() since we don't need to consider interpolation +# a faster version of stats::stepfun() since we don't need to consider interpolation; +# right = FALSE means left-closed intervals [x0[i], x0[i+1]), i.e., the function jumps +# at x0[i] (right-continuous); right = TRUE means right-closed (x0[i], x0[i+1]] stepfun2 <- function(x0, y, right = FALSE) { x0; y # avoid lazy evaluation: evaluate right now + # for small breakpoint vectors, a for-loop of vectorized comparisons (x >= b) + # is faster than findInterval(), which has dispatch overhead and uses binary + # search (overkill for 3-5 breakpoints) if (length(x0) <= 10L) { if (right) function(x) { i <- 1L; for (b in x0) i <- i + (x > b); y[i] } From 1c66243dcf95d343fc3ef317973ce49373906c9a Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2026 17:06:22 -0400 Subject: [PATCH 3/3] restore dropped comments in expected_event.R [ci skip] Co-Authored-By: Claude Opus 4.6 --- R/expected_event.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/expected_event.R b/R/expected_event.R index 270801547..efe680791 100644 --- a/R/expected_event.R +++ b/R/expected_event.R @@ -164,10 +164,13 @@ expected_event <- function( } # Create 3 step functions (sf) ---- + # Step function to define enrollment rates over time sf_enroll_rate <- stepfun2(c(0, cumsum(enroll_rate$duration)), c(0, enroll_rate$rate, 0)) + # step function to define failure rates over time start_fail <- c(0, cumsum(fail_rate$duration)) fail_rate_last <- nrow(fail_rate) sf_fail_rate <- stepfun2(start_fail, c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last])) + # step function to define dropout rates over time sf_dropout_rate <- stepfun2(start_fail, c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last])) # combine sub-intervals from enroll + failure + dropout #