From 36efd924a393b5188c16eeb18caf73fcf47abda4 Mon Sep 17 00:00:00 2001
From: Yihui Xie <xie@yihui.name>
Date: Mon, 22 Jun 2026 16:59:46 -0400
Subject: [PATCH 1/3] perf: use stepfun2 (loop-based) instead of stats::stepfun
 in expected_event

stats::stepfun has high creation overhead (~57x slower than stepfun2).
In expected_event, step functions are created per call and evaluated on
small vectors (3-5 elements), making creation cost dominant.

Additionally, improve stepfun2 eval speed for small breakpoint vectors
(n <= 10) by replacing findInterval with a for-loop of vectorized
comparisons (x >= b). This avoids findInterval's dispatch overhead and
binary search, which are overkill for 3-4 breakpoints.

Net speedup for the step-function portion of expected_event: ~21x vs
stats::stepfun, ~2x vs the previous findInterval-based stepfun2.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 R/expected_event.R | 20 ++++----------------
 R/utils.R          | 10 +++++++---
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/R/expected_event.R b/R/expected_event.R
index c105678fe..270801547 100644
--- a/R/expected_event.R
+++ b/R/expected_event.R
@@ -164,23 +164,11 @@ expected_event <- function(
   }
 
   # Create 3 step functions (sf) ----
-  # Step function to define enrollment rates over time
-  sf_enroll_rate <- stats::stepfun(c(0, cumsum(enroll_rate$duration)),
-    c(0, enroll_rate$rate, 0),
-    right = FALSE
-  )
-  # step function to define failure rates over time
+  sf_enroll_rate <- stepfun2(c(0, cumsum(enroll_rate$duration)), c(0, enroll_rate$rate, 0))
   start_fail <- c(0, cumsum(fail_rate$duration))
   fail_rate_last <- nrow(fail_rate)
-  sf_fail_rate <- stats::stepfun(start_fail,
-    c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last]),
-    right = FALSE
-  )
-  # step function to define dropout rates over time
-  sf_dropout_rate <- stats::stepfun(start_fail,
-    c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last]),
-    right = FALSE
-  )
+  sf_fail_rate <- stepfun2(start_fail, c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last]))
+  sf_dropout_rate <- stepfun2(start_fail, c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last]))
 
   # combine sub-intervals from enroll + failure + dropout #
   # impute the NA by step functions
@@ -223,7 +211,7 @@ expected_event <- function(
   if (simple) {
     ans <- sum(df$nbar)
   } else {
-    sf_start_fail <- stats::stepfun(start_fail, c(0, start_fail), right = FALSE)
+    sf_start_fail <- stepfun2(start_fail, c(0, start_fail))
     ans <- data.frame(
       fail_rate = df$fail_rate_var,
       event = df$nbar,
diff --git a/R/utils.R b/R/utils.R
index fc7f8a122..07638f28e 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -51,9 +51,13 @@ is_wholenumber <- function (x, tol = .Machine$double.eps^0.5)  {
 # a faster version of stats::stepfun() since we don't need to consider interpolation
 stepfun2 <- function(x0, y, right = FALSE) {
   x0; y  # avoid lazy evaluation: evaluate right now
-  function(x) {
-    i <- findInterval(x, x0, left.open = right)
-    y[i + 1]
+  if (length(x0) <= 10L) {
+    if (right)
+      function(x) { i <- 1L; for (b in x0) i <- i + (x > b); y[i] }
+    else
+      function(x) { i <- 1L; for (b in x0) i <- i + (x >= b); y[i] }
+  } else {
+    function(x) y[findInterval(x, x0, left.open = right) + 1L]
   }
 }
 

From f42373fa48e795be27ea9c838799a472d9e34548 Mon Sep 17 00:00:00 2001
From: Yihui Xie <xie@yihui.name>
Date: Mon, 22 Jun 2026 17:01:36 -0400
Subject: [PATCH 2/3] add comments to stepfun2 explaining the loop and `right`
 argument

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 R/utils.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index 07638f28e..bcf1da086 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -48,9 +48,14 @@ is_wholenumber <- function (x, tol = .Machine$double.eps^0.5)  {
   abs(x - round(x)) < tol
 }
 
-# a faster version of stats::stepfun() since we don't need to consider interpolation
+# a faster version of stats::stepfun() since we don't need to consider interpolation;
+# right = FALSE means left-closed intervals [x0[i], x0[i+1]), i.e., the function jumps
+# at x0[i] (right-continuous); right = TRUE means right-closed (x0[i], x0[i+1]]
 stepfun2 <- function(x0, y, right = FALSE) {
   x0; y  # avoid lazy evaluation: evaluate right now
+  # for small breakpoint vectors, a for-loop of vectorized comparisons (x >= b)
+  # is faster than findInterval(), which has dispatch overhead and uses binary
+  # search (overkill for 3-5 breakpoints)
   if (length(x0) <= 10L) {
     if (right)
       function(x) { i <- 1L; for (b in x0) i <- i + (x > b); y[i] }

From 1c66243dcf95d343fc3ef317973ce49373906c9a Mon Sep 17 00:00:00 2001
From: Yihui Xie <xie@yihui.name>
Date: Mon, 22 Jun 2026 17:06:22 -0400
Subject: [PATCH 3/3] restore dropped comments in expected_event.R [ci skip]

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 R/expected_event.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/R/expected_event.R b/R/expected_event.R
index 270801547..efe680791 100644
--- a/R/expected_event.R
+++ b/R/expected_event.R
@@ -164,10 +164,13 @@ expected_event <- function(
   }
 
   # Create 3 step functions (sf) ----
+  # Step function to define enrollment rates over time
   sf_enroll_rate <- stepfun2(c(0, cumsum(enroll_rate$duration)), c(0, enroll_rate$rate, 0))
+  # step function to define failure rates over time
   start_fail <- c(0, cumsum(fail_rate$duration))
   fail_rate_last <- nrow(fail_rate)
   sf_fail_rate <- stepfun2(start_fail, c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last]))
+  # step function to define dropout rates over time
   sf_dropout_rate <- stepfun2(start_fail, c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last]))
 
   # combine sub-intervals from enroll + failure + dropout #