From 2182b0f7fb3f6de18d09838639aed850569fbbe7 Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sat, 27 Jun 2026 18:51:04 -0500
Subject: [PATCH 1/6] Fix trump-void1 move ordering to restore v2.9 search
 efficiency

The heuristic extraction refactor changed weight_alloc_trump_void1's
first branch from `lead_suit == trump` to `suit == trump`. Since that
is exhaustive with the following `else if (suit != trump)`, the three
ruffing branches (using the `24 - rank + ...` formula) became dead
code, and trump ruffs were scored with side-suit discard weights
instead. This mis-ordered ruffs, costing alpha-beta cutoffs.

The effect is small for solve but compounds heavily in calc's warm-TT
iterative deepening: calc explored ~34% more nodes than v2.9. Restoring
the original `lead_suit == trump` pitch branch makes the ruffing
branches reachable again and cuts calc time ~25% (gap to v2.9: 1.37x ->
1.02x). Ordering-only change; double-dummy results are unchanged.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../heuristic_sorting/heuristic_sorting.cpp   | 48 ++++---------------
 1 file changed, 8 insertions(+), 40 deletions(-)

diff --git a/library/src/heuristic_sorting/heuristic_sorting.cpp b/library/src/heuristic_sorting/heuristic_sorting.cpp
index 449c0edf..95018f32 100644
--- a/library/src/heuristic_sorting/heuristic_sorting.cpp
+++ b/library/src/heuristic_sorting/heuristic_sorting.cpp
@@ -676,49 +676,17 @@ void weight_alloc_trump_void1(HeuristicContext& ctx)
   unsigned short suitCount = tpos.length[curr_hand][suit];
   int suitAdd;
 
-  if (suit == trump)
+  if (lead_suit == trump) // We pitch
   {
-    // We trump a non-trump card.
-    
-    if (tpos.length[partner_lh][lead_suit] != 0)
-    {
-      // 3rd hand will follow.
-  if ((tpos.rank_in_suit[rho_lh][lead_suit] >
-       (tpos.rank_in_suit[partner_lh][lead_suit] |
-    bit_map_rank[ctx.lead0_rank])) ||
-          ((tpos.length[rho_lh][lead_suit] == 0) &&
-           (tpos.length[rho_lh][trump] != 0)))
-      {
-        // Partner can win with a card or by ruffing.
-        suitAdd = 60 + (suitCount << 6) / 44;
-      }
-      else
-      {
-        suitAdd = -2 + (suitCount << 6) / 36;
-        // Don't ruff from Kx.
-        if ((suitCount == 2) &&
-            (tpos.second_best[suit].hand == curr_hand))
-          suitAdd += -4;
-      }
-    }
-    else if ((tpos.length[rho_lh][lead_suit] == 0) &&
-             (tpos.rank_in_suit[rho_lh][trump] >
-              tpos.rank_in_suit[partner_lh][trump]))
-    {
-      // Partner can overruff 3rd hand.
-      suitAdd = 60 + (suitCount << 6) / 44;
-    }
-  else if ((tpos.length[partner_lh][trump] == 0) &&
-       (tpos.rank_in_suit[rho_lh][lead_suit] >
-        bit_map_rank[ctx.lead0_rank]))
-    {
-      // 3rd hand has no trumps, and partner has suit winner.
-      suitAdd = 60 + (suitCount << 6) / 44;
-    }
+    if (tpos.rank_in_suit[rho_lh][lead_suit] >
+        (tpos.rank_in_suit[partner_lh][lead_suit] |
+         bit_map_rank[ctx.lead0_rank]))
+      // Partner can win.
+      suitAdd = (suitCount << 6) / 44;
     else
     {
-      suitAdd = -2 + (suitCount << 6) / 36;
-      // Don't ruff from Kx.
+      // Don't pitch from Kx.
+      suitAdd = (suitCount << 6) / 36;
       if ((suitCount == 2) &&
           (tpos.second_best[suit].hand == curr_hand))
         suitAdd += -4;

From 8ecebc8de2add27b275718bc86273be5a977d4e4 Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sun, 28 Jun 2026 01:28:03 +0100
Subject: [PATCH 2/6] Fix incorrect comment

Per Copilot.

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 library/src/heuristic_sorting/heuristic_sorting.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/src/heuristic_sorting/heuristic_sorting.cpp b/library/src/heuristic_sorting/heuristic_sorting.cpp
index 95018f32..88478881 100644
--- a/library/src/heuristic_sorting/heuristic_sorting.cpp
+++ b/library/src/heuristic_sorting/heuristic_sorting.cpp
@@ -681,7 +681,7 @@ void weight_alloc_trump_void1(HeuristicContext& ctx)
     if (tpos.rank_in_suit[rho_lh][lead_suit] >
         (tpos.rank_in_suit[partner_lh][lead_suit] |
          bit_map_rank[ctx.lead0_rank]))
-      // Partner can win.
+      // RHO can win.
       suitAdd = (suitCount << 6) / 44;
     else
     {

From 6234595600fdcaed6ac510efbd6d2d3cc393b3dd Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sun, 28 Jun 2026 07:45:52 -0500
Subject: [PATCH 3/6] Fix signed->unsigned cast bugs that corrupted move
 ordering and pruning

The heuristic/quick-tricks refactor introduced static_cast<unsigned char>
wrappers on values that v2.9 used as signed, changing search behavior:

- make_3 / make_3_ctx: winner[]/second_best[] .hand and .rank were cast
  to unsigned char, turning the -1 "no card" sentinel into 255. This broke
  winner[trump].hand == -1 style checks in QuickTricks, losing cutoffs.
- weight_alloc_trump_void2 / _void3: rel_rank[aggr[suit]][...] indexed
  through static_cast<unsigned char>(aggr[suit]), truncating the 13-bit
  aggregate holding to 8 bits and reading the wrong rel_rank row.
- QuickTricksPartnerHand{Trump,NT}: bit_map_rank index cast the signed
  rank through unsigned char.

With these reverted to v2.9's signed handling, the per-move-generation
ordering trace now matches v2.9 exactly (0 divergences on list1), closing
the residual calc gap to parity. Ordering/pruning-only change; double-dummy
results are unchanged and all library tests pass.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 library/src/ab_search.cpp                        | 16 ++++++++--------
 .../src/heuristic_sorting/heuristic_sorting.cpp  | 15 +++------------
 library/src/quick_tricks.cpp                     |  4 ++--
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/library/src/ab_search.cpp b/library/src/ab_search.cpp
index 762d75f0..c5b8ee96 100644
--- a/library/src/ab_search.cpp
+++ b/library/src/ab_search.cpp
@@ -878,10 +878,10 @@ void make_3(
 
       int aggr = posPoint->aggr[st];
 
-  posPoint->winner[st].rank = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[1][st].rank);
-  posPoint->winner[st].hand = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[1][st].hand);
-  posPoint->second_best[st].rank = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[2][st].rank);
-  posPoint->second_best[st].hand = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[2][st].hand);
+  posPoint->winner[st].rank = thrp->rel[aggr].abs_rank[1][st].rank;
+  posPoint->winner[st].hand = thrp->rel[aggr].abs_rank[1][st].hand;
+  posPoint->second_best[st].rank = thrp->rel[aggr].abs_rank[2][st].rank;
+  posPoint->second_best[st].hand = thrp->rel[aggr].abs_rank[2][st].hand;
 
     }
   }
@@ -944,10 +944,10 @@ static void make_3_ctx(
 
       int aggr = posPoint->aggr[st];
 
-      posPoint->winner[st].rank = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[1][st].rank);
-      posPoint->winner[st].hand = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[1][st].hand);
-      posPoint->second_best[st].rank = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[2][st].rank);
-      posPoint->second_best[st].hand = static_cast<unsigned char>(thrp->rel[aggr].abs_rank[2][st].hand);
+      posPoint->winner[st].rank = thrp->rel[aggr].abs_rank[1][st].rank;
+      posPoint->winner[st].hand = thrp->rel[aggr].abs_rank[1][st].hand;
+      posPoint->second_best[st].rank = thrp->rel[aggr].abs_rank[2][st].rank;
+      posPoint->second_best[st].hand = thrp->rel[aggr].abs_rank[2][st].hand;
 
     }
   }
diff --git a/library/src/heuristic_sorting/heuristic_sorting.cpp b/library/src/heuristic_sorting/heuristic_sorting.cpp
index 88478881..8ddd0a2d 100644
--- a/library/src/heuristic_sorting/heuristic_sorting.cpp
+++ b/library/src/heuristic_sorting/heuristic_sorting.cpp
@@ -1213,10 +1213,7 @@ void weight_alloc_trump_void2(HeuristicContext& ctx)
     mply[k].rank < ctx.move1_rank)
     {
       // Don't underruff.
-    unsigned char aggrSuit = static_cast<unsigned char>(tpos.aggr[suit]);
-    unsigned char moveRank = static_cast<unsigned char>(mply[k].rank);
-  unsigned char relRankValue = static_cast<unsigned char>(rel_rank[aggrSuit][moveRank]);
-    int r_rank = static_cast<int>(relRankValue);
+    int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank];
       suitAdd = (suitCount << 6) / 40;
       mply[k].weight = -32 + r_rank + suitAdd;
     }
@@ -1386,10 +1383,7 @@ void weight_alloc_trump_void3(HeuristicContext& ctx)
     {
       for (int k = last_num_moves; k < num_moves; k++)
       {
-    int r_rank = static_cast<int>(
-      static_cast<unsigned char>(
-  rel_rank[static_cast<unsigned char>(tpos.aggr[suit])]
-             [static_cast<unsigned char>(mply[k].rank)]));
+    int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank];
         if (mply[k].rank > ctx.move2_rank)
           mply[k].weight = 33 + r_rank; // Overruff
         else
@@ -1404,10 +1398,7 @@ void weight_alloc_trump_void3(HeuristicContext& ctx)
   {
     for (int k = last_num_moves; k < num_moves; k++)
     {
-    int r_rank = static_cast<int>(
-      static_cast<unsigned char>(
-  rel_rank[static_cast<unsigned char>(tpos.aggr[suit])]
-           [static_cast<unsigned char>(mply[k].rank)]));
+    int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank];
       mply[k].weight = 33 + r_rank;
     }
   }
diff --git a/library/src/quick_tricks.cpp b/library/src/quick_tricks.cpp
index 0c161406..48f37fe5 100644
--- a/library/src/quick_tricks.cpp
+++ b/library/src/quick_tricks.cpp
@@ -1000,7 +1000,7 @@ int QuickTricksPartnerHandTrump(
     if (ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].hand == partner[hand])
     {
       tpos.win_ranks[depth][suit] |= bit_map_rank[
-        static_cast<int>(static_cast<unsigned char>(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank)) ];
+        static_cast<int>(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank) ];
 
       tpos.win_ranks[depth][commSuit] |= bit_map_rank[commRank];
 
@@ -1110,7 +1110,7 @@ int QuickTricksPartnerHandNT(
     if (ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].hand == partner[hand])
     {
       tpos.win_ranks[depth][suit] |= bit_map_rank[
-        static_cast<int>(static_cast<unsigned char>(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank)) ];
+        static_cast<int>(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank) ];
       qt++;
       if (qt >= cutoff)
         return qt;

From b08925f881868d6a1f49771dc4923490c6b2a380 Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sun, 28 Jun 2026 16:12:18 -0500
Subject: [PATCH 4/6] Dispatch hardest boards first to shorten parallel calc
 tail

The parallel board loop handed boards out in index order via an atomic
counter, so a hard board picked near the end left one worker running long
while the others sat idle. Hand out the hardest boards first (longest-
processing-time-first) so the tail consists of cheap boards.

parallel_all_boards_n gains an optional dispatch-order permutation: workers
still pull from the same atomic counter, but the slot is mapped through the
order before becoming a board number, so only the dispatch sequence changes
and result placement is unaffected. The solve path passes no order and is
unchanged.

calc estimates per-deal difficulty with a cheap, trump-independent
structural proxy (deal_fanout, mirroring Scheduler::Fanout) and sorts board
indices by descending difficulty before dispatch.

calc list1000 -n18: ~11.0s -> ~9.6s wall (~13%), user CPU unchanged.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 library/src/calc_tables.cpp            | 45 +++++++++++++++++++++++++-
 library/src/system/parallel_boards.cpp | 20 +++++++++---
 library/src/system/parallel_boards.hpp |  9 +++++-
 3 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/library/src/calc_tables.cpp b/library/src/calc_tables.cpp
index b0446fe8..1c9df8b0 100644
--- a/library/src/calc_tables.cpp
+++ b/library/src/calc_tables.cpp
@@ -8,12 +8,15 @@
 */
 
 #include "calc_tables.hpp"
+#include <algorithm>
+#include <numeric>
 #include <vector>
 
 #include <pbn.hpp>
 #include <solve_board.hpp>
 #include <api/solve_board.hpp>
 #include <solver_if.hpp>
+#include <lookup_tables/lookup_tables.hpp>
 #include <system/memory.hpp>
 #include <system/parallel_boards.hpp>
 #include <system/scheduler.hpp>
@@ -23,6 +26,33 @@
 extern Memory memory;
 extern Scheduler scheduler;
 
+namespace
+{
+// Cheap structural difficulty estimate (cards only, trump-independent). Used to
+// dispatch the hardest boards first so the parallel tail is short. Mirrors
+// Scheduler::Fanout: per hand, sum the number of card groups per suit, with a
+// bonus for voids.
+auto deal_fanout(const Deal& dl) -> int
+{
+  int fanout = 0;
+  for (int h = 0; h < DDS_HANDS; h++)
+  {
+    int fanout_suit = 0;
+    int num_voids = 0;
+    for (int s = 0; s < DDS_SUITS; s++)
+    {
+      const int c = static_cast<int>(dl.remainCards[h][s] >> 2);
+      fanout_suit += group_data[c].last_group_ + 1;
+      if (c == 0)
+        num_voids++;
+    }
+    fanout_suit += num_voids * fanout_suit;
+    fanout += fanout_suit;
+  }
+  return fanout;
+}
+}
+
 // Legacy overload (creates temporary context)
 auto calc_all_boards_n(
   Boards * bop,
@@ -137,11 +167,24 @@ auto calc_all_boards_n(
   else
   {
     std::vector<SolverContext> contexts(static_cast<unsigned>(nthreads));
+
+    // Dispatch hardest boards first to shorten the parallel tail.
+    std::vector<int> fanout(static_cast<unsigned>(n));
+    for (int i = 0; i < n; i++)
+      fanout[static_cast<unsigned>(i)] = deal_fanout(bop->deals[i]);
+    std::vector<int> order(static_cast<unsigned>(n));
+    std::iota(order.begin(), order.end(), 0);
+    std::stable_sort(order.begin(), order.end(),
+      [&](const int a, const int b) {
+        return fanout[static_cast<unsigned>(a)] > fanout[static_cast<unsigned>(b)];
+      });
+
     err = parallel_all_boards_n(n, nthreads,
       [&](const int worker_id, const int bno) -> int {
         return calc_single_common_internal(
           contexts[static_cast<unsigned>(worker_id)], *bop, *solvedp, bno);
-      });
+      },
+      &order);
   }
 
   END_BLOCK_TIMER;
diff --git a/library/src/system/parallel_boards.cpp b/library/src/system/parallel_boards.cpp
index 750041e7..ebb2a0af 100644
--- a/library/src/system/parallel_boards.cpp
+++ b/library/src/system/parallel_boards.cpp
@@ -34,20 +34,29 @@ auto resolve_worker_count(
 auto parallel_all_boards_n(
   const int count,
   const int worker_cap,
-  const std::function<int(int worker_id, int bno)>& process_board) -> int
+  const std::function<int(int worker_id, int bno)>& process_board,
+  const std::vector<int>* order) -> int
 {
   if (count <= 0)
   {
     return RETURN_NO_FAULT;
   }
 
+  // Map a dispatch slot to the board number to process. With an order, hand out
+  // boards in that sequence (e.g. hardest first); otherwise in index order.
+  const bool use_order =
+    (order != nullptr && static_cast<int>(order->size()) == count);
+  auto board_of = [&](const int slot) -> int {
+    return use_order ? (*order)[static_cast<unsigned>(slot)] : slot;
+  };
+
   const int workers = resolve_worker_count(worker_cap, count);
 
   if (workers == 1)
   {
-    for (int bno = 0; bno < count; ++bno)
+    for (int slot = 0; slot < count; ++slot)
     {
-      const int rc = process_board(0, bno);
+      const int rc = process_board(0, board_of(slot));
       if (rc != RETURN_NO_FAULT)
       {
         return rc;
@@ -62,11 +71,12 @@ auto parallel_all_boards_n(
   auto worker = [&](const int worker_id) {
     for (;;)
     {
-      const int bno = next.fetch_add(1, std::memory_order_relaxed);
-      if (bno >= count || first_error.load(std::memory_order_relaxed) != RETURN_NO_FAULT)
+      const int slot = next.fetch_add(1, std::memory_order_relaxed);
+      if (slot >= count || first_error.load(std::memory_order_relaxed) != RETURN_NO_FAULT)
       {
         break;
       }
+      const int bno = board_of(slot);
 
       const int rc = process_board(worker_id, bno);
       if (rc != RETURN_NO_FAULT)
diff --git a/library/src/system/parallel_boards.hpp b/library/src/system/parallel_boards.hpp
index 2f19b6de..01292d05 100644
--- a/library/src/system/parallel_boards.hpp
+++ b/library/src/system/parallel_boards.hpp
@@ -10,6 +10,7 @@
 #pragma once
 
 #include <functional>
+#include <vector>
 
 
 /**
@@ -28,9 +29,15 @@ auto resolve_worker_count(int max_threads, int count) -> int;
  * @param worker_cap Maximum worker threads; <= 0 uses hardware concurrency.
  * @param process_board Called for each board; must return RETURN_NO_FAULT (1)
  *        on success. Receives the worker's thread index and board number.
+ * @param order Optional dispatch order: a permutation of [0, count) giving the
+ *        sequence in which board numbers are handed out (e.g. hardest first to
+ *        shorten the tail). When null/empty, boards are dispatched in index
+ *        order. Only the dispatch order changes; @p process_board still receives
+ *        the real board number, so result placement is unaffected.
  * @return First non-success code from @p process_board, or RETURN_NO_FAULT.
  */
 auto parallel_all_boards_n(
   int count,
   int worker_cap,
-  const std::function<int(int worker_id, int bno)>& process_board) -> int;
+  const std::function<int(int worker_id, int bno)>& process_board,
+  const std::vector<int>* order = nullptr) -> int;

From 2d57e8dbae36df5d3fea7d704990d29b5f884cef Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sun, 28 Jun 2026 17:39:43 -0500
Subject: [PATCH 5/6] Skip hardest-first dispatch for single-deal calc

CalcDDtableN builds one board per strain for a single deal. deal_fanout is
trump-independent, so all boards share one fanout and the difficulty sort is a
pure no-op there. Gate the sort behind a difficulty_sort flag (default on for
batch CalcAllTablesN) and disable it for the single-deal path.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 library/src/calc_tables.cpp | 41 +++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/library/src/calc_tables.cpp b/library/src/calc_tables.cpp
index 1c9df8b0..01b2a79a 100644
--- a/library/src/calc_tables.cpp
+++ b/library/src/calc_tables.cpp
@@ -53,11 +53,14 @@ auto deal_fanout(const Deal& dl) -> int
 }
 }
 
-// Legacy overload (creates temporary context)
+// Legacy overload (creates temporary context). difficulty_sort dispatches the
+// hardest boards first; it only helps across distinct deals (batch calc), so it
+// is skipped for a single deal (all boards share one deal / one fanout).
 auto calc_all_boards_n(
   Boards * bop,
   SolvedBoards * solvedp,
-  int max_threads = 0) -> int;
+  int max_threads = 0,
+  bool difficulty_sort = true) -> int;
 
 
 auto calc_single_common_internal(
@@ -140,7 +143,8 @@ auto calc_all_boards_n(
 auto calc_all_boards_n(
   Boards * bop,
   SolvedBoards * solvedp,
-  int max_threads) -> int
+  int max_threads,
+  bool difficulty_sort) -> int
 {
   const int n = bop->no_of_boards;
   if (n > MAXNOOFBOARDS)
@@ -168,23 +172,29 @@ auto calc_all_boards_n(
   {
     std::vector<SolverContext> contexts(static_cast<unsigned>(nthreads));
 
-    // Dispatch hardest boards first to shorten the parallel tail.
-    std::vector<int> fanout(static_cast<unsigned>(n));
-    for (int i = 0; i < n; i++)
-      fanout[static_cast<unsigned>(i)] = deal_fanout(bop->deals[i]);
-    std::vector<int> order(static_cast<unsigned>(n));
-    std::iota(order.begin(), order.end(), 0);
-    std::stable_sort(order.begin(), order.end(),
-      [&](const int a, const int b) {
-        return fanout[static_cast<unsigned>(a)] > fanout[static_cast<unsigned>(b)];
-      });
+    // Dispatch hardest boards first to shorten the parallel tail. This only
+    // helps across distinct deals (batch calc); for a single deal every board
+    // shares one fanout, so the sort is skipped (it would be a no-op anyway).
+    std::vector<int> order;
+    if (difficulty_sort)
+    {
+      std::vector<int> fanout(static_cast<unsigned>(n));
+      for (int i = 0; i < n; i++)
+        fanout[static_cast<unsigned>(i)] = deal_fanout(bop->deals[i]);
+      order.resize(static_cast<unsigned>(n));
+      std::iota(order.begin(), order.end(), 0);
+      std::stable_sort(order.begin(), order.end(),
+        [&](const int a, const int b) {
+          return fanout[static_cast<unsigned>(a)] > fanout[static_cast<unsigned>(b)];
+        });
+    }
 
     err = parallel_all_boards_n(n, nthreads,
       [&](const int worker_id, const int bno) -> int {
         return calc_single_common_internal(
           contexts[static_cast<unsigned>(worker_id)], *bop, *solvedp, bno);
       },
-      &order);
+      order.empty() ? nullptr : &order);
   }
 
   END_BLOCK_TIMER;
@@ -235,7 +245,8 @@ int STDCALL CalcDDtableN(
     ind++;
   }
 
-  int res = calc_all_boards_n(&bo, &solved, maxThreads);
+  // Single deal: all boards share one deal, so hardest-first sorting is a no-op.
+  int res = calc_all_boards_n(&bo, &solved, maxThreads, /*difficulty_sort=*/false);
   if (res != 1)
     return res;
 

From f4ed912bf19ef7c8e27322058bec28e184b1b1d5 Mon Sep 17 00:00:00 2001
From: Adam Wildavsky <adam@tameware.com>
Date: Sun, 28 Jun 2026 22:28:09 -0500
Subject: [PATCH 6/6] Validate order is a permutation in parallel_all_boards_n

Only honor the optional dispatch order when it is a valid permutation
of [0, count: each element in range and unique. A malformed order
(duplicates or out-of-range values) now falls back to index order,
preventing invalid board indices from reaching process_board.
EOF
)

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 library/src/system/parallel_boards.cpp | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/library/src/system/parallel_boards.cpp b/library/src/system/parallel_boards.cpp
index ebb2a0af..31a014de 100644
--- a/library/src/system/parallel_boards.cpp
+++ b/library/src/system/parallel_boards.cpp
@@ -31,6 +31,21 @@ auto resolve_worker_count(
 }
 
 
+static auto is_permutation_of_range(
+  const std::vector<int>& order,
+  const int count) -> bool
+{
+  std::vector<char> seen(static_cast<unsigned>(count), 0);
+  for (const int v : order)
+  {
+    if (v < 0 || v >= count || seen[static_cast<unsigned>(v)])
+      return false;
+    seen[static_cast<unsigned>(v)] = 1;
+  }
+  return true;
+}
+
+
 auto parallel_all_boards_n(
   const int count,
   const int worker_cap,
@@ -43,9 +58,13 @@ auto parallel_all_boards_n(
   }
 
   // Map a dispatch slot to the board number to process. With an order, hand out
-  // boards in that sequence (e.g. hardest first); otherwise in index order.
+  // boards in that sequence (e.g. hardest first); otherwise in index order. The
+  // order is only honored when it is a valid permutation of [0, count); a
+  // malformed order falls back to index order to avoid invalid board indices.
   const bool use_order =
-    (order != nullptr && static_cast<int>(order->size()) == count);
+    (order != nullptr &&
+     static_cast<int>(order->size()) == count &&
+     is_permutation_of_range(*order, count));
   auto board_of = [&](const int slot) -> int {
     return use_order ? (*order)[static_cast<unsigned>(slot)] : slot;
   };