diff --git a/library/src/ab_search.cpp b/library/src/ab_search.cpp index 762d75f0..c5b8ee96 100644 --- a/library/src/ab_search.cpp +++ b/library/src/ab_search.cpp @@ -878,10 +878,10 @@ void make_3( int aggr = posPoint->aggr[st]; - posPoint->winner[st].rank = static_cast(thrp->rel[aggr].abs_rank[1][st].rank); - posPoint->winner[st].hand = static_cast(thrp->rel[aggr].abs_rank[1][st].hand); - posPoint->second_best[st].rank = static_cast(thrp->rel[aggr].abs_rank[2][st].rank); - posPoint->second_best[st].hand = static_cast(thrp->rel[aggr].abs_rank[2][st].hand); + posPoint->winner[st].rank = thrp->rel[aggr].abs_rank[1][st].rank; + posPoint->winner[st].hand = thrp->rel[aggr].abs_rank[1][st].hand; + posPoint->second_best[st].rank = thrp->rel[aggr].abs_rank[2][st].rank; + posPoint->second_best[st].hand = thrp->rel[aggr].abs_rank[2][st].hand; } } @@ -944,10 +944,10 @@ static void make_3_ctx( int aggr = posPoint->aggr[st]; - posPoint->winner[st].rank = static_cast(thrp->rel[aggr].abs_rank[1][st].rank); - posPoint->winner[st].hand = static_cast(thrp->rel[aggr].abs_rank[1][st].hand); - posPoint->second_best[st].rank = static_cast(thrp->rel[aggr].abs_rank[2][st].rank); - posPoint->second_best[st].hand = static_cast(thrp->rel[aggr].abs_rank[2][st].hand); + posPoint->winner[st].rank = thrp->rel[aggr].abs_rank[1][st].rank; + posPoint->winner[st].hand = thrp->rel[aggr].abs_rank[1][st].hand; + posPoint->second_best[st].rank = thrp->rel[aggr].abs_rank[2][st].rank; + posPoint->second_best[st].hand = thrp->rel[aggr].abs_rank[2][st].hand; } } diff --git a/library/src/calc_tables.cpp b/library/src/calc_tables.cpp index b0446fe8..01b2a79a 100644 --- a/library/src/calc_tables.cpp +++ b/library/src/calc_tables.cpp @@ -8,12 +8,15 @@ */ #include "calc_tables.hpp" +#include +#include #include #include #include #include #include +#include #include #include #include @@ -23,11 +26,41 @@ extern Memory memory; extern Scheduler scheduler; -// Legacy overload (creates temporary context) +namespace +{ +// Cheap structural difficulty estimate (cards only, trump-independent). Used to +// dispatch the hardest boards first so the parallel tail is short. Mirrors +// Scheduler::Fanout: per hand, sum the number of card groups per suit, with a +// bonus for voids. +auto deal_fanout(const Deal& dl) -> int +{ + int fanout = 0; + for (int h = 0; h < DDS_HANDS; h++) + { + int fanout_suit = 0; + int num_voids = 0; + for (int s = 0; s < DDS_SUITS; s++) + { + const int c = static_cast(dl.remainCards[h][s] >> 2); + fanout_suit += group_data[c].last_group_ + 1; + if (c == 0) + num_voids++; + } + fanout_suit += num_voids * fanout_suit; + fanout += fanout_suit; + } + return fanout; +} +} + +// Legacy overload (creates temporary context). difficulty_sort dispatches the +// hardest boards first; it only helps across distinct deals (batch calc), so it +// is skipped for a single deal (all boards share one deal / one fanout). auto calc_all_boards_n( Boards * bop, SolvedBoards * solvedp, - int max_threads = 0) -> int; + int max_threads = 0, + bool difficulty_sort = true) -> int; auto calc_single_common_internal( @@ -110,7 +143,8 @@ auto calc_all_boards_n( auto calc_all_boards_n( Boards * bop, SolvedBoards * solvedp, - int max_threads) -> int + int max_threads, + bool difficulty_sort) -> int { const int n = bop->no_of_boards; if (n > MAXNOOFBOARDS) @@ -137,11 +171,30 @@ auto calc_all_boards_n( else { std::vector contexts(static_cast(nthreads)); + + // Dispatch hardest boards first to shorten the parallel tail. This only + // helps across distinct deals (batch calc); for a single deal every board + // shares one fanout, so the sort is skipped (it would be a no-op anyway). + std::vector order; + if (difficulty_sort) + { + std::vector fanout(static_cast(n)); + for (int i = 0; i < n; i++) + fanout[static_cast(i)] = deal_fanout(bop->deals[i]); + order.resize(static_cast(n)); + std::iota(order.begin(), order.end(), 0); + std::stable_sort(order.begin(), order.end(), + [&](const int a, const int b) { + return fanout[static_cast(a)] > fanout[static_cast(b)]; + }); + } + err = parallel_all_boards_n(n, nthreads, [&](const int worker_id, const int bno) -> int { return calc_single_common_internal( contexts[static_cast(worker_id)], *bop, *solvedp, bno); - }); + }, + order.empty() ? nullptr : &order); } END_BLOCK_TIMER; @@ -192,7 +245,8 @@ int STDCALL CalcDDtableN( ind++; } - int res = calc_all_boards_n(&bo, &solved, maxThreads); + // Single deal: all boards share one deal, so hardest-first sorting is a no-op. + int res = calc_all_boards_n(&bo, &solved, maxThreads, /*difficulty_sort=*/false); if (res != 1) return res; diff --git a/library/src/heuristic_sorting/heuristic_sorting.cpp b/library/src/heuristic_sorting/heuristic_sorting.cpp index 449c0edf..8ddd0a2d 100644 --- a/library/src/heuristic_sorting/heuristic_sorting.cpp +++ b/library/src/heuristic_sorting/heuristic_sorting.cpp @@ -676,49 +676,17 @@ void weight_alloc_trump_void1(HeuristicContext& ctx) unsigned short suitCount = tpos.length[curr_hand][suit]; int suitAdd; - if (suit == trump) + if (lead_suit == trump) // We pitch { - // We trump a non-trump card. - - if (tpos.length[partner_lh][lead_suit] != 0) - { - // 3rd hand will follow. - if ((tpos.rank_in_suit[rho_lh][lead_suit] > - (tpos.rank_in_suit[partner_lh][lead_suit] | - bit_map_rank[ctx.lead0_rank])) || - ((tpos.length[rho_lh][lead_suit] == 0) && - (tpos.length[rho_lh][trump] != 0))) - { - // Partner can win with a card or by ruffing. - suitAdd = 60 + (suitCount << 6) / 44; - } - else - { - suitAdd = -2 + (suitCount << 6) / 36; - // Don't ruff from Kx. - if ((suitCount == 2) && - (tpos.second_best[suit].hand == curr_hand)) - suitAdd += -4; - } - } - else if ((tpos.length[rho_lh][lead_suit] == 0) && - (tpos.rank_in_suit[rho_lh][trump] > - tpos.rank_in_suit[partner_lh][trump])) - { - // Partner can overruff 3rd hand. - suitAdd = 60 + (suitCount << 6) / 44; - } - else if ((tpos.length[partner_lh][trump] == 0) && - (tpos.rank_in_suit[rho_lh][lead_suit] > - bit_map_rank[ctx.lead0_rank])) - { - // 3rd hand has no trumps, and partner has suit winner. - suitAdd = 60 + (suitCount << 6) / 44; - } + if (tpos.rank_in_suit[rho_lh][lead_suit] > + (tpos.rank_in_suit[partner_lh][lead_suit] | + bit_map_rank[ctx.lead0_rank])) + // RHO can win. + suitAdd = (suitCount << 6) / 44; else { - suitAdd = -2 + (suitCount << 6) / 36; - // Don't ruff from Kx. + // Don't pitch from Kx. + suitAdd = (suitCount << 6) / 36; if ((suitCount == 2) && (tpos.second_best[suit].hand == curr_hand)) suitAdd += -4; @@ -1245,10 +1213,7 @@ void weight_alloc_trump_void2(HeuristicContext& ctx) mply[k].rank < ctx.move1_rank) { // Don't underruff. - unsigned char aggrSuit = static_cast(tpos.aggr[suit]); - unsigned char moveRank = static_cast(mply[k].rank); - unsigned char relRankValue = static_cast(rel_rank[aggrSuit][moveRank]); - int r_rank = static_cast(relRankValue); + int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank]; suitAdd = (suitCount << 6) / 40; mply[k].weight = -32 + r_rank + suitAdd; } @@ -1418,10 +1383,7 @@ void weight_alloc_trump_void3(HeuristicContext& ctx) { for (int k = last_num_moves; k < num_moves; k++) { - int r_rank = static_cast( - static_cast( - rel_rank[static_cast(tpos.aggr[suit])] - [static_cast(mply[k].rank)])); + int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank]; if (mply[k].rank > ctx.move2_rank) mply[k].weight = 33 + r_rank; // Overruff else @@ -1436,10 +1398,7 @@ void weight_alloc_trump_void3(HeuristicContext& ctx) { for (int k = last_num_moves; k < num_moves; k++) { - int r_rank = static_cast( - static_cast( - rel_rank[static_cast(tpos.aggr[suit])] - [static_cast(mply[k].rank)])); + int r_rank = rel_rank[tpos.aggr[suit]][mply[k].rank]; mply[k].weight = 33 + r_rank; } } diff --git a/library/src/quick_tricks.cpp b/library/src/quick_tricks.cpp index 0c161406..48f37fe5 100644 --- a/library/src/quick_tricks.cpp +++ b/library/src/quick_tricks.cpp @@ -1000,7 +1000,7 @@ int QuickTricksPartnerHandTrump( if (ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].hand == partner[hand]) { tpos.win_ranks[depth][suit] |= bit_map_rank[ - static_cast(static_cast(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank)) ]; + static_cast(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank) ]; tpos.win_ranks[depth][commSuit] |= bit_map_rank[commRank]; @@ -1110,7 +1110,7 @@ int QuickTricksPartnerHandNT( if (ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].hand == partner[hand]) { tpos.win_ranks[depth][suit] |= bit_map_rank[ - static_cast(static_cast(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank)) ]; + static_cast(ctx.thread_ptr()->rel[ranks].abs_rank[3][suit].rank) ]; qt++; if (qt >= cutoff) return qt; diff --git a/library/src/system/parallel_boards.cpp b/library/src/system/parallel_boards.cpp index 750041e7..31a014de 100644 --- a/library/src/system/parallel_boards.cpp +++ b/library/src/system/parallel_boards.cpp @@ -31,23 +31,51 @@ auto resolve_worker_count( } +static auto is_permutation_of_range( + const std::vector& order, + const int count) -> bool +{ + std::vector seen(static_cast(count), 0); + for (const int v : order) + { + if (v < 0 || v >= count || seen[static_cast(v)]) + return false; + seen[static_cast(v)] = 1; + } + return true; +} + + auto parallel_all_boards_n( const int count, const int worker_cap, - const std::function& process_board) -> int + const std::function& process_board, + const std::vector* order) -> int { if (count <= 0) { return RETURN_NO_FAULT; } + // Map a dispatch slot to the board number to process. With an order, hand out + // boards in that sequence (e.g. hardest first); otherwise in index order. The + // order is only honored when it is a valid permutation of [0, count); a + // malformed order falls back to index order to avoid invalid board indices. + const bool use_order = + (order != nullptr && + static_cast(order->size()) == count && + is_permutation_of_range(*order, count)); + auto board_of = [&](const int slot) -> int { + return use_order ? (*order)[static_cast(slot)] : slot; + }; + const int workers = resolve_worker_count(worker_cap, count); if (workers == 1) { - for (int bno = 0; bno < count; ++bno) + for (int slot = 0; slot < count; ++slot) { - const int rc = process_board(0, bno); + const int rc = process_board(0, board_of(slot)); if (rc != RETURN_NO_FAULT) { return rc; @@ -62,11 +90,12 @@ auto parallel_all_boards_n( auto worker = [&](const int worker_id) { for (;;) { - const int bno = next.fetch_add(1, std::memory_order_relaxed); - if (bno >= count || first_error.load(std::memory_order_relaxed) != RETURN_NO_FAULT) + const int slot = next.fetch_add(1, std::memory_order_relaxed); + if (slot >= count || first_error.load(std::memory_order_relaxed) != RETURN_NO_FAULT) { break; } + const int bno = board_of(slot); const int rc = process_board(worker_id, bno); if (rc != RETURN_NO_FAULT) diff --git a/library/src/system/parallel_boards.hpp b/library/src/system/parallel_boards.hpp index 2f19b6de..01292d05 100644 --- a/library/src/system/parallel_boards.hpp +++ b/library/src/system/parallel_boards.hpp @@ -10,6 +10,7 @@ #pragma once #include +#include /** @@ -28,9 +29,15 @@ auto resolve_worker_count(int max_threads, int count) -> int; * @param worker_cap Maximum worker threads; <= 0 uses hardware concurrency. * @param process_board Called for each board; must return RETURN_NO_FAULT (1) * on success. Receives the worker's thread index and board number. + * @param order Optional dispatch order: a permutation of [0, count) giving the + * sequence in which board numbers are handed out (e.g. hardest first to + * shorten the tail). When null/empty, boards are dispatched in index + * order. Only the dispatch order changes; @p process_board still receives + * the real board number, so result placement is unaffected. * @return First non-success code from @p process_board, or RETURN_NO_FAULT. */ auto parallel_all_boards_n( int count, int worker_cap, - const std::function& process_board) -> int; + const std::function& process_board, + const std::vector* order = nullptr) -> int;