From c8469d380dc0ca66176c1277e9da518c5808489c Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 00:03:40 +0200 Subject: [PATCH 1/4] enable CV, add UTs, smoke tests and enable them in github --- .gitignore | 1 + baselines.slurm | 30 ++++++ cv.py | 221 +++++++++++++++++++++++++++++++++++++++++ das/env/bbob_splits.py | 24 ++--- das/training/expdas.py | 138 ++++++++++++++++++++++++- das/training/ppo.py | 22 ++-- das/training/rldas.py | 132 +++++++++++++++++++++++- exp_das_study.slurm | 53 ++++++++++ main.py | 14 --- ppo_study.slurm | 57 +++++++++++ pyproject.toml | 1 + rl_das_study.slurm | 45 +++++++++ run_local.sh | 55 ++++++++++ runner.sh | 33 ++++++ smoke_test.sh | 60 +++++++++++ train.py | 30 ++---- 16 files changed, 856 insertions(+), 60 deletions(-) create mode 100644 baselines.slurm create mode 100644 cv.py create mode 100644 exp_das_study.slurm delete mode 100644 main.py create mode 100644 ppo_study.slurm create mode 100644 rl_das_study.slurm create mode 100755 run_local.sh create mode 100755 runner.sh create mode 100755 smoke_test.sh diff --git a/.gitignore b/.gitignore index 0c213e2..b3e5135 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ results/ .idea/ __pycache__/ */__pycache__/ +logs/ diff --git a/baselines.slurm b/baselines.slurm new file mode 100644 index 0000000..097fcae --- /dev/null +++ b/baselines.slurm @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH --job-name=das2_baselines +#SBATCH --output=logs/baselines_%j.out +#SBATCH --error=logs/baselines_%j.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=24:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 + +# Args: SEED [PORTFOLIO...] +SEED=${1:-42} + +if [ "$#" -lt 2 ]; then + PORTFOLIO=('SPSO' 'IPSO' 'SPSOL') +else + PORTFOLIO=("${@:2}") +fi + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +echo "Baselines | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" + +python baselines.py ${PORTFOLIO_STR}_BASELINES_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --agent all --seed $SEED \ No newline at end of file diff --git a/cv.py b/cv.py new file mode 100644 index 0000000..557ec2c --- /dev/null +++ b/cv.py @@ -0,0 +1,221 @@ +"""Cross-validation entry point: train one model per fold, evaluate on held-out split. + +Usage +----- + python cv.py ppo [options] + python cv.py rl-das [options] + python cv.py exp-das [options] + +Outputs (per fold) +------------------ + models/_cv_.zip / _final.pt trained model + results/_cv_.jsonl per-problem test results + results/_cv_summary.jsonl aggregated stats across all folds +""" + +import argparse +import warnings +from pathlib import Path + +from das.env.bbob_splits import ALL_DIMS +from das.utils import set_seed + +warnings.filterwarnings("ignore") + + +# ------------------------------------------------------------------ # +# Argument parsing # +# ------------------------------------------------------------------ # + + +def _add_shared_args(p: argparse.ArgumentParser) -> None: + p.add_argument("name", help="Experiment name (used for output file names)") + p.add_argument( + "-p", + "--portfolio", + nargs="+", + default=["SPSO", "IPSO", "SPSOL"], + help="Sub-optimizer names from the portfolio", + ) + p.add_argument( + "--fe-multiplier", + type=int, + default=10_000, + help="Budget = fe_multiplier × dimension", + ) + p.add_argument( + "--n-checkpoints", + type=int, + default=10, + help="Optimizer-selection steps per episode", + ) + p.add_argument("--n-individuals", type=int, default=100, help="Population size") + p.add_argument("--seed", type=int, default=42) + p.add_argument( + "--cv-mode", + default="LOIO", + choices=["LOIO", "LOPO"], + help="LOIO: hold out instances per fold; LOPO: hold out functions per fold", + ) + p.add_argument("--n-folds", type=int, default=3, help="Number of CV folds") + p.add_argument( + "--folds", + nargs="+", + type=int, + default=None, + help="Zero-based fold indices to run (default: all)", + ) + + +def _parse_args() -> argparse.Namespace: + root = argparse.ArgumentParser( + description="Cross-validation for DAS agents. Choose an agent with a sub-command.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + sub = root.add_subparsers( + dest="agent", required=True, metavar="{ppo,rl-das,exp-das}" + ) + + # ---- PPO -------------------------------------------------------- + ppo = sub.add_parser( + "ppo", + help="SB3 PPO with VecNormalize", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + _add_shared_args(ppo) + ppo.add_argument( + "-d", + "--dims", + nargs="+", + type=int, + default=ALL_DIMS, + choices=ALL_DIMS, + help="Problem dimensions", + ) + ppo.add_argument( + "-x", "--cdb", type=float, default=1.0, help="Checkpoint division base" + ) + ppo.add_argument( + "-O", + "--reward-option", + type=int, + default=1, + choices=[1, 2, 3, 4], + help="Reward shaping option", + ) + ppo.add_argument( + "-E", + "--n-epochs", + type=int, + default=20, + help="Training passes per fold. total_timesteps = n_epochs × |train_ids| × n_checkpoints", + ) + ppo.add_argument( + "-j", "--n-envs", type=int, default=1, help="Parallel training envs" + ) + ppo.add_argument("--wandb", action="store_true", help="Log to Weights & Biases") + + # ---- RL-DAS ----------------------------------------------------- + rl = sub.add_parser( + "rl-das", + help="Custom RL-DAS: single-dimension, pure-PyTorch PPO", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + _add_shared_args(rl) + rl.add_argument( + "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)" + ) + rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs per fold") + rl.add_argument( + "--k-epoch", + type=int, + default=None, + help="PPO gradient steps per episode (default: int(0.3 × n_checkpoints))", + ) + rl.add_argument("--lr", type=float, default=1e-5, help="Learning rate") + rl.add_argument( + "--eval-interval", type=int, default=5, help="Evaluate every N epochs" + ) + rl.add_argument( + "--save-interval", type=int, default=50, help="Checkpoint every N epochs" + ) + rl.add_argument("--device", default="cpu", help="PyTorch device") + + # ---- Exp-DAS ---------------------------------------------------- + exp = sub.add_parser( + "exp-das", + help="Exponential-DAS: custom PPO with exponential checkpoint spacing", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + _add_shared_args(exp) + exp.add_argument( + "--dims", nargs="+", type=int, default=[2, 5, 10], help="Problem dimensions" + ) + exp.add_argument( + "--cdb", + type=float, + default=2.0, + help="Checkpoint division base (>1 = exponential)", + ) + exp.add_argument( + "--reward-option", + type=int, + default=1, + choices=[1, 2, 3, 4], + help="Reward shaping option", + ) + exp.add_argument( + "--buffer-capacity", + type=int, + default=None, + help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)", + ) + exp.add_argument( + "-E", + "--n-epochs", + type=int, + default=3, + help="Passes over the training set per fold. total_episodes = n_epochs × |train_ids|", + ) + exp.add_argument( + "--save-interval", type=int, default=500, help="Checkpoint every N episodes" + ) + exp.add_argument("--actor-lr", type=float, default=3e-5, help="Actor learning rate") + exp.add_argument( + "--critic-lr", type=float, default=1e-5, help="Critic learning rate" + ) + exp.add_argument( + "--ppo-epochs", type=int, default=6, help="PPO gradient epochs per update" + ) + exp.add_argument("--device", default="cpu", help="PyTorch device") + + return root.parse_args() + + +# ------------------------------------------------------------------ # +# Main # +# ------------------------------------------------------------------ # + + +def main() -> None: + args = _parse_args() + set_seed(args.seed) + Path("models").mkdir(exist_ok=True) + Path("results").mkdir(exist_ok=True) + + if args.agent == "ppo": + from das.training.ppo import run_cv_ppo + + run_cv_ppo(args) + elif args.agent == "rl-das": + from das.training.rldas import run_cv_rl_das + + run_cv_rl_das(args) + elif args.agent == "exp-das": + from das.training.expdas import run_cv_exp_das + + run_cv_exp_das(args) + + +if __name__ == "__main__": + main() diff --git a/das/env/bbob_splits.py b/das/env/bbob_splits.py index d8b660e..bc9a8da 100644 --- a/das/env/bbob_splits.py +++ b/das/env/bbob_splits.py @@ -48,19 +48,15 @@ def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[st return all_ids[:split], all_ids[split:] -_N_CV_FOLDS = 3 - - def get_cv_folds( - cv_mode: str, dims: list[int], seed: int = 0 + cv_mode: str, dims: list[int], seed: int = 0, n_folds: int = 3 ) -> list[tuple[list[str], list[str], str]]: - """Return (train_ids, test_ids, fold_tag) for each of the 3 CV folds. + """Return (train_ids, test_ids, fold_tag) for each CV fold. - LOIO: 3 folds – the 15 instance IDs are randomly shuffled and split into - 3 groups of 5; each fold tests on 1 group and trains on the other 10. - LOPO: 3 folds – the 24 BBOB functions are randomly shuffled and split into - 3 groups of 8; each fold tests on all problems from 1 group of - functions (all instances) and trains on the other 16 functions. + LOIO: instance IDs are shuffled and split into n_folds groups; + each fold tests on one group and trains on the rest. + LOPO: BBOB functions are shuffled and split into n_folds groups; + each fold tests on all problems from one group of functions. """ rng = np.random.default_rng(seed) folds = [] @@ -68,8 +64,8 @@ def get_cv_folds( if cv_mode == "LOIO": insts = list(INSTANCE_IDS) rng.shuffle(insts) - chunk = len(insts) // _N_CV_FOLDS # 5 - for i in range(_N_CV_FOLDS): + chunk = len(insts) // n_folds + for i in range(n_folds): test_insts = insts[i * chunk : (i + 1) * chunk] train_insts = [inst for inst in insts if inst not in set(test_insts)] folds.append( @@ -82,8 +78,8 @@ def get_cv_folds( else: # LOPO fns = list(ALL_FUNCTIONS) rng.shuffle(fns) - chunk = len(fns) // _N_CV_FOLDS # 8 - for i in range(_N_CV_FOLDS): + chunk = len(fns) // n_folds + for i in range(n_folds): test_fns = set(fns[i * chunk : (i + 1) * chunk]) train_fns = ALL_FUNCTIONS - test_fns folds.append( diff --git a/das/training/expdas.py b/das/training/expdas.py index a6a6076..5158bad 100644 --- a/das/training/expdas.py +++ b/das/training/expdas.py @@ -1,11 +1,12 @@ """Exponential-DAS training runner (custom PyTorch PPO with exp checkpoint spacing).""" +import json import os import cocoex as cx import numpy as np -from das.env.bbob_splits import get_train_test_split +from das.env.bbob_splits import get_cv_folds, get_train_test_split from das.env.das_env import DASEnv from das.env.observation import observation_dim from das.optimizers.portfolio import get_portfolio @@ -21,8 +22,12 @@ def run_exp_das(args) -> None: obs_dim = observation_dim(n_opt) train_ids, test_ids = get_train_test_split(args.mode, args.dims) + total_episodes = args.n_epochs * len(train_ids) print(f"Train: {len(train_ids)} problems | Test: {len(test_ids)} problems") - print(f"obs_dim={obs_dim} cdb={args.cdb} n_checkpoints={args.n_checkpoints}") + print( + f"obs_dim={obs_dim} cdb={args.cdb} n_checkpoints={args.n_checkpoints}" + f" n_epochs={args.n_epochs} total_episodes={total_episodes}" + ) suite = cx.Suite("bbob", "", "") @@ -62,7 +67,7 @@ def run_exp_das(args) -> None: train_env=train_env, test_env=test_env, agent=agent, - total_episodes=args.total_episodes, + total_episodes=total_episodes, eval_interval=args.eval_interval, save_interval=args.save_interval, save_dir="models", @@ -77,3 +82,130 @@ def run_exp_das(args) -> None: eval_path = os.path.join("results", f"{args.name}_eval.jsonl") write_jsonl(eval_path, test_results) print(f"Results saved to {eval_path}") + + +def run_cv_exp_das(args) -> None: + from agents.exponential_das import ExpDASAgent + from agents.exponential_das import train, evaluate + + optimizers = get_portfolio(args.portfolio) + n_opt = len(optimizers) + obs_dim = observation_dim(n_opt) + + suite = cx.Suite("bbob", "", "") + + all_folds = get_cv_folds( + args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds + ) + fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds + + print(f"CV mode : {args.cv_mode} ({len(fold_indices)}/{len(all_folds)} folds)") + print(f"n_epochs/fold: {args.n_epochs} | dims={args.dims}") + + buffer_capacity = args.buffer_capacity or (16 * args.n_checkpoints) + + env_cfg = dict( + suite=suite, + optimizers=optimizers, + fe_multiplier=args.fe_multiplier, + n_checkpoints=args.n_checkpoints, + checkpoint_division_base=args.cdb, + reward_option=args.reward_option, + n_individuals=args.n_individuals, + seed=args.seed, + ) + + fold_summaries = [] + + for run_idx, fold_idx in enumerate(fold_indices): + train_ids, test_ids, fold_tag = all_folds[fold_idx] + fold_name = f"{args.name}_cv_{fold_tag}" + model_path = os.path.join("models", f"{fold_name}_final.pt") + result_path = os.path.join("results", f"{fold_name}.jsonl") + + print(f"\n{'=' * 60}") + print( + f"Fold {run_idx + 1}/{len(fold_indices)}: {fold_tag}" + f" ({len(train_ids)} train / {len(test_ids)} test)" + ) + print(f"{'=' * 60}") + + total_episodes = args.n_epochs * len(train_ids) + + if os.path.exists(model_path): + print(f" [skip training] {model_path} already exists") + agent = ExpDASAgent.load(model_path, obs_dim=obs_dim, n_actions=n_opt) + else: + agent = ExpDASAgent( + obs_dim=obs_dim, + n_actions=n_opt, + buffer_capacity=buffer_capacity, + actor_lr=args.actor_lr, + critic_lr=args.critic_lr, + ppo_epochs=args.ppo_epochs, + n_checkpoints=args.n_checkpoints, + device=args.device, + ) + train_env = DASEnv(problem_ids=train_ids, **env_cfg) + train( + train_env=train_env, + test_env=None, + agent=agent, + total_episodes=total_episodes, + eval_interval=total_episodes + 1, + save_interval=args.save_interval, + save_dir="models", + name=fold_name, + ) + train_env.close() + + if os.path.exists(result_path): + print(f" [skip evaluation] {result_path} already exists") + with open(result_path) as fh: + fold_results = [json.loads(line) for line in fh] + else: + eval_env = DASEnv(problem_ids=test_ids, **env_cfg) + raw = evaluate(eval_env, agent, n_episodes=len(test_ids)) + fold_results = [{**r, "fold": fold_tag} for r in raw] + eval_env.close() + write_jsonl(result_path, fold_results) + print(f" Saved results → {result_path}") + + mean_best_y = float(np.mean([r["best_y"] for r in fold_results])) + fold_summaries.append( + { + "fold": fold_tag, + "fold_idx": fold_idx, + "n_test": len(fold_results), + "mean_best_y": mean_best_y, + } + ) + print(f" mean best_y={mean_best_y:.4e}") + + all_best_y = [] + for fold_idx in fold_indices: + _, _, fold_tag = all_folds[fold_idx] + rpath = os.path.join("results", f"{args.name}_cv_{fold_tag}.jsonl") + if os.path.exists(rpath): + with open(rpath) as fh: + all_best_y.extend(json.loads(line)["best_y"] for line in fh) + + overall = { + "cv_mode": args.cv_mode, + "name": args.name, + "portfolio": args.portfolio, + "dims": args.dims, + "total_episodes_per_fold": args.total_episodes, + "n_folds_run": len(fold_summaries), + "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None, + "folds": fold_summaries, + } + summary_path = os.path.join("results", f"{args.name}_cv_summary.jsonl") + write_jsonl(summary_path, [overall]) + + print(f"\n{'=' * 60}") + print(f"Cross-validation complete ({args.cv_mode} dims={args.dims})") + print(f" Folds run : {len(fold_summaries)}") + if all_best_y: + print(f" Overall mean best_y: {overall['overall_mean_best_y']:.4e}") + print(f" Summary : {summary_path}") diff --git a/das/training/ppo.py b/das/training/ppo.py index 030db4a..5eaf452 100644 --- a/das/training/ppo.py +++ b/das/training/ppo.py @@ -143,10 +143,6 @@ def run_ppo(args) -> None: print(f"Portfolio : {args.portfolio}") print(f"Budget : {args.fe_multiplier}×dim | checkpoints={args.n_checkpoints}") - if args.cv_mode: - _run_cv(args, optimizers, cfg) - return - train_ids, test_ids = get_train_test_split(args.mode, args.dims) print( f"Mode : {args.mode} ({len(train_ids)} train / {len(test_ids)} test problems)" @@ -170,11 +166,25 @@ def run_ppo(args) -> None: print(f" Results : {out_path}") -def _run_cv(args, optimizers: list, cfg: dict) -> None: +def run_cv_ppo(args) -> None: from stable_baselines3 import PPO + optimizers = get_portfolio(args.portfolio) + cfg = { + "fe_multiplier": args.fe_multiplier, + "n_checkpoints": args.n_checkpoints, + "cdb": args.cdb, + "reward_option": args.reward_option, + "n_individuals": args.n_individuals, + "seed": args.seed, + } + print(f"Portfolio : {args.portfolio}") + print(f"Budget : {args.fe_multiplier}×dim | checkpoints={args.n_checkpoints}") + global_optima = load_global_optima() - all_folds = get_cv_folds(args.cv_mode, args.dims, seed=args.seed) + all_folds = get_cv_folds( + args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds + ) fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds print( diff --git a/das/training/rldas.py b/das/training/rldas.py index 5e68eb4..2c041cc 100644 --- a/das/training/rldas.py +++ b/das/training/rldas.py @@ -1,10 +1,11 @@ """RL-DAS training runner (custom single-dim PyTorch PPO).""" +import json import os import numpy as np -from das.env.bbob_splits import get_train_test_split +from das.env.bbob_splits import get_cv_folds, get_train_test_split from das.optimizers.portfolio import get_portfolio from das.training.common import write_jsonl @@ -69,3 +70,132 @@ def run_rl_das(args) -> None: eval_path = os.path.join("results", f"{args.name}_eval.jsonl") write_jsonl(eval_path, test_results) print(f"Results saved to {eval_path}") + + +def run_cv_rl_das(args) -> None: + import cocoex as cx + from agents.rl_das import RLDASEnv, PPOAgent + from agents.rl_das import train, evaluate + + optimizers = get_portfolio(args.portfolio) + if not optimizers: + raise ValueError(f"Unknown optimizers: {args.portfolio}") + + suite = cx.Suite("bbob", "", "") + + if args.k_epoch is None: + args.k_epoch = max(1, int(0.3 * args.n_checkpoints)) + + all_folds = get_cv_folds( + args.cv_mode, [args.dim], seed=args.seed, n_folds=args.n_folds + ) + fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds + + print( + f"CV mode : {args.cv_mode} ({len(fold_indices)}/{len(all_folds)} folds selected)" + ) + print(f"Epochs/fold: {args.n_epochs} | dim={args.dim}") + + env_kwargs = dict( + suite=suite, + optimizers=optimizers, + dim=args.dim, + fe_multiplier=args.fe_multiplier, + n_checkpoints=args.n_checkpoints, + n_individuals=args.n_individuals, + seed=args.seed, + ) + + fold_summaries = [] + + for run_idx, fold_idx in enumerate(fold_indices): + train_ids, test_ids, fold_tag = all_folds[fold_idx] + fold_name = f"{args.name}_cv_{fold_tag}" + model_path = os.path.join("models", f"{fold_name}_final.pt") + result_path = os.path.join("results", f"{fold_name}.jsonl") + + print(f"\n{'=' * 60}") + print( + f"Fold {run_idx + 1}/{len(fold_indices)}: {fold_tag}" + f" ({len(train_ids)} train / {len(test_ids)} test)" + ) + print(f"{'=' * 60}") + + agent = PPOAgent( + dim=args.dim, n_opt=len(optimizers), lr=args.lr, device=args.device + ) + + if os.path.exists(model_path): + print(f" [skip training] {model_path} already exists") + agent = PPOAgent.load(model_path, dim=args.dim, n_opt=len(optimizers)) + else: + train_env = RLDASEnv(problem_ids=train_ids, **env_kwargs) + test_env = RLDASEnv(problem_ids=test_ids, **env_kwargs) + print( + f"RL-DAS | obs_dim={train_env.observation_space.shape[0]}" + f" | k_epoch={args.k_epoch}" + ) + train( + train_env=train_env, + test_env=test_env, + agent=agent, + n_epochs=args.n_epochs, + k_epoch=args.k_epoch, + eval_interval=args.eval_interval, + save_interval=args.save_interval, + save_dir="models", + name=fold_name, + ) + train_env.close() + test_env.close() + + if os.path.exists(result_path): + print(f" [skip evaluation] {result_path} already exists") + with open(result_path) as fh: + fold_results = [json.loads(line) for line in fh] + else: + eval_env = RLDASEnv(problem_ids=test_ids, **env_kwargs) + raw = evaluate(eval_env, agent, n_episodes=len(test_ids)) + fold_results = [{**r, "fold": fold_tag} for r in raw] + eval_env.close() + write_jsonl(result_path, fold_results) + print(f" Saved results → {result_path}") + + mean_best_y = float(np.mean([r["best_y"] for r in fold_results])) + fold_summaries.append( + { + "fold": fold_tag, + "fold_idx": fold_idx, + "n_test": len(fold_results), + "mean_best_y": mean_best_y, + } + ) + print(f" mean best_y={mean_best_y:.4e}") + + all_best_y = [] + for fold_idx in fold_indices: + _, _, fold_tag = all_folds[fold_idx] + rpath = os.path.join("results", f"{args.name}_cv_{fold_tag}.jsonl") + if os.path.exists(rpath): + with open(rpath) as fh: + all_best_y.extend(json.loads(line)["best_y"] for line in fh) + + overall = { + "cv_mode": args.cv_mode, + "name": args.name, + "portfolio": args.portfolio, + "dim": args.dim, + "n_epochs_per_fold": args.n_epochs, + "n_folds_run": len(fold_summaries), + "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None, + "folds": fold_summaries, + } + summary_path = os.path.join("results", f"{args.name}_cv_summary.jsonl") + write_jsonl(summary_path, [overall]) + + print(f"\n{'=' * 60}") + print(f"Cross-validation complete ({args.cv_mode} dim={args.dim})") + print(f" Folds run : {len(fold_summaries)}") + if all_best_y: + print(f" Overall mean best_y: {overall['overall_mean_best_y']:.4e}") + print(f" Summary : {summary_path}") diff --git a/exp_das_study.slurm b/exp_das_study.slurm new file mode 100644 index 0000000..b8b3ae9 --- /dev/null +++ b/exp_das_study.slurm @@ -0,0 +1,53 @@ +#!/bin/bash +#SBATCH --job-name=das2_expdas +#SBATCH --output=logs/expdas_%A_%a.out +#SBATCH --error=logs/expdas_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 +#SBATCH --array=0-3 + +# Args: SEED [PORTFOLIO...] +SEED=${1:-42} + +if [ "$#" -lt 2 ]; then + PORTFOLIO=('SPSO' 'IPSO' 'SPSOL') +else + PORTFOLIO=("${@:2}") +fi + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" + +# 0: CV-LOIO multi-dim (2,5,10) +if [[ $SLURM_ARRAY_TASK_ID -eq 0 ]]; then + echo "Exp-DAS | CV-LOIO | multi-dim (2 5 10)" + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOIO_MULTI_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 5 10 --cv-mode LOIO --n-epochs 10 --seed $SEED + +# 1: CV-LOPO multi-dim (2,5,10) +elif [[ $SLURM_ARRAY_TASK_ID -eq 1 ]]; then + echo "Exp-DAS | CV-LOPO | multi-dim (2 5 10)" + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOPO_MULTI_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 5 10 --cv-mode LOPO --n-epochs 10 --seed $SEED + +# 2: CV-LOIO multi-dim (2,3,5,10) +elif [[ $SLURM_ARRAY_TASK_ID -eq 2 ]]; then + echo "Exp-DAS | CV-LOIO | multi-dim (2 3 5 10)" + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOIO_ALLDIM_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 3 5 10 --cv-mode LOIO --n-epochs 10 --seed $SEED + +# 3: CV-LOPO multi-dim (2,3,5,10) +elif [[ $SLURM_ARRAY_TASK_ID -eq 3 ]]; then + echo "Exp-DAS | CV-LOPO | multi-dim (2 3 5 10)" + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOPO_ALLDIM_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 3 5 10 --cv-mode LOPO --n-epochs 10 --seed $SEED +fi \ No newline at end of file diff --git a/main.py b/main.py deleted file mode 100644 index f1c59ba..0000000 --- a/main.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Entry point dispatcher. - -Use the dedicated scripts instead: - python train.py [options] — train a PPO agent - python evaluate.py [options] — evaluate a saved model -""" - -if __name__ == "__main__": - import sys - - print("Use train.py or evaluate.py directly:") - print(" python train.py --help") - print(" python evaluate.py --help") - sys.exit(0) diff --git a/ppo_study.slurm b/ppo_study.slurm new file mode 100644 index 0000000..6ca70cb --- /dev/null +++ b/ppo_study.slurm @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=das2_ppo +#SBATCH --output=logs/ppo_%A_%a.out +#SBATCH --error=logs/ppo_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 +#SBATCH --array=0-9 + +# Args: SEED [PORTFOLIO...] +SEED=${1:-42} + +if [ "$#" -lt 2 ]; then + PORTFOLIO=('SPSO' 'IPSO' 'SPSOL') +else + PORTFOLIO=("${@:2}") +fi + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +DIMS=(2 3 5 10) + +echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" + +# 0-3: single-dim CV-LOIO +if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then + DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + echo "PPO | CV-LOIO | dim=$DIM" + python cv.py ppo ${PORTFOLIO_STR}_PPO_LOIO_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d $DIM --cv-mode LOIO --n-epochs 3 --seed $SEED + +# 4-7: single-dim CV-LOPO +elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + echo "PPO | CV-LOPO | dim=$DIM" + python cv.py ppo ${PORTFOLIO_STR}_PPO_LOPO_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d $DIM --cv-mode LOPO --n-epochs 3 --seed $SEED + +# 8: multi-dim CV-LOIO +elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then + echo "PPO | CV-LOIO | multi-dim" + python cv.py ppo ${PORTFOLIO_STR}_PPO_LOIO_MULTI_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d 2 3 5 10 --cv-mode LOIO --n-epochs 3 --seed $SEED + +# 9: multi-dim CV-LOPO +elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then + echo "PPO | CV-LOPO | multi-dim" + python cv.py ppo ${PORTFOLIO_STR}_PPO_LOPO_MULTI_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d 2 3 5 10 --cv-mode LOPO --n-epochs 3 --seed $SEED +fi \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9f4221d..e244ac6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dev = [ [project.scripts] das-train = "train:main" +das-cv = "cv:main" das-eval = "evaluate:main" rl-das-train = "train_rl_das:main" exp-das-train = "train_exp_das:main" diff --git a/rl_das_study.slurm b/rl_das_study.slurm new file mode 100644 index 0000000..504cf97 --- /dev/null +++ b/rl_das_study.slurm @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH --job-name=das2_rldas +#SBATCH --output=logs/rldas_%A_%a.out +#SBATCH --error=logs/rldas_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH -A plgrldas2026-gpu-a100 +#SBATCH --array=0-7 + +# Args: SEED [PORTFOLIO...] +SEED=${1:-42} + +if [ "$#" -lt 2 ]; then + PORTFOLIO=('SPSO' 'IPSO' 'SPSOL') +else + PORTFOLIO=("${@:2}") +fi + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +DIMS=(2 3 5 10) + +echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" + +# 0-3: CV-LOIO per dimension +if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then + DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + echo "RL-DAS | CV-LOIO | dim=$DIM" + python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOIO_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOIO --n-epochs 500 --seed $SEED + +# 4-7: CV-LOPO per dimension +elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + echo "RL-DAS | CV-LOPO | dim=$DIM" + python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOPO_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOPO --n-epochs 500 --seed $SEED +fi \ No newline at end of file diff --git a/run_local.sh b/run_local.sh new file mode 100755 index 0000000..2415bd9 --- /dev/null +++ b/run_local.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Local runner — no SLURM. Runs a small smoke-test configuration. +# Usage: ./run_local.sh [seed] [agent] [portfolio...] +# agent: ppo | ppo-cv | rl-das | rl-das-cv | exp-das | exp-das-cv | baselines (default: ppo) + +SEED=${1:-42} +AGENT=${2:-ppo} +shift 2 2>/dev/null || shift $# + +if [ "$#" -lt 1 ]; then + PORTFOLIO=('CPSO' 'NM' 'TDE') +else + PORTFOLIO=("$@") +fi + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + +mkdir -p logs models results + +echo "Local run | AGENT=$AGENT | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" + +case "$AGENT" in + ppo) + python train.py ppo ${PORTFOLIO_STR}_PPO_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d 2 --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + ppo-cv) + python cv.py ppo ${PORTFOLIO_STR}_PPO_CV_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" -d 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + rl-das) + python train.py rl-das ${PORTFOLIO_STR}_RLDAS_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dim 2 --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + rl-das-cv) + python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_CV_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dim 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + exp-das) + python train.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + exp-das-cv) + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_CV_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + baselines) + python baselines.py ${PORTFOLIO_STR}_BASELINES_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --agent all -d 2 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + ;; + *) + echo "Unknown agent '$AGENT'. Use: ppo | ppo-cv | rl-das | rl-das-cv | exp-das | exp-das-cv | baselines" + exit 1 + ;; +esac \ No newline at end of file diff --git a/runner.sh b/runner.sh new file mode 100755 index 0000000..49b27c4 --- /dev/null +++ b/runner.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +SEEDS=(12) + +PORTFOLIOS=( + "CPSO TDE NM" +) + +echo "Starting job submissions..." + +for SEED in "${SEEDS[@]}"; do + for PORTFOLIO in "${PORTFOLIOS[@]}"; do + + echo "Submitting PPO study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" + sbatch ppo_study.slurm $SEED $PORTFOLIO + sleep 1 + + echo "Submitting RL-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" + sbatch rl_das_study.slurm $SEED $PORTFOLIO + sleep 1 + + echo "Submitting Exp-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" + sbatch exp_das_study.slurm $SEED $PORTFOLIO + sleep 1 + + echo "Submitting baselines | SEED=$SEED | PORTFOLIO=$PORTFOLIO" + sbatch baselines.slurm $SEED $PORTFOLIO + sleep 1 + + done +done + +echo "All jobs submitted!" \ No newline at end of file diff --git a/smoke_test.sh b/smoke_test.sh new file mode 100755 index 0000000..83d606f --- /dev/null +++ b/smoke_test.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Smoke tests — runs every agent type with tiny settings to catch import/wiring errors. +# Uses run_local.sh which already sets fe-multiplier=10, n-checkpoints=3, n-epochs=1. +# +# Usage: +# ./smoke_test.sh # run all tests +# ./smoke_test.sh ppo rl-das # run specific agents + +set -uo pipefail + +SEED=42 +PASS=0 +FAIL=0 +FAILURES=() +LOG_DIR=$(mktemp -d) + +run_smoke() { + local label=$1 + local agent=$2 + local log="$LOG_DIR/${agent//-/_}.log" + printf "%-25s ... " "$label" + if bash run_local.sh $SEED "$agent" > "$log" 2>&1; then + echo "PASS" + ((PASS++)) || true + else + echo "FAIL (log: $log)" + ((FAIL++)) || true + FAILURES+=("$label") + fi +} + +if [ "$#" -gt 0 ]; then + AGENTS=("$@") +else + AGENTS=(ppo ppo-cv rl-das rl-das-cv exp-das exp-das-cv baselines) +fi + +for agent in "${AGENTS[@]}"; do + case "$agent" in + ppo) run_smoke "ppo train" ppo ;; + ppo-cv) run_smoke "ppo cv" ppo-cv ;; + rl-das) run_smoke "rl-das train" rl-das ;; + rl-das-cv) run_smoke "rl-das cv" rl-das-cv ;; + exp-das) run_smoke "exp-das train" exp-das ;; + exp-das-cv) run_smoke "exp-das cv" exp-das-cv;; + baselines) run_smoke "baselines" baselines ;; + *) echo "Unknown agent: $agent"; exit 1 ;; + esac +done + +echo "" +echo "Smoke tests: $PASS passed, $FAIL failed" +if [ "${#FAILURES[@]}" -gt 0 ]; then + for f in "${FAILURES[@]}"; do + echo " FAILED: $f" + echo " --- log: $LOG_DIR/${f// /_}.log ---" + tail -20 "$LOG_DIR/${f// /_}.log" 2>/dev/null || true + done + exit 1 +fi \ No newline at end of file diff --git a/train.py b/train.py index 64c0c90..ebd98b5 100644 --- a/train.py +++ b/train.py @@ -12,11 +12,6 @@ models/_vecnorm.pkl results/_eval.jsonl (with --eval) - CV mode (--cv-mode LOIO|LOPO): - models/_cv_.zip + _vecnorm.pkl - results/_cv_.jsonl - results/_cv_summary.jsonl - rl-das outputs -------------- models/_final.pt @@ -89,7 +84,7 @@ def _parse_args() -> argparse.Namespace: # ---- PPO -------------------------------------------------------- ppo = sub.add_parser( "ppo", - help="SB3 PPO with VecNormalize (multi-dim, CV support)", + help="SB3 PPO with VecNormalize (multi-dim)", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) _add_shared_args(ppo) @@ -117,7 +112,7 @@ def _parse_args() -> argparse.Namespace: "-E", "--n-epochs", type=int, - default=1, + default=20, help="Passes over the full training set. total_timesteps = n_epochs × |train_ids| × n_checkpoints", ) ppo.add_argument( @@ -129,19 +124,6 @@ def _parse_args() -> argparse.Namespace: action="store_true", help="Evaluate on the test set immediately after training", ) - ppo.add_argument( - "--cv-mode", - default=None, - choices=["LOIO", "LOPO"], - help="3-fold CV: LOIO holds out 5 of 15 instances per fold; LOPO holds out 8 of 24 functions per fold", - ) - ppo.add_argument( - "--folds", - nargs="+", - type=int, - default=None, - help="Zero-based fold indices to run (CV mode only, default: all)", - ) # ---- RL-DAS ----------------------------------------------------- rl = sub.add_parser( @@ -153,7 +135,7 @@ def _parse_args() -> argparse.Namespace: rl.add_argument( "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)" ) - rl.add_argument("--n-epochs", type=int, default=500, help="Training epochs") + rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs") rl.add_argument( "--k-epoch", type=int, @@ -203,7 +185,11 @@ def _parse_args() -> argparse.Namespace: help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)", ) exp.add_argument( - "--total-episodes", type=int, default=5000, help="Total training episodes" + "-E", + "--n-epochs", + type=int, + default=3, + help="Passes over the training set. total_episodes = n_epochs × |train_ids|", ) exp.add_argument( "--eval-interval", type=int, default=100, help="Evaluate every N episodes" From 2b23413d87f40e31cd3f774e57874a8e517b82b1 Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 00:06:02 +0200 Subject: [PATCH 2/4] enable ci --- .github/workflows/ci.yml | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3d78100 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,66 @@ +name: CI + +on: + push: + branches: ["**"] + pull_request: + workflow_dispatch: + inputs: + run_smoke_tests: + description: "Run smoke tests" + type: boolean + default: true + smoke_agents: + description: "Agents to smoke-test (space-separated, empty = all)" + type: string + default: "" + +jobs: + unit-tests: + name: Unit tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + python-version: "3.11" + + - name: Install dependencies + run: uv sync --group dev + + - name: Run pytest + run: uv run pytest tests/ -v --tb=short + + smoke-tests: + name: Smoke tests + runs-on: ubuntu-latest + # Run on manual dispatch (when opted in) or on push/PR to main + if: | + (github.event_name == 'workflow_dispatch' && inputs.run_smoke_tests) || + (github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main') + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + python-version: "3.11" + + - name: Install dependencies + run: uv sync + + - name: Make scripts executable + run: chmod +x run_local.sh smoke_test.sh + + - name: Run smoke tests + run: | + AGENTS="${{ inputs.smoke_agents }}" + if [ -n "$AGENTS" ]; then + bash smoke_test.sh $AGENTS + else + bash smoke_test.sh + fi \ No newline at end of file From e609b3d3cebeee5838a60a1f81d383d2388bf2ea Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 00:24:52 +0200 Subject: [PATCH 3/4] make tests faster --- das/training/expdas.py | 2 +- tests/test_baselines.py | 11 +++++------ uv.lock | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/das/training/expdas.py b/das/training/expdas.py index 5158bad..05dc281 100644 --- a/das/training/expdas.py +++ b/das/training/expdas.py @@ -195,7 +195,7 @@ def run_cv_exp_das(args) -> None: "name": args.name, "portfolio": args.portfolio, "dims": args.dims, - "total_episodes_per_fold": args.total_episodes, + "n_epochs_per_fold": args.n_epochs, "n_folds_run": len(fold_summaries), "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None, "folds": fold_summaries, diff --git a/tests/test_baselines.py b/tests/test_baselines.py index 432666d..718d8c5 100644 --- a/tests/test_baselines.py +++ b/tests/test_baselines.py @@ -56,7 +56,9 @@ def get_problem(self, problem_id: str) -> MockProblem: PROBLEM_IDS = [f"mock_f{i:02d}" for i in range(4)] N_CHECKPOINTS = 3 -FE_MULTIPLIER = 50 +FE_MULTIPLIER = ( + 5 # keep total budget (FE_MULTIPLIER × dim) < 50 to avoid ELA computation +) N_INDIVIDUALS = 10 PORTFOLIO = ["SPSO", "IPSO"] @@ -259,11 +261,8 @@ def test_run_single_uses_full_budget(self): """The optimizer must receive the full budget (fe_multiplier × dim).""" opt_class = get_portfolio(["SPSO"])[0] problem = MockProblem("test_p", dim=2) - # With tiny budget the optimizer can barely initialise - result_small = run_single_algorithm(opt_class, problem, 5, N_INDIVIDUALS) - result_large = run_single_algorithm( - opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS - ) + result_small = run_single_algorithm(opt_class, problem, 2, N_INDIVIDUALS) + result_large = run_single_algorithm(opt_class, problem, 50, N_INDIVIDUALS) # Larger budget should not produce a worse result assert result_large <= result_small + 1e-6 diff --git a/uv.lock b/uv.lock index c5f5fe7..d601eb3 100644 --- a/uv.lock +++ b/uv.lock @@ -297,6 +297,7 @@ dependencies = [ { name = "pypop7" }, { name = "rich" }, { name = "stable-baselines3" }, + { name = "torch" }, { name = "tqdm" }, { name = "wandb" }, ] @@ -320,6 +321,7 @@ requires-dist = [ { name = "pypop7", specifier = ">=0.0.82,<0.0.83" }, { name = "rich", specifier = ">=15.0.0" }, { name = "stable-baselines3", specifier = ">=2.3.0" }, + { name = "torch", specifier = ">=2.0.0" }, { name = "tqdm", specifier = ">=4.67.1,<5" }, { name = "wandb", specifier = ">=0.22.2,<0.23" }, ] From 738ede6e358bf3b42280d6b554d2e55730c9f7c0 Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 00:55:29 +0200 Subject: [PATCH 4/4] make tests faster 2 --- tests/test_heterogeneous_portfolios.py | 119 ++----------------------- 1 file changed, 9 insertions(+), 110 deletions(-) diff --git a/tests/test_heterogeneous_portfolios.py b/tests/test_heterogeneous_portfolios.py index 2163bbd..fb85bfe 100644 --- a/tests/test_heterogeneous_portfolios.py +++ b/tests/test_heterogeneous_portfolios.py @@ -144,36 +144,14 @@ def _policy(env): return _policy -def reverse_round_robin(n): - """Cycles through actions n-1, n-2, …, 0 across successive steps.""" - state = {"i": 0} - - def _policy(env): - a = (n - 1) - (state["i"] % n) - state["i"] += 1 - return a - - return _policy - - -def _make_rr(n, direction): - return round_robin(n) if direction == "forward" else reverse_round_robin(n) - - # ------------------------------------------------------------------ # # 1. BO + PSO + ES — three-family portfolios # # ------------------------------------------------------------------ # -# Each row: (portfolio spec, dim, fn_name) _BO_PSO_ES = [ (["GPBO_EI", "SPSO", "LMCMAES"], 2, "sphere"), - (["GPBO_UCB", "SPSO", "LMCMAES"], 2, "abs"), - (["GPBO_EI", "IPSO", "CMAES"], 2, "multimodal"), - (["GPBO_UCB", "IPSO", "CMAES"], 3, "sphere"), + (["GPBO_UCB", "IPSO", "CMAES"], 2, "multimodal"), (["GPBO_EI", "SPSOL", "LMCMAES"], 3, "asymmetric"), - (["GPBO_UCB", "CPSO", "CMAES"], 2, "step"), - (["GPBO_EI", "SPSO", "CMAES"], 1, "sphere"), - (["GPBO_UCB", "IPSO", "LMCMAES"], 1, "abs"), (["GPBO_EI", "SPSO", "LMCMAES"], 5, "sphere"), ] @@ -188,57 +166,27 @@ def test_random_policy(self, spec, dim, fn_name): info = drain(env) assert np.isfinite(info["best_y"]) - @pytest.mark.parametrize("direction", ["forward", "reverse"]) @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES) - def test_round_robin_exercises_all_handoffs(self, spec, dim, fn_name, direction): - """Round-robin (forward and reverse) forces every consecutive pair to hand off.""" + def test_round_robin_exercises_all_handoffs(self, spec, dim, fn_name): + """Round-robin forces every consecutive pair to hand off.""" classes = resolve(spec) env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name]) env.reset() - info = drain(env, policy=_make_rr(len(classes), direction)) + info = drain(env, policy=round_robin(len(classes))) assert np.isfinite(info["best_y"]) assert set(env._choices_history) == set(range(len(classes))) - @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4]) - def test_fixed_bo_policy_only_calls_bo(self, spec, dim, fn_name): - env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name]) - env.reset() - drain(env, policy=fixed(0)) - assert all(c == 0 for c in env._choices_history) - assert np.isfinite(env._best_y) - - @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4]) - def test_fixed_pso_policy_never_calls_bo(self, spec, dim, fn_name): - env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name]) - env.reset() - drain(env, policy=fixed(1)) # PSO is always at index 1 - assert all(c == 1 for c in env._choices_history) - assert np.isfinite(env._best_y) - - @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4]) - def test_fixed_es_policy_never_calls_bo(self, spec, dim, fn_name): - env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name]) - env.reset() - drain(env, policy=fixed(2)) # ES is always at index 2 - assert all(c == 2 for c in env._choices_history) - assert np.isfinite(env._best_y) - # ------------------------------------------------------------------ # # 2. BO + PSO + DE — three-family portfolios # # ------------------------------------------------------------------ # -# MADDE/NL_SHADE_RSP respect env n_individuals (=10 here). -# JDE21 forces NP=170, so it gets a larger budget via fe_multiplier=250. _BO_PSO_DE_LIGHT = [ (["GPBO_EI", "SPSO", "MADDE"], 2, "sphere", 100), - (["GPBO_UCB", "SPSO", "NL_SHADE_RSP"], 2, "abs", 100), - (["GPBO_EI", "IPSO", "MADDE"], 3, "multimodal", 100), (["GPBO_UCB", "CPSO", "NL_SHADE_RSP"], 2, "step", 100), ] _BO_PSO_DE_HEAVY = [ (["GPBO_EI", "SPSO", "JDE21"], 2, "sphere", 250), - (["GPBO_UCB", "IPSO", "JDE21"], 3, "abs", 250), ] @@ -256,13 +204,12 @@ def test_random_policy(self, spec, dim, fn_name, fe_mult): info = drain(env) assert np.isfinite(info["best_y"]) - @pytest.mark.parametrize("direction", ["forward", "reverse"]) @pytest.mark.parametrize("spec,dim,fn_name,fe_mult", _BO_PSO_DE_LIGHT) - def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult, direction): + def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult): classes = resolve(spec) env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name], fe_multiplier=fe_mult) env.reset() - info = drain(env, policy=_make_rr(len(classes), direction)) + info = drain(env, policy=round_robin(len(classes))) assert np.isfinite(info["best_y"]) assert set(env._choices_history) == set(range(len(classes))) @@ -274,9 +221,6 @@ def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult, direction): _ALL_FOUR = [ (["GPBO_EI", "SPSO", "CMAES", "MADDE"], 2, "sphere", 100), (["GPBO_UCB", "IPSO", "LMCMAES", "NL_SHADE_RSP"], 2, "abs", 100), - (["GPBO_EI", "SPSO", "LMCMAES", "MADDE"], 3, "multimodal", 100), - (["GPBO_UCB", "CPSO", "CMAES", "NL_SHADE_RSP"], 3, "asymmetric", 100), - (["GPBO_EI", "SPSO", "CMAES", "JDE21"], 2, "sphere", 250), ] @@ -292,14 +236,12 @@ def test_random_policy(self, spec, dim, fn_name, fe_mult): info = drain(env) assert np.isfinite(info["best_y"]) - @pytest.mark.parametrize("direction", ["forward", "reverse"]) - @pytest.mark.parametrize("spec,dim,fn_name,fe_mult", _ALL_FOUR[:3]) - def test_round_robin_visits_all_four(self, spec, dim, fn_name, fe_mult, direction): - # With N_CHECKPOINTS=3 and 4 optimizers, forward visits [0,1,2], reverse visits [3,2,1] + def test_round_robin_visits_all_four(self): + spec, dim, fn_name, fe_mult = _ALL_FOUR[0] classes = resolve(spec) env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name], fe_multiplier=fe_mult) env.reset() - info = drain(env, policy=_make_rr(len(classes), direction)) + info = drain(env, policy=round_robin(len(classes))) assert np.isfinite(info["best_y"]) def test_best_y_nondecreasing_all_families(self): @@ -343,75 +285,35 @@ def _run_chain(self, spec, actions, dim=2, fn=None, fe_multiplier=100): # -- BO ↔ PSO --------------------------------------------------- # def test_bo_pso_bo(self): - # GPBO_EI → SPSO → GPBO_EI: BO hands population to PSO, gets it back self._run_chain(["GPBO_EI", "SPSO", "LMCMAES"], actions=[0, 1, 0]) def test_pso_bo_pso(self): - # SPSO → GPBO_EI → SPSO: population seeded into GP, then back to PSO self._run_chain(["GPBO_EI", "SPSO", "LMCMAES"], actions=[1, 0, 1]) def test_pso_bo_es(self): - # SPSO → GPBO_UCB → LMCMAES self._run_chain(["GPBO_UCB", "SPSO", "LMCMAES"], actions=[1, 0, 2]) # -- BO ↔ ES ---------------------------------------------------- # def test_bo_es_bo(self): - # GPBO_EI → CMAES → GPBO_EI self._run_chain(["GPBO_EI", "SPSO", "CMAES"], actions=[0, 2, 0]) - def test_es_bo_pso(self): - # CMAES → GPBO_UCB → SPSO - self._run_chain(["GPBO_UCB", "SPSO", "CMAES"], actions=[2, 0, 1]) - - def test_es_bo_es(self): - # LMCMAES → GPBO_EI → CMAES: cross-ES transition via BO bridge - self._run_chain( - ["GPBO_EI", "SPSO", "LMCMAES", "CMAES"], - actions=[2, 0, 3], - fe_multiplier=100, - ) - # -- BO ↔ DE ---------------------------------------------------- # def test_de_bo_pso(self): - # MADDE → GPBO_EI → SPSO self._run_chain(["GPBO_EI", "SPSO", "MADDE"], actions=[2, 0, 1]) def test_pso_bo_de(self): - # SPSO → GPBO_UCB → NL_SHADE_RSP self._run_chain(["GPBO_UCB", "SPSO", "NL_SHADE_RSP"], actions=[1, 0, 2]) def test_bo_de_bo(self): - # GPBO_EI → MADDE → GPBO_EI: BO hands off to DE and reclaims state self._run_chain(["GPBO_EI", "SPSO", "MADDE"], actions=[0, 2, 0]) # -- BO ↔ BO ---------------------------------------------------- # def test_ei_pso_ucb(self): - # GPBO_EI → SPSO → GPBO_UCB: EI observations flow through PSO to UCB self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[0, 2, 1]) - def test_ucb_pso_ei(self): - # GPBO_UCB → SPSO → GPBO_EI - self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[1, 2, 0]) - - def test_ei_ucb_pso(self): - # GPBO_EI → GPBO_UCB → SPSO: BO→BO obs hand-off then PSO - self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[0, 1, 2]) - - # -- Higher-dimension chains ------------------------------------ # - - @pytest.mark.parametrize("dim", [3, 5]) - def test_bo_pso_es_dim(self, dim): - self._run_chain(["GPBO_EI", "SPSO", "CMAES"], actions=[0, 1, 2], dim=dim) - - @pytest.mark.parametrize("fn_name", ["abs", "multimodal", "asymmetric", "step"]) - def test_bo_pso_es_landscape(self, fn_name): - self._run_chain( - ["GPBO_EI", "SPSO", "LMCMAES"], actions=[0, 1, 2], fn=FUNCTIONS[fn_name] - ) - # ------------------------------------------------------------------ # # 5. Env contract invariants across all portfolios # @@ -419,9 +321,6 @@ def test_bo_pso_es_landscape(self, fn_name): _CONTRACT_PORTFOLIOS = [ (["GPBO_EI", "SPSO", "LMCMAES"], 100), - (["GPBO_UCB", "IPSO", "CMAES"], 100), - (["GPBO_EI", "GPBO_UCB", "SPSO"], 100), - (["GPBO_EI", "SPSO", "MADDE"], 100), (["GPBO_EI", "SPSO", "CMAES", "MADDE"], 100), ]