From c8469d380dc0ca66176c1277e9da518c5808489c Mon Sep 17 00:00:00 2001
From: wlnc <wladyslaw.niec@unit8.co>
Date: Wed, 20 May 2026 00:03:40 +0200
Subject: [PATCH 1/4] enable CV, add UTs, smoke tests and enable them in github

---
 .gitignore             |   1 +
 baselines.slurm        |  30 ++++++
 cv.py                  | 221 +++++++++++++++++++++++++++++++++++++++++
 das/env/bbob_splits.py |  24 ++---
 das/training/expdas.py | 138 ++++++++++++++++++++++++-
 das/training/ppo.py    |  22 ++--
 das/training/rldas.py  | 132 +++++++++++++++++++++++-
 exp_das_study.slurm    |  53 ++++++++++
 main.py                |  14 ---
 ppo_study.slurm        |  57 +++++++++++
 pyproject.toml         |   1 +
 rl_das_study.slurm     |  45 +++++++++
 run_local.sh           |  55 ++++++++++
 runner.sh              |  33 ++++++
 smoke_test.sh          |  60 +++++++++++
 train.py               |  30 ++----
 16 files changed, 856 insertions(+), 60 deletions(-)
 create mode 100644 baselines.slurm
 create mode 100644 cv.py
 create mode 100644 exp_das_study.slurm
 delete mode 100644 main.py
 create mode 100644 ppo_study.slurm
 create mode 100644 rl_das_study.slurm
 create mode 100755 run_local.sh
 create mode 100755 runner.sh
 create mode 100755 smoke_test.sh
diff --git a/.gitignore b/.gitignore
index 0c213e2..b3e5135 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ results/
 .idea/
 __pycache__/
 */__pycache__/
+logs/
diff --git a/baselines.slurm b/baselines.slurm
new file mode 100644
index 0000000..097fcae
--- /dev/null
+++ b/baselines.slurm
@@ -0,0 +1,30 @@
+#!/bin/bash
+#SBATCH --job-name=das2_baselines
+#SBATCH --output=logs/baselines_%j.out
+#SBATCH --error=logs/baselines_%j.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=24:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgrldas2026-gpu-a100
+
+# Args: SEED [PORTFOLIO...]
+SEED=${1:-42}
+
+if [ "$#" -lt 2 ]; then
+    PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
+else
+    PORTFOLIO=("${@:2}")
+fi
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+echo "Baselines | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
+
+python baselines.py ${PORTFOLIO_STR}_BASELINES_SEED${SEED} \
+    -p "${PORTFOLIO[@]}" --agent all --seed $SEED
\ No newline at end of file
diff --git a/cv.py b/cv.py
new file mode 100644
index 0000000..557ec2c
--- /dev/null
+++ b/cv.py
@@ -0,0 +1,221 @@
+"""Cross-validation entry point: train one model per fold, evaluate on held-out split.
+
+Usage
+-----
+    python cv.py ppo     <name> [options]
+    python cv.py rl-das  <name> [options]
+    python cv.py exp-das <name> [options]
+
+Outputs (per fold)
+------------------
+    models/<name>_cv_<fold>.zip / _final.pt   trained model
+    results/<name>_cv_<fold>.jsonl            per-problem test results
+    results/<name>_cv_summary.jsonl           aggregated stats across all folds
+"""
+
+import argparse
+import warnings
+from pathlib import Path
+
+from das.env.bbob_splits import ALL_DIMS
+from das.utils import set_seed
+
+warnings.filterwarnings("ignore")
+
+
+# ------------------------------------------------------------------ #
+# Argument parsing                                                     #
+# ------------------------------------------------------------------ #
+
+
+def _add_shared_args(p: argparse.ArgumentParser) -> None:
+    p.add_argument("name", help="Experiment name (used for output file names)")
+    p.add_argument(
+        "-p",
+        "--portfolio",
+        nargs="+",
+        default=["SPSO", "IPSO", "SPSOL"],
+        help="Sub-optimizer names from the portfolio",
+    )
+    p.add_argument(
+        "--fe-multiplier",
+        type=int,
+        default=10_000,
+        help="Budget = fe_multiplier × dimension",
+    )
+    p.add_argument(
+        "--n-checkpoints",
+        type=int,
+        default=10,
+        help="Optimizer-selection steps per episode",
+    )
+    p.add_argument("--n-individuals", type=int, default=100, help="Population size")
+    p.add_argument("--seed", type=int, default=42)
+    p.add_argument(
+        "--cv-mode",
+        default="LOIO",
+        choices=["LOIO", "LOPO"],
+        help="LOIO: hold out instances per fold; LOPO: hold out functions per fold",
+    )
+    p.add_argument("--n-folds", type=int, default=3, help="Number of CV folds")
+    p.add_argument(
+        "--folds",
+        nargs="+",
+        type=int,
+        default=None,
+        help="Zero-based fold indices to run (default: all)",
+    )
+
+
+def _parse_args() -> argparse.Namespace:
+    root = argparse.ArgumentParser(
+        description="Cross-validation for DAS agents.  Choose an agent with a sub-command.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    sub = root.add_subparsers(
+        dest="agent", required=True, metavar="{ppo,rl-das,exp-das}"
+    )
+
+    # ---- PPO --------------------------------------------------------
+    ppo = sub.add_parser(
+        "ppo",
+        help="SB3 PPO with VecNormalize",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    _add_shared_args(ppo)
+    ppo.add_argument(
+        "-d",
+        "--dims",
+        nargs="+",
+        type=int,
+        default=ALL_DIMS,
+        choices=ALL_DIMS,
+        help="Problem dimensions",
+    )
+    ppo.add_argument(
+        "-x", "--cdb", type=float, default=1.0, help="Checkpoint division base"
+    )
+    ppo.add_argument(
+        "-O",
+        "--reward-option",
+        type=int,
+        default=1,
+        choices=[1, 2, 3, 4],
+        help="Reward shaping option",
+    )
+    ppo.add_argument(
+        "-E",
+        "--n-epochs",
+        type=int,
+        default=20,
+        help="Training passes per fold. total_timesteps = n_epochs × |train_ids| × n_checkpoints",
+    )
+    ppo.add_argument(
+        "-j", "--n-envs", type=int, default=1, help="Parallel training envs"
+    )
+    ppo.add_argument("--wandb", action="store_true", help="Log to Weights & Biases")
+
+    # ---- RL-DAS -----------------------------------------------------
+    rl = sub.add_parser(
+        "rl-das",
+        help="Custom RL-DAS: single-dimension, pure-PyTorch PPO",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    _add_shared_args(rl)
+    rl.add_argument(
+        "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)"
+    )
+    rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs per fold")
+    rl.add_argument(
+        "--k-epoch",
+        type=int,
+        default=None,
+        help="PPO gradient steps per episode (default: int(0.3 × n_checkpoints))",
+    )
+    rl.add_argument("--lr", type=float, default=1e-5, help="Learning rate")
+    rl.add_argument(
+        "--eval-interval", type=int, default=5, help="Evaluate every N epochs"
+    )
+    rl.add_argument(
+        "--save-interval", type=int, default=50, help="Checkpoint every N epochs"
+    )
+    rl.add_argument("--device", default="cpu", help="PyTorch device")
+
+    # ---- Exp-DAS ----------------------------------------------------
+    exp = sub.add_parser(
+        "exp-das",
+        help="Exponential-DAS: custom PPO with exponential checkpoint spacing",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    _add_shared_args(exp)
+    exp.add_argument(
+        "--dims", nargs="+", type=int, default=[2, 5, 10], help="Problem dimensions"
+    )
+    exp.add_argument(
+        "--cdb",
+        type=float,
+        default=2.0,
+        help="Checkpoint division base (>1 = exponential)",
+    )
+    exp.add_argument(
+        "--reward-option",
+        type=int,
+        default=1,
+        choices=[1, 2, 3, 4],
+        help="Reward shaping option",
+    )
+    exp.add_argument(
+        "--buffer-capacity",
+        type=int,
+        default=None,
+        help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)",
+    )
+    exp.add_argument(
+        "-E",
+        "--n-epochs",
+        type=int,
+        default=3,
+        help="Passes over the training set per fold. total_episodes = n_epochs × |train_ids|",
+    )
+    exp.add_argument(
+        "--save-interval", type=int, default=500, help="Checkpoint every N episodes"
+    )
+    exp.add_argument("--actor-lr", type=float, default=3e-5, help="Actor learning rate")
+    exp.add_argument(
+        "--critic-lr", type=float, default=1e-5, help="Critic learning rate"
+    )
+    exp.add_argument(
+        "--ppo-epochs", type=int, default=6, help="PPO gradient epochs per update"
+    )
+    exp.add_argument("--device", default="cpu", help="PyTorch device")
+
+    return root.parse_args()
+
+
+# ------------------------------------------------------------------ #
+# Main                                                                 #
+# ------------------------------------------------------------------ #
+
+
+def main() -> None:
+    args = _parse_args()
+    set_seed(args.seed)
+    Path("models").mkdir(exist_ok=True)
+    Path("results").mkdir(exist_ok=True)
+
+    if args.agent == "ppo":
+        from das.training.ppo import run_cv_ppo
+
+        run_cv_ppo(args)
+    elif args.agent == "rl-das":
+        from das.training.rldas import run_cv_rl_das
+
+        run_cv_rl_das(args)
+    elif args.agent == "exp-das":
+        from das.training.expdas import run_cv_exp_das
+
+        run_cv_exp_das(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/das/env/bbob_splits.py b/das/env/bbob_splits.py
index d8b660e..bc9a8da 100644
--- a/das/env/bbob_splits.py
+++ b/das/env/bbob_splits.py
@@ -48,19 +48,15 @@ def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[st
     return all_ids[:split], all_ids[split:]
 
 
-_N_CV_FOLDS = 3
-
-
 def get_cv_folds(
-    cv_mode: str, dims: list[int], seed: int = 0
+    cv_mode: str, dims: list[int], seed: int = 0, n_folds: int = 3
 ) -> list[tuple[list[str], list[str], str]]:
-    """Return (train_ids, test_ids, fold_tag) for each of the 3 CV folds.
+    """Return (train_ids, test_ids, fold_tag) for each CV fold.
 
-    LOIO: 3 folds – the 15 instance IDs are randomly shuffled and split into
-          3 groups of 5; each fold tests on 1 group and trains on the other 10.
-    LOPO: 3 folds – the 24 BBOB functions are randomly shuffled and split into
-          3 groups of 8; each fold tests on all problems from 1 group of
-          functions (all instances) and trains on the other 16 functions.
+    LOIO: instance IDs are shuffled and split into n_folds groups;
+          each fold tests on one group and trains on the rest.
+    LOPO: BBOB functions are shuffled and split into n_folds groups;
+          each fold tests on all problems from one group of functions.
     """
     rng = np.random.default_rng(seed)
     folds = []
@@ -68,8 +64,8 @@ def get_cv_folds(
     if cv_mode == "LOIO":
         insts = list(INSTANCE_IDS)
         rng.shuffle(insts)
-        chunk = len(insts) // _N_CV_FOLDS  # 5
-        for i in range(_N_CV_FOLDS):
+        chunk = len(insts) // n_folds
+        for i in range(n_folds):
             test_insts = insts[i * chunk : (i + 1) * chunk]
             train_insts = [inst for inst in insts if inst not in set(test_insts)]
             folds.append(
@@ -82,8 +78,8 @@ def get_cv_folds(
     else:  # LOPO
         fns = list(ALL_FUNCTIONS)
         rng.shuffle(fns)
-        chunk = len(fns) // _N_CV_FOLDS  # 8
-        for i in range(_N_CV_FOLDS):
+        chunk = len(fns) // n_folds
+        for i in range(n_folds):
             test_fns = set(fns[i * chunk : (i + 1) * chunk])
             train_fns = ALL_FUNCTIONS - test_fns
             folds.append(
diff --git a/das/training/expdas.py b/das/training/expdas.py
index a6a6076..5158bad 100644
--- a/das/training/expdas.py
+++ b/das/training/expdas.py
@@ -1,11 +1,12 @@
 """Exponential-DAS training runner (custom PyTorch PPO with exp checkpoint spacing)."""
 
+import json
 import os
 
 import cocoex as cx
 import numpy as np
 
-from das.env.bbob_splits import get_train_test_split
+from das.env.bbob_splits import get_cv_folds, get_train_test_split
 from das.env.das_env import DASEnv
 from das.env.observation import observation_dim
 from das.optimizers.portfolio import get_portfolio
@@ -21,8 +22,12 @@ def run_exp_das(args) -> None:
     obs_dim = observation_dim(n_opt)
 
     train_ids, test_ids = get_train_test_split(args.mode, args.dims)
+    total_episodes = args.n_epochs * len(train_ids)
     print(f"Train: {len(train_ids)} problems  |  Test: {len(test_ids)} problems")
-    print(f"obs_dim={obs_dim}  cdb={args.cdb}  n_checkpoints={args.n_checkpoints}")
+    print(
+        f"obs_dim={obs_dim}  cdb={args.cdb}  n_checkpoints={args.n_checkpoints}"
+        f"  n_epochs={args.n_epochs}  total_episodes={total_episodes}"
+    )
 
     suite = cx.Suite("bbob", "", "")
 
@@ -62,7 +67,7 @@ def run_exp_das(args) -> None:
         train_env=train_env,
         test_env=test_env,
         agent=agent,
-        total_episodes=args.total_episodes,
+        total_episodes=total_episodes,
         eval_interval=args.eval_interval,
         save_interval=args.save_interval,
         save_dir="models",
@@ -77,3 +82,130 @@ def run_exp_das(args) -> None:
         eval_path = os.path.join("results", f"{args.name}_eval.jsonl")
         write_jsonl(eval_path, test_results)
         print(f"Results saved to {eval_path}")
+
+
+def run_cv_exp_das(args) -> None:
+    from agents.exponential_das import ExpDASAgent
+    from agents.exponential_das import train, evaluate
+
+    optimizers = get_portfolio(args.portfolio)
+    n_opt = len(optimizers)
+    obs_dim = observation_dim(n_opt)
+
+    suite = cx.Suite("bbob", "", "")
+
+    all_folds = get_cv_folds(
+        args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds
+    )
+    fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds
+
+    print(f"CV mode    : {args.cv_mode}  ({len(fold_indices)}/{len(all_folds)} folds)")
+    print(f"n_epochs/fold: {args.n_epochs}  |  dims={args.dims}")
+
+    buffer_capacity = args.buffer_capacity or (16 * args.n_checkpoints)
+
+    env_cfg = dict(
+        suite=suite,
+        optimizers=optimizers,
+        fe_multiplier=args.fe_multiplier,
+        n_checkpoints=args.n_checkpoints,
+        checkpoint_division_base=args.cdb,
+        reward_option=args.reward_option,
+        n_individuals=args.n_individuals,
+        seed=args.seed,
+    )
+
+    fold_summaries = []
+
+    for run_idx, fold_idx in enumerate(fold_indices):
+        train_ids, test_ids, fold_tag = all_folds[fold_idx]
+        fold_name = f"{args.name}_cv_{fold_tag}"
+        model_path = os.path.join("models", f"{fold_name}_final.pt")
+        result_path = os.path.join("results", f"{fold_name}.jsonl")
+
+        print(f"\n{'=' * 60}")
+        print(
+            f"Fold {run_idx + 1}/{len(fold_indices)}: {fold_tag}"
+            f"  ({len(train_ids)} train / {len(test_ids)} test)"
+        )
+        print(f"{'=' * 60}")
+
+        total_episodes = args.n_epochs * len(train_ids)
+
+        if os.path.exists(model_path):
+            print(f"  [skip training] {model_path} already exists")
+            agent = ExpDASAgent.load(model_path, obs_dim=obs_dim, n_actions=n_opt)
+        else:
+            agent = ExpDASAgent(
+                obs_dim=obs_dim,
+                n_actions=n_opt,
+                buffer_capacity=buffer_capacity,
+                actor_lr=args.actor_lr,
+                critic_lr=args.critic_lr,
+                ppo_epochs=args.ppo_epochs,
+                n_checkpoints=args.n_checkpoints,
+                device=args.device,
+            )
+            train_env = DASEnv(problem_ids=train_ids, **env_cfg)
+            train(
+                train_env=train_env,
+                test_env=None,
+                agent=agent,
+                total_episodes=total_episodes,
+                eval_interval=total_episodes + 1,
+                save_interval=args.save_interval,
+                save_dir="models",
+                name=fold_name,
+            )
+            train_env.close()
+
+        if os.path.exists(result_path):
+            print(f"  [skip evaluation] {result_path} already exists")
+            with open(result_path) as fh:
+                fold_results = [json.loads(line) for line in fh]
+        else:
+            eval_env = DASEnv(problem_ids=test_ids, **env_cfg)
+            raw = evaluate(eval_env, agent, n_episodes=len(test_ids))
+            fold_results = [{**r, "fold": fold_tag} for r in raw]
+            eval_env.close()
+            write_jsonl(result_path, fold_results)
+            print(f"  Saved results → {result_path}")
+
+        mean_best_y = float(np.mean([r["best_y"] for r in fold_results]))
+        fold_summaries.append(
+            {
+                "fold": fold_tag,
+                "fold_idx": fold_idx,
+                "n_test": len(fold_results),
+                "mean_best_y": mean_best_y,
+            }
+        )
+        print(f"  mean best_y={mean_best_y:.4e}")
+
+    all_best_y = []
+    for fold_idx in fold_indices:
+        _, _, fold_tag = all_folds[fold_idx]
+        rpath = os.path.join("results", f"{args.name}_cv_{fold_tag}.jsonl")
+        if os.path.exists(rpath):
+            with open(rpath) as fh:
+                all_best_y.extend(json.loads(line)["best_y"] for line in fh)
+
+    overall = {
+        "cv_mode": args.cv_mode,
+        "name": args.name,
+        "portfolio": args.portfolio,
+        "dims": args.dims,
+        "total_episodes_per_fold": args.total_episodes,
+        "n_folds_run": len(fold_summaries),
+        "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None,
+        "folds": fold_summaries,
+    }
+    summary_path = os.path.join("results", f"{args.name}_cv_summary.jsonl")
+    write_jsonl(summary_path, [overall])
+
+    print(f"\n{'=' * 60}")
+    print(f"Cross-validation complete  ({args.cv_mode}  dims={args.dims})")
+    print(f"  Folds run          : {len(fold_summaries)}")
+    if all_best_y:
+        print(f"  Overall mean best_y: {overall['overall_mean_best_y']:.4e}")
+    print(f"  Summary            : {summary_path}")
diff --git a/das/training/ppo.py b/das/training/ppo.py
index 030db4a..5eaf452 100644
--- a/das/training/ppo.py
+++ b/das/training/ppo.py
@@ -143,10 +143,6 @@ def run_ppo(args) -> None:
     print(f"Portfolio : {args.portfolio}")
     print(f"Budget    : {args.fe_multiplier}×dim  |  checkpoints={args.n_checkpoints}")
 
-    if args.cv_mode:
-        _run_cv(args, optimizers, cfg)
-        return
-
     train_ids, test_ids = get_train_test_split(args.mode, args.dims)
     print(
         f"Mode      : {args.mode}  ({len(train_ids)} train / {len(test_ids)} test problems)"
@@ -170,11 +166,25 @@ def run_ppo(args) -> None:
         print(f"  Results    : {out_path}")
 
 
-def _run_cv(args, optimizers: list, cfg: dict) -> None:
+def run_cv_ppo(args) -> None:
     from stable_baselines3 import PPO
 
+    optimizers = get_portfolio(args.portfolio)
+    cfg = {
+        "fe_multiplier": args.fe_multiplier,
+        "n_checkpoints": args.n_checkpoints,
+        "cdb": args.cdb,
+        "reward_option": args.reward_option,
+        "n_individuals": args.n_individuals,
+        "seed": args.seed,
+    }
+    print(f"Portfolio : {args.portfolio}")
+    print(f"Budget    : {args.fe_multiplier}×dim  |  checkpoints={args.n_checkpoints}")
+
     global_optima = load_global_optima()
-    all_folds = get_cv_folds(args.cv_mode, args.dims, seed=args.seed)
+    all_folds = get_cv_folds(
+        args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds
+    )
     fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds
 
     print(
diff --git a/das/training/rldas.py b/das/training/rldas.py
index 5e68eb4..2c041cc 100644
--- a/das/training/rldas.py
+++ b/das/training/rldas.py
@@ -1,10 +1,11 @@
 """RL-DAS training runner (custom single-dim PyTorch PPO)."""
 
+import json
 import os
 
 import numpy as np
 
-from das.env.bbob_splits import get_train_test_split
+from das.env.bbob_splits import get_cv_folds, get_train_test_split
 from das.optimizers.portfolio import get_portfolio
 from das.training.common import write_jsonl
 
@@ -69,3 +70,132 @@ def run_rl_das(args) -> None:
         eval_path = os.path.join("results", f"{args.name}_eval.jsonl")
         write_jsonl(eval_path, test_results)
         print(f"Results saved to {eval_path}")
+
+
+def run_cv_rl_das(args) -> None:
+    import cocoex as cx
+    from agents.rl_das import RLDASEnv, PPOAgent
+    from agents.rl_das import train, evaluate
+
+    optimizers = get_portfolio(args.portfolio)
+    if not optimizers:
+        raise ValueError(f"Unknown optimizers: {args.portfolio}")
+
+    suite = cx.Suite("bbob", "", "")
+
+    if args.k_epoch is None:
+        args.k_epoch = max(1, int(0.3 * args.n_checkpoints))
+
+    all_folds = get_cv_folds(
+        args.cv_mode, [args.dim], seed=args.seed, n_folds=args.n_folds
+    )
+    fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds
+
+    print(
+        f"CV mode    : {args.cv_mode}  ({len(fold_indices)}/{len(all_folds)} folds selected)"
+    )
+    print(f"Epochs/fold: {args.n_epochs}  |  dim={args.dim}")
+
+    env_kwargs = dict(
+        suite=suite,
+        optimizers=optimizers,
+        dim=args.dim,
+        fe_multiplier=args.fe_multiplier,
+        n_checkpoints=args.n_checkpoints,
+        n_individuals=args.n_individuals,
+        seed=args.seed,
+    )
+
+    fold_summaries = []
+
+    for run_idx, fold_idx in enumerate(fold_indices):
+        train_ids, test_ids, fold_tag = all_folds[fold_idx]
+        fold_name = f"{args.name}_cv_{fold_tag}"
+        model_path = os.path.join("models", f"{fold_name}_final.pt")
+        result_path = os.path.join("results", f"{fold_name}.jsonl")
+
+        print(f"\n{'=' * 60}")
+        print(
+            f"Fold {run_idx + 1}/{len(fold_indices)}: {fold_tag}"
+            f"  ({len(train_ids)} train / {len(test_ids)} test)"
+        )
+        print(f"{'=' * 60}")
+
+        agent = PPOAgent(
+            dim=args.dim, n_opt=len(optimizers), lr=args.lr, device=args.device
+        )
+
+        if os.path.exists(model_path):
+            print(f"  [skip training] {model_path} already exists")
+            agent = PPOAgent.load(model_path, dim=args.dim, n_opt=len(optimizers))
+        else:
+            train_env = RLDASEnv(problem_ids=train_ids, **env_kwargs)
+            test_env = RLDASEnv(problem_ids=test_ids, **env_kwargs)
+            print(
+                f"RL-DAS  |  obs_dim={train_env.observation_space.shape[0]}"
+                f"  |  k_epoch={args.k_epoch}"
+            )
+            train(
+                train_env=train_env,
+                test_env=test_env,
+                agent=agent,
+                n_epochs=args.n_epochs,
+                k_epoch=args.k_epoch,
+                eval_interval=args.eval_interval,
+                save_interval=args.save_interval,
+                save_dir="models",
+                name=fold_name,
+            )
+            train_env.close()
+            test_env.close()
+
+        if os.path.exists(result_path):
+            print(f"  [skip evaluation] {result_path} already exists")
+            with open(result_path) as fh:
+                fold_results = [json.loads(line) for line in fh]
+        else:
+            eval_env = RLDASEnv(problem_ids=test_ids, **env_kwargs)
+            raw = evaluate(eval_env, agent, n_episodes=len(test_ids))
+            fold_results = [{**r, "fold": fold_tag} for r in raw]
+            eval_env.close()
+            write_jsonl(result_path, fold_results)
+            print(f"  Saved results → {result_path}")
+
+        mean_best_y = float(np.mean([r["best_y"] for r in fold_results]))
+        fold_summaries.append(
+            {
+                "fold": fold_tag,
+                "fold_idx": fold_idx,
+                "n_test": len(fold_results),
+                "mean_best_y": mean_best_y,
+            }
+        )
+        print(f"  mean best_y={mean_best_y:.4e}")
+
+    all_best_y = []
+    for fold_idx in fold_indices:
+        _, _, fold_tag = all_folds[fold_idx]
+        rpath = os.path.join("results", f"{args.name}_cv_{fold_tag}.jsonl")
+        if os.path.exists(rpath):
+            with open(rpath) as fh:
+                all_best_y.extend(json.loads(line)["best_y"] for line in fh)
+
+    overall = {
+        "cv_mode": args.cv_mode,
+        "name": args.name,
+        "portfolio": args.portfolio,
+        "dim": args.dim,
+        "n_epochs_per_fold": args.n_epochs,
+        "n_folds_run": len(fold_summaries),
+        "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None,
+        "folds": fold_summaries,
+    }
+    summary_path = os.path.join("results", f"{args.name}_cv_summary.jsonl")
+    write_jsonl(summary_path, [overall])
+
+    print(f"\n{'=' * 60}")
+    print(f"Cross-validation complete  ({args.cv_mode}  dim={args.dim})")
+    print(f"  Folds run          : {len(fold_summaries)}")
+    if all_best_y:
+        print(f"  Overall mean best_y: {overall['overall_mean_best_y']:.4e}")
+    print(f"  Summary            : {summary_path}")
diff --git a/exp_das_study.slurm b/exp_das_study.slurm
new file mode 100644
index 0000000..b8b3ae9
--- /dev/null
+++ b/exp_das_study.slurm
@@ -0,0 +1,53 @@
+#!/bin/bash
+#SBATCH --job-name=das2_expdas
+#SBATCH --output=logs/expdas_%A_%a.out
+#SBATCH --error=logs/expdas_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgrldas2026-gpu-a100
+#SBATCH --array=0-3
+
+# Args: SEED [PORTFOLIO...]
+SEED=${1:-42}
+
+if [ "$#" -lt 2 ]; then
+    PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
+else
+    PORTFOLIO=("${@:2}")
+fi
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
+
+# 0: CV-LOIO multi-dim (2,5,10)
+if [[ $SLURM_ARRAY_TASK_ID -eq 0 ]]; then
+    echo "Exp-DAS | CV-LOIO | multi-dim (2 5 10)"
+    python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOIO_MULTI_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dims 2 5 10 --cv-mode LOIO --n-epochs 10 --seed $SEED
+
+# 1: CV-LOPO multi-dim (2,5,10)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 1 ]]; then
+    echo "Exp-DAS | CV-LOPO | multi-dim (2 5 10)"
+    python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOPO_MULTI_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dims 2 5 10 --cv-mode LOPO --n-epochs 10 --seed $SEED
+
+# 2: CV-LOIO multi-dim (2,3,5,10)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 2 ]]; then
+    echo "Exp-DAS | CV-LOIO | multi-dim (2 3 5 10)"
+    python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOIO_ALLDIM_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dims 2 3 5 10 --cv-mode LOIO --n-epochs 10 --seed $SEED
+
+# 3: CV-LOPO multi-dim (2,3,5,10)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 3 ]]; then
+    echo "Exp-DAS | CV-LOPO | multi-dim (2 3 5 10)"
+    python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOPO_ALLDIM_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dims 2 3 5 10 --cv-mode LOPO --n-epochs 10 --seed $SEED
+fi
\ No newline at end of file
diff --git a/main.py b/main.py
deleted file mode 100644
index f1c59ba..0000000
--- a/main.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Entry point dispatcher.
-
-Use the dedicated scripts instead:
-    python train.py <name> [options]      — train a PPO agent
-    python evaluate.py <name> [options]  — evaluate a saved model
-"""
-
-if __name__ == "__main__":
-    import sys
-
-    print("Use train.py or evaluate.py directly:")
-    print("  python train.py --help")
-    print("  python evaluate.py --help")
-    sys.exit(0)
diff --git a/ppo_study.slurm b/ppo_study.slurm
new file mode 100644
index 0000000..6ca70cb
--- /dev/null
+++ b/ppo_study.slurm
@@ -0,0 +1,57 @@
+#!/bin/bash
+#SBATCH --job-name=das2_ppo
+#SBATCH --output=logs/ppo_%A_%a.out
+#SBATCH --error=logs/ppo_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgrldas2026-gpu-a100
+#SBATCH --array=0-9
+
+# Args: SEED [PORTFOLIO...]
+SEED=${1:-42}
+
+if [ "$#" -lt 2 ]; then
+    PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
+else
+    PORTFOLIO=("${@:2}")
+fi
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+DIMS=(2 3 5 10)
+
+echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
+
+# 0-3: single-dim CV-LOIO
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "PPO | CV-LOIO | dim=$DIM"
+    python cv.py ppo ${PORTFOLIO_STR}_PPO_LOIO_DIM${DIM}_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" -d $DIM --cv-mode LOIO --n-epochs 3 --seed $SEED
+
+# 4-7: single-dim CV-LOPO
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "PPO | CV-LOPO | dim=$DIM"
+    python cv.py ppo ${PORTFOLIO_STR}_PPO_LOPO_DIM${DIM}_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" -d $DIM --cv-mode LOPO --n-epochs 3 --seed $SEED
+
+# 8: multi-dim CV-LOIO
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    echo "PPO | CV-LOIO | multi-dim"
+    python cv.py ppo ${PORTFOLIO_STR}_PPO_LOIO_MULTI_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" -d 2 3 5 10 --cv-mode LOIO --n-epochs 3 --seed $SEED
+
+# 9: multi-dim CV-LOPO
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    echo "PPO | CV-LOPO | multi-dim"
+    python cv.py ppo ${PORTFOLIO_STR}_PPO_LOPO_MULTI_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" -d 2 3 5 10 --cv-mode LOPO --n-epochs 3 --seed $SEED
+fi
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 9f4221d..e244ac6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dev = [
 
 [project.scripts]
 das-train = "train:main"
+das-cv = "cv:main"
 das-eval = "evaluate:main"
 rl-das-train = "train_rl_das:main"
 exp-das-train = "train_exp_das:main"
diff --git a/rl_das_study.slurm b/rl_das_study.slurm
new file mode 100644
index 0000000..504cf97
--- /dev/null
+++ b/rl_das_study.slurm
@@ -0,0 +1,45 @@
+#!/bin/bash
+#SBATCH --job-name=das2_rldas
+#SBATCH --output=logs/rldas_%A_%a.out
+#SBATCH --error=logs/rldas_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgrldas2026-gpu-a100
+#SBATCH --array=0-7
+
+# Args: SEED [PORTFOLIO...]
+SEED=${1:-42}
+
+if [ "$#" -lt 2 ]; then
+    PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
+else
+    PORTFOLIO=("${@:2}")
+fi
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+DIMS=(2 3 5 10)
+
+echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
+
+# 0-3: CV-LOIO per dimension
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "RL-DAS | CV-LOIO | dim=$DIM"
+    python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOIO_DIM${DIM}_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOIO --n-epochs 500 --seed $SEED
+
+# 4-7: CV-LOPO per dimension
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "RL-DAS | CV-LOPO | dim=$DIM"
+    python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOPO_DIM${DIM}_SEED${SEED} \
+        -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOPO --n-epochs 500 --seed $SEED
+fi
\ No newline at end of file
diff --git a/run_local.sh b/run_local.sh
new file mode 100755
index 0000000..2415bd9
--- /dev/null
+++ b/run_local.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Local runner — no SLURM. Runs a small smoke-test configuration.
+# Usage: ./run_local.sh [seed] [agent] [portfolio...]
+#   agent: ppo | ppo-cv | rl-das | rl-das-cv | exp-das | exp-das-cv | baselines (default: ppo)
+
+SEED=${1:-42}
+AGENT=${2:-ppo}
+shift 2 2>/dev/null || shift $#
+
+if [ "$#" -lt 1 ]; then
+    PORTFOLIO=('CPSO' 'NM' 'TDE')
+else
+    PORTFOLIO=("$@")
+fi
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+mkdir -p logs models results
+
+echo "Local run | AGENT=$AGENT | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"
+
+case "$AGENT" in
+    ppo)
+        python train.py ppo ${PORTFOLIO_STR}_PPO_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" -d 2 --n-epochs 1 --seed $SEED  --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    ppo-cv)
+        python cv.py ppo ${PORTFOLIO_STR}_PPO_CV_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" -d 2 --cv-mode LOIO --n-epochs 1 --seed $SEED  --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    rl-das)
+        python train.py rl-das ${PORTFOLIO_STR}_RLDAS_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" --dim 2 --n-epochs 1 --seed $SEED --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    rl-das-cv)
+        python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_CV_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" --dim 2 --cv-mode LOIO --n-epochs 1 --seed $SEED  --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    exp-das)
+        python train.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" --dims 2 --n-epochs 1 --seed $SEED  --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    exp-das-cv)
+        python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_CV_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" --dims 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10  --n-checkpoints 3
+        ;;
+    baselines)
+        python baselines.py ${PORTFOLIO_STR}_BASELINES_LOCAL_SEED${SEED} \
+            -p "${PORTFOLIO[@]}" --agent all -d 2 --seed $SEED  --fe-multiplier 10 --n-checkpoints 3
+        ;;
+    *)
+        echo "Unknown agent '$AGENT'. Use: ppo | ppo-cv | rl-das | rl-das-cv | exp-das | exp-das-cv | baselines"
+        exit 1
+        ;;
+esac
\ No newline at end of file
diff --git a/runner.sh b/runner.sh
new file mode 100755
index 0000000..49b27c4
--- /dev/null
+++ b/runner.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+SEEDS=(12)
+
+PORTFOLIOS=(
+    "CPSO TDE NM"
+)
+
+echo "Starting job submissions..."
+
+for SEED in "${SEEDS[@]}"; do
+    for PORTFOLIO in "${PORTFOLIOS[@]}"; do
+
+        echo "Submitting PPO study | SEED=$SEED | PORTFOLIO=$PORTFOLIO"
+        sbatch ppo_study.slurm $SEED $PORTFOLIO
+        sleep 1
+
+        echo "Submitting RL-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO"
+        sbatch rl_das_study.slurm $SEED $PORTFOLIO
+        sleep 1
+
+        echo "Submitting Exp-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO"
+        sbatch exp_das_study.slurm $SEED $PORTFOLIO
+        sleep 1
+
+        echo "Submitting baselines | SEED=$SEED | PORTFOLIO=$PORTFOLIO"
+        sbatch baselines.slurm $SEED $PORTFOLIO
+        sleep 1
+
+    done
+done
+
+echo "All jobs submitted!"
\ No newline at end of file
diff --git a/smoke_test.sh b/smoke_test.sh
new file mode 100755
index 0000000..83d606f
--- /dev/null
+++ b/smoke_test.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Smoke tests — runs every agent type with tiny settings to catch import/wiring errors.
+# Uses run_local.sh which already sets fe-multiplier=10, n-checkpoints=3, n-epochs=1.
+#
+# Usage:
+#   ./smoke_test.sh              # run all tests
+#   ./smoke_test.sh ppo rl-das   # run specific agents
+
+set -uo pipefail
+
+SEED=42
+PASS=0
+FAIL=0
+FAILURES=()
+LOG_DIR=$(mktemp -d)
+
+run_smoke() {
+    local label=$1
+    local agent=$2
+    local log="$LOG_DIR/${agent//-/_}.log"
+    printf "%-25s ... " "$label"
+    if bash run_local.sh $SEED "$agent" > "$log" 2>&1; then
+        echo "PASS"
+        ((PASS++)) || true
+    else
+        echo "FAIL  (log: $log)"
+        ((FAIL++)) || true
+        FAILURES+=("$label")
+    fi
+}
+
+if [ "$#" -gt 0 ]; then
+    AGENTS=("$@")
+else
+    AGENTS=(ppo ppo-cv rl-das rl-das-cv exp-das exp-das-cv baselines)
+fi
+
+for agent in "${AGENTS[@]}"; do
+    case "$agent" in
+        ppo)        run_smoke "ppo train"     ppo       ;;
+        ppo-cv)     run_smoke "ppo cv"        ppo-cv    ;;
+        rl-das)     run_smoke "rl-das train"  rl-das    ;;
+        rl-das-cv)  run_smoke "rl-das cv"     rl-das-cv ;;
+        exp-das)    run_smoke "exp-das train" exp-das   ;;
+        exp-das-cv) run_smoke "exp-das cv"    exp-das-cv;;
+        baselines)  run_smoke "baselines"     baselines ;;
+        *)          echo "Unknown agent: $agent"; exit 1 ;;
+    esac
+done
+
+echo ""
+echo "Smoke tests: $PASS passed, $FAIL failed"
+if [ "${#FAILURES[@]}" -gt 0 ]; then
+    for f in "${FAILURES[@]}"; do
+        echo "  FAILED: $f"
+        echo "  --- log: $LOG_DIR/${f// /_}.log ---"
+        tail -20 "$LOG_DIR/${f// /_}.log" 2>/dev/null || true
+    done
+    exit 1
+fi
\ No newline at end of file
diff --git a/train.py b/train.py
index 64c0c90..ebd98b5 100644
--- a/train.py
+++ b/train.py
@@ -12,11 +12,6 @@
     models/<name>_vecnorm.pkl
     results/<name>_eval.jsonl          (with --eval)
 
-    CV mode (--cv-mode LOIO|LOPO):
-    models/<name>_cv_<fold>.zip  +  _vecnorm.pkl
-    results/<name>_cv_<fold>.jsonl
-    results/<name>_cv_summary.jsonl
-
 rl-das outputs
 --------------
     models/<name>_final.pt
@@ -89,7 +84,7 @@ def _parse_args() -> argparse.Namespace:
     # ---- PPO --------------------------------------------------------
     ppo = sub.add_parser(
         "ppo",
-        help="SB3 PPO with VecNormalize (multi-dim, CV support)",
+        help="SB3 PPO with VecNormalize (multi-dim)",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     _add_shared_args(ppo)
@@ -117,7 +112,7 @@ def _parse_args() -> argparse.Namespace:
         "-E",
         "--n-epochs",
         type=int,
-        default=1,
+        default=20,
         help="Passes over the full training set. total_timesteps = n_epochs × |train_ids| × n_checkpoints",
     )
     ppo.add_argument(
@@ -129,19 +124,6 @@ def _parse_args() -> argparse.Namespace:
         action="store_true",
         help="Evaluate on the test set immediately after training",
     )
-    ppo.add_argument(
-        "--cv-mode",
-        default=None,
-        choices=["LOIO", "LOPO"],
-        help="3-fold CV: LOIO holds out 5 of 15 instances per fold; LOPO holds out 8 of 24 functions per fold",
-    )
-    ppo.add_argument(
-        "--folds",
-        nargs="+",
-        type=int,
-        default=None,
-        help="Zero-based fold indices to run (CV mode only, default: all)",
-    )
 
     # ---- RL-DAS -----------------------------------------------------
     rl = sub.add_parser(
@@ -153,7 +135,7 @@ def _parse_args() -> argparse.Namespace:
     rl.add_argument(
         "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)"
     )
-    rl.add_argument("--n-epochs", type=int, default=500, help="Training epochs")
+    rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs")
     rl.add_argument(
         "--k-epoch",
         type=int,
@@ -203,7 +185,11 @@ def _parse_args() -> argparse.Namespace:
         help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)",
     )
     exp.add_argument(
-        "--total-episodes", type=int, default=5000, help="Total training episodes"
+        "-E",
+        "--n-epochs",
+        type=int,
+        default=3,
+        help="Passes over the training set. total_episodes = n_epochs × |train_ids|",
     )
     exp.add_argument(
         "--eval-interval", type=int, default=100, help="Evaluate every N episodes"

From 2b23413d87f40e31cd3f774e57874a8e517b82b1 Mon Sep 17 00:00:00 2001
From: wlnc <wladyslaw.niec@unit8.co>
Date: Wed, 20 May 2026 00:06:02 +0200
Subject: [PATCH 2/4] enable ci

---
 .github/workflows/ci.yml | 66 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..3d78100
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,66 @@
+name: CI
+
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      run_smoke_tests:
+        description: "Run smoke tests"
+        type: boolean
+        default: true
+      smoke_agents:
+        description: "Agents to smoke-test (space-separated, empty = all)"
+        type: string
+        default: ""
+
+jobs:
+  unit-tests:
+    name: Unit tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: uv sync --group dev
+
+      - name: Run pytest
+        run: uv run pytest tests/ -v --tb=short
+
+  smoke-tests:
+    name: Smoke tests
+    runs-on: ubuntu-latest
+    # Run on manual dispatch (when opted in) or on push/PR to main
+    if: |
+      (github.event_name == 'workflow_dispatch' && inputs.run_smoke_tests) ||
+      (github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main')
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Make scripts executable
+        run: chmod +x run_local.sh smoke_test.sh
+
+      - name: Run smoke tests
+        run: |
+          AGENTS="${{ inputs.smoke_agents }}"
+          if [ -n "$AGENTS" ]; then
+            bash smoke_test.sh $AGENTS
+          else
+            bash smoke_test.sh
+          fi
\ No newline at end of file

From e609b3d3cebeee5838a60a1f81d383d2388bf2ea Mon Sep 17 00:00:00 2001
From: wlnc <wladyslaw.niec@unit8.co>
Date: Wed, 20 May 2026 00:24:52 +0200
Subject: [PATCH 3/4] make tests faster

---
 das/training/expdas.py  |  2 +-
 tests/test_baselines.py | 11 +++++------
 uv.lock                 |  2 ++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/das/training/expdas.py b/das/training/expdas.py
index 5158bad..05dc281 100644
--- a/das/training/expdas.py
+++ b/das/training/expdas.py
@@ -195,7 +195,7 @@ def run_cv_exp_das(args) -> None:
         "name": args.name,
         "portfolio": args.portfolio,
         "dims": args.dims,
-        "total_episodes_per_fold": args.total_episodes,
+        "n_epochs_per_fold": args.n_epochs,
         "n_folds_run": len(fold_summaries),
         "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None,
         "folds": fold_summaries,
diff --git a/tests/test_baselines.py b/tests/test_baselines.py
index 432666d..718d8c5 100644
--- a/tests/test_baselines.py
+++ b/tests/test_baselines.py
@@ -56,7 +56,9 @@ def get_problem(self, problem_id: str) -> MockProblem:
 
 PROBLEM_IDS = [f"mock_f{i:02d}" for i in range(4)]
 N_CHECKPOINTS = 3
-FE_MULTIPLIER = 50
+FE_MULTIPLIER = (
+    5  # keep total budget (FE_MULTIPLIER × dim) < 50 to avoid ELA computation
+)
 N_INDIVIDUALS = 10
 PORTFOLIO = ["SPSO", "IPSO"]
 
@@ -259,11 +261,8 @@ def test_run_single_uses_full_budget(self):
         """The optimizer must receive the full budget (fe_multiplier × dim)."""
         opt_class = get_portfolio(["SPSO"])[0]
         problem = MockProblem("test_p", dim=2)
-        # With tiny budget the optimizer can barely initialise
-        result_small = run_single_algorithm(opt_class, problem, 5, N_INDIVIDUALS)
-        result_large = run_single_algorithm(
-            opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS
-        )
+        result_small = run_single_algorithm(opt_class, problem, 2, N_INDIVIDUALS)
+        result_large = run_single_algorithm(opt_class, problem, 50, N_INDIVIDUALS)
         # Larger budget should not produce a worse result
         assert result_large <= result_small + 1e-6
 
diff --git a/uv.lock b/uv.lock
index c5f5fe7..d601eb3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -297,6 +297,7 @@ dependencies = [
     { name = "pypop7" },
     { name = "rich" },
     { name = "stable-baselines3" },
+    { name = "torch" },
     { name = "tqdm" },
     { name = "wandb" },
 ]
@@ -320,6 +321,7 @@ requires-dist = [
     { name = "pypop7", specifier = ">=0.0.82,<0.0.83" },
     { name = "rich", specifier = ">=15.0.0" },
     { name = "stable-baselines3", specifier = ">=2.3.0" },
+    { name = "torch", specifier = ">=2.0.0" },
     { name = "tqdm", specifier = ">=4.67.1,<5" },
     { name = "wandb", specifier = ">=0.22.2,<0.23" },
 ]

From 738ede6e358bf3b42280d6b554d2e55730c9f7c0 Mon Sep 17 00:00:00 2001
From: wlnc <wladyslaw.niec@unit8.co>
Date: Wed, 20 May 2026 00:55:29 +0200
Subject: [PATCH 4/4] make tests faster 2

---
 tests/test_heterogeneous_portfolios.py | 119 ++-----------------------
 1 file changed, 9 insertions(+), 110 deletions(-)

diff --git a/tests/test_heterogeneous_portfolios.py b/tests/test_heterogeneous_portfolios.py
index 2163bbd..fb85bfe 100644
--- a/tests/test_heterogeneous_portfolios.py
+++ b/tests/test_heterogeneous_portfolios.py
@@ -144,36 +144,14 @@ def _policy(env):
     return _policy
 
 
-def reverse_round_robin(n):
-    """Cycles through actions n-1, n-2, …, 0 across successive steps."""
-    state = {"i": 0}
-
-    def _policy(env):
-        a = (n - 1) - (state["i"] % n)
-        state["i"] += 1
-        return a
-
-    return _policy
-
-
-def _make_rr(n, direction):
-    return round_robin(n) if direction == "forward" else reverse_round_robin(n)
-
-
 # ------------------------------------------------------------------ #
 # 1. BO + PSO + ES — three-family portfolios                         #
 # ------------------------------------------------------------------ #
 
-# Each row: (portfolio spec, dim, fn_name)
 _BO_PSO_ES = [
     (["GPBO_EI", "SPSO", "LMCMAES"], 2, "sphere"),
-    (["GPBO_UCB", "SPSO", "LMCMAES"], 2, "abs"),
-    (["GPBO_EI", "IPSO", "CMAES"], 2, "multimodal"),
-    (["GPBO_UCB", "IPSO", "CMAES"], 3, "sphere"),
+    (["GPBO_UCB", "IPSO", "CMAES"], 2, "multimodal"),
     (["GPBO_EI", "SPSOL", "LMCMAES"], 3, "asymmetric"),
-    (["GPBO_UCB", "CPSO", "CMAES"], 2, "step"),
-    (["GPBO_EI", "SPSO", "CMAES"], 1, "sphere"),
-    (["GPBO_UCB", "IPSO", "LMCMAES"], 1, "abs"),
     (["GPBO_EI", "SPSO", "LMCMAES"], 5, "sphere"),
 ]
 
@@ -188,57 +166,27 @@ def test_random_policy(self, spec, dim, fn_name):
         info = drain(env)
         assert np.isfinite(info["best_y"])
 
-    @pytest.mark.parametrize("direction", ["forward", "reverse"])
     @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES)
-    def test_round_robin_exercises_all_handoffs(self, spec, dim, fn_name, direction):
-        """Round-robin (forward and reverse) forces every consecutive pair to hand off."""
+    def test_round_robin_exercises_all_handoffs(self, spec, dim, fn_name):
+        """Round-robin forces every consecutive pair to hand off."""
         classes = resolve(spec)
         env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name])
         env.reset()
-        info = drain(env, policy=_make_rr(len(classes), direction))
+        info = drain(env, policy=round_robin(len(classes)))
         assert np.isfinite(info["best_y"])
         assert set(env._choices_history) == set(range(len(classes)))
 
-    @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4])
-    def test_fixed_bo_policy_only_calls_bo(self, spec, dim, fn_name):
-        env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name])
-        env.reset()
-        drain(env, policy=fixed(0))
-        assert all(c == 0 for c in env._choices_history)
-        assert np.isfinite(env._best_y)
-
-    @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4])
-    def test_fixed_pso_policy_never_calls_bo(self, spec, dim, fn_name):
-        env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name])
-        env.reset()
-        drain(env, policy=fixed(1))  # PSO is always at index 1
-        assert all(c == 1 for c in env._choices_history)
-        assert np.isfinite(env._best_y)
-
-    @pytest.mark.parametrize("spec,dim,fn_name", _BO_PSO_ES[:4])
-    def test_fixed_es_policy_never_calls_bo(self, spec, dim, fn_name):
-        env = make_env(resolve(spec), dim=dim, fn=FUNCTIONS[fn_name])
-        env.reset()
-        drain(env, policy=fixed(2))  # ES is always at index 2
-        assert all(c == 2 for c in env._choices_history)
-        assert np.isfinite(env._best_y)
-
 
 # ------------------------------------------------------------------ #
 # 2. BO + PSO + DE — three-family portfolios                         #
 # ------------------------------------------------------------------ #
 
-# MADDE/NL_SHADE_RSP respect env n_individuals (=10 here).
-# JDE21 forces NP=170, so it gets a larger budget via fe_multiplier=250.
 _BO_PSO_DE_LIGHT = [
     (["GPBO_EI", "SPSO", "MADDE"], 2, "sphere", 100),
-    (["GPBO_UCB", "SPSO", "NL_SHADE_RSP"], 2, "abs", 100),
-    (["GPBO_EI", "IPSO", "MADDE"], 3, "multimodal", 100),
     (["GPBO_UCB", "CPSO", "NL_SHADE_RSP"], 2, "step", 100),
 ]
 _BO_PSO_DE_HEAVY = [
     (["GPBO_EI", "SPSO", "JDE21"], 2, "sphere", 250),
-    (["GPBO_UCB", "IPSO", "JDE21"], 3, "abs", 250),
 ]
 
 
@@ -256,13 +204,12 @@ def test_random_policy(self, spec, dim, fn_name, fe_mult):
         info = drain(env)
         assert np.isfinite(info["best_y"])
 
-    @pytest.mark.parametrize("direction", ["forward", "reverse"])
     @pytest.mark.parametrize("spec,dim,fn_name,fe_mult", _BO_PSO_DE_LIGHT)
-    def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult, direction):
+    def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult):
         classes = resolve(spec)
         env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name], fe_multiplier=fe_mult)
         env.reset()
-        info = drain(env, policy=_make_rr(len(classes), direction))
+        info = drain(env, policy=round_robin(len(classes)))
         assert np.isfinite(info["best_y"])
         assert set(env._choices_history) == set(range(len(classes)))
 
@@ -274,9 +221,6 @@ def test_round_robin_all_families(self, spec, dim, fn_name, fe_mult, direction):
 _ALL_FOUR = [
     (["GPBO_EI", "SPSO", "CMAES", "MADDE"], 2, "sphere", 100),
     (["GPBO_UCB", "IPSO", "LMCMAES", "NL_SHADE_RSP"], 2, "abs", 100),
-    (["GPBO_EI", "SPSO", "LMCMAES", "MADDE"], 3, "multimodal", 100),
-    (["GPBO_UCB", "CPSO", "CMAES", "NL_SHADE_RSP"], 3, "asymmetric", 100),
-    (["GPBO_EI", "SPSO", "CMAES", "JDE21"], 2, "sphere", 250),
 ]
 
 
@@ -292,14 +236,12 @@ def test_random_policy(self, spec, dim, fn_name, fe_mult):
         info = drain(env)
         assert np.isfinite(info["best_y"])
 
-    @pytest.mark.parametrize("direction", ["forward", "reverse"])
-    @pytest.mark.parametrize("spec,dim,fn_name,fe_mult", _ALL_FOUR[:3])
-    def test_round_robin_visits_all_four(self, spec, dim, fn_name, fe_mult, direction):
-        # With N_CHECKPOINTS=3 and 4 optimizers, forward visits [0,1,2], reverse visits [3,2,1]
+    def test_round_robin_visits_all_four(self):
+        spec, dim, fn_name, fe_mult = _ALL_FOUR[0]
         classes = resolve(spec)
         env = make_env(classes, dim=dim, fn=FUNCTIONS[fn_name], fe_multiplier=fe_mult)
         env.reset()
-        info = drain(env, policy=_make_rr(len(classes), direction))
+        info = drain(env, policy=round_robin(len(classes)))
         assert np.isfinite(info["best_y"])
 
     def test_best_y_nondecreasing_all_families(self):
@@ -343,75 +285,35 @@ def _run_chain(self, spec, actions, dim=2, fn=None, fe_multiplier=100):
     # -- BO ↔ PSO --------------------------------------------------- #
 
     def test_bo_pso_bo(self):
-        # GPBO_EI → SPSO → GPBO_EI: BO hands population to PSO, gets it back
         self._run_chain(["GPBO_EI", "SPSO", "LMCMAES"], actions=[0, 1, 0])
 
     def test_pso_bo_pso(self):
-        # SPSO → GPBO_EI → SPSO: population seeded into GP, then back to PSO
         self._run_chain(["GPBO_EI", "SPSO", "LMCMAES"], actions=[1, 0, 1])
 
     def test_pso_bo_es(self):
-        # SPSO → GPBO_UCB → LMCMAES
         self._run_chain(["GPBO_UCB", "SPSO", "LMCMAES"], actions=[1, 0, 2])
 
     # -- BO ↔ ES ---------------------------------------------------- #
 
     def test_bo_es_bo(self):
-        # GPBO_EI → CMAES → GPBO_EI
         self._run_chain(["GPBO_EI", "SPSO", "CMAES"], actions=[0, 2, 0])
 
-    def test_es_bo_pso(self):
-        # CMAES → GPBO_UCB → SPSO
-        self._run_chain(["GPBO_UCB", "SPSO", "CMAES"], actions=[2, 0, 1])
-
-    def test_es_bo_es(self):
-        # LMCMAES → GPBO_EI → CMAES: cross-ES transition via BO bridge
-        self._run_chain(
-            ["GPBO_EI", "SPSO", "LMCMAES", "CMAES"],
-            actions=[2, 0, 3],
-            fe_multiplier=100,
-        )
-
     # -- BO ↔ DE ---------------------------------------------------- #
 
     def test_de_bo_pso(self):
-        # MADDE → GPBO_EI → SPSO
         self._run_chain(["GPBO_EI", "SPSO", "MADDE"], actions=[2, 0, 1])
 
     def test_pso_bo_de(self):
-        # SPSO → GPBO_UCB → NL_SHADE_RSP
         self._run_chain(["GPBO_UCB", "SPSO", "NL_SHADE_RSP"], actions=[1, 0, 2])
 
     def test_bo_de_bo(self):
-        # GPBO_EI → MADDE → GPBO_EI: BO hands off to DE and reclaims state
         self._run_chain(["GPBO_EI", "SPSO", "MADDE"], actions=[0, 2, 0])
 
     # -- BO ↔ BO ---------------------------------------------------- #
 
     def test_ei_pso_ucb(self):
-        # GPBO_EI → SPSO → GPBO_UCB: EI observations flow through PSO to UCB
         self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[0, 2, 1])
 
-    def test_ucb_pso_ei(self):
-        # GPBO_UCB → SPSO → GPBO_EI
-        self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[1, 2, 0])
-
-    def test_ei_ucb_pso(self):
-        # GPBO_EI → GPBO_UCB → SPSO: BO→BO obs hand-off then PSO
-        self._run_chain(["GPBO_EI", "GPBO_UCB", "SPSO"], actions=[0, 1, 2])
-
-    # -- Higher-dimension chains ------------------------------------ #
-
-    @pytest.mark.parametrize("dim", [3, 5])
-    def test_bo_pso_es_dim(self, dim):
-        self._run_chain(["GPBO_EI", "SPSO", "CMAES"], actions=[0, 1, 2], dim=dim)
-
-    @pytest.mark.parametrize("fn_name", ["abs", "multimodal", "asymmetric", "step"])
-    def test_bo_pso_es_landscape(self, fn_name):
-        self._run_chain(
-            ["GPBO_EI", "SPSO", "LMCMAES"], actions=[0, 1, 2], fn=FUNCTIONS[fn_name]
-        )
-
 
 # ------------------------------------------------------------------ #
 # 5. Env contract invariants across all portfolios                   #
@@ -419,9 +321,6 @@ def test_bo_pso_es_landscape(self, fn_name):
 
 _CONTRACT_PORTFOLIOS = [
     (["GPBO_EI", "SPSO", "LMCMAES"], 100),
-    (["GPBO_UCB", "IPSO", "CMAES"], 100),
-    (["GPBO_EI", "GPBO_UCB", "SPSO"], 100),
-    (["GPBO_EI", "SPSO", "MADDE"], 100),
     (["GPBO_EI", "SPSO", "CMAES", "MADDE"], 100),
 ]