Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: CI

on:
push:
branches: ["**"]
pull_request:
workflow_dispatch:
inputs:
run_smoke_tests:
description: "Run smoke tests"
type: boolean
default: true
smoke_agents:
description: "Agents to smoke-test (space-separated, empty = all)"
type: string
default: ""

jobs:
unit-tests:
name: Unit tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: "3.11"

- name: Install dependencies
run: uv sync --group dev

- name: Run pytest
run: uv run pytest tests/ -v --tb=short

smoke-tests:
name: Smoke tests
runs-on: ubuntu-latest
# Run on manual dispatch (when opted in) or on push/PR to main
if: |
(github.event_name == 'workflow_dispatch' && inputs.run_smoke_tests) ||
(github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main')
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: "3.11"

- name: Install dependencies
run: uv sync

- name: Make scripts executable
run: chmod +x run_local.sh smoke_test.sh

- name: Run smoke tests
run: |
AGENTS="${{ inputs.smoke_agents }}"
if [ -n "$AGENTS" ]; then
bash smoke_test.sh $AGENTS
else
bash smoke_test.sh
fi
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ results/
.idea/
__pycache__/
*/__pycache__/
logs/
30 changes: 30 additions & 0 deletions baselines.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
#SBATCH --job-name=das2_baselines
#SBATCH --output=logs/baselines_%j.out
#SBATCH --error=logs/baselines_%j.err
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=32G
#SBATCH --time=24:00:00
#SBATCH --partition=plgrid-gpu-a100
#SBATCH -A plgrldas2026-gpu-a100

# Args: SEED [PORTFOLIO...]
SEED=${1:-42}

if [ "$#" -lt 2 ]; then
PORTFOLIO=('SPSO' 'IPSO' 'SPSOL')
else
PORTFOLIO=("${@:2}")
fi

PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")

ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate"
source "$ENV_PATH"
mkdir -p logs

echo "Baselines | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}"

python baselines.py ${PORTFOLIO_STR}_BASELINES_SEED${SEED} \
-p "${PORTFOLIO[@]}" --agent all --seed $SEED
221 changes: 221 additions & 0 deletions cv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""Cross-validation entry point: train one model per fold, evaluate on held-out split.

Usage
-----
python cv.py ppo <name> [options]
python cv.py rl-das <name> [options]
python cv.py exp-das <name> [options]

Outputs (per fold)
------------------
models/<name>_cv_<fold>.zip / _final.pt trained model
results/<name>_cv_<fold>.jsonl per-problem test results
results/<name>_cv_summary.jsonl aggregated stats across all folds
"""

import argparse
import warnings
from pathlib import Path

from das.env.bbob_splits import ALL_DIMS
from das.utils import set_seed

warnings.filterwarnings("ignore")


# ------------------------------------------------------------------ #
# Argument parsing #
# ------------------------------------------------------------------ #


def _add_shared_args(p: argparse.ArgumentParser) -> None:
p.add_argument("name", help="Experiment name (used for output file names)")
p.add_argument(
"-p",
"--portfolio",
nargs="+",
default=["SPSO", "IPSO", "SPSOL"],
help="Sub-optimizer names from the portfolio",
)
p.add_argument(
"--fe-multiplier",
type=int,
default=10_000,
help="Budget = fe_multiplier × dimension",
)
p.add_argument(
"--n-checkpoints",
type=int,
default=10,
help="Optimizer-selection steps per episode",
)
p.add_argument("--n-individuals", type=int, default=100, help="Population size")
p.add_argument("--seed", type=int, default=42)
p.add_argument(
"--cv-mode",
default="LOIO",
choices=["LOIO", "LOPO"],
help="LOIO: hold out instances per fold; LOPO: hold out functions per fold",
)
p.add_argument("--n-folds", type=int, default=3, help="Number of CV folds")
p.add_argument(
"--folds",
nargs="+",
type=int,
default=None,
help="Zero-based fold indices to run (default: all)",
)


def _parse_args() -> argparse.Namespace:
root = argparse.ArgumentParser(
description="Cross-validation for DAS agents. Choose an agent with a sub-command.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
sub = root.add_subparsers(
dest="agent", required=True, metavar="{ppo,rl-das,exp-das}"
)

# ---- PPO --------------------------------------------------------
ppo = sub.add_parser(
"ppo",
help="SB3 PPO with VecNormalize",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
_add_shared_args(ppo)
ppo.add_argument(
"-d",
"--dims",
nargs="+",
type=int,
default=ALL_DIMS,
choices=ALL_DIMS,
help="Problem dimensions",
)
ppo.add_argument(
"-x", "--cdb", type=float, default=1.0, help="Checkpoint division base"
)
ppo.add_argument(
"-O",
"--reward-option",
type=int,
default=1,
choices=[1, 2, 3, 4],
help="Reward shaping option",
)
ppo.add_argument(
"-E",
"--n-epochs",
type=int,
default=20,
help="Training passes per fold. total_timesteps = n_epochs × |train_ids| × n_checkpoints",
)
ppo.add_argument(
"-j", "--n-envs", type=int, default=1, help="Parallel training envs"
)
ppo.add_argument("--wandb", action="store_true", help="Log to Weights & Biases")

# ---- RL-DAS -----------------------------------------------------
rl = sub.add_parser(
"rl-das",
help="Custom RL-DAS: single-dimension, pure-PyTorch PPO",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
_add_shared_args(rl)
rl.add_argument(
"--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)"
)
rl.add_argument("--n-epochs", type=int, default=20, help="Training epochs per fold")
rl.add_argument(
"--k-epoch",
type=int,
default=None,
help="PPO gradient steps per episode (default: int(0.3 × n_checkpoints))",
)
rl.add_argument("--lr", type=float, default=1e-5, help="Learning rate")
rl.add_argument(
"--eval-interval", type=int, default=5, help="Evaluate every N epochs"
)
rl.add_argument(
"--save-interval", type=int, default=50, help="Checkpoint every N epochs"
)
rl.add_argument("--device", default="cpu", help="PyTorch device")

# ---- Exp-DAS ----------------------------------------------------
exp = sub.add_parser(
"exp-das",
help="Exponential-DAS: custom PPO with exponential checkpoint spacing",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
_add_shared_args(exp)
exp.add_argument(
"--dims", nargs="+", type=int, default=[2, 5, 10], help="Problem dimensions"
)
exp.add_argument(
"--cdb",
type=float,
default=2.0,
help="Checkpoint division base (>1 = exponential)",
)
exp.add_argument(
"--reward-option",
type=int,
default=1,
choices=[1, 2, 3, 4],
help="Reward shaping option",
)
exp.add_argument(
"--buffer-capacity",
type=int,
default=None,
help="PPO rollout buffer size in steps (default: 16 × n_checkpoints)",
)
exp.add_argument(
"-E",
"--n-epochs",
type=int,
default=3,
help="Passes over the training set per fold. total_episodes = n_epochs × |train_ids|",
)
exp.add_argument(
"--save-interval", type=int, default=500, help="Checkpoint every N episodes"
)
exp.add_argument("--actor-lr", type=float, default=3e-5, help="Actor learning rate")
exp.add_argument(
"--critic-lr", type=float, default=1e-5, help="Critic learning rate"
)
exp.add_argument(
"--ppo-epochs", type=int, default=6, help="PPO gradient epochs per update"
)
exp.add_argument("--device", default="cpu", help="PyTorch device")

return root.parse_args()


# ------------------------------------------------------------------ #
# Main #
# ------------------------------------------------------------------ #


def main() -> None:
args = _parse_args()
set_seed(args.seed)
Path("models").mkdir(exist_ok=True)
Path("results").mkdir(exist_ok=True)

if args.agent == "ppo":
from das.training.ppo import run_cv_ppo

run_cv_ppo(args)
elif args.agent == "rl-das":
from das.training.rldas import run_cv_rl_das

run_cv_rl_das(args)
elif args.agent == "exp-das":
from das.training.expdas import run_cv_exp_das

run_cv_exp_das(args)


if __name__ == "__main__":
main()
24 changes: 10 additions & 14 deletions das/env/bbob_splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,28 +48,24 @@ def get_train_test_split(mode: str, dims: list[int]) -> tuple[list[str], list[st
return all_ids[:split], all_ids[split:]


_N_CV_FOLDS = 3


def get_cv_folds(
cv_mode: str, dims: list[int], seed: int = 0
cv_mode: str, dims: list[int], seed: int = 0, n_folds: int = 3
) -> list[tuple[list[str], list[str], str]]:
"""Return (train_ids, test_ids, fold_tag) for each of the 3 CV folds.
"""Return (train_ids, test_ids, fold_tag) for each CV fold.

LOIO: 3 folds – the 15 instance IDs are randomly shuffled and split into
3 groups of 5; each fold tests on 1 group and trains on the other 10.
LOPO: 3 folds – the 24 BBOB functions are randomly shuffled and split into
3 groups of 8; each fold tests on all problems from 1 group of
functions (all instances) and trains on the other 16 functions.
LOIO: instance IDs are shuffled and split into n_folds groups;
each fold tests on one group and trains on the rest.
LOPO: BBOB functions are shuffled and split into n_folds groups;
each fold tests on all problems from one group of functions.
"""
rng = np.random.default_rng(seed)
folds = []

if cv_mode == "LOIO":
insts = list(INSTANCE_IDS)
rng.shuffle(insts)
chunk = len(insts) // _N_CV_FOLDS # 5
for i in range(_N_CV_FOLDS):
chunk = len(insts) // n_folds
for i in range(n_folds):
test_insts = insts[i * chunk : (i + 1) * chunk]
train_insts = [inst for inst in insts if inst not in set(test_insts)]
folds.append(
Expand All @@ -82,8 +78,8 @@ def get_cv_folds(
else: # LOPO
fns = list(ALL_FUNCTIONS)
rng.shuffle(fns)
chunk = len(fns) // _N_CV_FOLDS # 8
for i in range(_N_CV_FOLDS):
chunk = len(fns) // n_folds
for i in range(n_folds):
test_fns = set(fns[i * chunk : (i + 1) * chunk])
train_fns = ALL_FUNCTIONS - test_fns
folds.append(
Expand Down
Loading
Loading