From 2863c81f9f9f9c45f9773f1a33ce75dd7a2a508c Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 08:53:51 +0200 Subject: [PATCH 1/8] fix ci and smoke tests --- .github/workflows/ci.yml | 4 ++++ agents/exponential_das/agent.py | 2 +- agents/rl_das/agent.py | 2 +- das/training/rldas.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d78100..7063a72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,9 @@ jobs: - name: Install dependencies run: uv sync --group dev + - name: Ruff lint + run: uv run ruff check . + - name: Run pytest run: uv run pytest tests/ -v --tb=short @@ -58,6 +61,7 @@ jobs: - name: Run smoke tests run: | + source .venv/bin/activate AGENTS="${{ inputs.smoke_agents }}" if [ -n "$AGENTS" ]; then bash smoke_test.sh $AGENTS diff --git a/agents/exponential_das/agent.py b/agents/exponential_das/agent.py index b5ca3b6..cc85b3d 100644 --- a/agents/exponential_das/agent.py +++ b/agents/exponential_das/agent.py @@ -268,7 +268,7 @@ def save(self, path: str) -> None: @classmethod def load(cls, path: str, obs_dim: int, n_actions: int, **kwargs) -> "ExpDASAgent": agent = cls(obs_dim, n_actions, **kwargs) - ckpt = torch.load(path, map_location=agent.device) + ckpt = torch.load(path, map_location=agent.device, weights_only=False) agent.actor.load_state_dict(ckpt["actor"]) agent.critic.load_state_dict(ckpt["critic"]) if "actor_opt" in ckpt: diff --git a/agents/rl_das/agent.py b/agents/rl_das/agent.py index 4c40cbc..8def163 100644 --- a/agents/rl_das/agent.py +++ b/agents/rl_das/agent.py @@ -243,7 +243,7 @@ def save(self, path: str) -> None: @classmethod def load(cls, path: str, dim: int, n_opt: int, **kwargs) -> "PPOAgent": agent = cls(dim, n_opt, **kwargs) - ckpt = torch.load(path, map_location=agent.device) + ckpt = torch.load(path, map_location=agent.device, weights_only=False) agent.actor.load_state_dict(ckpt["actor"]) agent.critic.load_state_dict(ckpt["critic"]) if "optimizer" in ckpt: diff --git a/das/training/rldas.py b/das/training/rldas.py index 2c041cc..ddac460 100644 --- a/das/training/rldas.py +++ b/das/training/rldas.py @@ -63,7 +63,7 @@ def run_rl_das(args) -> None: if args.eval: print("\nRunning final evaluation on test set …") - n_problems = len(test_env.problem_ids) + n_problems = len(test_env._problem_ids) test_results = evaluate(test_env, agent, n_episodes=n_problems) mean_best_y = float(np.mean([r["best_y"] for r in test_results])) print(f"Test mean best_y = {mean_best_y:.6e}") From 50bf2724ab77ca252eae5636d75af39e56da640f Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 08:56:43 +0200 Subject: [PATCH 2/8] fix ci and smoke tests 2 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7063a72..8dd2903 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: run: uv sync --group dev - name: Ruff lint - run: uv run ruff check . + run: uv run ruff format . - name: Run pytest run: uv run pytest tests/ -v --tb=short From 7444500f32b12f0a0bd7556d772a8795c360b881 Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 12:20:42 +0200 Subject: [PATCH 3/8] set RL-DAS logic to follow strictly the one of tis original version --- agents/rl_das/env.py | 424 +++++++++++------------ agents/rl_das/network.py | 8 +- agents/rl_das/optimizers.py | 669 ++++++++++++++++++++++++++++++++++++ agents/rl_das/population.py | 137 ++++++++ cv.py | 4 + das/training/rldas.py | 11 +- run_local.sh | 8 +- train.py | 6 +- 8 files changed, 1036 insertions(+), 231 deletions(-) create mode 100644 agents/rl_das/optimizers.py create mode 100644 agents/rl_das/population.py diff --git a/agents/rl_das/env.py b/agents/rl_das/env.py index ccfbd9b..38653ae 100644 --- a/agents/rl_das/env.py +++ b/agents/rl_das/env.py @@ -1,18 +1,22 @@ -"""RL-DAS gymnasium environment. +"""RL-DAS gymnasium environment (Population-based, 9-dim features). -Observation space: flat Box of shape (6 + 2 * n_opt * dim,) - - [0:6] six population-state features - - [6:] movement history, 2*n_opt blocks of ``dim`` scalars each, - interleaved as [best_0, worst_0, best_1, worst_1, ...] +Strictly follows the original RL-DAS design (Guo et al., 2024): -Action space: Discrete(n_opt) +- A single mutable ``Population`` object is shared between DE sub-optimizers + as warm-started state (no get_data/set_data contract). +- Portfolio is restricted to the three DE algorithms: NL_SHADE_RSP, JDE21, MadDE. +- Observation: 9-dim population-state features (computed with local sampling) + concatenated with per-optimizer movement history embeddings of shape (dim,). + +Observation layout: flat Box of shape (9 + 2 * n_opt * dim,) + [0:9] nine population-state features (see _pop_features) + [9:] movement history, 2*n_opt blocks of dim scalars each, + interleaved as [best_0, worst_0, best_1, worst_1, ...] -Reward: max(0, (prev_best - new_best) / cost_scale) — non-negative improvement - normalised by the initial global-best cost so rewards are comparable - across problems with different fitness scales. +Action space: Discrete(n_opt) -The environment operates on BBOB problems accessed through a cocoex Suite. -Only problems whose dimension matches ``dim`` are used; others are skipped. +Reward: max(0, (prev_best - new_best) / cost_scale) non-negative improvement + normalised by the initial global-best cost. """ from __future__ import annotations @@ -23,68 +27,136 @@ import gymnasium as gym from gymnasium import spaces +from agents.rl_das.population import Population from das.optimizers.base import get_checkpoints # --------------------------------------------------------------------------- -# Population-state feature extraction (6-D, dimension-independent) +# Feature helper functions (adapted from original RL-DAS utils.py) # --------------------------------------------------------------------------- +def _cal_fdc(group_norm: np.ndarray, costs: np.ndarray) -> float: + """Fitness-distance correlation. group_norm must be in [0, 1]^dim.""" + opt_x = group_norm[np.argmin(costs)] + ds = np.sum((group_norm - opt_x) ** 2, axis=1) + fs = 1.0 / (costs + 1e-8) + C_fd = ((fs - fs.mean()) * (ds - ds.mean())).mean() + delta_f = ((fs - fs.mean()) ** 2).mean() + delta_d = ((ds - ds.mean()) ** 2).mean() + return float(C_fd / (delta_d * delta_f + 1e-8)) + + +def _dispersion(group_norm: np.ndarray, costs: np.ndarray) -> tuple[float, float]: + """Dispersion and dispersion-ratio metrics. group_norm in [0, 1]^dim.""" + gs, dim = group_norm.shape + group_sorted = group_norm[np.argsort(costs)] + diam = float(np.sqrt(dim)) + disp = 0.0 + max_dis = 0.0 + for i in range(1, gs): + shift = np.concatenate((group_sorted[i:], group_sorted[:i]), 0) + distances = np.sqrt(np.sum((group_sorted - shift) ** 2, -1)) + disp += float(np.sum(distances)) + cur_max = float(np.max(distances)) + if cur_max > max_dis: + max_dis = cur_max + disp /= gs**2 + gs10 = max(gs * 10 // 100, 1) + top10 = group_sorted[:gs10] + disp10 = 0.0 + for i in range(1, gs10): + shift = np.concatenate((top10[i:], top10[:i]), 0) + disp10 += float(np.sum(np.sqrt(np.sum((top10 - shift) ** 2, -1)))) + if gs10 > 1: + disp10 /= gs10**2 + return float(disp10 - disp), float(max_dis / (diam + 1e-10)) + + +def _negative_slope_coefficient( + group_cost: np.ndarray, sample_cost: np.ndarray +) -> float: + """Negative slope coefficient (NSC).""" + gs = sample_cost.shape[0] + m = 10 + gs -= gs % m + if gs < m: + return 0.0 + pairs = sorted(zip(group_cost[:gs].tolist(), sample_cost[:gs].tolist())) + arr = np.array(pairs) + sorted_group = arr[:, 0].reshape(m, -1) + sorted_sample = arr[:, 1].reshape(m, -1) + Ms = np.mean(sorted_group, -1) + Ns = np.mean(sorted_sample, -1) + nsc = np.minimum((Ns[1:] - Ns[:-1]) / (Ms[1:] - Ms[:-1] + 1e-8), 0) + return float(np.sum(nsc)) + + +def _average_neutral_ratio( + group_cost: np.ndarray, sample_costs: np.ndarray, eps: float = 1.0 +) -> float: + """Average neutral ratio (ANR).""" + gs = sample_costs.shape[1] + dcost = np.fabs(sample_costs - group_cost[:gs]) + return float(np.mean(np.sum(dcost < eps, axis=0) / sample_costs.shape[0])) + + +def _non_improvable_worsenable( + group_cost: np.ndarray, sample_costs: np.ndarray +) -> tuple[float, float]: + """Non-improvable (NI) and non-worsenable (NW) ratios.""" + gs = sample_costs.shape[1] + NI = ( + 1.0 + - np.count_nonzero(np.sum(group_cost[:gs] > sample_costs, axis=-1)) + / sample_costs.shape[0] + ) + NW = ( + 1.0 + - np.count_nonzero(np.sum(group_cost[:gs] < sample_costs, axis=-1)) + / sample_costs.shape[0] + ) + return float(NI), float(NW) + + def _pop_features( - x: np.ndarray, - y: np.ndarray, - gbest_y: float, + population: Population, + sample_costs: np.ndarray, cost_scale: float, - n_fes: int, - max_fes: int, + n_fe: int, + max_fe: int, ) -> np.ndarray: - """Return a 6-D population-state feature vector. - - Features - -------- - gbc : normalised global-best cost - fdc : fitness–distance correlation - disp : mean distance to centroid (dispersion) - disp_ratio: dispersion / search-space range - nsc : negative slope coefficient (distance vs fitness) - progress : n_fes / max_fes + """Return 9-dim feature vector matching original RL-DAS Population.get_feature. + + Features (in order): + gbc normalised global-best cost + fdc fitness-distance correlation + disp dispersion (disp10 - disp_all) + disp_ratio max_pairwise_dist / sqrt(dim) + nsc negative slope coefficient + anr average neutral ratio + ni non-improvable ratio + nw non-worsenable ratio + progress n_fe / max_fe """ - NP = len(y) - - # 1. Normalised global best - gbc = float(gbest_y / cost_scale) if cost_scale > 1e-10 else 0.0 - - # 2. Fitness–distance correlation - gbest_x = x[int(np.argmin(y))] - dists = np.linalg.norm(x - gbest_x, axis=1) - if dists.std() > 1e-10 and y.std() > 1e-10 and NP > 2: - fdc = float(np.corrcoef(dists, y)[0, 1]) - else: - fdc = 0.0 - - # 3. Dispersion metrics - centroid = x.mean(axis=0) - disp_dists = np.linalg.norm(x - centroid, axis=1) - disp = float(disp_dists.mean()) - spread = float(np.ptp(x)) - disp_ratio = disp / (spread + 1e-10) - - # 4. Negative slope coefficient - if dists.std() > 1e-10 and NP > 2: - try: - slope = float(np.polyfit(dists, y, 1)[0]) - nsc = -slope if np.isfinite(slope) else 0.0 - except (np.linalg.LinAlgError, ValueError): - nsc = 0.0 - else: - nsc = 0.0 - - # 5. Progress - progress = float(n_fes / max(max_fes, 1)) - - out = np.array([gbc, fdc, disp, disp_ratio, nsc, progress], dtype=np.float32) - # Guard against any NaN/inf that could corrupt the network weights + group = population.group + cost = population.cost + lb = population.Xmin + ub = population.Xmax + + group_norm = (group - lb) / (ub - lb + 1e-10) + + gbc = float(population.gbest / (cost_scale + 1e-10)) + fdc = _cal_fdc(group_norm, cost / (cost_scale + 1e-10)) + disp, disp_ratio = _dispersion(group_norm, cost) + nsc = _negative_slope_coefficient(cost, sample_costs[0]) + anr = _average_neutral_ratio(cost, sample_costs) + ni, nw = _non_improvable_worsenable(cost, sample_costs) + progress = float(n_fe / max(max_fe, 1)) + + out = np.array( + [gbc, fdc, disp, disp_ratio, nsc, anr, ni, nw, progress], dtype=np.float32 + ) return np.nan_to_num(out, nan=0.0, posinf=1.0, neginf=-1.0) @@ -96,33 +168,34 @@ def _pop_features( class RLDASEnv(gym.Env): """RL-DAS environment wrapping BBOB problems via a cocoex Suite. + Uses a Population object as shared warm-started state across all DE + sub-optimizers (matching the original RL-DAS design). + Parameters ---------- problem_ids: - BBOB problem IDs to cycle through (one per episode). Only IDs whose - dimension matches ``dim`` are used. + BBOB problem IDs to cycle through (one per episode). suite: cocoex Suite object. optimizers: - Ordered list of sub-optimizer classes (same pypop7-compatible classes - as used in DASEnv — defines the action space). + List of instantiated DE optimizer objects (NL_SHADE_RSP, JDE21, MadDE). dim: - Problem dimension. The movement embeddings have shape (dim,), so the - agent is dimension-specific. + Problem dimension. fe_multiplier: Budget = fe_multiplier * dim. n_checkpoints: Number of optimizer-selection steps per episode. checkpoint_division_base: - cdb=1.0 → uniform checkpoints; cdb>1.0 → exponentially growing. + 1.0 → uniform checkpoints; >1.0 → exponentially growing. n_individuals: - Population size. + Initial population size (Nmax). seed: - Seed for deterministic optimizer RNGs (passed to pypop7 as seed_rng). + Unused (kept for API compatibility with other DAS envs). """ metadata = {"render_modes": []} - N_FEATURES = 6 + N_FEATURES = 9 + SAMPLE_TIMES = 2 def __init__( self, @@ -138,19 +211,17 @@ def __init__( ): super().__init__() - # Filter to matching dimension (BBOB IDs end with _d{dim:02d}) self._problem_ids = [pid for pid in problem_ids if pid.endswith(f"_d{dim:02d}")] if not self._problem_ids: raise ValueError(f"No problem_ids match dimension {dim}.") self.suite = suite - self.optimizers = optimizers + self._optimizers = optimizers self.dim = dim self.fe_multiplier = fe_multiplier self.n_checkpoints = n_checkpoints self.cdb = checkpoint_division_base self.n_individuals = n_individuals - self._seed = seed self.n_opt = len(optimizers) obs_dim = self.N_FEATURES + 2 * self.n_opt * dim @@ -160,28 +231,14 @@ def __init__( ) self.action_space = spaces.Discrete(self.n_opt) - # Episode state self._problem = None self._problem_idx = 0 self._max_fe = 0 self._n_fe = 0 self._checkpoints: np.ndarray | None = None self._checkpoint_idx = 0 - - # Current population arrays (shared warm-start state) - self._pop_x: np.ndarray | None = None - self._pop_y: np.ndarray | None = None - self._optimizer_state: dict = {} - - # Global best tracking - self._gbest_y: float = np.inf - self._gbest_x: np.ndarray | None = None + self._population: Population | None = None self._cost_scale: float = 1.0 - self._worst_y: float = -np.inf - self._initial_range: tuple[float, float] = (np.inf, -np.inf) - - # Movement history: list of accumulated movement vectors per optimizer - # shape: (n_opt,) lists of (dim,) arrays; averaged in observation self._best_history: list[list[np.ndarray]] = [[] for _ in range(self.n_opt)] self._worst_history: list[list[np.ndarray]] = [[] for _ in range(self.n_opt)] @@ -194,24 +251,20 @@ def reset(self, seed=None, options=None): problem_id = self._problem_ids[self._problem_idx % len(self._problem_ids)] self._problem_idx += 1 - self._problem = self.suite.get_problem(problem_id) + + lb = float(self._problem.lower_bounds[0]) + ub = float(self._problem.upper_bounds[0]) self._max_fe = self.fe_multiplier * self.dim self._checkpoints = get_checkpoints( self.n_checkpoints, self._max_fe, self.n_individuals, self.cdb ) - - self._n_fe = 0 self._checkpoint_idx = 0 - self._optimizer_state = {} - self._pop_x = None - self._pop_y = None - self._gbest_y = np.inf - self._gbest_x = None - self._cost_scale = 1.0 - self._worst_y = -np.inf - self._initial_range = (np.inf, -np.inf) + self._population = Population(self.dim, lb, ub, Nmax=self.n_individuals) + self._population.initialize_costs(self._problem) + self._cost_scale = max(float(self._population.gbest), 1e-10) + self._n_fe = self._population.NP self._best_history = [[] for _ in range(self.n_opt)] self._worst_history = [[] for _ in range(self.n_opt)] @@ -223,143 +276,84 @@ def step(self, action: int): assert self._problem is not None, "Call reset() before step()" target_fe = int(self._checkpoints[self._checkpoint_idx]) - prev_gbest = self._gbest_y + prev_gbest = self._population.gbest - # Record pre-step best/worst positions for movement computation - prev_best_x, prev_worst_x = self._get_best_worst_positions() + prev_best_x = self._population.gbest_solution.copy() + worst_idx = int(np.argmax(self._population.cost)) + prev_worst_x = self._population.group[worst_idx].copy() - result = self._run_optimizer(action, target_fe) - self._update_state(result) + self._population, _, end_fes = self._optimizers[action].step( + self._population, self._problem, self._n_fe, target_fe, self._max_fe + ) + self._n_fe = end_fes - # Compute movement for the chosen optimizer - new_best_x, new_worst_x = self._get_best_worst_positions() - best_move = (new_best_x - prev_best_x) / (self.dim**0.5 + 1e-10) - worst_move = (new_worst_x - prev_worst_x) / (self.dim**0.5 + 1e-10) - self._best_history[action].append(best_move) - self._worst_history[action].append(worst_move) + new_best_x = self._population.gbest_solution.copy() + new_worst_x = self._population.group[ + int(np.argmax(self._population.cost)) + ].copy() + scale = float(self.dim**0.5) + 1e-10 + self._best_history[action].append((new_best_x - prev_best_x) / scale) + self._worst_history[action].append((new_worst_x - prev_worst_x) / scale) self._checkpoint_idx += 1 terminated = ( self._checkpoint_idx >= self.n_checkpoints or self._n_fe >= self._max_fe ) - improvement = ( - max(0.0, prev_gbest - self._gbest_y) - if np.isfinite(prev_gbest) and np.isfinite(self._gbest_y) - else 0.0 + reward = float( + max(0.0, (prev_gbest - self._population.gbest) / (self._cost_scale + 1e-10)) ) - reward = float(improvement / (self._cost_scale + 1e-10)) - if not np.isfinite(reward): - reward = 0.0 obs = self._build_observation() - info = {"best_y": self._gbest_y, "n_fe": self._n_fe} - return obs, reward, terminated, False, info + return ( + obs, + reward, + terminated, + False, + {"best_y": self._population.gbest, "n_fe": self._n_fe}, + ) # ------------------------------------------------------------------ - # Optimizer execution (reuses pypop7 warm-start machinery) + # Local sampling (counts FEs toward budget) # ------------------------------------------------------------------ - def _run_optimizer(self, action: int, target_fe: int) -> dict: - optimizer_class = self.optimizers[action] - problem_config = { - "fitness_function": self._problem, - "ndim_problem": self.dim, - "lower_boundary": self._problem.lower_bounds, - "upper_boundary": self._problem.upper_bounds, - } - options = { - "max_function_evaluations": self._max_fe, - "target_fe": target_fe, - "n_individuals": self.n_individuals, - "best_so_far_y": self._gbest_y, - "verbose": False, - } - if self._seed is not None: - options["seed_rng"] = ( - self._seed * 1_000_000 - + self._problem_idx * 1_000 - + self._checkpoint_idx - ) % (2**31) - - optimizer = optimizer_class(problem_config, options) - optimizer.n_function_evaluations = self._n_fe - optimizer.set_data( - best_x=self._gbest_x, - best_y=self._gbest_y if self._gbest_y < np.inf else None, - **self._optimizer_state, - ) - result = optimizer.optimize() - if isinstance(result, tuple): - result = result[0] - - new_state = optimizer.get_data() - if new_state: - self._optimizer_state = new_state - elif len(optimizer.x_history) > 0: - self._optimizer_state = { - "x": np.array(optimizer.x_history[-self.n_individuals :]), - "y": np.array(optimizer.y_history[-self.n_individuals :]), - } - - # Update cached population arrays - if "x" in self._optimizer_state: - self._pop_x = self._optimizer_state["x"] - self._pop_y = self._optimizer_state["y"] - - return result - - def _update_state(self, result: dict) -> None: - new_best_y: float = result.get("best_so_far_y", np.inf) - new_best_x: np.ndarray | None = result.get("best_so_far_x") - worst_y: float = result.get("worst_so_far_y", -np.inf) - - if new_best_y < self._gbest_y: - self._gbest_y = new_best_y - self._gbest_x = new_best_x - - if worst_y > self._worst_y: - self._worst_y = worst_y - - if self._initial_range[0] == np.inf: - self._initial_range = (new_best_y, max(worst_y, new_best_y + 1e-5)) - self._cost_scale = max(abs(new_best_y), 1e-10) - - y_hist = result.get("y_history") - n_fe_step = len(y_hist) if y_hist is not None else 0 - self._n_fe = result.get("n_function_evaluations", self._n_fe + n_fe_step) + def _local_sample(self) -> np.ndarray: + """Run SAMPLE_TIMES independent trials on a deepcopy of the population. + + Returns + ------- + sample_costs : ndarray of shape (SAMPLE_TIMES, min_NP) + """ + sample_size = self._population.NP + costs = [] + min_len = sample_size + for _ in range(self.SAMPLE_TIMES): + pop_copy = copy.deepcopy(self._population) + opt = self._optimizers[np.random.randint(self.n_opt)] + popped, _, _ = opt.step( + pop_copy, + self._problem, + self._n_fe, + self._n_fe + sample_size, + self._max_fe, + ) + costs.append(popped.cost.copy()) + if popped.cost.shape[0] < min_len: + min_len = popped.cost.shape[0] + self._n_fe = min(self._n_fe + sample_size * self.SAMPLE_TIMES, self._max_fe) + return np.array([c[:min_len] for c in costs]) # ------------------------------------------------------------------ # Observation construction # ------------------------------------------------------------------ - def _get_best_worst_positions(self) -> tuple[np.ndarray, np.ndarray]: - """Return current best and worst particle positions.""" - if self._pop_x is None or len(self._pop_x) == 0: - zeros = np.zeros(self.dim, dtype=np.float32) - return zeros, zeros - best_idx = int(np.argmin(self._pop_y)) - worst_idx = int(np.argmax(self._pop_y)) - return self._pop_x[best_idx].astype(np.float32), self._pop_x[worst_idx].astype( - np.float32 - ) - def _build_observation(self) -> np.ndarray: - # Population-state features - if self._pop_x is not None and len(self._pop_x) > 0: - features = _pop_features( - self._pop_x, - self._pop_y, - self._gbest_y, - self._cost_scale, - self._n_fe, - self._max_fe, - ) - else: - features = np.zeros(self.N_FEATURES, dtype=np.float32) + sample_costs = self._local_sample() + features = _pop_features( + self._population, sample_costs, self._cost_scale, self._n_fe, self._max_fe + ) - # Per-optimizer movement history (averaged, or zero if unused) - movements = [] + movements: list[np.ndarray] = [] for k in range(self.n_opt): if self._best_history[k]: best_emb = np.mean(self._best_history[k], axis=0).astype(np.float32) diff --git a/agents/rl_das/network.py b/agents/rl_das/network.py index efc0fd7..16e08ab 100644 --- a/agents/rl_das/network.py +++ b/agents/rl_das/network.py @@ -6,13 +6,13 @@ Architecture (following Guo et al. 2024): - Input: flat obs = [features_6d | best_move_0_dim | worst_move_0_dim | ...] + Input: flat obs = [features_9d | best_move_0_dim | worst_move_0_dim | ...] For each of the 2*n_opt movement blocks: embedder_k : Linear(dim, 64) -> ReLU -> Linear(64, 1) -> ReLU - backbone_input = cat(features_6d, *[emb_k(move_k) for k]) shape: (6+2*n_opt,) - backbone : Linear(6+2*n_opt, 64) -> Tanh -> Linear(64, 16) -> Tanh + backbone_input = cat(features_9d, *[emb_k(move_k) for k]) shape: (9+2*n_opt,) + backbone : Linear(9+2*n_opt, 64) -> Tanh -> Linear(64, 16) -> Tanh Actor head : Linear(16, n_opt) -> Softmax Critic head : Linear(16, 1) @@ -42,7 +42,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class _RLDASBackbone(nn.Module): """Shared feature extractor used by both Actor and Critic.""" - N_FEATURES = 6 # must match env.RLDASEnv.N_FEATURES + N_FEATURES = 9 # must match env.RLDASEnv.N_FEATURES def __init__(self, dim: int, n_opt: int) -> None: super().__init__() diff --git a/agents/rl_das/optimizers.py b/agents/rl_das/optimizers.py new file mode 100644 index 0000000..40dae89 --- /dev/null +++ b/agents/rl_das/optimizers.py @@ -0,0 +1,669 @@ +"""DE optimizers for RL-DAS: NL_SHADE_RSP, JDE21, MadDE. + +Ported from the original RL-DAS (Guo et al., 2024) with the following +adaptations for DAS2 / BBOB: + +- Batch evaluation: ``np.array([float(problem(xs[i])) for i in range(len(xs))])`` + instead of ``problem.func(xs) - problem.optimum``. +- Bounds from ``population.Xmin`` / ``population.Xmax`` (not hardcoded ±100). +- No early-termination at ``cost < 1e-8`` (FE budget controls episode end). +- ``__init__`` takes no ``dim`` argument (dimension inferred from population). +""" + +from __future__ import annotations + +import numpy as np +import scipy.stats as stats + + +def _eval(problem, xs: np.ndarray) -> np.ndarray: + return np.array([float(problem(xs[i])) for i in range(len(xs))]) + + +class NL_SHADE_RSP: + def __init__(self) -> None: + self.pb = 0.4 + self.pa = 0.5 + + def _binomial(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: + NP, dim = x.shape + jrand = np.random.randint(dim, size=NP) + u = np.where(np.random.rand(NP, dim) < cr.repeat(dim).reshape(NP, dim), v, x) + u[np.arange(NP), jrand] = v[np.arange(NP), jrand] + return u + + def _exponential(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: + NP, dim = x.shape + u = x.copy() + L = np.random.randint(dim, size=NP).repeat(dim).reshape(NP, dim) + L = L <= np.arange(dim) + rvs = np.random.rand(NP, dim) + L = np.where(rvs > cr.repeat(dim).reshape(NP, dim), L, 0) + return u * (1 - L) + v * L + + def _update_pa(self, fa: float, fp: float, na: int, NP: int) -> None: + if na == 0 or fa == 0: + self.pa = 0.5 + return + self.pa = (fa / (na + 1e-15)) / ((fa / (na + 1e-15)) + (fp / (NP - na + 1e-15))) + self.pa = float(np.clip(self.pa, 0.1, 0.9)) + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + NP, dim = population.NP, population.dim + NA = int(NP * 2.1) + if NA < population.archive.shape[0]: + population.archive = population.archive[:NA] + self.pa = 0.5 + k = 1 + Fevs = [] + while FEs >= k * record_period: + k += 1 + population.sort(population.NP) + + while FEs < FEs_end and FEs < MaxFEs: + Cr, F = population.choose_F_Cr() + Cr = np.sort(Cr) + pr = np.exp(-(np.arange(NP) + 1) / NP) + pr /= np.sum(pr) + cross_exponential = np.random.random() < 0.5 + pb_upper = int(max(2, NP * self.pb)) + pbs = np.random.randint(pb_upper, size=NP) + count = 0 + duplicate = np.where(pbs == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 1: + pbs[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where(pbs == np.arange(NP))[0] + count += 1 + xpb = population.group[pbs] + r1 = np.random.randint(NP, size=NP) + count = 0 + duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] + count += 1 + x1 = population.group[r1] + rvs = np.random.rand(NP) + r2_pop = np.where(rvs >= self.pa)[0] + r2_arc = np.where(rvs < self.pa)[0] + use_arc = np.zeros(NP, dtype=bool) + use_arc[r2_arc] = True + if population.archive.shape[0] < 25: + r2_pop = np.arange(NP) + r2_arc = np.array([], dtype=np.int32) + r2 = np.random.choice(np.arange(NP), size=r2_pop.shape[0], p=pr) + count = 0 + duplicate = np.where( + (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) + )[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.choice( + np.arange(NP), size=duplicate.shape[0], p=pr + ) + duplicate = np.where( + (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) + )[0] + count += 1 + x2 = np.zeros((NP, dim)) + if r2_pop.shape[0] > 0: + x2[r2_pop] = population.group[r2] + if r2_arc.shape[0] > 0: + x2[r2_arc] = population.archive[ + np.random.randint( + min(population.archive.shape[0], NA), + size=r2_arc.shape[0], + ) + ] + Fs = F.repeat(dim).reshape(NP, dim) + vs = population.group + Fs * (xpb - population.group) + Fs * (x1 - x2) + Crb = np.zeros(NP) + tmp_id = np.where(np.arange(NP) + FEs < 0.5 * MaxFEs)[0] + Crb[tmp_id] = 2 * ((FEs + tmp_id) / MaxFEs - 0.5) + if cross_exponential: + us = self._binomial(population.group, vs, Crb) + else: + us = self._exponential(population.group, vs, Cr) + + # Reinitialise out-of-bounds elements with uniform random in [lb, ub] + oob = (us < population.Xmin) | (us > population.Xmax) + if np.any(oob): + rand_pts = ( + np.random.rand(NP, dim) * (population.Xmax - population.Xmin) + + population.Xmin + ) + us = np.where(oob, rand_pts, us) + + cost = _eval(problem, us) + optim = np.where(cost < population.cost)[0] + for i in optim: + population.update_archive(i) + population.F[optim] = F[optim] + population.Cr[optim] = Cr[optim] + SF = F[optim] + SCr = Cr[optim] + df = (population.cost[optim] - cost[optim]) / ( + population.cost[optim] + 1e-9 + ) + arc_usage = use_arc[optim] + fp = float(np.sum(df[arc_usage])) + fa = float(np.sum(df[~arc_usage])) + na = int(np.sum(arc_usage)) + population.group[optim] = us[optim] + population.cost[optim] = cost[optim] + if float(np.min(cost)) < population.gbest: + population.gbest = float(np.min(cost)) + population.gbest_solution = us[np.argmin(cost)].copy() + FEs += NP + self.pb = 0.4 - 0.2 * (FEs / MaxFEs) + population.NLPSR(FEs, MaxFEs) + population.update_M_F_Cr(SF, SCr, df) + self._update_pa(fa, fp, na, NP) + NP = population.NP + NA = population.NA + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + + return population, Fevs, min(FEs, MaxFEs) + + +class JDE21: + def __init__(self) -> None: + self.sNP = 10 + self.tao1 = 0.1 + self.tao2 = 0.1 + self.Finit = 0.5 + self.CRinit = 0.9 + self.Fl_b = 0.1 + self.Fl_s = 0.17 + self.Fu = 1.1 + self.CRl_b = 0.0 + self.CRl_s = 0.1 + self.CRu_b = 1.1 + self.CRu_s = 0.8 + self.eps = 1e-12 + self.MyEps = 0.25 + self.nReset = 0 + self.sReset = 0 + self.cCopy = 0 + + def _prevec_enakih(self, cost: np.ndarray, best: float) -> bool: + eqs = len(cost[np.fabs(cost - best) < self.eps]) + return eqs > 2 and eqs > len(cost) * self.MyEps + + def _crowding(self, group: np.ndarray, vs: np.ndarray) -> np.ndarray: + NP, dim = vs.shape + dist = np.sum( + ((group * np.ones((NP, NP, dim))).transpose(1, 0, 2) - vs) ** 2, -1 + ).transpose() + return np.argmin(dist, -1) + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + NP = population.NP + dim = population.dim + sNP = self.sNP + bNP = max(NP - sNP, 2) + age = 0 + k = 1 + Fevs = [] + + def mutate_cross_select(r1, r2, r3, SF, SCr, df, age, big): + nonlocal bNP + if big: + xNP = bNP + randF = np.random.rand(xNP) * self.Fu + self.Fl_b + randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_b + pF = population.F[:xNP] + pCr = population.Cr[:xNP] + else: + # Use actual slice size (may be < sNP when population shrank) + xNP = population.F[bNP:].shape[0] + if xNP == 0: + return SF, SCr, df, age + randF = np.random.rand(xNP) * self.Fu + self.Fl_s + randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_s + pF = population.F[bNP:] + pCr = population.Cr[bNP:] + + rvs = np.random.rand(xNP) + F = np.where(rvs < self.tao1, randF, pF) + rvs = np.random.rand(xNP) + Cr = np.where(rvs < self.tao2, randCr, pCr) + Fs = F.repeat(dim).reshape(xNP, dim) + Crs = Cr.repeat(dim).reshape(xNP, dim) + v = population.group[r1] + Fs * ( + population.group[r2] - population.group[r3] + ) + v = np.clip(v, population.Xmin, population.Xmax) + jrand = np.random.randint(dim, size=xNP) + u = np.where( + np.random.rand(xNP, dim) < Crs, + v, + (population.group[:bNP] if big else population.group[bNP:]), + ) + u[np.arange(xNP), jrand] = v[np.arange(xNP), jrand] + cost = _eval(problem, u) + crowding_ids = ( + self._crowding(population.group[:xNP], u) + if big + else np.arange(xNP) + bNP + ) + age += xNP + for i in range(xNP): + idx = crowding_ids[i] + if cost[i] < population.cost[idx]: + population.update_archive(idx) + population.group[idx] = u[i] + population.cost[idx] = cost[i] + population.F[idx] = F[i] + population.Cr[idx] = Cr[i] + SF = np.append(SF, F[i]) + SCr = np.append(SCr, Cr[i]) + d = (population.cost[i] - cost[i]) / (population.cost[i] + 1e-9) + df = np.append(df, d) + if cost[i] < population.cbest: + age = 0 + population.cbest_id = idx + population.cbest = cost[i] + if cost[i] < population.gbest: + population.gbest = cost[i] + population.gbest_solution = u[i].copy() + + return SF, SCr, df, age + + population.sort(NP, True) + while FEs >= k * record_period: + k += 1 + while FEs < FEs_end: + df = np.array([]) + SF = np.array([]) + SCr = np.array([]) + if ( + self._prevec_enakih(population.cost[:bNP], population.gbest) + or age > MaxFEs / 10 + ): + self.nReset += 1 + population.group[:bNP] = population.initialize_group(bNP) + population.F[:bNP] = self.Finit + population.Cr[:bNP] = self.CRinit + population.cost[:bNP] = 1e15 + age = 0 + population.cbest = float(np.min(population.cost)) + population.cbest_id = int(np.argmin(population.cost)) + + if FEs < MaxFEs / 3: + mig = 1 + elif FEs < 2 * MaxFEs / 3: + mig = 2 + else: + mig = 3 + + r1 = np.random.randint(bNP, size=bNP) + count = 0 + duplicate = np.where((r1 == np.arange(bNP)) * (r1 == population.cbest_id))[ + 0 + ] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(bNP, size=duplicate.shape[0]) + duplicate = np.where( + (r1 == np.arange(bNP)) * (r1 == population.cbest_id) + )[0] + count += 1 + r2 = np.random.randint(bNP + mig, size=bNP) + count = 0 + duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) + duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] + count += 1 + r3 = np.random.randint(bNP + mig, size=bNP) + count = 0 + duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[0] + while duplicate.shape[0] > 0 and count < 25: + r3[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) + duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[ + 0 + ] + count += 1 + SF, SCr, df, age = mutate_cross_select( + r1, r2, r3, SF, SCr, df, age, big=True + ) + FEs += bNP + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + if FEs >= FEs_end or FEs >= MaxFEs: + break + + curr_sNP = NP - bNP + if ( + curr_sNP > 0 + and population.cbest_id >= bNP + and self._prevec_enakih(population.cost[bNP:], population.cbest) + ): + self.sReset += 1 + cbest = population.cbest + cbest_id = population.cbest_id + tmp = population.group[cbest_id].copy() + population.group[bNP:] = population.initialize_group(curr_sNP) + population.F[bNP:] = self.Finit + population.Cr[bNP:] = self.CRinit + population.cost[bNP:] = 1e15 + population.cbest = cbest + population.cbest_id = cbest_id + population.group[cbest_id] = tmp + population.cost[cbest_id] = cbest + + if population.cbest_id < bNP: + self.cCopy += 1 + population.cost[bNP] = population.cbest + population.group[bNP] = population.group[population.cbest_id].copy() + population.cbest_id = bNP + + curr_sNP = NP - bNP # actual small-pop size (= sNP unless shrunken) + for _ in range(bNP // curr_sNP if curr_sNP > 0 else 0): + r1 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] + count += 1 + r2 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where((r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1))[ + 0 + ] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where( + (r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1) + )[0] + count += 1 + r3 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where( + (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) + )[0] + while duplicate.shape[0] > 0 and count < 25: + r3[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where( + (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) + )[0] + count += 1 + SF, SCr, df, age = mutate_cross_select( + r1, r2, r3, SF, SCr, df, age, big=False + ) + FEs += curr_sNP + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + if FEs >= FEs_end or FEs >= MaxFEs: + break + + population.update_M_F_Cr(SF, SCr, df) + NP = min( + int(population.cal_NP_next_gen(FEs, MaxFEs)), len(population.group) + ) + population.NP = NP + population.group = population.group[-NP:] + population.cost = population.cost[-NP:] + population.F = population.F[-NP:] + population.Cr = population.Cr[-NP:] + population.cbest_id = int(np.argmin(population.cost)) + population.cbest = float(np.min(population.cost)) + bNP = max(NP - sNP, 2) + + return population, Fevs, min(FEs, MaxFEs) + + +class MadDE: + def __init__(self) -> None: + self.p = 0.18 + self.PqBX = 0.01 + self.pm = np.ones(3) / 3 + + def _ctb_w_arc( + self, + group: np.ndarray, + best: np.ndarray, + archive: np.ndarray, + Fs: np.ndarray, + ) -> np.ndarray: + NP, dim = group.shape + NB = best.shape[0] + NA = archive.shape[0] + count = 0 + rb = np.random.randint(NB, size=NP) + duplicate = np.where(rb == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) + duplicate = np.where(rb == np.arange(NP))[0] + count += 1 + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP + NA, size=NP) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + xb = best[rb] + x1 = group[r1] + x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] + return group + Fs * (xb - group) + Fs * (x1 - x2) + + def _ctr_w_arc( + self, + group: np.ndarray, + archive: np.ndarray, + Fs: np.ndarray, + ) -> np.ndarray: + NP, dim = group.shape + NA = archive.shape[0] + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where(r1 == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where(r1 == np.arange(NP))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP + NA, size=NP) + duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) + duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + x1 = group[r1] + x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] + return group + Fs * (x1 - x2) + + def _weighted_rtb( + self, + group: np.ndarray, + best: np.ndarray, + Fs: np.ndarray, + Fas: float, + ) -> np.ndarray: + NP, dim = group.shape + NB = best.shape[0] + count = 0 + rb = np.random.randint(NB, size=NP) + duplicate = np.where(rb == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) + duplicate = np.where(rb == np.arange(NP))[0] + count += 1 + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP, size=NP) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + return Fs * group[r1] + Fs * Fas * (best[rb] - group[r2]) + + @staticmethod + def _binomial(x: np.ndarray, v: np.ndarray, Crs: np.ndarray) -> np.ndarray: + NP, dim = x.shape + jrand = np.random.randint(dim, size=NP) + u = np.where(np.random.rand(NP, dim) < Crs, v, x) + u[np.arange(NP), jrand] = v[np.arange(NP), jrand] + return u + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + Fevs = [] + k = 1 + while FEs >= k * record_period: + k += 1 + population.sort(population.NP) + while FEs < FEs_end and FEs < MaxFEs: + NP, dim = population.NP, population.dim + q = 2 * self.p - self.p * FEs / MaxFEs + Fa = 0.5 + 0.5 * FEs / MaxFEs + Cr, F = population.choose_F_Cr() + mu = np.random.choice(3, size=NP, p=self.pm) + p1 = population.group[mu == 0] + p2 = population.group[mu == 1] + p3 = population.group[mu == 2] + pbest = population.group[: max(int(self.p * NP), 2)] + qbest = population.group[: max(int(q * NP), 2)] + Fs = F.repeat(dim).reshape(NP, dim) + v1 = self._ctb_w_arc(p1, pbest, population.archive, Fs[mu == 0]) + v2 = self._ctr_w_arc(p2, population.archive, Fs[mu == 1]) + v3 = self._weighted_rtb(p3, qbest, Fs[mu == 2], Fa) + v = np.zeros((NP, dim)) + v[mu == 0] = v1 + v[mu == 1] = v2 + v[mu == 2] = v3 + + # Bounds handling: midpoint reflection with lb/ub + Xmin = population.Xmin + Xmax = population.Xmax + v = np.where(v < Xmin, (population.group + Xmin) / 2, v) + v = np.where(v > Xmax, (population.group + Xmax) / 2, v) + + rvs = np.random.rand(NP) + Crs = Cr.repeat(dim).reshape(NP, dim) + u = np.zeros((NP, dim)) + if np.sum(rvs <= self.PqBX) > 0: + qu = v[rvs <= self.PqBX] + if population.archive.shape[0] > 0: + qbest = np.concatenate((population.group, population.archive), 0)[ + : max(int(q * (NP + population.archive.shape[0])), 2) + ] + cross_qbest = qbest[np.random.randint(qbest.shape[0], size=qu.shape[0])] + qu = self._binomial(cross_qbest, qu, Crs[rvs <= self.PqBX]) + u[rvs <= self.PqBX] = qu + bu = v[rvs > self.PqBX] + bu = self._binomial( + population.group[rvs > self.PqBX], bu, Crs[rvs > self.PqBX] + ) + u[rvs > self.PqBX] = bu + + ncost = _eval(problem, u) + FEs += NP + optim = np.where(ncost < population.cost)[0] + for i in optim: + population.update_archive(i) + SF = F[optim] + SCr = Cr[optim] + df = np.maximum(0, population.cost - ncost) + population.update_M_F_Cr(SF, SCr, df[optim]) + count_S = np.zeros(3) + for i in range(3): + count_S[i] = np.mean(df[mu == i] / (population.cost[mu == i] + 1e-9)) + if np.sum(count_S) > 0: + self.pm = np.clip(count_S / np.sum(count_S), 0.1, 0.9) + self.pm /= np.sum(self.pm) + else: + self.pm = np.ones(3) / 3 + population.group[optim] = u[optim] + population.cost = np.minimum(population.cost, ncost) + population.NLPSR(FEs, MaxFEs) + if float(np.min(population.cost)) < population.gbest: + population.gbest = float(np.min(population.cost)) + population.gbest_solution = population.group[ + np.argmin(population.cost) + ].copy() + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + + return population, Fevs, min(FEs, MaxFEs) + + +_PORTFOLIO: dict[str, type] = { + "NL_SHADE_RSP": NL_SHADE_RSP, + "MADDE": MadDE, + "JDE21": JDE21, +} + + +def get_rldas_portfolio(names: list[str] | None = None) -> list: + """Return instantiated RL-DAS optimizer objects. + + Parameters + ---------- + names: + Optimizer names. Defaults to ``["NL_SHADE_RSP", "MADDE", "JDE21"]``. + Valid names: ``"NL_SHADE_RSP"``, ``"MADDE"``, ``"JDE21"``. + """ + if names is None: + names = ["NL_SHADE_RSP", "MADDE", "JDE21"] + unknown = [n for n in names if n not in _PORTFOLIO] + if unknown: + raise ValueError( + f"Unknown RL-DAS optimizer(s): {unknown}. Valid choices: {list(_PORTFOLIO)}" + ) + return [_PORTFOLIO[n]() for n in names] diff --git a/agents/rl_das/population.py b/agents/rl_das/population.py new file mode 100644 index 0000000..a65ffe2 --- /dev/null +++ b/agents/rl_das/population.py @@ -0,0 +1,137 @@ +"""Population class for RL-DAS DE optimizers. + +Direct port of the original RL-DAS Population with configurable bounds +so it works with BBOB problems ([-5, 5]) instead of CEC ([-100, 100]). +""" + +from __future__ import annotations + +import numpy as np +import scipy.stats as stats + + +class Population: + def __init__( + self, + dim: int, + lb: np.ndarray | float, + ub: np.ndarray | float, + Nmax: int = 170, + Nmin: int = 30, + ): + self.dim = dim + self.Nmax = Nmax + self.Nmin = Nmin + self.NP = Nmax + self.NA = int(self.NP * 2.1) + self.cost = np.zeros(self.NP) + self.cbest: float = 1e15 + self.cbest_id: int = -1 + self.gbest: float = 1e15 + self.gbest_solution = np.zeros(dim) + self.Xmin = ( + np.full(dim, lb, dtype=float) + if np.isscalar(lb) + else np.asarray(lb, dtype=float) + ) + self.Xmax = ( + np.full(dim, ub, dtype=float) + if np.isscalar(ub) + else np.asarray(ub, dtype=float) + ) + self.group = self._init_group() + self.archive: np.ndarray = np.array([]) + self.MF = np.ones(dim * 20) * 0.2 + self.MCr = np.ones(dim * 20) * 0.2 + self.k: int = 0 + self.F = np.ones(self.NP) * 0.5 + self.Cr = np.ones(self.NP) * 0.9 + + def _init_group(self, size: int = -1) -> np.ndarray: + if size < 0: + size = self.NP + return np.random.rand(size, self.dim) * (self.Xmax - self.Xmin) + self.Xmin + + # Alias used by JDE21 reset logic + def initialize_group(self, size: int = -1) -> np.ndarray: + return self._init_group(size) + + def initialize_costs(self, problem) -> None: + self.cost = np.array([float(problem(self.group[i])) for i in range(self.NP)]) + self.gbest = self.cbest = float(np.min(self.cost)) + self.cbest_id = int(np.argmin(self.cost)) + self.gbest_solution = self.group[self.cbest_id].copy() + + def sort(self, size: int, reverse: bool = False) -> None: + r = -1 if reverse else 1 + ind = np.concatenate( + (np.argsort(r * self.cost[:size]), np.arange(self.NP)[size:]) + ) + self.cost = self.cost[ind] + self.cbest = float(np.min(self.cost)) + self.cbest_id = int(np.argmin(self.cost)) + self.group = self.group[ind] + self.F = self.F[ind] + self.Cr = self.Cr[ind] + + def cal_NP_next_gen(self, FEs: int, MaxFEs: int) -> float: + return np.round( + self.Nmax + + (self.Nmin - self.Nmax) * np.power(FEs / MaxFEs, 1 - FEs / MaxFEs) + ) + + def slice(self, size: int) -> None: + self.NP = size + self.group = self.group[:size] + self.cost = self.cost[:size] + self.F = self.F[:size] + self.Cr = self.Cr[:size] + if self.cbest_id >= size: + self.cbest_id = int(np.argmin(self.cost)) + self.cbest = float(np.min(self.cost)) + + def _mean_wL(self, df: np.ndarray, s: np.ndarray) -> float: + w = df / np.sum(df) + if np.sum(w * s) > 1e-6: + return float(np.sum(w * (s**2)) / np.sum(w * s)) + return 0.5 + + def choose_F_Cr(self) -> tuple[np.ndarray, np.ndarray]: + gs = self.NP + ind_r = np.random.randint(0, self.MF.shape[0], size=gs) + C_r = np.minimum( + 1, np.maximum(0, np.random.normal(loc=self.MCr[ind_r], scale=0.1, size=gs)) + ) + cauchy_locs = self.MF[ind_r] + F = stats.cauchy.rvs(loc=cauchy_locs, scale=0.1, size=gs) + err = np.where(F < 0)[0] + F[err] = 2 * cauchy_locs[err] - F[err] + return C_r, np.minimum(1, F) + + def update_M_F_Cr(self, SF: np.ndarray, SCr: np.ndarray, df: np.ndarray) -> None: + if SF.shape[0] > 0: + self.MF[self.k] = self._mean_wL(df, SF) + self.MCr[self.k] = self._mean_wL(df, SCr) + self.k = (self.k + 1) % self.MF.shape[0] + else: + self.MF[self.k] = 0.5 + self.MCr[self.k] = 0.5 + + def NLPSR(self, FEs: int, MaxFEs: int) -> None: + self.sort(self.NP) + N = self.cal_NP_next_gen(FEs, MaxFEs) + A = int(max(N * 2.1, self.Nmin)) + N = int(N) + if N < self.NP: + self.slice(N) + if A < self.archive.shape[0]: + self.NA = A + self.archive = self.archive[:A] + + def update_archive(self, old_id: int) -> None: + if self.archive.shape[0] < self.NA: + self.archive = np.append(self.archive, self.group[old_id]).reshape( + -1, self.dim + ) + else: + self.archive[np.random.randint(self.archive.shape[0])] = self.group[old_id] diff --git a/cv.py b/cv.py index 557ec2c..28ae3d1 100644 --- a/cv.py +++ b/cv.py @@ -140,6 +140,10 @@ def _parse_args() -> argparse.Namespace: "--save-interval", type=int, default=50, help="Checkpoint every N epochs" ) rl.add_argument("--device", default="cpu", help="PyTorch device") + rl.set_defaults( + portfolio=["NL_SHADE_RSP", "MADDE", "JDE21"], + n_individuals=170, + ) # ---- Exp-DAS ---------------------------------------------------- exp = sub.add_parser( diff --git a/das/training/rldas.py b/das/training/rldas.py index ddac460..00e859f 100644 --- a/das/training/rldas.py +++ b/das/training/rldas.py @@ -6,7 +6,6 @@ import numpy as np from das.env.bbob_splits import get_cv_folds, get_train_test_split -from das.optimizers.portfolio import get_portfolio from das.training.common import write_jsonl @@ -14,10 +13,9 @@ def run_rl_das(args) -> None: import cocoex as cx from agents.rl_das import RLDASEnv, PPOAgent from agents.rl_das import train, evaluate + from agents.rl_das.optimizers import get_rldas_portfolio - optimizers = get_portfolio(args.portfolio) - if not optimizers: - raise ValueError(f"Unknown optimizers: {args.portfolio}") + optimizers = get_rldas_portfolio(args.portfolio) train_ids, test_ids = get_train_test_split(args.mode, [args.dim]) print(f"Train: {len(train_ids)} problems | Test: {len(test_ids)} problems") @@ -76,10 +74,9 @@ def run_cv_rl_das(args) -> None: import cocoex as cx from agents.rl_das import RLDASEnv, PPOAgent from agents.rl_das import train, evaluate + from agents.rl_das.optimizers import get_rldas_portfolio - optimizers = get_portfolio(args.portfolio) - if not optimizers: - raise ValueError(f"Unknown optimizers: {args.portfolio}") + optimizers = get_rldas_portfolio(args.portfolio) suite = cx.Suite("bbob", "", "") diff --git a/run_local.sh b/run_local.sh index 2415bd9..f1363fb 100755 --- a/run_local.sh +++ b/run_local.sh @@ -29,12 +29,12 @@ case "$AGENT" in -p "${PORTFOLIO[@]}" -d 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 ;; rl-das) - python train.py rl-das ${PORTFOLIO_STR}_RLDAS_LOCAL_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --dim 2 --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + python train.py rl-das NL_SHADE_RSP_MADDE_JDE21_RLDAS_LOCAL_SEED${SEED} \ + --dim 2 --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 ;; rl-das-cv) - python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_CV_LOCAL_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --dim 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 + python cv.py rl-das NL_SHADE_RSP_MADDE_JDE21_RLDAS_CV_LOCAL_SEED${SEED} \ + --dim 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 ;; exp-das) python train.py exp-das ${PORTFOLIO_STR}_EXPDAS_LOCAL_SEED${SEED} \ diff --git a/train.py b/train.py index ebd98b5..5024ec5 100644 --- a/train.py +++ b/train.py @@ -153,7 +153,11 @@ def _parse_args() -> argparse.Namespace: rl.add_argument( "--no-eval", dest="eval", action="store_false", help="Skip final evaluation" ) - rl.set_defaults(eval=True) + rl.set_defaults( + eval=True, + portfolio=["NL_SHADE_RSP", "MADDE", "JDE21"], + n_individuals=170, + ) # ---- Exp-DAS ---------------------------------------------------- exp = sub.add_parser( From b6c88cb56e6b759652eee3d925bc85082f0ad54c Mon Sep 17 00:00:00 2001 From: wlnc Date: Wed, 20 May 2026 12:36:42 +0200 Subject: [PATCH 4/8] add README --- README.md | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..4e5cac7 --- /dev/null +++ b/README.md @@ -0,0 +1,362 @@ +# DynamicAlgorithmSelection2 + +RL-based Dynamic Algorithm Selection (DAS) on the [BBOB benchmark](https://numbbo.github.io/coco/). A controller learns to switch between a portfolio of black-box optimizers at runtime — allocating function-evaluation budget to whichever optimizer is most promising at each checkpoint. + +--- + +## Agents + +Three agent families share the same BBOB problem set and evaluation protocol: + +| Agent | Description | Key reference | +|---|---|---| +| **PPO** | Stable Baselines 3 PPO with VecNormalize; multi-dimensional; ELA-based observations | — | +| **RL-DAS** | Custom single-dimension PyTorch PPO; DE-only portfolio; population-state features with local sampling | Guo et al., 2024 | +| **Exp-DAS** | Custom PyTorch PPO with exponential checkpoint spacing; flexible portfolio | — | + +### PPO +Uses `DASEnv` — a Gymnasium environment that wraps a warm-started portfolio of arbitrary optimizers. Observations are 22-dimensional ELA landscape features plus per-optimizer movement history. Trains across multiple dimensions simultaneously. + +### RL-DAS +Faithful port of [Guo et al. 2024](https://doi.org/10.1145/3638529.3654223) with BBOB adaptations: +- Fixed DE portfolio: **NL_SHADE_RSP**, **MadDE**, **JDE21** (all share a single `Population` object as mutable warm-started state). +- 9-dimensional population-state features computed via local sampling (2 independent forward passes on a population deepcopy). +- Movement embedder networks compress D-dim displacement vectors to scalars; the backbone is dimension-specific (one model per `--dim`). +- Hand-rolled PPO training loop (no SB3 dependency for this agent). + +### Exp-DAS +Evolution of the original DAS `policy-gradient` agent. Uses `DASEnv` (same as PPO) but replaces uniform checkpoint spacing with an **exponential schedule** controlled by the Checkpoint Division Base (`--cdb`): + +- **`cdb = 1.0` (uniform):** every checkpoint covers the same number of function evaluations — consistent monitoring throughout the run. +- **`cdb > 1.0` (exponential):** early checkpoints are short (frequent switching during initial exploration) and later checkpoints are long (uninterrupted convergence during exploitation). + +The agent uses separate actor and critic learning rates and a configurable number of PPO gradient epochs per update. Like PPO, it supports multiple dimensions simultaneously and an arbitrary optimizer portfolio. + +--- + +## Installation + +Requires Python 3.11. Dependency management via [uv](https://docs.astral.sh/uv/). + +```bash +uv sync +``` + +--- + +## Quick start + +`run_local.sh` runs a single agent with tiny settings (fast smoke test): + +```bash +bash run_local.sh [seed] [agent] [portfolio...] + +# Examples +bash run_local.sh 42 ppo CPSO NM TDE +bash run_local.sh 42 ppo-cv CPSO NM TDE +bash run_local.sh 42 rl-das # DE portfolio fixed; no -p needed +bash run_local.sh 42 rl-das-cv +bash run_local.sh 42 exp-das CPSO NM TDE +bash run_local.sh 42 exp-das-cv CPSO NM TDE +bash run_local.sh 42 baselines CPSO NM TDE +``` + +Run the full smoke-test suite (all agent types): + +```bash +bash smoke_test.sh +# or selectively +bash smoke_test.sh rl-das rl-das-cv +``` + +--- + +## Training + +```bash +python train.py {ppo,rl-das,exp-das} [options] +``` + +### PPO + +```bash +python train.py ppo MY_PPO \ + -p CPSO NM TDE \ + -d 2 5 10 \ + -E 20 \ + --fe-multiplier 10000 \ + --n-checkpoints 10 \ + --seed 42 +``` + +Key options: + +| Flag | Default | Description | +|---|---|---| +| `-p / --portfolio` | `SPSO IPSO SPSOL` | Sub-optimizer names | +| `-d / --dims` | all | Problem dimensions | +| `-E / --n-epochs` | 20 | Passes over the training set | +| `--fe-multiplier` | 10 000 | Budget = multiplier × dimension | +| `--n-checkpoints` | 10 | Optimizer-selection steps per episode | +| `-x / --cdb` | 1.0 | Checkpoint division base (1 = uniform) | +| `-O / --reward-option` | 1 | Reward shaping (1–4) | +| `--wandb` | off | Log to Weights & Biases | + +Outputs: `models/.zip`, `models/_vecnorm.pkl` + +### RL-DAS + +```bash +python train.py rl-das MY_RLDAS \ + --dim 10 \ + --n-epochs 20 \ + --fe-multiplier 10000 \ + --seed 42 +``` + +The portfolio is fixed to `NL_SHADE_RSP MADDE JDE21` and `--n-individuals` defaults to 170 (matching the original paper). Use `--portfolio` to override. + +Key options: + +| Flag | Default | Description | +|---|---|---| +| `--dim` | 10 | Problem dimension (one model per dim) | +| `--n-epochs` | 20 | Training epochs | +| `--lr` | 1e-5 | Learning rate | +| `--k-epoch` | `0.3 × n_checkpoints` | PPO gradient steps per episode | +| `--device` | cpu | PyTorch device | + +Outputs: `models/_final.pt`, `models/_epoch.pt`, `models/_train_log.jsonl` + +### Exp-DAS + +```bash +python train.py exp-das MY_EXPDAS \ + -p CPSO NM TDE \ + --dims 2 5 10 \ + -E 3 \ + --cdb 2.0 \ + --reward-option 1 \ + --seed 42 +``` + +Key options: + +| Flag | Default | Description | +|---|---|---| +| `--dims` | `2 5 10` | Problem dimensions | +| `--cdb` | 2.0 | Checkpoint Division Base (see below) | +| `-E / --n-epochs` | 3 | Passes over the training set | +| `--actor-lr` | 3e-5 | Actor learning rate | +| `--critic-lr` | 1e-5 | Critic learning rate | +| `--ppo-epochs` | 6 | PPO gradient epochs per update | +| `--buffer-capacity` | `16 × n_checkpoints` | PPO rollout buffer size in steps | +| `-O / --reward-option` | 1 | Reward shaping strategy (1–4, see below) | +| `--save-interval` | 500 | Save a checkpoint every N episodes | +| `--device` | cpu | PyTorch device | + +Outputs: `models/_best.pt`, `models/_final.pt`, `models/_ep.pt`, `models/_train_log.jsonl` + +--- + +## Checkpoint Division Base (CDB) + +The `--cdb` argument controls how the total FE budget is distributed across the `n_checkpoints` decision points in each episode. + +With `cdb = 1.0` every checkpoint covers the same number of FEs (uniform). With `cdb > 1.0` checkpoint durations grow exponentially: the first checkpoints are short (fast switching during early exploration) and the last are long (uninterrupted convergence during exploitation). + +``` +cdb = 1.0 → [───][───][───][───][───] uniform +cdb = 2.0 → [─][──][────][────────] exponential +``` + +**When to use each value:** + +| Value | Effect | Use case | +|---|---|---| +| `1.0` | Equal-length checkpoints | Consistent monitoring; PPO default | +| `2.0` | Moderate exponential growth | Exp-DAS default; balances exploration and exploitation | +| `> 2.0` | Aggressive early switching | Portfolios where early optimizer choice is decisive | + +The `--cdb` flag is available for all three agents (`ppo`, `rl-das` ignores it, `exp-das`). + +--- + +## Reward options + +The `-O / --reward-option` flag selects the reward signal used at each checkpoint. All options measure improvement in the best objective value found so far and scale it by the initial value range. + +| Option | Name | Description | +|---|---|---| +| `1` | Log-scaled improvement | `improvement` between consecutive checkpoints, clipped to `[0, 1]`, then `log(r + 1e-5)`. Smooths large variance. **Default.** | +| `2` | Linear clipped improvement | Same as option 1 but without the log transform: `clip(improvement, 0, 1)`. | +| `3` | Sparse total improvement | Returns `0` at every intermediate checkpoint; at the final checkpoint returns the log-scaled total improvement from episode start. Focuses the agent on end-of-run quality. | +| `4` | Binary threshold | Returns `1` if scaled improvement ≥ `1e-3`, else `0`. Simple binary feedback. | + +--- + +## Cross-validation + +```bash +python cv.py {ppo,rl-das,exp-das} [options] +``` + +Two CV modes: + +- **LOIO** (Leave-One-Instance-Out): hold out a subset of BBOB instances per fold. +- **LOPO** (Leave-One-Problem-Out): hold out a subset of BBOB functions per fold. + +```bash +# PPO – 3-fold LOIO +python cv.py ppo MY_PPO_CV \ + -p CPSO NM TDE -d 5 10 \ + --cv-mode LOIO --n-folds 3 --n-epochs 10 --seed 42 + +# RL-DAS – 3-fold LOPO, dim 10 only +python cv.py rl-das MY_RLDAS_CV \ + --dim 10 --cv-mode LOPO --n-folds 3 --n-epochs 20 --seed 42 + +# Run only folds 0 and 2 +python cv.py exp-das MY_EXPDAS_CV \ + -p CPSO NM TDE --dims 5 10 \ + --cv-mode LOIO --folds 0 2 --n-epochs 3 --seed 42 +``` + +Outputs per fold: `results/_cv_.jsonl` +Aggregated: `results/_cv_summary.jsonl` + +--- + +## Baselines + +```bash +python baselines.py --agent [options] +``` + +Agent types: + +| Type | Description | +|---|---| +| `random` | Uniform random selection at each checkpoint | +| `fixed:` | Always pick one optimizer, e.g. `fixed:CPSO` | +| `single:` | One optimizer runs the full budget (no checkpointing) | +| `all` | All of the above; derives oracle-best / oracle-worst | + +```bash +python baselines.py MY_BASELINES --agent all \ + -p CPSO NM TDE -d 2 5 10 --seed 42 +``` + +--- + +## Evaluation + +Load a trained PPO model and evaluate it on the BBOB test set: + +```bash +python evaluate.py MY_PPO \ + -p CPSO NM TDE -d 5 10 --seed 42 +``` + +Add `--coco-observer` to write COCO-compatible data for `cocopp` post-processing. + +--- + +## Problem set + +The BBOB benchmark provides 24 functions × 15 instances × 6 dimensions = 2 160 problems per dimension. + +**Dimensions:** `2, 3, 5, 10, 20, 40` + +**Default train/test split** (`--mode easy`): trains on 14 structurally simpler functions and tests on the remaining 10 harder functions. + +| Mode | Train | Test | +|---|---|---| +| `easy` | functions {4,6–14,18–20,22–24} | remaining 10 functions | +| `hard` | inverse of easy | — | +| `random` | 2/3 of all problems | 1/3 | + +--- + +## Optimizer portfolio + +Available sub-optimizers (pass names via `-p / --portfolio`): + +| Family | Names | +|---|---| +| PSO | `SPSO`, `IPSO`, `SPSOL`, `CPSO` | +| DE | `NL_SHADE_RSP`, `MADDE`, `JDE21`, `TDE` | +| ES | `NM` (Nelder-Mead) | +| BO | `BO` | +| DS | `DS` (Direct Search) | + +RL-DAS always uses the DE trio `NL_SHADE_RSP / MADDE / JDE21` — overridable with `--portfolio`. + +--- + +## HPC / SLURM + +Submit all agents for a given seed and portfolio: + +```bash +bash runner.sh +``` + +Individual SLURM scripts: + +| Script | Agent | +|---|---| +| `ppo_study.slurm` | PPO | +| `rl_das_study.slurm` | RL-DAS | +| `exp_das_study.slurm` | Exp-DAS | +| `baselines.slurm` | Baselines | + +--- + +## Project structure + +``` +DynamicAlgorithmSelection2/ +├── train.py # Unified training entry point +├── cv.py # Cross-validation entry point +├── baselines.py # Baseline agents +├── evaluate.py # Model evaluation +├── run_local.sh # Local smoke-test runner +├── smoke_test.sh # Full smoke-test suite +├── runner.sh # SLURM batch submission +│ +├── agents/ +│ ├── rl_das/ # RL-DAS (Guo et al. 2024 port) +│ │ ├── env.py # RLDASEnv: Population-based Gymnasium env +│ │ ├── optimizers.py # NL_SHADE_RSP, JDE21, MadDE (BBOB-adapted) +│ │ ├── population.py # Shared mutable Population state (NLPSR) +│ │ ├── agent.py # PPOAgent (actor-critic) +│ │ ├── network.py # Movement embedder + backbone +│ │ └── trainer.py # train() / evaluate() loops +│ └── exponential_das/ # Exp-DAS agent +│ +├── das/ +│ ├── env/ +│ │ ├── das_env.py # DASEnv: Gymnasium env for PPO / Exp-DAS +│ │ ├── bbob_splits.py# BBOB problem IDs, train/test/CV splits +│ │ ├── observation.py# ELA feature extraction (22-dim) +│ │ └── reward.py # Reward shaping options +│ ├── optimizers/ +│ │ ├── portfolio.py # get_portfolio() factory +│ │ └── {PSO,DE,ES,BO,DS}/ # Sub-optimizer implementations +│ └── training/ +│ ├── ppo.py # run_ppo() / run_cv_ppo() +│ ├── rldas.py # run_rl_das() / run_cv_rl_das() +│ ├── expdas.py # run_exp_das() / run_cv_exp_das() +│ └── common.py # Shared utilities (JSONL writer, etc.) +│ +├── tests/ # pytest test suite +└── pyproject.toml +``` + +--- + +## References + +- Guo, Y. et al. (2024). *Deep Reinforcement Learning for Dynamic Algorithm Selection: A Proof-of-Principle Study on Differential Evolution*. GECCO 2024. https://doi.org/10.1145/3638529.3654223 +- Hansen, N. et al. (2021). *COCO: A Platform for Comparing Continuous Optimizers in a Black-Box Setting*. Optimization Methods and Software. \ No newline at end of file From 20d931ec1cd75e5418c18738721be0166b29d181 Mon Sep 17 00:00:00 2001 From: wlnc Date: Fri, 22 May 2026 22:22:46 +0200 Subject: [PATCH 5/8] update runners --- cv.py | 21 ++++++++++++--------- rl_das_study.slurm | 20 ++++++-------------- runner.sh | 9 +++++---- train.py | 21 ++++++++++++--------- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/cv.py b/cv.py index 28ae3d1..8b2a6d1 100644 --- a/cv.py +++ b/cv.py @@ -28,15 +28,18 @@ # ------------------------------------------------------------------ # -def _add_shared_args(p: argparse.ArgumentParser) -> None: +def _add_shared_args( + p: argparse.ArgumentParser, *, include_portfolio: bool = True +) -> None: p.add_argument("name", help="Experiment name (used for output file names)") - p.add_argument( - "-p", - "--portfolio", - nargs="+", - default=["SPSO", "IPSO", "SPSOL"], - help="Sub-optimizer names from the portfolio", - ) + if include_portfolio: + p.add_argument( + "-p", + "--portfolio", + nargs="+", + default=["SPSO", "IPSO", "SPSOL"], + help="Sub-optimizer names from the portfolio", + ) p.add_argument( "--fe-multiplier", type=int, @@ -121,7 +124,7 @@ def _parse_args() -> argparse.Namespace: help="Custom RL-DAS: single-dimension, pure-PyTorch PPO", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - _add_shared_args(rl) + _add_shared_args(rl, include_portfolio=False) rl.add_argument( "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)" ) diff --git a/rl_das_study.slurm b/rl_das_study.slurm index 504cf97..60bd113 100644 --- a/rl_das_study.slurm +++ b/rl_das_study.slurm @@ -10,36 +10,28 @@ #SBATCH -A plgrldas2026-gpu-a100 #SBATCH --array=0-7 -# Args: SEED [PORTFOLIO...] +# Args: SEED SEED=${1:-42} -if [ "$#" -lt 2 ]; then - PORTFOLIO=('SPSO' 'IPSO' 'SPSOL') -else - PORTFOLIO=("${@:2}") -fi - -PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") - ENV_PATH="$SCRATCH/DynamicAlgorithmSelection2/.venv/bin/activate" source "$ENV_PATH" mkdir -p logs DIMS=(2 3 5 10) -echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED | PORTFOLIO=${PORTFOLIO[*]}" +echo "Array job $SLURM_ARRAY_TASK_ID | SEED=$SEED" # 0-3: CV-LOIO per dimension if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} echo "RL-DAS | CV-LOIO | dim=$DIM" - python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOIO_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOIO --n-epochs 500 --seed $SEED + python cv.py rl-das NL_SHADE_RSP_MADDE_JDE21_RLDAS_LOIO_DIM${DIM}_SEED${SEED} \ + --dim $DIM --cv-mode LOIO --n-epochs 500 --seed $SEED # 4-7: CV-LOPO per dimension elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "RL-DAS | CV-LOPO | dim=$DIM" - python cv.py rl-das ${PORTFOLIO_STR}_RLDAS_LOPO_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --dim $DIM --cv-mode LOPO --n-epochs 500 --seed $SEED + python cv.py rl-das NL_SHADE_RSP_MADDE_JDE21_RLDAS_LOPO_DIM${DIM}_SEED${SEED} \ + --dim $DIM --cv-mode LOPO --n-epochs 500 --seed $SEED fi \ No newline at end of file diff --git a/runner.sh b/runner.sh index 49b27c4..0d05cf1 100755 --- a/runner.sh +++ b/runner.sh @@ -9,16 +9,17 @@ PORTFOLIOS=( echo "Starting job submissions..." for SEED in "${SEEDS[@]}"; do + + echo "Submitting RL-DAS study | SEED=$SEED" + sbatch rl_das_study.slurm $SEED + sleep 1 + for PORTFOLIO in "${PORTFOLIOS[@]}"; do echo "Submitting PPO study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" sbatch ppo_study.slurm $SEED $PORTFOLIO sleep 1 - echo "Submitting RL-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" - sbatch rl_das_study.slurm $SEED $PORTFOLIO - sleep 1 - echo "Submitting Exp-DAS study | SEED=$SEED | PORTFOLIO=$PORTFOLIO" sbatch exp_das_study.slurm $SEED $PORTFOLIO sleep 1 diff --git a/train.py b/train.py index 5024ec5..73a1530 100644 --- a/train.py +++ b/train.py @@ -41,15 +41,18 @@ # ------------------------------------------------------------------ # -def _add_shared_args(p: argparse.ArgumentParser) -> None: +def _add_shared_args( + p: argparse.ArgumentParser, *, include_portfolio: bool = True +) -> None: p.add_argument("name", help="Experiment name (used for output file names)") - p.add_argument( - "-p", - "--portfolio", - nargs="+", - default=["SPSO", "IPSO", "SPSOL"], - help="Sub-optimizer names from the portfolio", - ) + if include_portfolio: + p.add_argument( + "-p", + "--portfolio", + nargs="+", + default=["SPSO", "IPSO", "SPSOL"], + help="Sub-optimizer names from the portfolio", + ) p.add_argument( "--mode", choices=["easy", "hard", "random"], @@ -131,7 +134,7 @@ def _parse_args() -> argparse.Namespace: help="Custom RL-DAS: single-dimension, pure-PyTorch PPO", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - _add_shared_args(rl) + _add_shared_args(rl, include_portfolio=False) rl.add_argument( "--dim", type=int, default=10, help="Problem dimension (agent is dim-specific)" ) From 4d916310978deec6a4dd4a5fd51ba96489355cff Mon Sep 17 00:00:00 2001 From: wlnc Date: Sun, 24 May 2026 11:43:26 +0200 Subject: [PATCH 6/8] update results logging --- .github/workflows/ci.yml | 3 + agents/exponential_das/trainer.py | 25 +- agents/rl_das/optimizers.py | 669 ----------------------- agents/rl_das/optimizers/__init__.py | 40 ++ agents/rl_das/optimizers/jde21.py | 275 ++++++++++ agents/rl_das/optimizers/madde.py | 209 +++++++ agents/rl_das/optimizers/nl_shade_rsp.py | 166 ++++++ agents/rl_das/trainer.py | 2 +- baselines.py | 141 ++--- cv.py | 7 + das/env/__init__.py | 2 + das/env/das_env.py | 1 + das/optimizers/BO/__init__.py | 2 +- das/optimizers/DS/nm.py | 6 +- das/optimizers/__init__.py | 5 +- das/training/callbacks.py | 5 +- das/training/common.py | 57 ++ das/training/expdas.py | 225 +++++--- evaluate.py | 62 +-- tests/test_baselines.py | 514 +++++++++++++---- 20 files changed, 1428 insertions(+), 988 deletions(-) delete mode 100644 agents/rl_das/optimizers.py create mode 100644 agents/rl_das/optimizers/__init__.py create mode 100644 agents/rl_das/optimizers/jde21.py create mode 100644 agents/rl_das/optimizers/madde.py create mode 100644 agents/rl_das/optimizers/nl_shade_rsp.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8dd2903..2884b5b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,9 @@ jobs: - name: Ruff lint run: uv run ruff format . + - name: Ruff check + run: uv run ruff check + - name: Run pytest run: uv run pytest tests/ -v --tb=short diff --git a/agents/exponential_das/trainer.py b/agents/exponential_das/trainer.py index 432791e..bcba617 100644 --- a/agents/exponential_das/trainer.py +++ b/agents/exponential_das/trainer.py @@ -20,6 +20,7 @@ from agents.exponential_das.agent import ExpDASAgent from das.env.das_env import DASEnv +from das.training.common import compute_run_stats def train( @@ -111,7 +112,9 @@ def train( if ep % eval_interval == 0: test_results = evaluate(test_env, agent, n_episodes=20) - mean_test_r = float(np.mean([r["reward"] for r in test_results])) + mean_test_r = float( + np.mean([next(iter(r.values()))["reward"] for r in test_results]) + ) entry["mean_test_reward"] = mean_test_r mean_train_r = float(np.mean(episode_rewards[-eval_interval:])) print( @@ -145,24 +148,36 @@ def evaluate( env: DASEnv, agent: ExpDASAgent, n_episodes: int = 20, + global_optima: dict[str, float] | None = None, ) -> list[dict]: """Run the agent deterministically and return per-episode results.""" + if global_optima is None: + global_optima = {} results = [] for _ in range(n_episodes): obs, info = env.reset() + problem_id = info.get("problem_id", "") done = False total_reward = 0.0 + fitness_history: list[tuple[int, float]] = [] + step_info: dict = {} + while not done: action = agent.predict(obs) obs, reward, terminated, truncated, step_info = env.step(action) done = terminated or truncated total_reward += reward + fitness_history.extend(step_info.get("fitness_history_step", [])) + + max_fe = step_info.get("n_fe", 0) + global_minimum = global_optima.get(problem_id, 0.0) + stats = compute_run_stats(fitness_history, max_fe, global_minimum) results.append( { - "problem_id": info.get("problem_id", ""), - "reward": total_reward, - "best_y": step_info.get("best_y", float("inf")), - "n_fe": step_info.get("n_fe", 0), + problem_id: { + **stats, + "reward": total_reward, + } } ) return results diff --git a/agents/rl_das/optimizers.py b/agents/rl_das/optimizers.py deleted file mode 100644 index 40dae89..0000000 --- a/agents/rl_das/optimizers.py +++ /dev/null @@ -1,669 +0,0 @@ -"""DE optimizers for RL-DAS: NL_SHADE_RSP, JDE21, MadDE. - -Ported from the original RL-DAS (Guo et al., 2024) with the following -adaptations for DAS2 / BBOB: - -- Batch evaluation: ``np.array([float(problem(xs[i])) for i in range(len(xs))])`` - instead of ``problem.func(xs) - problem.optimum``. -- Bounds from ``population.Xmin`` / ``population.Xmax`` (not hardcoded ±100). -- No early-termination at ``cost < 1e-8`` (FE budget controls episode end). -- ``__init__`` takes no ``dim`` argument (dimension inferred from population). -""" - -from __future__ import annotations - -import numpy as np -import scipy.stats as stats - - -def _eval(problem, xs: np.ndarray) -> np.ndarray: - return np.array([float(problem(xs[i])) for i in range(len(xs))]) - - -class NL_SHADE_RSP: - def __init__(self) -> None: - self.pb = 0.4 - self.pa = 0.5 - - def _binomial(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: - NP, dim = x.shape - jrand = np.random.randint(dim, size=NP) - u = np.where(np.random.rand(NP, dim) < cr.repeat(dim).reshape(NP, dim), v, x) - u[np.arange(NP), jrand] = v[np.arange(NP), jrand] - return u - - def _exponential(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: - NP, dim = x.shape - u = x.copy() - L = np.random.randint(dim, size=NP).repeat(dim).reshape(NP, dim) - L = L <= np.arange(dim) - rvs = np.random.rand(NP, dim) - L = np.where(rvs > cr.repeat(dim).reshape(NP, dim), L, 0) - return u * (1 - L) + v * L - - def _update_pa(self, fa: float, fp: float, na: int, NP: int) -> None: - if na == 0 or fa == 0: - self.pa = 0.5 - return - self.pa = (fa / (na + 1e-15)) / ((fa / (na + 1e-15)) + (fp / (NP - na + 1e-15))) - self.pa = float(np.clip(self.pa, 0.1, 0.9)) - - def step( - self, - population, - problem, - FEs: int, - FEs_end: int, - MaxFEs: int, - record_period: int = -1, - ): - if record_period <= 0: - record_period = FEs_end - FEs - NP, dim = population.NP, population.dim - NA = int(NP * 2.1) - if NA < population.archive.shape[0]: - population.archive = population.archive[:NA] - self.pa = 0.5 - k = 1 - Fevs = [] - while FEs >= k * record_period: - k += 1 - population.sort(population.NP) - - while FEs < FEs_end and FEs < MaxFEs: - Cr, F = population.choose_F_Cr() - Cr = np.sort(Cr) - pr = np.exp(-(np.arange(NP) + 1) / NP) - pr /= np.sum(pr) - cross_exponential = np.random.random() < 0.5 - pb_upper = int(max(2, NP * self.pb)) - pbs = np.random.randint(pb_upper, size=NP) - count = 0 - duplicate = np.where(pbs == np.arange(NP))[0] - while duplicate.shape[0] > 0 and count < 1: - pbs[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where(pbs == np.arange(NP))[0] - count += 1 - xpb = population.group[pbs] - r1 = np.random.randint(NP, size=NP) - count = 0 - duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] - count += 1 - x1 = population.group[r1] - rvs = np.random.rand(NP) - r2_pop = np.where(rvs >= self.pa)[0] - r2_arc = np.where(rvs < self.pa)[0] - use_arc = np.zeros(NP, dtype=bool) - use_arc[r2_arc] = True - if population.archive.shape[0] < 25: - r2_pop = np.arange(NP) - r2_arc = np.array([], dtype=np.int32) - r2 = np.random.choice(np.arange(NP), size=r2_pop.shape[0], p=pr) - count = 0 - duplicate = np.where( - (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) - )[0] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = np.random.choice( - np.arange(NP), size=duplicate.shape[0], p=pr - ) - duplicate = np.where( - (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) - )[0] - count += 1 - x2 = np.zeros((NP, dim)) - if r2_pop.shape[0] > 0: - x2[r2_pop] = population.group[r2] - if r2_arc.shape[0] > 0: - x2[r2_arc] = population.archive[ - np.random.randint( - min(population.archive.shape[0], NA), - size=r2_arc.shape[0], - ) - ] - Fs = F.repeat(dim).reshape(NP, dim) - vs = population.group + Fs * (xpb - population.group) + Fs * (x1 - x2) - Crb = np.zeros(NP) - tmp_id = np.where(np.arange(NP) + FEs < 0.5 * MaxFEs)[0] - Crb[tmp_id] = 2 * ((FEs + tmp_id) / MaxFEs - 0.5) - if cross_exponential: - us = self._binomial(population.group, vs, Crb) - else: - us = self._exponential(population.group, vs, Cr) - - # Reinitialise out-of-bounds elements with uniform random in [lb, ub] - oob = (us < population.Xmin) | (us > population.Xmax) - if np.any(oob): - rand_pts = ( - np.random.rand(NP, dim) * (population.Xmax - population.Xmin) - + population.Xmin - ) - us = np.where(oob, rand_pts, us) - - cost = _eval(problem, us) - optim = np.where(cost < population.cost)[0] - for i in optim: - population.update_archive(i) - population.F[optim] = F[optim] - population.Cr[optim] = Cr[optim] - SF = F[optim] - SCr = Cr[optim] - df = (population.cost[optim] - cost[optim]) / ( - population.cost[optim] + 1e-9 - ) - arc_usage = use_arc[optim] - fp = float(np.sum(df[arc_usage])) - fa = float(np.sum(df[~arc_usage])) - na = int(np.sum(arc_usage)) - population.group[optim] = us[optim] - population.cost[optim] = cost[optim] - if float(np.min(cost)) < population.gbest: - population.gbest = float(np.min(cost)) - population.gbest_solution = us[np.argmin(cost)].copy() - FEs += NP - self.pb = 0.4 - 0.2 * (FEs / MaxFEs) - population.NLPSR(FEs, MaxFEs) - population.update_M_F_Cr(SF, SCr, df) - self._update_pa(fa, fp, na, NP) - NP = population.NP - NA = population.NA - if FEs >= k * record_period: - Fevs.append(population.gbest) - k += 1 - - return population, Fevs, min(FEs, MaxFEs) - - -class JDE21: - def __init__(self) -> None: - self.sNP = 10 - self.tao1 = 0.1 - self.tao2 = 0.1 - self.Finit = 0.5 - self.CRinit = 0.9 - self.Fl_b = 0.1 - self.Fl_s = 0.17 - self.Fu = 1.1 - self.CRl_b = 0.0 - self.CRl_s = 0.1 - self.CRu_b = 1.1 - self.CRu_s = 0.8 - self.eps = 1e-12 - self.MyEps = 0.25 - self.nReset = 0 - self.sReset = 0 - self.cCopy = 0 - - def _prevec_enakih(self, cost: np.ndarray, best: float) -> bool: - eqs = len(cost[np.fabs(cost - best) < self.eps]) - return eqs > 2 and eqs > len(cost) * self.MyEps - - def _crowding(self, group: np.ndarray, vs: np.ndarray) -> np.ndarray: - NP, dim = vs.shape - dist = np.sum( - ((group * np.ones((NP, NP, dim))).transpose(1, 0, 2) - vs) ** 2, -1 - ).transpose() - return np.argmin(dist, -1) - - def step( - self, - population, - problem, - FEs: int, - FEs_end: int, - MaxFEs: int, - record_period: int = -1, - ): - if record_period <= 0: - record_period = FEs_end - FEs - NP = population.NP - dim = population.dim - sNP = self.sNP - bNP = max(NP - sNP, 2) - age = 0 - k = 1 - Fevs = [] - - def mutate_cross_select(r1, r2, r3, SF, SCr, df, age, big): - nonlocal bNP - if big: - xNP = bNP - randF = np.random.rand(xNP) * self.Fu + self.Fl_b - randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_b - pF = population.F[:xNP] - pCr = population.Cr[:xNP] - else: - # Use actual slice size (may be < sNP when population shrank) - xNP = population.F[bNP:].shape[0] - if xNP == 0: - return SF, SCr, df, age - randF = np.random.rand(xNP) * self.Fu + self.Fl_s - randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_s - pF = population.F[bNP:] - pCr = population.Cr[bNP:] - - rvs = np.random.rand(xNP) - F = np.where(rvs < self.tao1, randF, pF) - rvs = np.random.rand(xNP) - Cr = np.where(rvs < self.tao2, randCr, pCr) - Fs = F.repeat(dim).reshape(xNP, dim) - Crs = Cr.repeat(dim).reshape(xNP, dim) - v = population.group[r1] + Fs * ( - population.group[r2] - population.group[r3] - ) - v = np.clip(v, population.Xmin, population.Xmax) - jrand = np.random.randint(dim, size=xNP) - u = np.where( - np.random.rand(xNP, dim) < Crs, - v, - (population.group[:bNP] if big else population.group[bNP:]), - ) - u[np.arange(xNP), jrand] = v[np.arange(xNP), jrand] - cost = _eval(problem, u) - crowding_ids = ( - self._crowding(population.group[:xNP], u) - if big - else np.arange(xNP) + bNP - ) - age += xNP - for i in range(xNP): - idx = crowding_ids[i] - if cost[i] < population.cost[idx]: - population.update_archive(idx) - population.group[idx] = u[i] - population.cost[idx] = cost[i] - population.F[idx] = F[i] - population.Cr[idx] = Cr[i] - SF = np.append(SF, F[i]) - SCr = np.append(SCr, Cr[i]) - d = (population.cost[i] - cost[i]) / (population.cost[i] + 1e-9) - df = np.append(df, d) - if cost[i] < population.cbest: - age = 0 - population.cbest_id = idx - population.cbest = cost[i] - if cost[i] < population.gbest: - population.gbest = cost[i] - population.gbest_solution = u[i].copy() - - return SF, SCr, df, age - - population.sort(NP, True) - while FEs >= k * record_period: - k += 1 - while FEs < FEs_end: - df = np.array([]) - SF = np.array([]) - SCr = np.array([]) - if ( - self._prevec_enakih(population.cost[:bNP], population.gbest) - or age > MaxFEs / 10 - ): - self.nReset += 1 - population.group[:bNP] = population.initialize_group(bNP) - population.F[:bNP] = self.Finit - population.Cr[:bNP] = self.CRinit - population.cost[:bNP] = 1e15 - age = 0 - population.cbest = float(np.min(population.cost)) - population.cbest_id = int(np.argmin(population.cost)) - - if FEs < MaxFEs / 3: - mig = 1 - elif FEs < 2 * MaxFEs / 3: - mig = 2 - else: - mig = 3 - - r1 = np.random.randint(bNP, size=bNP) - count = 0 - duplicate = np.where((r1 == np.arange(bNP)) * (r1 == population.cbest_id))[ - 0 - ] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = np.random.randint(bNP, size=duplicate.shape[0]) - duplicate = np.where( - (r1 == np.arange(bNP)) * (r1 == population.cbest_id) - )[0] - count += 1 - r2 = np.random.randint(bNP + mig, size=bNP) - count = 0 - duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) - duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] - count += 1 - r3 = np.random.randint(bNP + mig, size=bNP) - count = 0 - duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[0] - while duplicate.shape[0] > 0 and count < 25: - r3[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) - duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[ - 0 - ] - count += 1 - SF, SCr, df, age = mutate_cross_select( - r1, r2, r3, SF, SCr, df, age, big=True - ) - FEs += bNP - if FEs >= k * record_period: - Fevs.append(population.gbest) - k += 1 - if FEs >= FEs_end or FEs >= MaxFEs: - break - - curr_sNP = NP - bNP - if ( - curr_sNP > 0 - and population.cbest_id >= bNP - and self._prevec_enakih(population.cost[bNP:], population.cbest) - ): - self.sReset += 1 - cbest = population.cbest - cbest_id = population.cbest_id - tmp = population.group[cbest_id].copy() - population.group[bNP:] = population.initialize_group(curr_sNP) - population.F[bNP:] = self.Finit - population.Cr[bNP:] = self.CRinit - population.cost[bNP:] = 1e15 - population.cbest = cbest - population.cbest_id = cbest_id - population.group[cbest_id] = tmp - population.cost[cbest_id] = cbest - - if population.cbest_id < bNP: - self.cCopy += 1 - population.cost[bNP] = population.cbest - population.group[bNP] = population.group[population.cbest_id].copy() - population.cbest_id = bNP - - curr_sNP = NP - bNP # actual small-pop size (= sNP unless shrunken) - for _ in range(bNP // curr_sNP if curr_sNP > 0 else 0): - r1 = np.random.randint(curr_sNP, size=curr_sNP) + bNP - count = 0 - duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = ( - np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP - ) - duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] - count += 1 - r2 = np.random.randint(curr_sNP, size=curr_sNP) + bNP - count = 0 - duplicate = np.where((r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1))[ - 0 - ] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = ( - np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP - ) - duplicate = np.where( - (r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1) - )[0] - count += 1 - r3 = np.random.randint(curr_sNP, size=curr_sNP) + bNP - count = 0 - duplicate = np.where( - (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) - )[0] - while duplicate.shape[0] > 0 and count < 25: - r3[duplicate] = ( - np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP - ) - duplicate = np.where( - (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) - )[0] - count += 1 - SF, SCr, df, age = mutate_cross_select( - r1, r2, r3, SF, SCr, df, age, big=False - ) - FEs += curr_sNP - if FEs >= k * record_period: - Fevs.append(population.gbest) - k += 1 - if FEs >= FEs_end or FEs >= MaxFEs: - break - - population.update_M_F_Cr(SF, SCr, df) - NP = min( - int(population.cal_NP_next_gen(FEs, MaxFEs)), len(population.group) - ) - population.NP = NP - population.group = population.group[-NP:] - population.cost = population.cost[-NP:] - population.F = population.F[-NP:] - population.Cr = population.Cr[-NP:] - population.cbest_id = int(np.argmin(population.cost)) - population.cbest = float(np.min(population.cost)) - bNP = max(NP - sNP, 2) - - return population, Fevs, min(FEs, MaxFEs) - - -class MadDE: - def __init__(self) -> None: - self.p = 0.18 - self.PqBX = 0.01 - self.pm = np.ones(3) / 3 - - def _ctb_w_arc( - self, - group: np.ndarray, - best: np.ndarray, - archive: np.ndarray, - Fs: np.ndarray, - ) -> np.ndarray: - NP, dim = group.shape - NB = best.shape[0] - NA = archive.shape[0] - count = 0 - rb = np.random.randint(NB, size=NP) - duplicate = np.where(rb == np.arange(NP))[0] - while duplicate.shape[0] > 0 and count < 25: - rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) - duplicate = np.where(rb == np.arange(NP))[0] - count += 1 - count = 0 - r1 = np.random.randint(NP, size=NP) - duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] - count += 1 - count = 0 - r2 = np.random.randint(NP + NA, size=NP) - duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) - duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] - count += 1 - xb = best[rb] - x1 = group[r1] - x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] - return group + Fs * (xb - group) + Fs * (x1 - x2) - - def _ctr_w_arc( - self, - group: np.ndarray, - archive: np.ndarray, - Fs: np.ndarray, - ) -> np.ndarray: - NP, dim = group.shape - NA = archive.shape[0] - count = 0 - r1 = np.random.randint(NP, size=NP) - duplicate = np.where(r1 == np.arange(NP))[0] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where(r1 == np.arange(NP))[0] - count += 1 - count = 0 - r2 = np.random.randint(NP + NA, size=NP) - duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) - duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] - count += 1 - x1 = group[r1] - x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] - return group + Fs * (x1 - x2) - - def _weighted_rtb( - self, - group: np.ndarray, - best: np.ndarray, - Fs: np.ndarray, - Fas: float, - ) -> np.ndarray: - NP, dim = group.shape - NB = best.shape[0] - count = 0 - rb = np.random.randint(NB, size=NP) - duplicate = np.where(rb == np.arange(NP))[0] - while duplicate.shape[0] > 0 and count < 25: - rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) - duplicate = np.where(rb == np.arange(NP))[0] - count += 1 - count = 0 - r1 = np.random.randint(NP, size=NP) - duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] - while duplicate.shape[0] > 0 and count < 25: - r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] - count += 1 - count = 0 - r2 = np.random.randint(NP, size=NP) - duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] - while duplicate.shape[0] > 0 and count < 25: - r2[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) - duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] - count += 1 - return Fs * group[r1] + Fs * Fas * (best[rb] - group[r2]) - - @staticmethod - def _binomial(x: np.ndarray, v: np.ndarray, Crs: np.ndarray) -> np.ndarray: - NP, dim = x.shape - jrand = np.random.randint(dim, size=NP) - u = np.where(np.random.rand(NP, dim) < Crs, v, x) - u[np.arange(NP), jrand] = v[np.arange(NP), jrand] - return u - - def step( - self, - population, - problem, - FEs: int, - FEs_end: int, - MaxFEs: int, - record_period: int = -1, - ): - if record_period <= 0: - record_period = FEs_end - FEs - Fevs = [] - k = 1 - while FEs >= k * record_period: - k += 1 - population.sort(population.NP) - while FEs < FEs_end and FEs < MaxFEs: - NP, dim = population.NP, population.dim - q = 2 * self.p - self.p * FEs / MaxFEs - Fa = 0.5 + 0.5 * FEs / MaxFEs - Cr, F = population.choose_F_Cr() - mu = np.random.choice(3, size=NP, p=self.pm) - p1 = population.group[mu == 0] - p2 = population.group[mu == 1] - p3 = population.group[mu == 2] - pbest = population.group[: max(int(self.p * NP), 2)] - qbest = population.group[: max(int(q * NP), 2)] - Fs = F.repeat(dim).reshape(NP, dim) - v1 = self._ctb_w_arc(p1, pbest, population.archive, Fs[mu == 0]) - v2 = self._ctr_w_arc(p2, population.archive, Fs[mu == 1]) - v3 = self._weighted_rtb(p3, qbest, Fs[mu == 2], Fa) - v = np.zeros((NP, dim)) - v[mu == 0] = v1 - v[mu == 1] = v2 - v[mu == 2] = v3 - - # Bounds handling: midpoint reflection with lb/ub - Xmin = population.Xmin - Xmax = population.Xmax - v = np.where(v < Xmin, (population.group + Xmin) / 2, v) - v = np.where(v > Xmax, (population.group + Xmax) / 2, v) - - rvs = np.random.rand(NP) - Crs = Cr.repeat(dim).reshape(NP, dim) - u = np.zeros((NP, dim)) - if np.sum(rvs <= self.PqBX) > 0: - qu = v[rvs <= self.PqBX] - if population.archive.shape[0] > 0: - qbest = np.concatenate((population.group, population.archive), 0)[ - : max(int(q * (NP + population.archive.shape[0])), 2) - ] - cross_qbest = qbest[np.random.randint(qbest.shape[0], size=qu.shape[0])] - qu = self._binomial(cross_qbest, qu, Crs[rvs <= self.PqBX]) - u[rvs <= self.PqBX] = qu - bu = v[rvs > self.PqBX] - bu = self._binomial( - population.group[rvs > self.PqBX], bu, Crs[rvs > self.PqBX] - ) - u[rvs > self.PqBX] = bu - - ncost = _eval(problem, u) - FEs += NP - optim = np.where(ncost < population.cost)[0] - for i in optim: - population.update_archive(i) - SF = F[optim] - SCr = Cr[optim] - df = np.maximum(0, population.cost - ncost) - population.update_M_F_Cr(SF, SCr, df[optim]) - count_S = np.zeros(3) - for i in range(3): - count_S[i] = np.mean(df[mu == i] / (population.cost[mu == i] + 1e-9)) - if np.sum(count_S) > 0: - self.pm = np.clip(count_S / np.sum(count_S), 0.1, 0.9) - self.pm /= np.sum(self.pm) - else: - self.pm = np.ones(3) / 3 - population.group[optim] = u[optim] - population.cost = np.minimum(population.cost, ncost) - population.NLPSR(FEs, MaxFEs) - if float(np.min(population.cost)) < population.gbest: - population.gbest = float(np.min(population.cost)) - population.gbest_solution = population.group[ - np.argmin(population.cost) - ].copy() - if FEs >= k * record_period: - Fevs.append(population.gbest) - k += 1 - - return population, Fevs, min(FEs, MaxFEs) - - -_PORTFOLIO: dict[str, type] = { - "NL_SHADE_RSP": NL_SHADE_RSP, - "MADDE": MadDE, - "JDE21": JDE21, -} - - -def get_rldas_portfolio(names: list[str] | None = None) -> list: - """Return instantiated RL-DAS optimizer objects. - - Parameters - ---------- - names: - Optimizer names. Defaults to ``["NL_SHADE_RSP", "MADDE", "JDE21"]``. - Valid names: ``"NL_SHADE_RSP"``, ``"MADDE"``, ``"JDE21"``. - """ - if names is None: - names = ["NL_SHADE_RSP", "MADDE", "JDE21"] - unknown = [n for n in names if n not in _PORTFOLIO] - if unknown: - raise ValueError( - f"Unknown RL-DAS optimizer(s): {unknown}. Valid choices: {list(_PORTFOLIO)}" - ) - return [_PORTFOLIO[n]() for n in names] diff --git a/agents/rl_das/optimizers/__init__.py b/agents/rl_das/optimizers/__init__.py new file mode 100644 index 0000000..61c8883 --- /dev/null +++ b/agents/rl_das/optimizers/__init__.py @@ -0,0 +1,40 @@ +"""DE optimizer registry for RL-DAS. + +Optimizer implementations: + - nl_shade_rsp.py → NL_SHADE_RSP + - jde21.py → JDE21 + - madde.py → MadDE +""" + +from __future__ import annotations + +from .jde21 import JDE21 +from .madde import MadDE +from .nl_shade_rsp import NL_SHADE_RSP + +__all__ = ["NL_SHADE_RSP", "JDE21", "MadDE", "get_rldas_portfolio", "_PORTFOLIO"] + +_PORTFOLIO: dict[str, type] = { + "NL_SHADE_RSP": NL_SHADE_RSP, + "MADDE": MadDE, + "JDE21": JDE21, +} + + +def get_rldas_portfolio(names: list[str] | None = None) -> list: + """Return instantiated RL-DAS optimizer objects. + + Parameters + ---------- + names: + Optimizer names. Defaults to ``["NL_SHADE_RSP", "MADDE", "JDE21"]``. + Valid names: ``"NL_SHADE_RSP"``, ``"MADDE"``, ``"JDE21"``. + """ + if names is None: + names = ["NL_SHADE_RSP", "MADDE", "JDE21"] + unknown = [n for n in names if n not in _PORTFOLIO] + if unknown: + raise ValueError( + f"Unknown RL-DAS optimizer(s): {unknown}. Valid choices: {list(_PORTFOLIO)}" + ) + return [_PORTFOLIO[n]() for n in names] diff --git a/agents/rl_das/optimizers/jde21.py b/agents/rl_das/optimizers/jde21.py new file mode 100644 index 0000000..06a1b8b --- /dev/null +++ b/agents/rl_das/optimizers/jde21.py @@ -0,0 +1,275 @@ +"""JDE21 differential-evolution optimizer for RL-DAS.""" + +from __future__ import annotations + +import numpy as np + + +def _eval(problem, xs: np.ndarray) -> np.ndarray: + return np.array([float(problem(xs[i])) for i in range(len(xs))]) + + +class JDE21: + def __init__(self) -> None: + self.sNP = 10 + self.tao1 = 0.1 + self.tao2 = 0.1 + self.Finit = 0.5 + self.CRinit = 0.9 + self.Fl_b = 0.1 + self.Fl_s = 0.17 + self.Fu = 1.1 + self.CRl_b = 0.0 + self.CRl_s = 0.1 + self.CRu_b = 1.1 + self.CRu_s = 0.8 + self.eps = 1e-12 + self.MyEps = 0.25 + self.nReset = 0 + self.sReset = 0 + self.cCopy = 0 + + def _prevec_enakih(self, cost: np.ndarray, best: float) -> bool: + eqs = len(cost[np.fabs(cost - best) < self.eps]) + return eqs > 2 and eqs > len(cost) * self.MyEps + + def _crowding(self, group: np.ndarray, vs: np.ndarray) -> np.ndarray: + NP, dim = vs.shape + dist = np.sum( + ((group * np.ones((NP, NP, dim))).transpose(1, 0, 2) - vs) ** 2, -1 + ).transpose() + return np.argmin(dist, -1) + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + NP = population.NP + dim = population.dim + sNP = self.sNP + bNP = max(NP - sNP, 2) + age = 0 + k = 1 + Fevs = [] + + def mutate_cross_select(r1, r2, r3, SF, SCr, df, age, big): + nonlocal bNP + if big: + xNP = bNP + randF = np.random.rand(xNP) * self.Fu + self.Fl_b + randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_b + pF = population.F[:xNP] + pCr = population.Cr[:xNP] + else: + # Use actual slice size (may be < sNP when population shrank) + xNP = population.F[bNP:].shape[0] + if xNP == 0: + return SF, SCr, df, age + randF = np.random.rand(xNP) * self.Fu + self.Fl_s + randCr = np.random.rand(xNP) * self.CRu_b + self.CRl_s + pF = population.F[bNP:] + pCr = population.Cr[bNP:] + + rvs = np.random.rand(xNP) + F = np.where(rvs < self.tao1, randF, pF) + rvs = np.random.rand(xNP) + Cr = np.where(rvs < self.tao2, randCr, pCr) + Fs = F.repeat(dim).reshape(xNP, dim) + Crs = Cr.repeat(dim).reshape(xNP, dim) + v = population.group[r1] + Fs * ( + population.group[r2] - population.group[r3] + ) + v = np.clip(v, population.Xmin, population.Xmax) + jrand = np.random.randint(dim, size=xNP) + u = np.where( + np.random.rand(xNP, dim) < Crs, + v, + (population.group[:bNP] if big else population.group[bNP:]), + ) + u[np.arange(xNP), jrand] = v[np.arange(xNP), jrand] + cost = _eval(problem, u) + crowding_ids = ( + self._crowding(population.group[:xNP], u) + if big + else np.arange(xNP) + bNP + ) + age += xNP + for i in range(xNP): + idx = crowding_ids[i] + if cost[i] < population.cost[idx]: + population.update_archive(idx) + population.group[idx] = u[i] + population.cost[idx] = cost[i] + population.F[idx] = F[i] + population.Cr[idx] = Cr[i] + SF = np.append(SF, F[i]) + SCr = np.append(SCr, Cr[i]) + d = (population.cost[i] - cost[i]) / (population.cost[i] + 1e-9) + df = np.append(df, d) + if cost[i] < population.cbest: + age = 0 + population.cbest_id = idx + population.cbest = cost[i] + if cost[i] < population.gbest: + population.gbest = cost[i] + population.gbest_solution = u[i].copy() + + return SF, SCr, df, age + + population.sort(NP, True) + while FEs >= k * record_period: + k += 1 + while FEs < FEs_end: + df = np.array([]) + SF = np.array([]) + SCr = np.array([]) + if ( + self._prevec_enakih(population.cost[:bNP], population.gbest) + or age > MaxFEs / 10 + ): + self.nReset += 1 + population.group[:bNP] = population.initialize_group(bNP) + population.F[:bNP] = self.Finit + population.Cr[:bNP] = self.CRinit + population.cost[:bNP] = 1e15 + age = 0 + population.cbest = float(np.min(population.cost)) + population.cbest_id = int(np.argmin(population.cost)) + + if FEs < MaxFEs / 3: + mig = 1 + elif FEs < 2 * MaxFEs / 3: + mig = 2 + else: + mig = 3 + + r1 = np.random.randint(bNP, size=bNP) + count = 0 + duplicate = np.where((r1 == np.arange(bNP)) * (r1 == population.cbest_id))[ + 0 + ] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(bNP, size=duplicate.shape[0]) + duplicate = np.where( + (r1 == np.arange(bNP)) * (r1 == population.cbest_id) + )[0] + count += 1 + r2 = np.random.randint(bNP + mig, size=bNP) + count = 0 + duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) + duplicate = np.where((r2 == np.arange(bNP)) + (r2 == r1))[0] + count += 1 + r3 = np.random.randint(bNP + mig, size=bNP) + count = 0 + duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[0] + while duplicate.shape[0] > 0 and count < 25: + r3[duplicate] = np.random.randint(bNP + mig, size=duplicate.shape[0]) + duplicate = np.where((r3 == np.arange(bNP)) + (r3 == r1) + (r3 == r2))[ + 0 + ] + count += 1 + SF, SCr, df, age = mutate_cross_select( + r1, r2, r3, SF, SCr, df, age, big=True + ) + FEs += bNP + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + if FEs >= FEs_end or FEs >= MaxFEs: + break + + curr_sNP = NP - bNP + if ( + curr_sNP > 0 + and population.cbest_id >= bNP + and self._prevec_enakih(population.cost[bNP:], population.cbest) + ): + self.sReset += 1 + cbest = population.cbest + cbest_id = population.cbest_id + tmp = population.group[cbest_id].copy() + population.group[bNP:] = population.initialize_group(curr_sNP) + population.F[bNP:] = self.Finit + population.Cr[bNP:] = self.CRinit + population.cost[bNP:] = 1e15 + population.cbest = cbest + population.cbest_id = cbest_id + population.group[cbest_id] = tmp + population.cost[cbest_id] = cbest + + if population.cbest_id < bNP: + self.cCopy += 1 + population.cost[bNP] = population.cbest + population.group[bNP] = population.group[population.cbest_id].copy() + population.cbest_id = bNP + + curr_sNP = NP - bNP # actual small-pop size (= sNP unless shrunken) + for _ in range(bNP // curr_sNP if curr_sNP > 0 else 0): + r1 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where(r1 == (np.arange(curr_sNP) + bNP))[0] + count += 1 + r2 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where((r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1))[ + 0 + ] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where( + (r2 == (np.arange(curr_sNP) + bNP)) + (r2 == r1) + )[0] + count += 1 + r3 = np.random.randint(curr_sNP, size=curr_sNP) + bNP + count = 0 + duplicate = np.where( + (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) + )[0] + while duplicate.shape[0] > 0 and count < 25: + r3[duplicate] = ( + np.random.randint(curr_sNP, size=duplicate.shape[0]) + bNP + ) + duplicate = np.where( + (r3 == (np.arange(curr_sNP) + bNP)) + (r3 == r1) + (r3 == r2) + )[0] + count += 1 + SF, SCr, df, age = mutate_cross_select( + r1, r2, r3, SF, SCr, df, age, big=False + ) + FEs += curr_sNP + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + if FEs >= FEs_end or FEs >= MaxFEs: + break + + population.update_M_F_Cr(SF, SCr, df) + NP = min( + int(population.cal_NP_next_gen(FEs, MaxFEs)), len(population.group) + ) + population.NP = NP + population.group = population.group[-NP:] + population.cost = population.cost[-NP:] + population.F = population.F[-NP:] + population.Cr = population.Cr[-NP:] + population.cbest_id = int(np.argmin(population.cost)) + population.cbest = float(np.min(population.cost)) + bNP = max(NP - sNP, 2) + + return population, Fevs, min(FEs, MaxFEs) diff --git a/agents/rl_das/optimizers/madde.py b/agents/rl_das/optimizers/madde.py new file mode 100644 index 0000000..7bd5911 --- /dev/null +++ b/agents/rl_das/optimizers/madde.py @@ -0,0 +1,209 @@ +"""MadDE differential-evolution optimizer for RL-DAS.""" + +from __future__ import annotations + +import numpy as np + + +def _eval(problem, xs: np.ndarray) -> np.ndarray: + return np.array([float(problem(xs[i])) for i in range(len(xs))]) + + +class MadDE: + def __init__(self) -> None: + self.p = 0.18 + self.PqBX = 0.01 + self.pm = np.ones(3) / 3 + + def _ctb_w_arc( + self, + group: np.ndarray, + best: np.ndarray, + archive: np.ndarray, + Fs: np.ndarray, + ) -> np.ndarray: + NP, dim = group.shape + NB = best.shape[0] + NA = archive.shape[0] + count = 0 + rb = np.random.randint(NB, size=NP) + duplicate = np.where(rb == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) + duplicate = np.where(rb == np.arange(NP))[0] + count += 1 + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP + NA, size=NP) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + xb = best[rb] + x1 = group[r1] + x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] + return group + Fs * (xb - group) + Fs * (x1 - x2) + + def _ctr_w_arc( + self, + group: np.ndarray, + archive: np.ndarray, + Fs: np.ndarray, + ) -> np.ndarray: + NP, dim = group.shape + NA = archive.shape[0] + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where(r1 == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where(r1 == np.arange(NP))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP + NA, size=NP) + duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP + NA, size=duplicate.shape[0]) + duplicate = np.where((r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + x1 = group[r1] + x2 = np.concatenate((group, archive), 0)[r2] if NA > 0 else group[r2] + return group + Fs * (x1 - x2) + + def _weighted_rtb( + self, + group: np.ndarray, + best: np.ndarray, + Fs: np.ndarray, + Fas: float, + ) -> np.ndarray: + NP, dim = group.shape + NB = best.shape[0] + count = 0 + rb = np.random.randint(NB, size=NP) + duplicate = np.where(rb == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 25: + rb[duplicate] = np.random.randint(NB, size=duplicate.shape[0]) + duplicate = np.where(rb == np.arange(NP))[0] + count += 1 + count = 0 + r1 = np.random.randint(NP, size=NP) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == rb) + (r1 == np.arange(NP)))[0] + count += 1 + count = 0 + r2 = np.random.randint(NP, size=NP) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r2 == rb) + (r2 == np.arange(NP)) + (r2 == r1))[0] + count += 1 + return Fs * group[r1] + Fs * Fas * (best[rb] - group[r2]) + + @staticmethod + def _binomial(x: np.ndarray, v: np.ndarray, Crs: np.ndarray) -> np.ndarray: + NP, dim = x.shape + jrand = np.random.randint(dim, size=NP) + u = np.where(np.random.rand(NP, dim) < Crs, v, x) + u[np.arange(NP), jrand] = v[np.arange(NP), jrand] + return u + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + Fevs = [] + k = 1 + while FEs >= k * record_period: + k += 1 + population.sort(population.NP) + while FEs < FEs_end and FEs < MaxFEs: + NP, dim = population.NP, population.dim + q = 2 * self.p - self.p * FEs / MaxFEs + Fa = 0.5 + 0.5 * FEs / MaxFEs + Cr, F = population.choose_F_Cr() + mu = np.random.choice(3, size=NP, p=self.pm) + p1 = population.group[mu == 0] + p2 = population.group[mu == 1] + p3 = population.group[mu == 2] + pbest = population.group[: max(int(self.p * NP), 2)] + qbest = population.group[: max(int(q * NP), 2)] + Fs = F.repeat(dim).reshape(NP, dim) + v1 = self._ctb_w_arc(p1, pbest, population.archive, Fs[mu == 0]) + v2 = self._ctr_w_arc(p2, population.archive, Fs[mu == 1]) + v3 = self._weighted_rtb(p3, qbest, Fs[mu == 2], Fa) + v = np.zeros((NP, dim)) + v[mu == 0] = v1 + v[mu == 1] = v2 + v[mu == 2] = v3 + + # Bounds handling: midpoint reflection with lb/ub + Xmin = population.Xmin + Xmax = population.Xmax + v = np.where(v < Xmin, (population.group + Xmin) / 2, v) + v = np.where(v > Xmax, (population.group + Xmax) / 2, v) + + rvs = np.random.rand(NP) + Crs = Cr.repeat(dim).reshape(NP, dim) + u = np.zeros((NP, dim)) + if np.sum(rvs <= self.PqBX) > 0: + qu = v[rvs <= self.PqBX] + if population.archive.shape[0] > 0: + qbest = np.concatenate((population.group, population.archive), 0)[ + : max(int(q * (NP + population.archive.shape[0])), 2) + ] + cross_qbest = qbest[np.random.randint(qbest.shape[0], size=qu.shape[0])] + qu = self._binomial(cross_qbest, qu, Crs[rvs <= self.PqBX]) + u[rvs <= self.PqBX] = qu + bu = v[rvs > self.PqBX] + bu = self._binomial( + population.group[rvs > self.PqBX], bu, Crs[rvs > self.PqBX] + ) + u[rvs > self.PqBX] = bu + + ncost = _eval(problem, u) + FEs += NP + optim = np.where(ncost < population.cost)[0] + for i in optim: + population.update_archive(i) + SF = F[optim] + SCr = Cr[optim] + df = np.maximum(0, population.cost - ncost) + population.update_M_F_Cr(SF, SCr, df[optim]) + count_S = np.zeros(3) + for i in range(3): + count_S[i] = np.mean(df[mu == i] / (population.cost[mu == i] + 1e-9)) + if np.sum(count_S) > 0: + self.pm = np.clip(count_S / np.sum(count_S), 0.1, 0.9) + self.pm /= np.sum(self.pm) + else: + self.pm = np.ones(3) / 3 + population.group[optim] = u[optim] + population.cost = np.minimum(population.cost, ncost) + population.NLPSR(FEs, MaxFEs) + if float(np.min(population.cost)) < population.gbest: + population.gbest = float(np.min(population.cost)) + population.gbest_solution = population.group[ + np.argmin(population.cost) + ].copy() + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + + return population, Fevs, min(FEs, MaxFEs) diff --git a/agents/rl_das/optimizers/nl_shade_rsp.py b/agents/rl_das/optimizers/nl_shade_rsp.py new file mode 100644 index 0000000..8a180a7 --- /dev/null +++ b/agents/rl_das/optimizers/nl_shade_rsp.py @@ -0,0 +1,166 @@ +"""NL_SHADE_RSP differential-evolution optimizer for RL-DAS.""" + +from __future__ import annotations + +import numpy as np + + +def _eval(problem, xs: np.ndarray) -> np.ndarray: + return np.array([float(problem(xs[i])) for i in range(len(xs))]) + + +class NL_SHADE_RSP: + def __init__(self) -> None: + self.pb = 0.4 + self.pa = 0.5 + + def _binomial(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: + NP, dim = x.shape + jrand = np.random.randint(dim, size=NP) + u = np.where(np.random.rand(NP, dim) < cr.repeat(dim).reshape(NP, dim), v, x) + u[np.arange(NP), jrand] = v[np.arange(NP), jrand] + return u + + def _exponential(self, x: np.ndarray, v: np.ndarray, cr: np.ndarray) -> np.ndarray: + NP, dim = x.shape + u = x.copy() + L = np.random.randint(dim, size=NP).repeat(dim).reshape(NP, dim) + L = L <= np.arange(dim) + rvs = np.random.rand(NP, dim) + L = np.where(rvs > cr.repeat(dim).reshape(NP, dim), L, 0) + return u * (1 - L) + v * L + + def _update_pa(self, fa: float, fp: float, na: int, NP: int) -> None: + if na == 0 or fa == 0: + self.pa = 0.5 + return + self.pa = (fa / (na + 1e-15)) / ((fa / (na + 1e-15)) + (fp / (NP - na + 1e-15))) + self.pa = float(np.clip(self.pa, 0.1, 0.9)) + + def step( + self, + population, + problem, + FEs: int, + FEs_end: int, + MaxFEs: int, + record_period: int = -1, + ): + if record_period <= 0: + record_period = FEs_end - FEs + NP, dim = population.NP, population.dim + NA = int(NP * 2.1) + if NA < population.archive.shape[0]: + population.archive = population.archive[:NA] + self.pa = 0.5 + k = 1 + Fevs = [] + while FEs >= k * record_period: + k += 1 + population.sort(population.NP) + + while FEs < FEs_end and FEs < MaxFEs: + Cr, F = population.choose_F_Cr() + Cr = np.sort(Cr) + pr = np.exp(-(np.arange(NP) + 1) / NP) + pr /= np.sum(pr) + cross_exponential = np.random.random() < 0.5 + pb_upper = int(max(2, NP * self.pb)) + pbs = np.random.randint(pb_upper, size=NP) + count = 0 + duplicate = np.where(pbs == np.arange(NP))[0] + while duplicate.shape[0] > 0 and count < 1: + pbs[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where(pbs == np.arange(NP))[0] + count += 1 + xpb = population.group[pbs] + r1 = np.random.randint(NP, size=NP) + count = 0 + duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] + while duplicate.shape[0] > 0 and count < 25: + r1[duplicate] = np.random.randint(NP, size=duplicate.shape[0]) + duplicate = np.where((r1 == np.arange(NP)) + (r1 == pbs))[0] + count += 1 + x1 = population.group[r1] + rvs = np.random.rand(NP) + r2_pop = np.where(rvs >= self.pa)[0] + r2_arc = np.where(rvs < self.pa)[0] + use_arc = np.zeros(NP, dtype=bool) + use_arc[r2_arc] = True + if population.archive.shape[0] < 25: + r2_pop = np.arange(NP) + r2_arc = np.array([], dtype=np.int32) + r2 = np.random.choice(np.arange(NP), size=r2_pop.shape[0], p=pr) + count = 0 + duplicate = np.where( + (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) + )[0] + while duplicate.shape[0] > 0 and count < 25: + r2[duplicate] = np.random.choice( + np.arange(NP), size=duplicate.shape[0], p=pr + ) + duplicate = np.where( + (r2 == r2_pop) + (r2 == pbs[r2_pop]) + (r2 == r1[r2_pop]) + )[0] + count += 1 + x2 = np.zeros((NP, dim)) + if r2_pop.shape[0] > 0: + x2[r2_pop] = population.group[r2] + if r2_arc.shape[0] > 0: + x2[r2_arc] = population.archive[ + np.random.randint( + min(population.archive.shape[0], NA), + size=r2_arc.shape[0], + ) + ] + Fs = F.repeat(dim).reshape(NP, dim) + vs = population.group + Fs * (xpb - population.group) + Fs * (x1 - x2) + Crb = np.zeros(NP) + tmp_id = np.where(np.arange(NP) + FEs < 0.5 * MaxFEs)[0] + Crb[tmp_id] = 2 * ((FEs + tmp_id) / MaxFEs - 0.5) + if cross_exponential: + us = self._binomial(population.group, vs, Crb) + else: + us = self._exponential(population.group, vs, Cr) + + # Reinitialise out-of-bounds elements with uniform random in [lb, ub] + oob = (us < population.Xmin) | (us > population.Xmax) + if np.any(oob): + rand_pts = ( + np.random.rand(NP, dim) * (population.Xmax - population.Xmin) + + population.Xmin + ) + us = np.where(oob, rand_pts, us) + + cost = _eval(problem, us) + optim = np.where(cost < population.cost)[0] + for i in optim: + population.update_archive(i) + population.F[optim] = F[optim] + population.Cr[optim] = Cr[optim] + SF = F[optim] + SCr = Cr[optim] + df = (population.cost[optim] - cost[optim]) / ( + population.cost[optim] + 1e-9 + ) + arc_usage = use_arc[optim] + fp = float(np.sum(df[arc_usage])) + fa = float(np.sum(df[~arc_usage])) + na = int(np.sum(arc_usage)) + population.group[optim] = us[optim] + population.cost[optim] = cost[optim] + if float(np.min(cost)) < population.gbest: + population.gbest = float(np.min(cost)) + population.gbest_solution = us[np.argmin(cost)].copy() + FEs += NP + self.pb = 0.4 - 0.2 * (FEs / MaxFEs) + population.NLPSR(FEs, MaxFEs) + population.update_M_F_Cr(SF, SCr, df) + self._update_pa(fa, fp, na, NP) + NP = population.NP + NA = population.NA + if FEs >= k * record_period: + Fevs.append(population.gbest) + k += 1 + + return population, Fevs, min(FEs, MaxFEs) diff --git a/agents/rl_das/trainer.py b/agents/rl_das/trainer.py index 2f86581..ee9b343 100644 --- a/agents/rl_das/trainer.py +++ b/agents/rl_das/trainer.py @@ -112,7 +112,7 @@ def train( ep = _run_episode(train_env, agent, deterministic=False) epoch_rewards.append(ep["total_reward"]) - diagnostics = agent.learn(k_epoch) + agent.learn(k_epoch) agent.rollout.clear() mean_train_reward = float(np.mean(epoch_rewards)) diff --git a/baselines.py b/baselines.py index 9fbd9c6..909e983 100644 --- a/baselines.py +++ b/baselines.py @@ -27,7 +27,7 @@ Outputs ------- - results/_.jsonl per-problem {problem_id, agent, best_y, gap} + results/_.jsonl per-problem {problem_id: {area_under_optimization_curve, aocc, final_fitness, agent}} results/_baselines_summary.jsonl aggregate comparison (when --agent all) """ @@ -35,7 +35,6 @@ import json import os import warnings -from itertools import product import cocoex import numpy as np @@ -45,7 +44,7 @@ from das.optimizers.portfolio import get_portfolio from das.utils import set_seed from das.env.bbob_splits import ALL_DIMS, get_train_test_split -from das.training.common import load_global_optima +from das.training.common import compute_run_stats, load_global_optima warnings.filterwarnings("ignore") @@ -71,15 +70,22 @@ def _policy(obs: np.ndarray, n_actions: int) -> int: # ------------------------------------------------------------------ # -def run_episode(env: DASEnv, policy_fn) -> dict: - """Run one complete episode; return the final info dict.""" - obs, info = env.reset() +def run_episode(env: DASEnv, policy_fn) -> tuple[dict, list[tuple[int, float]]]: + """Run one complete episode. + + Returns the final step-info dict and the full best-so-far improvement + history accumulated across all checkpoints (exact AOCC input). + """ + obs, _ = env.reset() done = False + step_info: dict = {} + fitness_history: list[tuple[int, float]] = [] while not done: action = policy_fn(obs, env.action_space.n) - obs, _, terminated, truncated, info = env.step(action) + obs, _, terminated, truncated, step_info = env.step(action) done = terminated or truncated - return info + fitness_history.extend(step_info.get("fitness_history_step", [])) + return step_info, fitness_history def collect_env_results( @@ -104,17 +110,11 @@ def collect_env_results( ) results = [] for problem_id in tqdm(test_ids, desc=f" {agent_tag}", smoothing=0.0): - info = run_episode(env, policy_fn) - best_y = info.get("best_y", float("inf")) - optimum = global_optima.get(problem_id, 0.0) - results.append( - { - "problem_id": problem_id, - "agent": agent_tag, - "best_y": best_y, - "gap": best_y - optimum, - } - ) + step_info, fitness_history = run_episode(env, policy_fn) + max_fe = step_info.get("n_fe", 0) + global_minimum = global_optima.get(problem_id, 0.0) + stats = compute_run_stats(fitness_history, max_fe, global_minimum) + results.append({problem_id: {**stats, "agent": agent_tag}}) env.close() return results @@ -125,9 +125,17 @@ def collect_env_results( def run_single_algorithm( - optimizer_class, problem, fe_multiplier: int, n_individuals: int -) -> float: - """Run one optimizer for the full budget in one uninterrupted call.""" + optimizer_class, + problem, + fe_multiplier: int, + n_individuals: int, + global_minimum: float = 0.0, +) -> dict[str, float]: + """Run one optimizer for the full budget in one uninterrupted call. + + Returns a stats dict with area_under_optimization_curve, aocc, and final_fitness. + Uses the optimizer's exact fitness_history (improvement points) for AOCC. + """ max_fe = fe_multiplier * problem.dimension problem_config = { "fitness_function": problem, @@ -145,7 +153,8 @@ def run_single_algorithm( result = optimizer.optimize() if isinstance(result, tuple): result = result[0] - return float(result.get("best_so_far_y", float("inf"))) + fitness_history = result.get("fitness_history", []) + return compute_run_stats(fitness_history, max_fe, global_minimum) def collect_single_results( @@ -161,18 +170,11 @@ def collect_single_results( results = [] for problem_id in tqdm(test_ids, desc=f" {agent_tag}", smoothing=0.0): problem = suite.get_problem(problem_id) - best_y = run_single_algorithm( - optimizer_class, problem, fe_multiplier, n_individuals - ) - optimum = global_optima.get(problem_id, 0.0) - results.append( - { - "problem_id": problem_id, - "agent": agent_tag, - "best_y": best_y, - "gap": best_y - optimum, - } + global_minimum = global_optima.get(problem_id, 0.0) + stats = run_single_algorithm( + optimizer_class, problem, fe_multiplier, n_individuals, global_minimum ) + results.append({problem_id: {**stats, "agent": agent_tag}}) return results @@ -192,38 +194,43 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list ------- oracle_best, oracle_worst: per-problem dicts annotated with winning agent """ - # Index by problem_id - by_problem: dict[str, list[dict]] = {} + by_problem: dict[str, list[tuple[str, dict]]] = {} for tag, records in all_results.items(): for r in records: - pid = r["problem_id"] - by_problem.setdefault(pid, []).append(r) + pid, metrics = next(iter(r.items())) + by_problem.setdefault(pid, []).append((pid, metrics)) oracle_best, oracle_worst = [], [] - for pid, records in by_problem.items(): - best = min(records, key=lambda r: r["gap"]) - worst = max(records, key=lambda r: r["gap"]) + for pid, entries in by_problem.items(): + best_pid, best_m = min(entries, key=lambda e: e[1]["final_fitness"]) + worst_pid, worst_m = max(entries, key=lambda e: e[1]["final_fitness"]) oracle_best.append( { - "problem_id": pid, - "agent": "oracle-best", - "best_agent": best["agent"], - "best_y": best["best_y"], - "gap": best["gap"], + pid: { + "area_under_optimization_curve": best_m[ + "area_under_optimization_curve" + ], + "final_fitness": best_m["final_fitness"], + "agent": "oracle-best", + "best_agent": best_m["agent"], + } } ) oracle_worst.append( { - "problem_id": pid, - "agent": "oracle-worst", - "worst_agent": worst["agent"], - "best_y": worst["best_y"], - "gap": worst["gap"], + pid: { + "area_under_optimization_curve": worst_m[ + "area_under_optimization_curve" + ], + "final_fitness": worst_m["final_fitness"], + "agent": "oracle-worst", + "worst_agent": worst_m["agent"], + } } ) - oracle_best.sort(key=lambda r: r["problem_id"]) - oracle_worst.sort(key=lambda r: r["problem_id"]) + oracle_best.sort(key=lambda r: next(iter(r))) + oracle_worst.sort(key=lambda r: next(iter(r))) return oracle_best, oracle_worst @@ -233,14 +240,20 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list def summarise(tag: str, records: list[dict]) -> dict: - gaps = [r["gap"] for r in records] + fitnesses = [next(iter(r.values()))["final_fitness"] for r in records] + aocc_vals = [next(iter(r.values()))["aocc"] for r in records] + auoc_vals = [ + next(iter(r.values()))["area_under_optimization_curve"] for r in records + ] return { "agent": tag, - "n_problems": len(gaps), - "mean_gap": float(np.mean(gaps)), - "median_gap": float(np.median(gaps)), - "best_gap": float(np.min(gaps)), - "worst_gap": float(np.max(gaps)), + "n_problems": len(fitnesses), + "mean_final_fitness": float(np.mean(fitnesses)), + "median_final_fitness": float(np.median(fitnesses)), + "best_final_fitness": float(np.min(fitnesses)), + "worst_final_fitness": float(np.max(fitnesses)), + "mean_aocc": float(np.mean(aocc_vals)), + "mean_auoc": float(np.mean(auoc_vals)), } @@ -252,17 +265,15 @@ def save_results(records: list[dict], path: str) -> None: def print_summary(summaries: list[dict]) -> None: width = max(len(s["agent"]) for s in summaries) + 2 - header = ( - f" {'Agent':<{width}} {'Mean gap':>12} {'Median gap':>12} {'Best gap':>12}" - ) + header = f" {'Agent':<{width}} {'Mean fitness':>14} {'Median fitness':>14} {'Mean AUOC':>14}" print(header) print(" " + "-" * (len(header) - 2)) for s in summaries: print( f" {s['agent']:<{width}} " - f"{s['mean_gap']:>12.4e} " - f"{s['median_gap']:>12.4e} " - f"{s['best_gap']:>12.4e}" + f"{s['mean_final_fitness']:>14.4e} " + f"{s['median_final_fitness']:>14.4e} " + f"{s['mean_auoc']:>14.4e}" ) diff --git a/cv.py b/cv.py index 8b2a6d1..323d7f3 100644 --- a/cv.py +++ b/cv.py @@ -195,6 +195,13 @@ def _parse_args() -> argparse.Namespace: "--ppo-epochs", type=int, default=6, help="PPO gradient epochs per update" ) exp.add_argument("--device", default="cpu", help="PyTorch device") + exp.add_argument( + "-j", + "--n-jobs", + type=int, + default=1, + help="Number of folds to run in parallel (default: 1 = sequential)", + ) return root.parse_args() diff --git a/das/env/__init__.py b/das/env/__init__.py index 111b9ff..0ba3a18 100644 --- a/das/env/__init__.py +++ b/das/env/__init__.py @@ -1 +1,3 @@ from das.env.das_env import DASEnv + +__all__ = ["DASEnv"] diff --git a/das/env/das_env.py b/das/env/das_env.py index edd4c90..22a51d0 100644 --- a/das/env/das_env.py +++ b/das/env/das_env.py @@ -158,6 +158,7 @@ def step(self, action: int): "best_y": self._best_y, "n_fe": self._n_fe, "checkpoint": self._checkpoint_idx, + "fitness_history_step": result.get("fitness_history", []), } return obs, reward, terminated, False, info diff --git a/das/optimizers/BO/__init__.py b/das/optimizers/BO/__init__.py index 46a4ce2..1e8dd03 100644 --- a/das/optimizers/BO/__init__.py +++ b/das/optimizers/BO/__init__.py @@ -1 +1 @@ -from das.optimizers.BO.gpbo import GPBO_EI, GPBO_UCB +from das.optimizers.BO.gpbo import GPBO_EI as GPBO_EI, GPBO_UCB as GPBO_UCB diff --git a/das/optimizers/DS/nm.py b/das/optimizers/DS/nm.py index 18d48cb..9882906 100644 --- a/das/optimizers/DS/nm.py +++ b/das/optimizers/DS/nm.py @@ -72,7 +72,7 @@ def initialize(self, x=None, y=None): def iterate(self, x, y): order = np.argsort(y) - l, h = order[0], order[-1] + lo, h = order[0], order[-1] p_mean = np.mean(x[order[:-1]], axis=0) # Reflection @@ -85,7 +85,7 @@ def iterate(self, x, y): if self._check_terminations(): return x, y - if y_r < y[l]: + if y_r < y[lo]: # Expansion p_e = np.clip( self.gamma * p_r + (1 - self.gamma) * p_mean, @@ -113,7 +113,7 @@ def iterate(self, x, y): # Shrinkage for i in range(1, self.n_individuals): x[order[i]] = np.clip( - x[l] + self.shrinkage * (x[order[i]] - x[l]), + x[lo] + self.shrinkage * (x[order[i]] - x[lo]), self.lower_boundary, self.upper_boundary, ) diff --git a/das/optimizers/__init__.py b/das/optimizers/__init__.py index e516e9e..db98390 100644 --- a/das/optimizers/__init__.py +++ b/das/optimizers/__init__.py @@ -1 +1,4 @@ -from das.optimizers.portfolio import PORTFOLIO, get_portfolio +from das.optimizers.portfolio import ( + PORTFOLIO as PORTFOLIO, + get_portfolio as get_portfolio, +) diff --git a/das/training/callbacks.py b/das/training/callbacks.py index 5f24a14..83a3c68 100644 --- a/das/training/callbacks.py +++ b/das/training/callbacks.py @@ -9,11 +9,10 @@ import json import os -from itertools import product from typing import Any import numpy as np -from stable_baselines3.common.callbacks import BaseCallback, EvalCallback +from stable_baselines3.common.callbacks import BaseCallback from stable_baselines3.common.vec_env import VecEnv try: @@ -124,7 +123,7 @@ def _on_step(self) -> bool: return True def _evaluate(self): - aoccs, best_ys = [], [] + best_ys = [] obs = self.eval_env.reset() for _ in range(self.n_eval_episodes): diff --git a/das/training/common.py b/das/training/common.py index 65bc07a..63e6a73 100644 --- a/das/training/common.py +++ b/das/training/common.py @@ -3,6 +3,8 @@ import json import os +import numpy as np + from das.env.das_env import DASEnv @@ -13,6 +15,61 @@ def load_global_optima(path: str = "bbob_optima.jsonl") -> dict[str, float]: return {k: v for line in f for k, v in json.loads(line).items()} +def compute_run_stats( + fitness_history: list[tuple[int, float]], + max_fe: int, + global_minimum: float, +) -> dict[str, float]: + """Compute AUOC and AOCC from a best-so-far improvement history. + + Parameters + ---------- + fitness_history: + List of (n_fe, best_y) pairs recorded at every improvement point. + FE counts must be absolute (1-indexed, relative to episode start). + max_fe: + Total function-evaluation budget for the episode. + global_minimum: + Known optimum value; used to shift fitness so the excess is >= 0. + Pass 0.0 when the optimum is unknown. + """ + if not fitness_history or max_fe <= 0: + return {"area_under_optimization_curve": 0.0, "aocc": 0.0, "final_fitness": 0.0} + + lb, ub = 1e-8, 1e8 + log_lb, log_ub = -8.0, 8.0 + + auoc_area = 0.0 + aocc_area = 0.0 + prev_fe = 0 + + for fe, best_y in fitness_history: + shifted = best_y - global_minimum + width = fe - prev_fe + + auoc_area += shifted * width + + clipped = np.clip(shifted, lb, ub) + normalized = (np.log10(clipped) - log_lb) / (log_ub - log_lb) + aocc_area += (1.0 - normalized) * width + + prev_fe = fe + + # Final plateau from last improvement to end of budget + final_shifted = fitness_history[-1][1] - global_minimum + final_width = max_fe - fitness_history[-1][0] + auoc_area += final_shifted * final_width + final_clipped = np.clip(final_shifted, lb, ub) + final_normalized = (np.log10(final_clipped) - log_lb) / (log_ub - log_lb) + aocc_area += (1.0 - final_normalized) * final_width + + return { + "area_under_optimization_curve": auoc_area / max_fe, + "aocc": aocc_area / max_fe, + "final_fitness": final_shifted, + } + + def make_das_env(problem_ids: list[str], optimizers: list, cfg: dict): """Return a zero-argument factory for use with SB3's make_vec_env.""" diff --git a/das/training/expdas.py b/das/training/expdas.py index 05dc281..62eb0db 100644 --- a/das/training/expdas.py +++ b/das/training/expdas.py @@ -2,6 +2,7 @@ import json import os +from concurrent.futures import ProcessPoolExecutor, as_completed import cocoex as cx import numpy as np @@ -10,7 +11,7 @@ from das.env.das_env import DASEnv from das.env.observation import observation_dim from das.optimizers.portfolio import get_portfolio -from das.training.common import write_jsonl +from das.training.common import load_global_optima, write_jsonl def run_exp_das(args) -> None: @@ -76,34 +77,51 @@ def run_exp_das(args) -> None: if args.eval: print("\nFinal evaluation on test set …") - test_results = evaluate(test_env, agent, n_episodes=min(len(test_ids), 50)) - mean_best_y = float(np.mean([r["best_y"] for r in test_results])) - print(f"Test mean best_y = {mean_best_y:.6e}") + global_optima = load_global_optima() + test_results = evaluate( + test_env, + agent, + n_episodes=min(len(test_ids), 50), + global_optima=global_optima, + ) + mean_final_fitness = float( + np.mean([next(iter(r.values()))["final_fitness"] for r in test_results]) + ) + print(f"Test mean final_fitness = {mean_final_fitness:.6e}") eval_path = os.path.join("results", f"{args.name}_eval.jsonl") write_jsonl(eval_path, test_results) print(f"Results saved to {eval_path}") -def run_cv_exp_das(args) -> None: - from agents.exponential_das import ExpDASAgent - from agents.exponential_das import train, evaluate +def _run_single_fold( + fold_idx: int, fold_info: tuple, args +) -> tuple[int, str, list[dict]]: + """Train and evaluate one CV fold. Safe to call in a subprocess. - optimizers = get_portfolio(args.portfolio) - n_opt = len(optimizers) - obs_dim = observation_dim(n_opt) + Each call creates its own cocoex Suite so the (non-picklable) Suite object + never crosses process boundaries. - suite = cx.Suite("bbob", "", "") + Returns (fold_idx, fold_tag, fold_results). + """ + # Lazy imports so the function can be pickled by ProcessPoolExecutor. + import cocoex as _cx + from agents.exponential_das import ExpDASAgent + from agents.exponential_das import train as _train, evaluate as _evaluate - all_folds = get_cv_folds( - args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds - ) - fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds + train_ids, test_ids, fold_tag = fold_info + fold_name = f"{args.name}_cv_{fold_tag}" + model_path = os.path.join("models", f"{fold_name}_final.pt") + result_path = os.path.join("results", f"{fold_name}.jsonl") - print(f"CV mode : {args.cv_mode} ({len(fold_indices)}/{len(all_folds)} folds)") - print(f"n_epochs/fold: {args.n_epochs} | dims={args.dims}") + prefix = f"[{fold_tag}]" + print(f"{prefix} starting ({len(train_ids)} train / {len(test_ids)} test)") + optimizers = get_portfolio(args.portfolio) + n_opt = len(optimizers) + obs_dim = observation_dim(n_opt) buffer_capacity = args.buffer_capacity or (16 * args.n_checkpoints) + suite = _cx.Suite("bbob", "", "") env_cfg = dict( suite=suite, optimizers=optimizers, @@ -115,80 +133,113 @@ def run_cv_exp_das(args) -> None: seed=args.seed, ) - fold_summaries = [] + total_episodes = args.n_epochs * len(train_ids) + + if os.path.exists(model_path): + print(f"{prefix} [skip training] model already exists") + agent = ExpDASAgent.load(model_path, obs_dim=obs_dim, n_actions=n_opt) + else: + agent = ExpDASAgent( + obs_dim=obs_dim, + n_actions=n_opt, + buffer_capacity=buffer_capacity, + actor_lr=args.actor_lr, + critic_lr=args.critic_lr, + ppo_epochs=args.ppo_epochs, + n_checkpoints=args.n_checkpoints, + device=args.device, + ) + train_env = DASEnv(problem_ids=train_ids, **env_cfg) + _train( + train_env=train_env, + test_env=None, + agent=agent, + total_episodes=total_episodes, + eval_interval=total_episodes + 1, + save_interval=args.save_interval, + save_dir="models", + name=fold_name, + ) + train_env.close() + + if os.path.exists(result_path): + print(f"{prefix} [skip evaluation] results already exist") + with open(result_path) as fh: + fold_results = [json.loads(line) for line in fh] + else: + global_optima = load_global_optima() + eval_env = DASEnv(problem_ids=test_ids, **env_cfg) + raw = _evaluate( + eval_env, agent, n_episodes=len(test_ids), global_optima=global_optima + ) + fold_results = [ + {pid: {**m, "fold": fold_tag}} for r in raw for pid, m in r.items() + ] + eval_env.close() + write_jsonl(result_path, fold_results) + print(f"{prefix} saved results → {result_path}") + + mean_ff = float( + np.mean([next(iter(r.values()))["final_fitness"] for r in fold_results]) + ) + print(f"{prefix} mean final_fitness={mean_ff:.4e}") + return fold_idx, fold_tag, fold_results - for run_idx, fold_idx in enumerate(fold_indices): - train_ids, test_ids, fold_tag = all_folds[fold_idx] - fold_name = f"{args.name}_cv_{fold_tag}" - model_path = os.path.join("models", f"{fold_name}_final.pt") - result_path = os.path.join("results", f"{fold_name}.jsonl") - print(f"\n{'=' * 60}") - print( - f"Fold {run_idx + 1}/{len(fold_indices)}: {fold_tag}" - f" ({len(train_ids)} train / {len(test_ids)} test)" +def run_cv_exp_das(args) -> None: + all_folds = get_cv_folds( + args.cv_mode, args.dims, seed=args.seed, n_folds=args.n_folds + ) + fold_indices = list(range(len(all_folds))) if args.folds is None else args.folds + n_jobs = getattr(args, "n_jobs", 1) + + print(f"CV mode : {args.cv_mode} ({len(fold_indices)}/{len(all_folds)} folds)") + print(f"n_epochs/fold: {args.n_epochs} | dims={args.dims} | n_jobs={n_jobs}") + + # ------------------------------------------------------------------ # + # Run folds — in parallel when n_jobs > 1, sequentially otherwise # + # ------------------------------------------------------------------ # + + results_by_fold: dict[int, tuple[str, list[dict]]] = {} + + if n_jobs > 1: + with ProcessPoolExecutor(max_workers=n_jobs) as pool: + futures = { + pool.submit(_run_single_fold, fi, all_folds[fi], args): fi + for fi in fold_indices + } + for future in as_completed(futures): + fi, fold_tag, fold_results = future.result() + results_by_fold[fi] = (fold_tag, fold_results) + else: + for fi in fold_indices: + print(f"\n{'=' * 60}") + fi_out, fold_tag, fold_results = _run_single_fold(fi, all_folds[fi], args) + results_by_fold[fi_out] = (fold_tag, fold_results) + + # ------------------------------------------------------------------ # + # Aggregate summary (same order as fold_indices) # + # ------------------------------------------------------------------ # + + fold_summaries = [] + all_final_fitnesses: list[float] = [] + + for fi in fold_indices: + fold_tag, fold_results = results_by_fold[fi] + mean_ff = float( + np.mean([next(iter(r.values()))["final_fitness"] for r in fold_results]) ) - print(f"{'=' * 60}") - - total_episodes = args.n_epochs * len(train_ids) - - if os.path.exists(model_path): - print(f" [skip training] {model_path} already exists") - agent = ExpDASAgent.load(model_path, obs_dim=obs_dim, n_actions=n_opt) - else: - agent = ExpDASAgent( - obs_dim=obs_dim, - n_actions=n_opt, - buffer_capacity=buffer_capacity, - actor_lr=args.actor_lr, - critic_lr=args.critic_lr, - ppo_epochs=args.ppo_epochs, - n_checkpoints=args.n_checkpoints, - device=args.device, - ) - train_env = DASEnv(problem_ids=train_ids, **env_cfg) - train( - train_env=train_env, - test_env=None, - agent=agent, - total_episodes=total_episodes, - eval_interval=total_episodes + 1, - save_interval=args.save_interval, - save_dir="models", - name=fold_name, - ) - train_env.close() - - if os.path.exists(result_path): - print(f" [skip evaluation] {result_path} already exists") - with open(result_path) as fh: - fold_results = [json.loads(line) for line in fh] - else: - eval_env = DASEnv(problem_ids=test_ids, **env_cfg) - raw = evaluate(eval_env, agent, n_episodes=len(test_ids)) - fold_results = [{**r, "fold": fold_tag} for r in raw] - eval_env.close() - write_jsonl(result_path, fold_results) - print(f" Saved results → {result_path}") - - mean_best_y = float(np.mean([r["best_y"] for r in fold_results])) fold_summaries.append( { "fold": fold_tag, - "fold_idx": fold_idx, + "fold_idx": fi, "n_test": len(fold_results), - "mean_best_y": mean_best_y, + "mean_final_fitness": mean_ff, } ) - print(f" mean best_y={mean_best_y:.4e}") - - all_best_y = [] - for fold_idx in fold_indices: - _, _, fold_tag = all_folds[fold_idx] - rpath = os.path.join("results", f"{args.name}_cv_{fold_tag}.jsonl") - if os.path.exists(rpath): - with open(rpath) as fh: - all_best_y.extend(json.loads(line)["best_y"] for line in fh) + all_final_fitnesses.extend( + next(iter(r.values()))["final_fitness"] for r in fold_results + ) overall = { "cv_mode": args.cv_mode, @@ -197,7 +248,9 @@ def run_cv_exp_das(args) -> None: "dims": args.dims, "n_epochs_per_fold": args.n_epochs, "n_folds_run": len(fold_summaries), - "overall_mean_best_y": float(np.mean(all_best_y)) if all_best_y else None, + "overall_mean_final_fitness": float(np.mean(all_final_fitnesses)) + if all_final_fitnesses + else None, "folds": fold_summaries, } summary_path = os.path.join("results", f"{args.name}_cv_summary.jsonl") @@ -206,6 +259,8 @@ def run_cv_exp_das(args) -> None: print(f"\n{'=' * 60}") print(f"Cross-validation complete ({args.cv_mode} dims={args.dims})") print(f" Folds run : {len(fold_summaries)}") - if all_best_y: - print(f" Overall mean best_y: {overall['overall_mean_best_y']:.4e}") + if all_final_fitnesses: + print( + f" Overall mean final_fitness: {overall['overall_mean_final_fitness']:.4e}" + ) print(f" Summary : {summary_path}") diff --git a/evaluate.py b/evaluate.py index d1140ff..da8fd06 100644 --- a/evaluate.py +++ b/evaluate.py @@ -12,9 +12,7 @@ import json import os import warnings -from itertools import product -import cocoex import numpy as np from stable_baselines3 import PPO from stable_baselines3.common.vec_env import VecNormalize @@ -22,36 +20,13 @@ from tqdm import tqdm from das.env.bbob_splits import ALL_DIMS, get_train_test_split -from das.env.das_env import DASEnv from das.optimizers.portfolio import get_portfolio from das.utils import set_seed -from das.training.common import load_global_optima, make_das_env +from das.training.common import compute_run_stats, load_global_optima, make_das_env warnings.filterwarnings("ignore") -# ------------------------------------------------------------------ # -# AOCC metric # -# ------------------------------------------------------------------ # - - -def aocc( - fitness_history: list[tuple[int, float]], max_fe: int, optimum: float -) -> float: - lb, ub = -8.0, 8.0 - area, prev_fe = 0.0, 0 - for fe, f in fitness_history: - v = np.clip(f - optimum, 1e-8, 1e8) - area += (1.0 - (np.log10(v) - lb) / (ub - lb)) * (fe - prev_fe) - prev_fe = fe - if fitness_history: - last_v = np.clip(fitness_history[-1][1] - optimum, 1e-8, 1e8) - area += (1.0 - (np.log10(last_v) - lb) / (ub - lb)) * ( - max_fe - fitness_history[-1][0] - ) - return area / max_fe - - # ------------------------------------------------------------------ # # CLI # # ------------------------------------------------------------------ # @@ -110,42 +85,37 @@ def main(): eval_env.norm_reward = False global_optima = load_global_optima() - observer = ( - cocoex.Observer("bbob", f"result_folder: {args.name}") if args.coco else None - ) out_path = os.path.join("results", f"{args.name}_eval.jsonl") results_all = [] for problem_id in tqdm(test_ids, smoothing=0.0): - # Run one episode obs = eval_env.reset() done = [False] - info = {} + fitness_history: list[tuple[int, float]] = [] + step_info: dict = {} while not done[0]: action, _ = model.predict(obs, deterministic=True) obs, _, done, infos = eval_env.step(action) - if done[0]: - info = infos[0] - - best_y = info.get("best_y", float("inf")) - optimum = global_optima.get(problem_id, 0.0) - record = { - "problem_id": problem_id, - "best_y": best_y, - "gap": best_y - optimum, - } - results_all.append(record) + step_info = infos[0] + fitness_history.extend(step_info.get("fitness_history_step", [])) + + max_fe = step_info.get("n_fe", 0) + global_minimum = global_optima.get(problem_id, 0.0) + stats = compute_run_stats(fitness_history, max_fe, global_minimum) + results_all.append({problem_id: stats}) with open(out_path, "w") as f: for r in results_all: f.write(json.dumps(r) + "\n") - gaps = [r["gap"] for r in results_all] + metrics = [next(iter(r.values())) for r in results_all] print(f"\nEvaluation complete ({len(results_all)} problems)") - print(f" Mean gap : {np.mean(gaps):.4e}") - print(f" Median gap: {np.median(gaps):.4e}") - print(f" Results : {out_path}") + print( + f" Mean final_fitness : {np.mean([m['final_fitness'] for m in metrics]):.4e}" + ) + print(f" Mean AOCC : {np.mean([m['aocc'] for m in metrics]):.4f}") + print(f" Results : {out_path}") if __name__ == "__main__": diff --git a/tests/test_baselines.py b/tests/test_baselines.py index 718d8c5..a17cf49 100644 --- a/tests/test_baselines.py +++ b/tests/test_baselines.py @@ -4,15 +4,12 @@ real BBOB/cocoex is needed and episodes complete in milliseconds. """ -import json -import os -import tempfile - import numpy as np import pytest from das.env.das_env import DASEnv from das.optimizers.portfolio import get_portfolio +from das.training.common import compute_run_stats from das.utils import set_seed from baselines import ( collect_env_results, @@ -62,6 +59,8 @@ def get_problem(self, problem_id: str) -> MockProblem: N_INDIVIDUALS = 10 PORTFOLIO = ["SPSO", "IPSO"] +METRICS_KEYS = {"area_under_optimization_curve", "aocc", "final_fitness"} + def make_env(problem_ids=PROBLEM_IDS, suite=None): return DASEnv( @@ -86,6 +85,96 @@ def make_cfg(): ) +def _metrics(record: dict) -> dict: + """Extract the metrics dict from a {problem_id: metrics} record.""" + return next(iter(record.values())) + + +def _pid(record: dict) -> str: + """Extract the problem_id key from a {problem_id: metrics} record.""" + return next(iter(record.keys())) + + +# ------------------------------------------------------------------ # +# 0. compute_run_stats (unit tests for the core metric function) # +# ------------------------------------------------------------------ # + + +class TestComputeRunStats: + def test_aocc_in_unit_interval(self): + history = [(100, 5.0), (300, 2.0), (700, 0.5)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert 0.0 <= stats["aocc"] <= 1.0 + + def test_auoc_known_value(self): + # AUOC = integral of shifted fitness / max_fe (piecewise constant) + # history = [(100, 5), (300, 2), (700, 0.5)], max_fe=1000, g=0 + # area = 5*100 + 2*200 + 0.5*400 + 0.5*300 (final plateau 700→1000) + # = 500 + 400 + 200 + 150 = 1250 + # AUOC = 1250 / 1000 = 1.25 + history = [(100, 5.0), (300, 2.0), (700, 0.5)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert stats["area_under_optimization_curve"] == pytest.approx(1.25) + + def test_final_fitness_is_shifted(self): + history = [(500, 3.0)] + stats = compute_run_stats(history, 1000, global_minimum=1.0) + assert stats["final_fitness"] == pytest.approx(2.0) # 3.0 - 1.0 + + def test_final_fitness_zero_global_minimum(self): + history = [(500, -4.5)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert stats["final_fitness"] == pytest.approx(-4.5) + + def test_empty_history_returns_zeros(self): + stats = compute_run_stats([], 1000, global_minimum=0.0) + assert stats["aocc"] == 0.0 + assert stats["area_under_optimization_curve"] == 0.0 + assert stats["final_fitness"] == 0.0 + + def test_single_improvement_at_start(self): + # Improvement at first FE, constant for the rest → AUOC = shifted_y + history = [(1, 2.0)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + # area = 2.0*1 (loop) + 2.0*999 (plateau) = 2000 → / 1000 = 2.0 + assert stats["area_under_optimization_curve"] == pytest.approx(2.0) + + def test_monotone_improvement_raises_aocc(self): + # More improvement points should yield higher AOCC + fast = [(10, 1.0), (20, 0.1), (30, 0.01)] + slow = [(10, 1.0), (500, 0.1), (900, 0.01)] + stats_fast = compute_run_stats(fast, 1000, global_minimum=0.0) + stats_slow = compute_run_stats(slow, 1000, global_minimum=0.0) + assert stats_fast["aocc"] > stats_slow["aocc"] + + def test_all_keys_present(self): + history = [(100, 1.0)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert METRICS_KEYS == set(stats.keys()) + + def test_global_minimum_shifts_auoc(self): + history = [(500, 5.0)] + stats_g0 = compute_run_stats(history, 1000, global_minimum=0.0) + stats_g2 = compute_run_stats(history, 1000, global_minimum=2.0) + # With g=2, shifted=3 < shifted=5 → lower AUOC + assert ( + stats_g2["area_under_optimization_curve"] + < stats_g0["area_under_optimization_curve"] + ) + + def test_aocc_perfect_convergence(self): + # If the optimizer reaches very close to the optimum immediately, AOCC ≈ 1 + history = [(1, 1e-7)] # shifted ≈ 1e-7 ≈ lb → normalized ≈ 0 → (1-0)*width + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert stats["aocc"] > 0.9 + + def test_aocc_no_convergence(self): + # If the optimizer stays near ub (1e8), AOCC ≈ 0 + history = [(1, 1e8)] + stats = compute_run_stats(history, 1000, global_minimum=0.0) + assert stats["aocc"] < 0.1 + + # ------------------------------------------------------------------ # # 1. Policy functions # # ------------------------------------------------------------------ # @@ -124,21 +213,55 @@ def test_fixed_policy_ignores_obs(self): class TestRunEpisode: - def test_returns_info_dict_with_best_y(self): + def test_returns_two_element_tuple(self): env = make_env() - info = run_episode(env, random_policy) - assert "best_y" in info - assert isinstance(info["best_y"], float) + result = run_episode(env, random_policy) + assert isinstance(result, tuple) and len(result) == 2 - def test_fixed_policy_runs_full_episode(self): + def test_step_info_has_best_y(self): env = make_env() - info = run_episode(env, fixed_policy(0)) - assert "best_y" in info + step_info, _ = run_episode(env, random_policy) + assert "best_y" in step_info + assert isinstance(step_info["best_y"], float) def test_best_y_is_finite(self): env = make_env() - info = run_episode(env, random_policy) - assert np.isfinite(info["best_y"]) + step_info, _ = run_episode(env, random_policy) + assert np.isfinite(step_info["best_y"]) + + def test_fitness_history_is_list_of_tuples(self): + env = make_env() + _, fitness_history = run_episode(env, random_policy) + assert isinstance(fitness_history, list) + for fe, y in fitness_history: + assert isinstance(fe, int) + assert isinstance(y, float) + + def test_fitness_history_fe_is_increasing(self): + env = make_env() + _, fitness_history = run_episode(env, random_policy) + fes = [fe for fe, _ in fitness_history] + assert fes == sorted(fes) + + def test_fitness_history_y_is_nonincreasing(self): + """Improvement history must be strictly improving (best-so-far never worsens).""" + env = make_env() + _, fitness_history = run_episode(env, random_policy) + ys = [y for _, y in fitness_history] + for i in range(1, len(ys)): + assert ys[i] < ys[i - 1] + + def test_fitness_history_nonempty_after_episode(self): + """At least one improvement must occur (first evaluation beats inf).""" + env = make_env() + _, fitness_history = run_episode(env, random_policy) + assert len(fitness_history) >= 1 + + def test_fixed_policy_runs_full_episode(self): + env = make_env() + step_info, fitness_history = run_episode(env, fixed_policy(0)) + assert np.isfinite(step_info["best_y"]) + assert len(fitness_history) >= 1 def test_episode_advances_problem_idx(self): env = make_env() @@ -173,7 +296,7 @@ def test_returns_one_record_per_problem(self): ) assert len(records) == 2 - def test_record_structure(self): + def test_record_is_nested_dict(self): suite = MockSuite() records = collect_env_results( "random", @@ -185,35 +308,74 @@ def test_record_structure(self): {}, ) r = records[0] - assert set(r.keys()) >= {"problem_id", "agent", "best_y", "gap"} + assert len(r) == 1 + pid, metrics = next(iter(r.items())) + assert pid == PROBLEM_IDS[0] + assert METRICS_KEYS <= set(metrics.keys()) - def test_agent_tag_in_records(self): + def test_all_metric_keys_present(self): suite = MockSuite() records = collect_env_results( - "fixed:SPSO", - fixed_policy(0), + "random", + random_policy, PROBLEM_IDS[:1], suite, get_portfolio(PORTFOLIO), make_cfg(), {}, ) - assert records[0]["agent"] == "fixed:SPSO" + metrics = _metrics(records[0]) + assert METRICS_KEYS <= set(metrics.keys()) - def test_gap_equals_best_y_minus_optimum(self): + def test_aocc_in_unit_interval(self): suite = MockSuite() - optima = {PROBLEM_IDS[0]: 1.0} records = collect_env_results( "random", random_policy, + PROBLEM_IDS[:2], + suite, + get_portfolio(PORTFOLIO), + make_cfg(), + {}, + ) + for r in records: + assert 0.0 <= _metrics(r)["aocc"] <= 1.0 + + def test_final_fitness_is_finite(self): + suite = MockSuite() + records = collect_env_results( + "random", + random_policy, + PROBLEM_IDS[:2], + suite, + get_portfolio(PORTFOLIO), + make_cfg(), + {}, + ) + for r in records: + assert np.isfinite(_metrics(r)["final_fitness"]) + + def test_final_fitness_shifted_by_global_minimum(self): + # Use compute_run_stats directly with a fixed history to verify shift logic. + history = [(100, 5.0)] + stats_g0 = compute_run_stats(history, 1000, global_minimum=0.0) + stats_g1 = compute_run_stats(history, 1000, global_minimum=1.0) + assert stats_g0["final_fitness"] - stats_g1["final_fitness"] == pytest.approx( + 1.0 + ) + + def test_agent_tag_in_metrics(self): + suite = MockSuite() + records = collect_env_results( + "fixed:SPSO", + fixed_policy(0), PROBLEM_IDS[:1], suite, get_portfolio(PORTFOLIO), make_cfg(), - optima, + {}, ) - r = records[0] - assert abs(r["gap"] - (r["best_y"] - 1.0)) < 1e-9 + assert _metrics(records[0])["agent"] == "fixed:SPSO" def test_problem_ids_match_order(self): suite = MockSuite() @@ -227,14 +389,10 @@ def test_problem_ids_match_order(self): make_cfg(), {}, ) - assert [r["problem_id"] for r in records] == ids + assert [_pid(r) for r in records] == ids def test_fixed_policy_only_uses_one_action(self): - """A fixed-action policy must always select the same optimizer. - - We verify this by checking that env._choices_history contains only - the chosen action index after one complete episode. - """ + """A fixed-action policy must always select the same optimizer.""" suite = MockSuite() for action_idx in range(len(PORTFOLIO)): env = make_env(suite=suite) @@ -250,21 +408,33 @@ def test_fixed_policy_only_uses_one_action(self): class TestSingleAlgorithm: - def test_run_single_returns_float(self): + def test_run_single_returns_metrics_dict(self): + opt_class = get_portfolio(["SPSO"])[0] + problem = MockProblem("test_p", dim=2) + stats = run_single_algorithm(opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS) + assert isinstance(stats, dict) + assert METRICS_KEYS <= set(stats.keys()) + + def test_run_single_all_metrics_finite(self): + opt_class = get_portfolio(["SPSO"])[0] + problem = MockProblem("test_p", dim=2) + stats = run_single_algorithm(opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS) + assert np.isfinite(stats["final_fitness"]) + assert np.isfinite(stats["area_under_optimization_curve"]) + assert np.isfinite(stats["aocc"]) + + def test_run_single_aocc_in_unit_interval(self): opt_class = get_portfolio(["SPSO"])[0] problem = MockProblem("test_p", dim=2) - result = run_single_algorithm(opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS) - assert isinstance(result, float) - assert np.isfinite(result) + stats = run_single_algorithm(opt_class, problem, FE_MULTIPLIER, N_INDIVIDUALS) + assert 0.0 <= stats["aocc"] <= 1.0 - def test_run_single_uses_full_budget(self): - """The optimizer must receive the full budget (fe_multiplier × dim).""" + def test_run_single_larger_budget_not_worse(self): opt_class = get_portfolio(["SPSO"])[0] problem = MockProblem("test_p", dim=2) - result_small = run_single_algorithm(opt_class, problem, 2, N_INDIVIDUALS) - result_large = run_single_algorithm(opt_class, problem, 50, N_INDIVIDUALS) - # Larger budget should not produce a worse result - assert result_large <= result_small + 1e-6 + small = run_single_algorithm(opt_class, problem, 2, N_INDIVIDUALS) + large = run_single_algorithm(opt_class, problem, 50, N_INDIVIDUALS) + assert large["final_fitness"] <= small["final_fitness"] + 1e-6 def test_collect_single_returns_one_record_per_problem(self): suite = MockSuite() @@ -293,11 +463,26 @@ def test_collect_single_record_structure(self): {}, ) r = records[0] - assert set(r.keys()) >= {"problem_id", "agent", "best_y", "gap"} - assert r["agent"] == "single:SPSO" + metrics = _metrics(r) + assert METRICS_KEYS <= set(metrics.keys()) + assert metrics["agent"] == "single:SPSO" + + def test_collect_single_aocc_in_unit_interval(self): + suite = MockSuite() + opt_class = get_portfolio(["SPSO"])[0] + records = collect_single_results( + "single:SPSO", + opt_class, + PROBLEM_IDS[:2], + suite, + FE_MULTIPLIER, + N_INDIVIDUALS, + {}, + ) + for r in records: + assert 0.0 <= _metrics(r)["aocc"] <= 1.0 - def test_single_vs_fixed_both_finite(self): - """Both single and fixed-policy agents should produce finite gaps.""" + def test_single_vs_fixed_both_have_finite_final_fitness(self): suite = MockSuite() opt_class = get_portfolio(["SPSO"])[0] @@ -320,7 +505,7 @@ def test_single_vs_fixed_both_finite(self): {}, ) for r in single_records + fixed_records: - assert np.isfinite(r["gap"]) + assert np.isfinite(_metrics(r)["final_fitness"]) # ------------------------------------------------------------------ # @@ -330,54 +515,80 @@ def test_single_vs_fixed_both_finite(self): class TestComputeOracle: def _make_results(self): - """Two agents with known per-problem gaps for testing.""" + """Two agents with known per-problem metrics for testing.""" return { "agent_A": [ - {"problem_id": "p1", "agent": "agent_A", "best_y": 5.0, "gap": 5.0}, - {"problem_id": "p2", "agent": "agent_A", "best_y": 3.0, "gap": 3.0}, + { + "p1": { + "final_fitness": 5.0, + "aocc": 0.3, + "area_under_optimization_curve": 5.0, + "agent": "agent_A", + } + }, + { + "p2": { + "final_fitness": 3.0, + "aocc": 0.5, + "area_under_optimization_curve": 3.0, + "agent": "agent_A", + } + }, ], "agent_B": [ - {"problem_id": "p1", "agent": "agent_B", "best_y": 2.0, "gap": 2.0}, - {"problem_id": "p2", "agent": "agent_B", "best_y": 8.0, "gap": 8.0}, + { + "p1": { + "final_fitness": 2.0, + "aocc": 0.7, + "area_under_optimization_curve": 2.0, + "agent": "agent_B", + } + }, + { + "p2": { + "final_fitness": 8.0, + "aocc": 0.1, + "area_under_optimization_curve": 8.0, + "agent": "agent_B", + } + }, ], } - def test_oracle_best_picks_minimum_gap(self): + def test_oracle_best_picks_minimum_final_fitness(self): best, _ = compute_oracle(self._make_results()) - by_pid = {r["problem_id"]: r for r in best} - assert by_pid["p1"]["gap"] == 2.0 # agent_B wins - assert by_pid["p2"]["gap"] == 3.0 # agent_A wins + by_pid = {_pid(r): _metrics(r) for r in best} + assert by_pid["p1"]["final_fitness"] == 2.0 # agent_B wins + assert by_pid["p2"]["final_fitness"] == 3.0 # agent_A wins - def test_oracle_worst_picks_maximum_gap(self): + def test_oracle_worst_picks_maximum_final_fitness(self): _, worst = compute_oracle(self._make_results()) - by_pid = {r["problem_id"]: r for r in worst} - assert by_pid["p1"]["gap"] == 5.0 # agent_A is worst - assert by_pid["p2"]["gap"] == 8.0 # agent_B is worst + by_pid = {_pid(r): _metrics(r) for r in worst} + assert by_pid["p1"]["final_fitness"] == 5.0 # agent_A is worst + assert by_pid["p2"]["final_fitness"] == 8.0 # agent_B is worst def test_oracle_best_records_winning_agent(self): best, _ = compute_oracle(self._make_results()) - by_pid = {r["problem_id"]: r for r in best} + by_pid = {_pid(r): _metrics(r) for r in best} assert by_pid["p1"]["best_agent"] == "agent_B" assert by_pid["p2"]["best_agent"] == "agent_A" def test_oracle_worst_records_worst_agent(self): _, worst = compute_oracle(self._make_results()) - by_pid = {r["problem_id"]: r for r in worst} + by_pid = {_pid(r): _metrics(r) for r in worst} assert by_pid["p1"]["worst_agent"] == "agent_A" assert by_pid["p2"]["worst_agent"] == "agent_B" def test_oracle_covers_all_problems(self): best, worst = compute_oracle(self._make_results()) - pids_best = {r["problem_id"] for r in best} - pids_worst = {r["problem_id"] for r in worst} - assert pids_best == {"p1", "p2"} - assert pids_worst == {"p1", "p2"} + assert {_pid(r) for r in best} == {"p1", "p2"} + assert {_pid(r) for r in worst} == {"p1", "p2"} def test_oracle_best_le_oracle_worst(self): - """Per problem, oracle-best gap ≤ oracle-worst gap.""" + """Per problem, oracle-best final_fitness ≤ oracle-worst final_fitness.""" best, worst = compute_oracle(self._make_results()) - best_by_pid = {r["problem_id"]: r["gap"] for r in best} - worst_by_pid = {r["problem_id"]: r["gap"] for r in worst} + best_by_pid = {_pid(r): _metrics(r)["final_fitness"] for r in best} + worst_by_pid = {_pid(r): _metrics(r)["final_fitness"] for r in worst} for pid in best_by_pid: assert best_by_pid[pid] <= worst_by_pid[pid] @@ -387,34 +598,76 @@ def test_oracle_with_real_runs(self): ids = PROBLEM_IDS[:2] cfg = make_cfg() optimizers = get_portfolio(PORTFOLIO) - global_optima = {} records_spso = collect_env_results( - "fixed:SPSO", fixed_policy(0), ids, suite, optimizers, cfg, global_optima + "fixed:SPSO", fixed_policy(0), ids, suite, optimizers, cfg, {} ) records_ipso = collect_env_results( - "fixed:IPSO", fixed_policy(1), ids, suite, optimizers, cfg, global_optima + "fixed:IPSO", fixed_policy(1), ids, suite, optimizers, cfg, {} ) best, worst = compute_oracle( - { - "fixed:SPSO": records_spso, - "fixed:IPSO": records_ipso, - } + {"fixed:SPSO": records_spso, "fixed:IPSO": records_ipso} ) assert len(best) == len(ids) assert len(worst) == len(ids) for b, w in zip( - sorted(best, key=lambda r: r["problem_id"]), - sorted(worst, key=lambda r: r["problem_id"]), + sorted(best, key=_pid), + sorted(worst, key=_pid), ): - assert b["gap"] <= w["gap"] + 1e-9 + assert _metrics(b)["final_fitness"] <= _metrics(w)["final_fitness"] + 1e-9 # ------------------------------------------------------------------ # # 6. summarise # # ------------------------------------------------------------------ # + +class TestSummarise: + def _records(self, final_fitnesses, aocc_values=None): + if aocc_values is None: + aocc_values = [0.5] * len(final_fitnesses) + return [ + { + f"p{i}": { + "final_fitness": ff, + "aocc": a, + "area_under_optimization_curve": ff, + "agent": "x", + } + } + for i, (ff, a) in enumerate(zip(final_fitnesses, aocc_values)) + ] + + def test_mean_final_fitness(self): + s = summarise("x", self._records([1.0, 3.0])) + assert s["mean_final_fitness"] == pytest.approx(2.0) + + def test_median_final_fitness(self): + s = summarise("x", self._records([1.0, 2.0, 9.0])) + assert s["median_final_fitness"] == pytest.approx(2.0) + + def test_best_final_fitness(self): + s = summarise("x", self._records([5.0, 1.0, 3.0])) + assert s["best_final_fitness"] == pytest.approx(1.0) + + def test_worst_final_fitness(self): + s = summarise("x", self._records([5.0, 1.0, 3.0])) + assert s["worst_final_fitness"] == pytest.approx(5.0) + + def test_mean_aocc(self): + s = summarise("x", self._records([1.0, 2.0], aocc_values=[0.4, 0.6])) + assert s["mean_aocc"] == pytest.approx(0.5) + + def test_n_problems(self): + s = summarise("x", self._records([1.0, 2.0, 3.0])) + assert s["n_problems"] == 3 + + def test_agent_field(self): + s = summarise("my_agent", self._records([1.0])) + assert s["agent"] == "my_agent" + + # ------------------------------------------------------------------ # # 7. Reproducibility via seed # # ------------------------------------------------------------------ # @@ -439,36 +692,46 @@ def test_same_seed_same_best_y(self): for seed in [0, 42, 999]: set_seed(seed) env1 = self._seeded_env(seed) - info1 = run_episode(env1, fixed_policy(0)) + step_info1, _ = run_episode(env1, fixed_policy(0)) set_seed(seed) env2 = self._seeded_env(seed) - info2 = run_episode(env2, fixed_policy(0)) + step_info2, _ = run_episode(env2, fixed_policy(0)) - assert info1["best_y"] == pytest.approx(info2["best_y"]), ( - f"seed={seed}: {info1['best_y']} != {info2['best_y']}" + assert step_info1["best_y"] == pytest.approx(step_info2["best_y"]), ( + f"seed={seed}: {step_info1['best_y']} != {step_info2['best_y']}" ) + def test_same_seed_same_aocc(self): + """Two envs with the same seed produce identical AOCC via identical fitness history.""" + for seed in [0, 42]: + set_seed(seed) + env1 = self._seeded_env(seed) + _, hist1 = run_episode(env1, fixed_policy(0)) + + set_seed(seed) + env2 = self._seeded_env(seed) + _, hist2 = run_episode(env2, fixed_policy(0)) + + assert hist1 == hist2 + def test_different_seeds_different_best_y(self): """Different seeds produce different optimizer trajectories (with high probability).""" results = set() for seed in [0, 1, 2, 3, 4]: set_seed(seed) env = self._seeded_env(seed) - info = run_episode(env, fixed_policy(0)) - results.add(round(info["best_y"], 8)) + step_info, _ = run_episode(env, fixed_policy(0)) + results.add(round(step_info["best_y"], 8)) # With 5 different seeds, at least 2 distinct outcomes are expected assert len(results) > 1 def test_seed_rng_set_in_optimizer_options(self): """DASEnv must pass seed_rng to pypop7 when env._seed is set.""" - from unittest.mock import patch, MagicMock - env = self._seeded_env(seed=42) env.reset() captured_options = {} - original_class = get_portfolio(PORTFOLIO)[0] class CapturingOptimizer(original_class): @@ -486,8 +749,6 @@ def __init__(self, problem, options): def test_no_seed_does_not_set_seed_rng(self): """Without a seed, seed_rng must NOT be injected (pypop7 uses random state).""" - from unittest.mock import patch - env = DASEnv( problem_ids=PROBLEM_IDS[:1], suite=MockSuite(), @@ -538,29 +799,64 @@ def __init__(self, problem, options): ) -class TestSummarise: - def _records(self, gaps): - return [ - {"problem_id": f"p{i}", "agent": "x", "best_y": g, "gap": g} - for i, g in enumerate(gaps) - ] +# ------------------------------------------------------------------ # +# 8. DASEnv step info — fitness_history_step # +# ------------------------------------------------------------------ # - def test_mean_gap(self): - s = summarise("x", self._records([1.0, 3.0])) - assert s["mean_gap"] == pytest.approx(2.0) - def test_median_gap(self): - s = summarise("x", self._records([1.0, 2.0, 9.0])) - assert s["median_gap"] == pytest.approx(2.0) +class TestFitnessHistoryStep: + def test_step_info_contains_fitness_history_step(self): + env = make_env() + env.reset() + _, _, _, _, info = env.step(0) + assert "fitness_history_step" in info - def test_best_gap(self): - s = summarise("x", self._records([5.0, 1.0, 3.0])) - assert s["best_gap"] == pytest.approx(1.0) + def test_fitness_history_step_is_list(self): + env = make_env() + env.reset() + _, _, _, _, info = env.step(0) + assert isinstance(info["fitness_history_step"], list) - def test_worst_gap(self): - s = summarise("x", self._records([5.0, 1.0, 3.0])) - assert s["worst_gap"] == pytest.approx(5.0) + def test_fitness_history_step_entries_are_int_float_tuples(self): + env = make_env() + env.reset() + _, _, _, _, info = env.step(0) + for fe, y in info["fitness_history_step"]: + assert isinstance(fe, int) + assert isinstance(y, float) - def test_n_problems(self): - s = summarise("x", self._records([1.0, 2.0, 3.0])) - assert s["n_problems"] == 3 + def test_fitness_history_step_fe_within_budget(self): + env = make_env() + env.reset() + _, _, _, _, info = env.step(0) + max_fe = FE_MULTIPLIER * 2 # dim=2 + for fe, _ in info["fitness_history_step"]: + assert 1 <= fe <= max_fe + + def test_fitness_history_step_accumulated_across_checkpoints(self): + """Full episode fitness history must contain at least as many points as one step.""" + env = make_env() + env.reset() + all_history = [] + done = False + while not done: + _, _, terminated, truncated, info = env.step(0) + done = terminated or truncated + all_history.extend(info["fitness_history_step"]) + + # At minimum one improvement in the first checkpoint (from inf) + assert len(all_history) >= 1 + + def test_fitness_history_step_fe_monotone_across_episode(self): + """FE values accumulated across all checkpoints must be strictly increasing.""" + env = make_env() + env.reset() + all_history = [] + done = False + while not done: + _, _, terminated, truncated, info = env.step(0) + done = terminated or truncated + all_history.extend(info["fitness_history_step"]) + + fes = [fe for fe, _ in all_history] + assert fes == sorted(set(fes)), "Duplicate or out-of-order FE values in history" From f13fdd81de67b1ed60f6a7b97b8d1b15b0630b1c Mon Sep 17 00:00:00 2001 From: wlnc Date: Sun, 24 May 2026 12:50:51 +0200 Subject: [PATCH 7/8] add parallelism check to smoke tests --- run_local.sh | 4 ++++ smoke_test.sh | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/run_local.sh b/run_local.sh index f1363fb..211aec8 100755 --- a/run_local.sh +++ b/run_local.sh @@ -44,6 +44,10 @@ case "$AGENT" in python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_CV_LOCAL_SEED${SEED} \ -p "${PORTFOLIO[@]}" --dims 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 ;; + exp-das-cv-par) + python cv.py exp-das ${PORTFOLIO_STR}_EXPDAS_CV_PAR_LOCAL_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --dims 2 --cv-mode LOIO --n-epochs 1 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 --n-jobs 2 + ;; baselines) python baselines.py ${PORTFOLIO_STR}_BASELINES_LOCAL_SEED${SEED} \ -p "${PORTFOLIO[@]}" --agent all -d 2 --seed $SEED --fe-multiplier 10 --n-checkpoints 3 diff --git a/smoke_test.sh b/smoke_test.sh index 83d606f..4dd856f 100755 --- a/smoke_test.sh +++ b/smoke_test.sh @@ -32,7 +32,7 @@ run_smoke() { if [ "$#" -gt 0 ]; then AGENTS=("$@") else - AGENTS=(ppo ppo-cv rl-das rl-das-cv exp-das exp-das-cv baselines) + AGENTS=(ppo ppo-cv rl-das rl-das-cv exp-das exp-das-cv exp-das-cv-par baselines) fi for agent in "${AGENTS[@]}"; do @@ -42,7 +42,8 @@ for agent in "${AGENTS[@]}"; do rl-das) run_smoke "rl-das train" rl-das ;; rl-das-cv) run_smoke "rl-das cv" rl-das-cv ;; exp-das) run_smoke "exp-das train" exp-das ;; - exp-das-cv) run_smoke "exp-das cv" exp-das-cv;; + exp-das-cv) run_smoke "exp-das cv" exp-das-cv ;; + exp-das-cv-par) run_smoke "exp-das cv parallel" exp-das-cv-par;; baselines) run_smoke "baselines" baselines ;; *) echo "Unknown agent: $agent"; exit 1 ;; esac From b0acdb281a1cc61d6add49672d3bb52a80a6ca8d Mon Sep 17 00:00:00 2001 From: wlnc Date: Sun, 24 May 2026 13:13:43 +0200 Subject: [PATCH 8/8] aocc logging fix --- baselines.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/baselines.py b/baselines.py index 909e983..0cb7dc9 100644 --- a/baselines.py +++ b/baselines.py @@ -210,6 +210,7 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list "area_under_optimization_curve": best_m[ "area_under_optimization_curve" ], + "aocc": best_m["aocc"], "final_fitness": best_m["final_fitness"], "agent": "oracle-best", "best_agent": best_m["agent"], @@ -222,6 +223,7 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list "area_under_optimization_curve": worst_m[ "area_under_optimization_curve" ], + "aocc": worst_m["aocc"], "final_fitness": worst_m["final_fitness"], "agent": "oracle-worst", "worst_agent": worst_m["agent"],