Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
- package-ecosystem: "uv"
directory: "/"
schedule:
interval: "weekly"
35 changes: 35 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: CI

on:
push:
branches: ["**"]
pull_request:
branches: ["**"]

jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"

- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}

- name: Install dev dependencies
run: uv sync --group dev

- name: Lint with ruff
run: uv run ruff check .

- name: Run tests
run: uv run pytest tests/ -v
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
66 changes: 56 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Heuristic-Driven Hyperparameter Optimization Algorithms

![CI](https://github.com/outsidermm/Heuristic-driven-Hyperparameter-Optimization-Algorithms/actions/workflows/ci.yml/badge.svg)

Welcome to the GitHub repository for the paper "Heuristic-driven Hyperparameter Optimization Algorithms." This project explores heuristic-based hyperparameter optimization (HPO) algorithms, offering an efficient alternative to traditional methods like Grid Search and Bayesian Optimization. The proposed algorithms are designed to balance computational efficiency and model performance, making them accessible to both novice and experienced machine learning practitioners.

## Table of Contents
Expand Down Expand Up @@ -105,13 +107,57 @@ We would like to thank Professor Seda Memik from the University of Northwestern

## Installation

To install and run the heuristic-driven HPO algorithms, follow these steps:

1. Clone the repository:
```bash
git clone https://github.com/outsidermm/Optimisation-of-Grid-Search-for-CNN-Hyperparameter-Tuning.git
```
2. Install the required dependencies:
```bash
pip install -r requirements.txt
```
**With uv (recommended):**
```bash
uv add heuristic-hpo
# TensorFlow / Keras are optional — install when you need to run training:
uv add "heuristic-hpo[tf]"
```

**With pip:**
```bash
pip install heuristic-hpo
pip install "heuristic-hpo[tf]" # includes TensorFlow + Keras
```

**From source:**
```bash
git clone https://github.com/outsidermm/Heuristic-driven-Hyperparameter-Optimization-Algorithms.git
cd Heuristic-driven-Hyperparameter-Optimization-Algorithms
uv sync --extra tf
```

## Quick Start

```python
from utility.dataloader import DataLoader
from algorithm import EpochTuner, BatchSizeTuner, LrTuner

# Load your dataset (expects ./dataset/cifar100/{X,y}_{train,test}.npy)
loader = DataLoader("cifar100")
train_ds, val_ds, test_ds = loader.load_dataset()

# Find the most cost-efficient epoch count between 10 and 250
tuner = EpochTuner("cifar100", left_bound=10, right_bound=250, exploration_factor=5)
best_epoch, accuracy, time_taken = tuner.binary_search_efficient_epoch()
print(f"Best epoch: {best_epoch}, accuracy: {accuracy:.4f}, time: {time_taken:.1f}s")

# Find the largest safe batch size (log2 scale: 4=16, 12=4096)
bs_tuner = BatchSizeTuner("cifar100", left_bound=4, right_bound=12, acceptable_range=0.30)
best_bs, accuracy, time_taken = bs_tuner.search()
print(f"Best batch size: {best_bs}")

# Find the optimal learning rate (searches 10^-1 to 10^-7)
lr_tuner = LrTuner("cifar100", left_bound=1, right_bound=7, local_extrema_allowance=0.05)
lr_exp, accuracy, time_taken = lr_tuner.search()
print(f"Best learning rate: 1e-{lr_exp}, accuracy: {accuracy:.4f}")
```

## Development

```bash
uv sync --group dev # install dev tools (pytest, ruff)
uv run pytest tests/ -v # run tests (TF-independent tests run without --extra tf)
uv run ruff check . # lint
uv build # build wheel
```
6 changes: 6 additions & 0 deletions algorithm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .batch_size import BatchSizeTuner
from .epoch import EpochTuner
from .lr import LrTuner
from .searcher import Searcher

__all__ = ["EpochTuner", "BatchSizeTuner", "LrTuner", "Searcher"]
78 changes: 65 additions & 13 deletions algorithm/batch_size.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,47 @@
import numpy as np
import sys
from collections.abc import Callable

sys.path.append(".")
import numpy as np

from searcher import Searcher
from utility.dataloader import DataLoader
from typing import Tuple
import tensorflow as tf

from .searcher import Searcher


def _get_oom_error() -> type:
"""Return tf.errors.ResourceExhaustedError if TF is available, else a dummy."""
try:
import tensorflow as tf # noqa: PLC0415

return tf.errors.ResourceExhaustedError
except ImportError:
return type("_NeverRaised", (Exception,), {})


class BatchSizeTuner:
"""Heuristic binary-search tuner for the maximum viable batch size.

Searches for the largest batch size (as a power of 2) whose accuracy stays
within ``acceptable_range / 2`` of the baseline accuracy measured at
``left_bound``.

Parameters
----------
dataset : str
Either ``"cifar100"`` or ``"imagenet"``.
left_bound : int
Log2 of the minimum batch size (e.g. ``4`` → batch size 16).
right_bound : int
Log2 of the maximum batch size (e.g. ``12`` → batch size 4096).
acceptable_range : float
Total acceptable accuracy drop relative to the baseline (default 0.30).
training_fn : Callable[[int], tuple[float, float]] | None
Optional override for the training function. Called as
``training_fn(batch_size)`` (actual batch size, not log2) and must
return ``(time, accuracy)``. Pass a callable to run without TensorFlow.
dataset_dir : str
Root directory for dataset ``.npy`` files (default ``"./dataset"``).
Ignored when ``training_fn`` is provided.
"""

__batch_list = np.array([])
__accuracy_list = np.array([])
Expand All @@ -21,19 +53,36 @@ def __init__(
left_bound: int,
right_bound: int,
acceptable_range: float = 0.30,
training_fn: Callable[[int], tuple[float, float]] | None = None,
dataset_dir: str = "./dataset",
) -> None:
self.__acceptable_range = acceptable_range / 2
self.__left_bound = left_bound
self.__right_bound = right_bound
self.__dataset = dataset
dataset_loader = DataLoader(self.__dataset)
self.__train_ds, self.__val_ds, self.__test_ds = dataset_loader.load_dataset()
if training_fn is not None:
self.__training_fn = training_fn
self.__train_ds = self.__val_ds = self.__test_ds = None
else:
dataset_loader = DataLoader(self.__dataset, dataset_dir=dataset_dir)
self.__train_ds, self.__val_ds, self.__test_ds = dataset_loader.load_dataset()
self.__training_fn = None

def search(self) -> tuple[int, float, float]:
"""Search for the largest acceptable batch size.

def search(self) -> Tuple[int, float, float]:
Returns
-------
Tuple[int, float, float]
``(best_batch_size, accuracy, time_taken_seconds)``.
Returns ``(-1.0, -1.0, -1.0)`` if even the minimum batch size
exceeds available GPU memory.
"""
left = self.__left_bound
_oom_error: type = _get_oom_error()
try:
_, acc_bound = self.batch_size_runner(left)
except tf.errors.ResourceExhaustedError:
except _oom_error:
return -1.0, -1.0, -1.0

right = self.__right_bound
Expand All @@ -55,7 +104,7 @@ def search(self) -> Tuple[int, float, float]:
else:
right = mid - 1

except tf.errors.ResourceExhaustedError:
except _oom_error:
right = mid - 1

mid = left + (right - left) // 2
Expand All @@ -66,11 +115,14 @@ def search(self) -> Tuple[int, float, float]:
self.__time_list[self.__batch_list == best_batch][0],
)

def batch_size_runner(self, batch_size: int) -> Tuple[float, float]:
def batch_size_runner(self, batch_size: int) -> tuple[float, float]:
actual = 2**batch_size
if self.__training_fn is not None:
return self.__training_fn(actual)
return Searcher(
dataset=self.__dataset,
train_ds=self.__train_ds,
val_ds=self.__val_ds,
test_ds=self.__test_ds,
verbose=1,
).training(batch_size=2**batch_size)
).training(batch_size=actual)
79 changes: 60 additions & 19 deletions algorithm/epoch.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
import numpy as np
import math
import sys
from collections.abc import Callable

sys.path.append(".")
import numpy as np

from utility.helper_func import min_max_scalar, weighted_avg
from searcher import Searcher
from utility.dataloader import DataLoader
from typing import Tuple
from utility.helper_func import min_max_scalar, weighted_avg

from .searcher import Searcher


class EpochTuner:
"""Heuristic binary-search tuner for the optimal number of training epochs.

Uses a weighted efficiency metric combining normalized training time and
(1 - accuracy) to find the epoch count with the best accuracy/time trade-off.

Parameters
----------
dataset : str
Either ``"cifar100"`` or ``"imagenet"``.
left_bound : int
Minimum epoch count to consider.
right_bound : int
Maximum epoch count to consider.
exploration_factor : int
Stops the search when ``right - left <= exploration_factor``.
training_fn : Callable[[int], tuple[float, float]] | None
Optional override for the training function. Called as
``training_fn(epoch)`` and must return ``(time, accuracy)``.
When ``None`` (default) a real ``Searcher`` + ``DataLoader`` is used.
Pass a custom callable to run without TensorFlow or dataset files.
dataset_dir : str
Root directory for dataset ``.npy`` files (default ``"./dataset"``).
Ignored when ``training_fn`` is provided.
"""

__epoch_list = np.array([])
__time_list = np.array([])
Expand All @@ -25,24 +48,34 @@ def __init__(
left_bound: int,
right_bound: int,
exploration_factor: int = 1,
training_fn: Callable[[int], tuple[float, float]] | None = None,
dataset_dir: str = "./dataset",
) -> None:
self.__left_bound = left_bound
self.__right_bound = right_bound
self.__exploration_factor = exploration_factor
self.__dataset = dataset
dataset_loader = DataLoader(self.__dataset)
self.__train_ds, self.__val_ds, self.__test_ds = dataset_loader.load_dataset()
if training_fn is not None:
self.__training_fn = training_fn
self.__train_ds = self.__val_ds = self.__test_ds = None
else:
dataset_loader = DataLoader(self.__dataset, dataset_dir=dataset_dir)
self.__train_ds, self.__val_ds, self.__test_ds = dataset_loader.load_dataset()
self.__training_fn = None

def epoch_run(self, epoch: int) -> None:
# run the epoch
if epoch not in self.__epoch_list:
time, acc = Searcher(
dataset=self.__dataset,
train_ds=self.__train_ds,
val_ds=self.__val_ds,
test_ds=self.__test_ds,
verbose=1,
).training(epoch=epoch)
if self.__training_fn is not None:
time, acc = self.__training_fn(epoch)
else:
time, acc = Searcher(
dataset=self.__dataset,
train_ds=self.__train_ds,
val_ds=self.__val_ds,
test_ds=self.__test_ds,
verbose=1,
).training(epoch=epoch)

self.__time_list = np.append(self.__time_list, time)
self.__accuracy_list = np.append(self.__accuracy_list, acc)
Expand All @@ -56,7 +89,14 @@ def epoch_run(self, epoch: int) -> None:
self.__time_normalized = min_max_scalar(self.__time_list)
self.__accuracy_normalized = min_max_scalar(self.__accuracy_list)

def binary_search_efficient_epoch(self) -> Tuple[int, float, float]:
def binary_search_efficient_epoch(self) -> tuple[int, float, float]:
"""Search for the most efficient epoch count.

Returns
-------
Tuple[int, float, float]
``(best_epoch, accuracy_at_best_epoch, time_at_best_epoch)``
"""
left = self.__left_bound
right = self.__right_bound
best_epoch = self.__left_bound
Expand All @@ -66,6 +106,7 @@ def binary_search_efficient_epoch(self) -> Tuple[int, float, float]:
left_efficiency = self.weighted_avg_from_epoch(left)
right_efficiency = self.weighted_avg_from_epoch(right)

mid = left # sentinel: mid is unused on the first pass when interval is already small
mid_efficiency = best_efficiency = math.inf

while left <= right:
Expand Down Expand Up @@ -102,9 +143,9 @@ def binary_search_efficient_epoch(self) -> Tuple[int, float, float]:
right = mid

return (
best_epoch[0],
self.__accuracy_list[self.__epoch_list == best_epoch][0],
self.__time_list[self.__epoch_list == best_epoch][0],
int(best_epoch),
float(self.__accuracy_list[self.__epoch_list == best_epoch][0]),
float(self.__time_list[self.__epoch_list == best_epoch][0]),
)

def weighted_avg_from_epoch(self, epoch: int) -> float:
Expand Down
Loading
Loading