Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Pre-commit checks

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.13'

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"

- name: Install dependencies
run: |
uv sync --group dev

- name: Run pre-commit
run: |
uv run pre-commit run --all-files
33 changes: 33 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Tests

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]

jobs:
tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.13'

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"

- name: Install dependencies
run: |
uv sync --group dev

- name: Run tests
run: |
uv run pytest
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repos:
# Type checking
- id: ty
name: ty
entry: uvx ty check
entry: uv run ty check
language: system
types: [python]
# args: [] # Add any args if needed
Expand Down
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ A lightweight job queue for shared GPU servers without SLURM.
1. **Start the Scheduler**:
```bash
# Run in foreground (for testing/debugging)
gpu-queue serve --min-free 2
gpu-queue serve --min-free 2 --max-use 6

# OR Start background daemon
gpu-queue start --min-free 2
gpu-queue start --min-free 2 --max-use 6
```
`--min-free` preserves that many physically idle GPUs. GPUs occupied by
other users do not count toward this reserve. `--max-use` caps how many
GPUs gpu-queue jobs may occupy at once.

2. **Submit Jobs**:
```bash
Expand All @@ -42,10 +45,12 @@ A lightweight job queue for shared GPU servers without SLURM.
gpu-queue watch
```
**Keybindings**:
- `d`: **Duplicate** selected job into **Staging** (enters Edit Mode)
- `v`: Enter/leave select mode for bulk actions
- `d`: **Duplicate** selected job into **Staging**
- `n`: Create a **new staged** job
- `e`: **Edit** selected staged job / **Save** staged changes
- `s` or `Enter` (in Staging): Send staged job to **Pending** (with confirmation)
- `b` (in Pending): Move selected pending job back to the top of **Staging**
- `c`: **Discard** staged job, or cancel pending/running job
- `J` / `K` (in Pending): Move selected job down/up in queue order
- `Space`: View logs (internal viewer)
Expand All @@ -54,8 +59,10 @@ A lightweight job queue for shared GPU servers without SLURM.
- `r`: Retry completed job into **Staging**
- `x`: Remove completed job

In select mode, `j`/`k` extend the selected rows as you move. `Esc` clears the selection. Batch-safe commands apply to all selected rows in the active panel: `b`, `c`, `s`, `d`, `p`, `r`, `x`, and pending `J`/`K` reorder. Edit and logs remain cursor-only.

**Interactive Editing**:
- **Enter Edit Mode**: Press `e` on a staged job, or create one via `n` / `d` / `r`.
- **Enter Edit Mode**: Press `e` on a staged job, or create one via `n`.
- **Navigation**: Use `h`/`l` to switch between GPUs and Command fields.
- **Modify Values**: Use `j`/`k` to decrease/increase GPU count.
- **Edit Command**: Select the Command field and press `Enter` to open your system editor.
Expand Down
139 changes: 139 additions & 0 deletions src/gpu_queue/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Command-line interface for gpu-queue."""

import argparse

from gpu_queue.commands import (
cmd_add,
cmd_cancel,
cmd_clear,
cmd_logs,
cmd_pause,
cmd_retry,
cmd_serve,
cmd_start,
cmd_stop,
)
from gpu_queue.tui.app import GPUQueueTUI


def cmd_watch(args: argparse.Namespace) -> None:
"""Interactive TUI for queue monitoring."""
import curses

tui = GPUQueueTUI(args.interval)
try:
curses.wrapper(tui.main)
except KeyboardInterrupt:
pass
print("Exited TUI.")


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="GPU Job Queue Scheduler")
subparsers = parser.add_subparsers(dest="command", required=True)

add_parser = subparsers.add_parser("add", help="Add a job to the queue")
add_parser.add_argument("command", help="Command to run")
add_parser.add_argument(
"--gpus", "-g", type=int, default=2, help="Number of GPUs required"
)
add_parser.add_argument(
"--priority",
"-p",
choices=["low", "medium", "high"],
default="medium",
help="Job priority",
)
add_parser.add_argument(
"--front",
"-f",
action="store_true",
help="Add to front of queue (Urgent priority)",
)
add_parser.set_defaults(func=cmd_add)

start_parser = subparsers.add_parser(
"start", help="Start the queue scheduler (background)"
)
start_parser.add_argument(
"--min-free",
type=int,
default=2,
help="Number of physically idle GPUs to always keep free",
)
start_parser.add_argument(
"--max-use",
type=int,
default=None,
help="Maximum GPUs this queue may occupy at once",
)
start_parser.set_defaults(func=cmd_start)

stop_parser = subparsers.add_parser("stop", help="Stop the background scheduler")
stop_parser.set_defaults(func=cmd_stop)

serve_parser = subparsers.add_parser(
"serve", help="Run the queue scheduler (foreground)"
)
serve_parser.add_argument(
"--min-free",
type=int,
default=2,
help="Number of physically idle GPUs to always keep free",
)
serve_parser.add_argument(
"--max-use",
type=int,
default=None,
help="Maximum GPUs this queue may occupy at once",
)
serve_parser.add_argument(
"--exclude-gpus",
type=str,
default="",
help="Comma-separated list of GPU indices to ignore (e.g. '0,1')",
)
serve_parser.set_defaults(func=cmd_serve)

cancel_parser = subparsers.add_parser("cancel", help="Cancel a pending job")
cancel_parser.add_argument("job_id", help="Job ID to cancel")
cancel_parser.set_defaults(func=cmd_cancel)

logs_parser = subparsers.add_parser("logs", help="Show job logs")
logs_parser.add_argument("job_id", help="Job ID")
logs_parser.add_argument(
"--lines", "-n", type=int, default=50, help="Number of lines"
)
logs_parser.set_defaults(func=cmd_logs)

clear_parser = subparsers.add_parser("clear", help="Clear completed jobs")
clear_parser.set_defaults(func=cmd_clear)

retry_parser = subparsers.add_parser("retry", help="Re-queue a completed job")
retry_parser.add_argument("job_id", help="Job ID to retry")
retry_parser.add_argument(
"--front", "-f", action="store_true", help="Add to front of queue"
)
retry_parser.set_defaults(func=cmd_retry)

pause_parser = subparsers.add_parser(
"pause", help="Pause a running job (re-queue at front)"
)
pause_parser.add_argument("job_id", help="Job ID to pause")
pause_parser.set_defaults(func=cmd_pause)

watch_parser = subparsers.add_parser(
"watch", help="Watch queue status continuously"
)
watch_parser.add_argument(
"--interval", "-n", type=float, default=2.0, help="Update interval in seconds"
)
watch_parser.set_defaults(func=cmd_watch)

return parser


def main() -> None:
parser = build_parser()
args = parser.parse_args()
args.func(args)
Loading
Loading