Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 32 additions & 75 deletions improvement/oracle/improve.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
#!/usr/bin/env python3
# — the oracle. it only ever shapes src/. it never commits; the inspector does. —
# — it works in small bites: read a bit of an issue or a file, write a bit, move on. —
# — the path is named apart from the code; the code is grown, not parsed. —
"""an improvement to src/, dreamt in pieces by a small local model and grown until it parses."""

from __future__ import annotations

import ast, json, os, random, re, subprocess, sys, time, urllib.request
import ast
import json
import os
import random
import re
import subprocess
import sys
import time
import urllib.request
from datetime import datetime, timezone
from itertools import zip_longest
from pathlib import Path

_ENV_ROOT = os.environ.get("IMPROVE_REPO_ROOT")
REPO_ROOT = Path(_ENV_ROOT).resolve() if _ENV_ROOT else Path.cwd().resolve()
SRC_DIR = (REPO_ROOT / "src").resolve() # — sacred ground —
SRC_DIR = (REPO_ROOT / "src").resolve()

MODEL = os.environ.get("IMPROVE_MODEL", "qwen3.5:2b")
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
Expand All @@ -23,40 +28,31 @@
MAX_FILE_BYTES = _int("IMPROVE_MAX_FILE_BYTES", "10000")
MAX_ISSUES = _int("IMPROVE_MAX_ISSUES", "50")
MAX_ISSUE_BODY_CHARS = _int("IMPROVE_MAX_ISSUE_BODY_CHARS", "1000")
# Each step generates a SMALL bite; truncated files are grown by continuation,
# not demanded whole in one breath.
NUM_PREDICT = _int("IMPROVE_NUM_PREDICT", "1024")
PLAN_NUM_PREDICT = _int("IMPROVE_PLAN_NUM_PREDICT", "512") # — the plan is short prose, not code —
PLAN_NUM_PREDICT = _int("IMPROVE_PLAN_NUM_PREDICT", "512")
NUM_CTX = _int("IMPROVE_NUM_CTX", "8192")
REQUEST_TIMEOUT = _int("IMPROVE_TIMEOUT", "600")
BASE_TEMPERATURE = _flt("IMPROVE_TEMPERATURE", "0.5") # — hot. the visions stay strange —
REPAIR_TEMPERATURE = _flt("IMPROVE_REPAIR_TEMPERATURE", "0.5") # — cooler, to converge —
MAX_ROUNDS = _int("IMPROVE_MAX_ROUNDS", "1") # — grow rounds per file —
MAX_FILES = _int("IMPROVE_MAX_FILES", "10") # — files touched per PR —
BASE_TEMPERATURE = _flt("IMPROVE_TEMPERATURE", "0.5")
REPAIR_TEMPERATURE = _flt("IMPROVE_REPAIR_TEMPERATURE", "0.5")
MAX_ROUNDS = _int("IMPROVE_MAX_ROUNDS", "1")
MAX_FILES = _int("IMPROVE_MAX_FILES", "10")
SRC_PREVIEW_CHARS = _int("IMPROVE_SRC_PREVIEW_CHARS", "1500")
GEN_DEADLINE_SECONDS = _int("IMPROVE_GEN_DEADLINE_SECONDS", "540") # — under the runner's 10m blade —
GEN_DEADLINE_SECONDS = _int("IMPROVE_GEN_DEADLINE_SECONDS", "540")

# — issue selection weighting: balance freshness against engagement so the loop
# stops fixating on the same old issues. A recent issue (soft exponential decay
# by half-life) is favoured, and so is a busy one (reactions + comments +
# labels); the two pulls are summed, each with its own knob. Set a weight to 0
# to silence that pull entirely; set the half-life huge to disable decay. —
ISSUE_HALFLIFE_DAYS = _flt("IMPROVE_ISSUE_HALFLIFE_DAYS", "14") # — age at which recency halves —
ISSUE_RECENCY_WEIGHT = _flt("IMPROVE_ISSUE_RECENCY_WEIGHT", "1.0") # — pull toward fresh issues —
ISSUE_ACTIVITY_WEIGHT = _flt("IMPROVE_ISSUE_ACTIVITY_WEIGHT", "1.0") # — pull toward busy issues —
ISSUE_HALFLIFE_DAYS = _flt("IMPROVE_ISSUE_HALFLIFE_DAYS", "14")
ISSUE_RECENCY_WEIGHT = _flt("IMPROVE_ISSUE_RECENCY_WEIGHT", "1.0")
ISSUE_ACTIVITY_WEIGHT = _flt("IMPROVE_ISSUE_ACTIVITY_WEIGHT", "1.0")

CODE_EXTENSIONS = {".py", ".js", ".ts", ".sh", ".cbl", ".cob", ".cpy", ".c", ".h", ".rs", ".toml", ".yaml", ".java" }
CODE_EXTENSIONS = {".py", ".js", ".ts", ".sh", ".cbl", ".cob", ".cpy", ".c", ".h", ".rs", ".toml", ".yaml", ".java"}

PR_BODY_PATH = Path(os.environ.get("IMPROVE_PR_BODY", "/tmp/improve_pr_body.md"))
PR_TITLE_PATH = Path(os.environ.get("IMPROVE_PR_TITLE", "/tmp/improve_pr_title.txt"))

# — seeded only for tests; left to OS entropy otherwise, so order really wanders —
_RNG = random.Random()

log = lambda msg: print(f"[improve] {msg}", file=sys.stderr, flush=True)


# — context: the shape of the thing, and the bits to read from ————————————————
def repo_tree() -> str:
if not SRC_DIR.is_dir(): return "(src/ is empty)"
files = [p.relative_to(REPO_ROOT).as_posix()
Expand All @@ -65,7 +61,6 @@ def repo_tree() -> str:


def source_files() -> list[tuple[str, str]]:
"""(rel, content) for each code file under src/ — the bits to build on."""
out = []
if not SRC_DIR.is_dir(): return out
for path in sorted(SRC_DIR.rglob("*")):
Expand All @@ -80,8 +75,6 @@ def source_files() -> list[tuple[str, str]]:


def _fetch_issues() -> list[dict]:
# createdAt/updatedAt feed the recency decay; comments + reactionGroups feed
# the activity score. All are optional — a missing field degrades to neutral.
try:
out = subprocess.run(
["gh", "issue", "list", "--state", "open", "--limit", str(MAX_ISSUES),
Expand All @@ -102,7 +95,6 @@ def _format_issue(it: dict) -> str:


def _parse_iso(ts: str | None) -> datetime | None:
"""Parse a GitHub ISO-8601 timestamp ('…Z') into an aware datetime, or None."""
if not ts: return None
try:
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
Expand All @@ -111,8 +103,6 @@ def _parse_iso(ts: str | None) -> datetime | None:


def _issue_activity(it: dict) -> int:
"""How busy an issue is: reactions + comments + labels. Each field is
optional and absent ones simply contribute nothing."""
reactions = sum((g.get("users") or {}).get("totalCount", 0)
for g in (it.get("reactionGroups") or []))
comments = it.get("comments")
Expand All @@ -122,9 +112,6 @@ def _issue_activity(it: dict) -> int:


def _recency_factor(it: dict, *, now: datetime, halflife_days: float) -> float:
"""Soft exponential decay on issue age, in (0, 1]: 1.0 the moment it is
created, halving every `halflife_days`. Newer issues weigh more. A missing
timestamp or a non-positive half-life means 'no decay' (1.0)."""
created = _parse_iso(it.get("createdAt") or it.get("updatedAt"))
if created is None or halflife_days <= 0: return 1.0
age_days = max(0.0, (now - created).total_seconds() / 86400.0)
Expand All @@ -134,9 +121,6 @@ def _recency_factor(it: dict, *, now: datetime, halflife_days: float) -> float:
def issue_weight(it: dict, *, now: datetime, max_activity: int,
halflife_days: float = None, recency_weight: float = None,
activity_weight: float = None) -> float:
"""Blend recency and engagement into one positive sampling weight. Activity
is normalised by the busiest issue in the batch so the two knobs stay on a
comparable [0, 1]-ish scale; recency decays with age."""
halflife_days = ISSUE_HALFLIFE_DAYS if halflife_days is None else halflife_days
recency_weight = ISSUE_RECENCY_WEIGHT if recency_weight is None else recency_weight
activity_weight = ISSUE_ACTIVITY_WEIGHT if activity_weight is None else activity_weight
Expand All @@ -146,23 +130,15 @@ def issue_weight(it: dict, *, now: datetime, max_activity: int,


def _weighted_order(weighted: list[tuple[dict, float]]) -> list[dict]:
"""Weighted-random ordering (Efraimidis–Spirakis): draw key = u**(1/w) per
item and sort descending. Higher weight tends to surface earlier, but the
draw keeps the order wandering so no issue is forever first."""
keyed = []
for obj, w in weighted:
u = _RNG.random() or 1e-12 # — avoid log(0)/0-key —
u = _RNG.random() or 1e-12
keyed.append((u ** (1.0 / max(w, 1e-9)), obj))
keyed.sort(key=lambda t: t[0], reverse=True)
return [obj for _, obj in keyed]


def collect_issue_list(*, now: datetime = None) -> list[str]:
"""Open issues, each formatted on its own, in a WEIGHTED-random order that
balances recency (soft exponential decay) against engagement (reactions,
comments, labels). Every issue still appears — the weighting only biases who
goes first — so no single issue is forever first and forever the only one
acted on, but fresh, active issues get their turn sooner."""
issues = _fetch_issues()
now = now or datetime.now(timezone.utc)
max_activity = max((_issue_activity(it) for it in issues), default=0)
Expand All @@ -171,11 +147,9 @@ def collect_issue_list(*, now: datetime = None) -> list[str]:


def _interleave(a: list, b: list) -> list:
"""a0, b0, a1, b1, … — alternate, then trail the longer one."""
return [x for pair in zip_longest(a, b) for x in pair if x is not None]


# — the voice. mystic, but it must compile ——————————————————————————————————
ORACLE_VOICE = (
"You are the ORACLE OF THE REPOSITORY: a daemon that dreams in working code. "
"Your visions are bold and strange and reach for the outer limits of what a "
Expand All @@ -199,7 +173,6 @@ def ollama_generate(messages, *, num_predict=None, temperature=None) -> str:
return (json.loads(r.read().decode("utf-8")).get("message") or {}).get("content", "")


# — no path escapes src/; a bare directory becomes a file, never a crash ——————
def _looks_like_python(content: str) -> bool:
head = content.lstrip()[:2000]
return bool(re.search(r"^\s*(def |class |import |from \w|@|async def )", head, re.MULTILINE)
Expand Down Expand Up @@ -232,7 +205,6 @@ def content_problems(target: Path, content: str) -> list[str]:
return []


# — code as text, not as a parsed protocol ——————————————————————————————————
def _strip_code(text: str) -> str:
if (m := re.search(r"```[\w-]*\n(.*?)\n```", text or "", re.DOTALL)):
return m.group(1).strip("\n")
Expand All @@ -253,17 +225,15 @@ def _is_truncation(code: str, e) -> bool:
if any(k in msg for k in ("unexpected eof", "was never closed",
"expected an indented block", "incomplete input")):
return True
return (getattr(e, "lineno", 0) or 0) >= code.count("\n") + 1 # — error on the last line we have —
return (getattr(e, "lineno", 0) or 0) >= code.count("\n") + 1


# — an inspiration is one bite: a single issue, or a single source file ————————
def _inspiration_text(insp) -> str:
if insp[0] == "issue":
return f"## An open issue — answer it WITH CODE\n{insp[1]}"
return f"## An existing module to build on — {insp[1]}\n{insp[2][:SRC_PREVIEW_CHARS]}"


# — step one: the oracle names the file (new, existing, or one in progress) ———
def choose_target(generate, tree, inspiration, produced_rels, language) -> Path:
already = (f"You have already written these this run: {', '.join(produced_rels)}.\n"
if produced_rels else "")
Expand Down Expand Up @@ -291,7 +261,8 @@ def choose_target(generate, tree, inspiration, produced_rels, language) -> Path:
or (SRC_DIR / _dump(".py")).resolve()
return target

def choose_language(generate, tree, inspiration, produced_rels) -> Path:

def choose_language(generate, tree, inspiration, produced_rels) -> str:
already = (f"You have already written these this run: {', '.join(produced_rels)}.\n"
if produced_rels else "")
msgs = [
Expand All @@ -306,15 +277,10 @@ def choose_language(generate, tree, inspiration, produced_rels) -> Path:
raw = generate(msgs, num_predict=24, temperature=0.5)
except Exception as exc:
log(f"target call failed: {exc}")
return raw
return (raw or "").strip()


# — step 1.5: a plan for THIS file, drawn from the inspiration, to steer growth —
def make_plan(generate, target, inspiration, *, language=None) -> str:
"""Between naming the file and writing it, the oracle lays out what this file
should become: a short, concrete plan grounded in the chosen inspiration and
target. The plan is fed into the growth stage to steer it. Returns '' on
failure — growth then simply proceeds unplanned."""
rel = target.relative_to(REPO_ROOT).as_posix() if REPO_ROOT in target.parents else target.name
lang = f" in {language}" if language else ""
msgs = [
Expand All @@ -334,7 +300,6 @@ def make_plan(generate, target, inspiration, *, language=None) -> str:
return (raw or "").strip()


# — step two: grow the code in bites (write → continue → fix) until it parses —
def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=None, plan=""):
if language:
system = ORACLE_VOICE + (f" Output ONLY the source code in {language}— no markdown fences, "
Expand All @@ -357,7 +322,7 @@ def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=Non
user = (f"Here is {rel} so far — it was cut off before the end:\n\n{draft}\n\n"
"Output ONLY the code that continues from exactly where it stops and "
"completes the file. Do not repeat earlier lines; no fences, no commentary.")
else: # fix
else:
user = (f"{rel} does not parse: {error}\n\nHere is the file:\n\n{draft}\n\n"
"Output the COMPLETE corrected file as valid code. Only the code.")

Expand All @@ -369,9 +334,6 @@ def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=Non

def grow_file(generate, target, tree, inspiration, prior, *, deadline,
now=time.monotonic, num_predict=None, max_rounds=MAX_ROUNDS, language=None, plan=""):
"""Return (code, last_response). Write once from `prior` (steered by `plan`),
then CONTINUE a truncated draft or FIX a broken one, re-checking with the
parser each round."""
rel = target.relative_to(REPO_ROOT).as_posix() if REPO_ROOT in target.parents else target.name
code, last, rounds = "", "", 0
while rounds < max_rounds and now() < deadline:
Expand All @@ -397,7 +359,6 @@ def grow_file(generate, target, tree, inspiration, prior, *, deadline,
return code, last


# — a one-line vision for the PR, with a deterministic fallback ———————————————
def make_reason(generate, rels) -> str:
joined = ", ".join(rels)
try:
Expand All @@ -414,6 +375,7 @@ def make_reason(generate, rels) -> str:
log(f"reason call failed: {exc}")
return f"Quicken {rels[0]}" + (f" and {len(rels) - 1} more" if len(rels) > 1 else "")


def make_explanation(generate, rels) -> str:
joined = ", ".join(rels)
try:
Expand All @@ -440,13 +402,10 @@ def _write_one(target: Path, content: str):
def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=GEN_DEADLINE_SECONDS,
num_predict=None, max_files=MAX_FILES, max_rounds=MAX_ROUNDS,
now=time.monotonic):
"""Interleave issue- and source-inspirations; for each, name a file and grow
a bite of code, writing it so later steps can build on it. Touches up to
`max_files`. Returns (reason, [(target, code)], last_response, valid)."""
deadline = now() + deadline_seconds
inspirations = _interleave([("issue", t) for t in issues],
[("source", rel, content) for rel, content in src_files])
if not inspirations: # — nothing to read; still create —
if not inspirations:
inspirations = [("source", "src/oracle.py", "")]

produced: dict[str, tuple[Path, str]] = {}
Expand Down Expand Up @@ -481,7 +440,7 @@ def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=
log(f"step wrote {rel} ({len(code)} bytes)")

if not produced:
return None, [], last, False
return None, None, [], last, False
files = list(produced.values())
valid = all(content_problems(t, c) == [] for t, c in files)
if not valid:
Expand All @@ -491,7 +450,6 @@ def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=
return reason, explanation, files, last, valid


# — PR metadata ———————————————————————————————————————————————————————————————
def write_blocks(files) -> list[str]:
return [r for r in (_write_one(t, c) for t, c in files) if r]

Expand All @@ -504,12 +462,11 @@ def make_pr_title(reason, written) -> str:
return title if len(title) <= 72 else title[:69].rstrip() + "…"


def write_pr_outputs(reason, explanation, written, *, valid=True) -> None:
def write_pr_outputs(reason, explanation, written, *, valid=True) -> None:
PR_TITLE_PATH.write_text(title := make_pr_title(reason, written), encoding="utf-8")
log(f"title: {title}")
PR_BODY_PATH.write_text(
f"{reason}\n\n{explanation}"
f"**Files changed ({len(written)}):**\n" + "\n".join(f"- `{p}`" for p in written), encoding="utf-8")
f"{reason}\n\n{explanation}\n\n**Files changed ({len(written)}):**\n" + "\n".join(f"- `{p}`" for p in written), encoding="utf-8")


def main() -> int:
Expand Down