From d66dc10a8dd3b576fc93a0484d81efabbd56330e Mon Sep 17 00:00:00 2001
From: iyeanur6-cyber <iyeanur6@gmail.com>
Date: Sat, 27 Jun 2026 00:44:42 +0530
Subject: [PATCH] Update improve.py

---
 improvement/oracle/improve.py | 107 ++++++++++------------------------
 1 file changed, 32 insertions(+), 75 deletions(-)

diff --git a/improvement/oracle/improve.py b/improvement/oracle/improve.py
index 041795c9..38bab2d5 100644
--- a/improvement/oracle/improve.py
+++ b/improvement/oracle/improve.py
@@ -1,18 +1,23 @@
 #!/usr/bin/env python3
-# — the oracle. it only ever shapes src/. it never commits; the inspector does. —
-# — it works in small bites: read a bit of an issue or a file, write a bit, move on. —
-# — the path is named apart from the code; the code is grown, not parsed. —
-"""an improvement to src/, dreamt in pieces by a small local model and grown until it parses."""
+
 from __future__ import annotations
 
-import ast, json, os, random, re, subprocess, sys, time, urllib.request
+import ast
+import json
+import os
+import random
+import re
+import subprocess
+import sys
+import time
+import urllib.request
 from datetime import datetime, timezone
 from itertools import zip_longest
 from pathlib import Path
 
 _ENV_ROOT = os.environ.get("IMPROVE_REPO_ROOT")
 REPO_ROOT = Path(_ENV_ROOT).resolve() if _ENV_ROOT else Path.cwd().resolve()
-SRC_DIR = (REPO_ROOT / "src").resolve()          # — sacred ground —
+SRC_DIR = (REPO_ROOT / "src").resolve()
 
 MODEL = os.environ.get("IMPROVE_MODEL", "qwen3.5:2b")
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
@@ -23,40 +28,31 @@
 MAX_FILE_BYTES        = _int("IMPROVE_MAX_FILE_BYTES", "10000")
 MAX_ISSUES            = _int("IMPROVE_MAX_ISSUES", "50")
 MAX_ISSUE_BODY_CHARS  = _int("IMPROVE_MAX_ISSUE_BODY_CHARS", "1000")
-# Each step generates a SMALL bite; truncated files are grown by continuation,
-# not demanded whole in one breath.
 NUM_PREDICT           = _int("IMPROVE_NUM_PREDICT", "1024")
-PLAN_NUM_PREDICT      = _int("IMPROVE_PLAN_NUM_PREDICT", "512")  # — the plan is short prose, not code —
+PLAN_NUM_PREDICT      = _int("IMPROVE_PLAN_NUM_PREDICT", "512")
 NUM_CTX               = _int("IMPROVE_NUM_CTX", "8192")
 REQUEST_TIMEOUT       = _int("IMPROVE_TIMEOUT", "600")
-BASE_TEMPERATURE      = _flt("IMPROVE_TEMPERATURE", "0.5")    # — hot. the visions stay strange —
-REPAIR_TEMPERATURE    = _flt("IMPROVE_REPAIR_TEMPERATURE", "0.5")  # — cooler, to converge —
-MAX_ROUNDS            = _int("IMPROVE_MAX_ROUNDS", "1")   # — grow rounds per file —
-MAX_FILES             = _int("IMPROVE_MAX_FILES", "10")   # — files touched per PR —
+BASE_TEMPERATURE      = _flt("IMPROVE_TEMPERATURE", "0.5")
+REPAIR_TEMPERATURE    = _flt("IMPROVE_REPAIR_TEMPERATURE", "0.5")
+MAX_ROUNDS            = _int("IMPROVE_MAX_ROUNDS", "1")
+MAX_FILES             = _int("IMPROVE_MAX_FILES", "10")
 SRC_PREVIEW_CHARS     = _int("IMPROVE_SRC_PREVIEW_CHARS", "1500")
-GEN_DEADLINE_SECONDS  = _int("IMPROVE_GEN_DEADLINE_SECONDS", "540")  # — under the runner's 10m blade —
+GEN_DEADLINE_SECONDS  = _int("IMPROVE_GEN_DEADLINE_SECONDS", "540")
 
-# — issue selection weighting: balance freshness against engagement so the loop
-#   stops fixating on the same old issues. A recent issue (soft exponential decay
-#   by half-life) is favoured, and so is a busy one (reactions + comments +
-#   labels); the two pulls are summed, each with its own knob. Set a weight to 0
-#   to silence that pull entirely; set the half-life huge to disable decay. —
-ISSUE_HALFLIFE_DAYS   = _flt("IMPROVE_ISSUE_HALFLIFE_DAYS", "14")    # — age at which recency halves —
-ISSUE_RECENCY_WEIGHT  = _flt("IMPROVE_ISSUE_RECENCY_WEIGHT", "1.0")  # — pull toward fresh issues —
-ISSUE_ACTIVITY_WEIGHT = _flt("IMPROVE_ISSUE_ACTIVITY_WEIGHT", "1.0") # — pull toward busy issues —
+ISSUE_HALFLIFE_DAYS   = _flt("IMPROVE_ISSUE_HALFLIFE_DAYS", "14")
+ISSUE_RECENCY_WEIGHT  = _flt("IMPROVE_ISSUE_RECENCY_WEIGHT", "1.0")
+ISSUE_ACTIVITY_WEIGHT = _flt("IMPROVE_ISSUE_ACTIVITY_WEIGHT", "1.0")
 
-CODE_EXTENSIONS = {".py", ".js", ".ts", ".sh", ".cbl", ".cob", ".cpy", ".c", ".h", ".rs", ".toml", ".yaml", ".java" }
+CODE_EXTENSIONS = {".py", ".js", ".ts", ".sh", ".cbl", ".cob", ".cpy", ".c", ".h", ".rs", ".toml", ".yaml", ".java"}
 
 PR_BODY_PATH  = Path(os.environ.get("IMPROVE_PR_BODY", "/tmp/improve_pr_body.md"))
 PR_TITLE_PATH = Path(os.environ.get("IMPROVE_PR_TITLE", "/tmp/improve_pr_title.txt"))
 
-# — seeded only for tests; left to OS entropy otherwise, so order really wanders —
 _RNG = random.Random()
 
 log = lambda msg: print(f"[improve] {msg}", file=sys.stderr, flush=True)
 
 
-# — context: the shape of the thing, and the bits to read from ————————————————
 def repo_tree() -> str:
     if not SRC_DIR.is_dir(): return "(src/ is empty)"
     files = [p.relative_to(REPO_ROOT).as_posix()
@@ -65,7 +61,6 @@ def repo_tree() -> str:
 
 
 def source_files() -> list[tuple[str, str]]:
-    """(rel, content) for each code file under src/ — the bits to build on."""
     out = []
     if not SRC_DIR.is_dir(): return out
     for path in sorted(SRC_DIR.rglob("*")):
@@ -80,8 +75,6 @@ def source_files() -> list[tuple[str, str]]:
 
 
 def _fetch_issues() -> list[dict]:
-    # createdAt/updatedAt feed the recency decay; comments + reactionGroups feed
-    # the activity score. All are optional — a missing field degrades to neutral.
     try:
         out = subprocess.run(
             ["gh", "issue", "list", "--state", "open", "--limit", str(MAX_ISSUES),
@@ -102,7 +95,6 @@ def _format_issue(it: dict) -> str:
 
 
 def _parse_iso(ts: str | None) -> datetime | None:
-    """Parse a GitHub ISO-8601 timestamp ('…Z') into an aware datetime, or None."""
     if not ts: return None
     try:
         return datetime.fromisoformat(ts.replace("Z", "+00:00"))
@@ -111,8 +103,6 @@ def _parse_iso(ts: str | None) -> datetime | None:
 
 
 def _issue_activity(it: dict) -> int:
-    """How busy an issue is: reactions + comments + labels. Each field is
-    optional and absent ones simply contribute nothing."""
     reactions = sum((g.get("users") or {}).get("totalCount", 0)
                     for g in (it.get("reactionGroups") or []))
     comments = it.get("comments")
@@ -122,9 +112,6 @@ def _issue_activity(it: dict) -> int:
 
 
 def _recency_factor(it: dict, *, now: datetime, halflife_days: float) -> float:
-    """Soft exponential decay on issue age, in (0, 1]: 1.0 the moment it is
-    created, halving every `halflife_days`. Newer issues weigh more. A missing
-    timestamp or a non-positive half-life means 'no decay' (1.0)."""
     created = _parse_iso(it.get("createdAt") or it.get("updatedAt"))
     if created is None or halflife_days <= 0: return 1.0
     age_days = max(0.0, (now - created).total_seconds() / 86400.0)
@@ -134,9 +121,6 @@ def _recency_factor(it: dict, *, now: datetime, halflife_days: float) -> float:
 def issue_weight(it: dict, *, now: datetime, max_activity: int,
                  halflife_days: float = None, recency_weight: float = None,
                  activity_weight: float = None) -> float:
-    """Blend recency and engagement into one positive sampling weight. Activity
-    is normalised by the busiest issue in the batch so the two knobs stay on a
-    comparable [0, 1]-ish scale; recency decays with age."""
     halflife_days = ISSUE_HALFLIFE_DAYS if halflife_days is None else halflife_days
     recency_weight = ISSUE_RECENCY_WEIGHT if recency_weight is None else recency_weight
     activity_weight = ISSUE_ACTIVITY_WEIGHT if activity_weight is None else activity_weight
@@ -146,23 +130,15 @@ def issue_weight(it: dict, *, now: datetime, max_activity: int,
 
 
 def _weighted_order(weighted: list[tuple[dict, float]]) -> list[dict]:
-    """Weighted-random ordering (Efraimidis–Spirakis): draw key = u**(1/w) per
-    item and sort descending. Higher weight tends to surface earlier, but the
-    draw keeps the order wandering so no issue is forever first."""
     keyed = []
     for obj, w in weighted:
-        u = _RNG.random() or 1e-12              # — avoid log(0)/0-key —
+        u = _RNG.random() or 1e-12
         keyed.append((u ** (1.0 / max(w, 1e-9)), obj))
     keyed.sort(key=lambda t: t[0], reverse=True)
     return [obj for _, obj in keyed]
 
 
 def collect_issue_list(*, now: datetime = None) -> list[str]:
-    """Open issues, each formatted on its own, in a WEIGHTED-random order that
-    balances recency (soft exponential decay) against engagement (reactions,
-    comments, labels). Every issue still appears — the weighting only biases who
-    goes first — so no single issue is forever first and forever the only one
-    acted on, but fresh, active issues get their turn sooner."""
     issues = _fetch_issues()
     now = now or datetime.now(timezone.utc)
     max_activity = max((_issue_activity(it) for it in issues), default=0)
@@ -171,11 +147,9 @@ def collect_issue_list(*, now: datetime = None) -> list[str]:
 
 
 def _interleave(a: list, b: list) -> list:
-    """a0, b0, a1, b1, … — alternate, then trail the longer one."""
     return [x for pair in zip_longest(a, b) for x in pair if x is not None]
 
 
-# — the voice. mystic, but it must compile ——————————————————————————————————
 ORACLE_VOICE = (
     "You are the ORACLE OF THE REPOSITORY: a daemon that dreams in working code. "
     "Your visions are bold and strange and reach for the outer limits of what a "
@@ -199,7 +173,6 @@ def ollama_generate(messages, *, num_predict=None, temperature=None) -> str:
         return (json.loads(r.read().decode("utf-8")).get("message") or {}).get("content", "")
 
 
-# — no path escapes src/; a bare directory becomes a file, never a crash ——————
 def _looks_like_python(content: str) -> bool:
     head = content.lstrip()[:2000]
     return bool(re.search(r"^\s*(def |class |import |from \w|@|async def )", head, re.MULTILINE)
@@ -232,7 +205,6 @@ def content_problems(target: Path, content: str) -> list[str]:
     return []
 
 
-# — code as text, not as a parsed protocol ——————————————————————————————————
 def _strip_code(text: str) -> str:
     if (m := re.search(r"```[\w-]*\n(.*?)\n```", text or "", re.DOTALL)):
         return m.group(1).strip("\n")
@@ -253,17 +225,15 @@ def _is_truncation(code: str, e) -> bool:
     if any(k in msg for k in ("unexpected eof", "was never closed",
                               "expected an indented block", "incomplete input")):
         return True
-    return (getattr(e, "lineno", 0) or 0) >= code.count("\n") + 1  # — error on the last line we have —
+    return (getattr(e, "lineno", 0) or 0) >= code.count("\n") + 1
 
 
-# — an inspiration is one bite: a single issue, or a single source file ————————
 def _inspiration_text(insp) -> str:
     if insp[0] == "issue":
         return f"## An open issue — answer it WITH CODE\n{insp[1]}"
     return f"## An existing module to build on — {insp[1]}\n{insp[2][:SRC_PREVIEW_CHARS]}"
 
 
-# — step one: the oracle names the file (new, existing, or one in progress) ———
 def choose_target(generate, tree, inspiration, produced_rels, language) -> Path:
     already = (f"You have already written these this run: {', '.join(produced_rels)}.\n"
                if produced_rels else "")
@@ -291,7 +261,8 @@ def choose_target(generate, tree, inspiration, produced_rels, language) -> Path:
         or (SRC_DIR / _dump(".py")).resolve()
     return target
 
-def choose_language(generate, tree, inspiration, produced_rels) -> Path:
+
+def choose_language(generate, tree, inspiration, produced_rels) -> str:
     already = (f"You have already written these this run: {', '.join(produced_rels)}.\n"
                if produced_rels else "")
     msgs = [
@@ -306,15 +277,10 @@ def choose_language(generate, tree, inspiration, produced_rels) -> Path:
         raw = generate(msgs, num_predict=24, temperature=0.5)
     except Exception as exc:
         log(f"target call failed: {exc}")
-    return raw
+    return (raw or "").strip()
 
 
-# — step 1.5: a plan for THIS file, drawn from the inspiration, to steer growth —
 def make_plan(generate, target, inspiration, *, language=None) -> str:
-    """Between naming the file and writing it, the oracle lays out what this file
-    should become: a short, concrete plan grounded in the chosen inspiration and
-    target. The plan is fed into the growth stage to steer it. Returns '' on
-    failure — growth then simply proceeds unplanned."""
     rel = target.relative_to(REPO_ROOT).as_posix() if REPO_ROOT in target.parents else target.name
     lang = f" in {language}" if language else ""
     msgs = [
@@ -334,7 +300,6 @@ def make_plan(generate, target, inspiration, *, language=None) -> str:
     return (raw or "").strip()
 
 
-# — step two: grow the code in bites (write → continue → fix) until it parses —
 def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=None, plan=""):
     if language:
         system = ORACLE_VOICE + (f" Output ONLY the source code in {language}— no markdown fences, "
@@ -357,7 +322,7 @@ def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=Non
         user = (f"Here is {rel} so far — it was cut off before the end:\n\n{draft}\n\n"
                 "Output ONLY the code that continues from exactly where it stops and "
                 "completes the file. Do not repeat earlier lines; no fences, no commentary.")
-    else:  # fix
+    else:
         user = (f"{rel} does not parse: {error}\n\nHere is the file:\n\n{draft}\n\n"
                 "Output the COMPLETE corrected file as valid code. Only the code.")
 
@@ -369,9 +334,6 @@ def _code_msgs(rel, tree, inspiration, *, mode, draft="", error="", language=Non
 
 def grow_file(generate, target, tree, inspiration, prior, *, deadline,
               now=time.monotonic, num_predict=None, max_rounds=MAX_ROUNDS, language=None, plan=""):
-    """Return (code, last_response). Write once from `prior` (steered by `plan`),
-    then CONTINUE a truncated draft or FIX a broken one, re-checking with the
-    parser each round."""
     rel = target.relative_to(REPO_ROOT).as_posix() if REPO_ROOT in target.parents else target.name
     code, last, rounds = "", "", 0
     while rounds < max_rounds and now() < deadline:
@@ -397,7 +359,6 @@ def grow_file(generate, target, tree, inspiration, prior, *, deadline,
     return code, last
 
 
-# — a one-line vision for the PR, with a deterministic fallback ———————————————
 def make_reason(generate, rels) -> str:
     joined = ", ".join(rels)
     try:
@@ -414,6 +375,7 @@ def make_reason(generate, rels) -> str:
         log(f"reason call failed: {exc}")
     return f"Quicken {rels[0]}" + (f" and {len(rels) - 1} more" if len(rels) > 1 else "")
 
+
 def make_explanation(generate, rels) -> str:
     joined = ", ".join(rels)
     try:
@@ -440,13 +402,10 @@ def _write_one(target: Path, content: str):
 def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=GEN_DEADLINE_SECONDS,
                          num_predict=None, max_files=MAX_FILES, max_rounds=MAX_ROUNDS,
                          now=time.monotonic):
-    """Interleave issue- and source-inspirations; for each, name a file and grow
-    a bite of code, writing it so later steps can build on it. Touches up to
-    `max_files`. Returns (reason, [(target, code)], last_response, valid)."""
     deadline = now() + deadline_seconds
     inspirations = _interleave([("issue", t) for t in issues],
                                [("source", rel, content) for rel, content in src_files])
-    if not inspirations:                                  # — nothing to read; still create —
+    if not inspirations:
         inspirations = [("source", "src/oracle.py", "")]
 
     produced: dict[str, tuple[Path, str]] = {}
@@ -481,7 +440,7 @@ def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=
             log(f"step wrote {rel} ({len(code)} bytes)")
 
     if not produced:
-        return None, [], last, False
+        return None, None, [], last, False
     files = list(produced.values())
     valid = all(content_problems(t, c) == [] for t, c in files)
     if not valid:
@@ -491,7 +450,6 @@ def generate_improvement(generate, tree, src_files, issues, *, deadline_seconds=
     return reason, explanation, files, last, valid
 
 
-# — PR metadata ———————————————————————————————————————————————————————————————
 def write_blocks(files) -> list[str]:
     return [r for r in (_write_one(t, c) for t, c in files) if r]
 
@@ -504,12 +462,11 @@ def make_pr_title(reason, written) -> str:
     return title if len(title) <= 72 else title[:69].rstrip() + "…"
 
 
-def write_pr_outputs(reason, explanation,  written, *, valid=True) -> None:
+def write_pr_outputs(reason, explanation, written, *, valid=True) -> None:
     PR_TITLE_PATH.write_text(title := make_pr_title(reason, written), encoding="utf-8")
     log(f"title: {title}")
     PR_BODY_PATH.write_text(
-        f"{reason}\n\n{explanation}"
-        f"**Files changed ({len(written)}):**\n" + "\n".join(f"- `{p}`" for p in written), encoding="utf-8")
+        f"{reason}\n\n{explanation}\n\n**Files changed ({len(written)}):**\n" + "\n".join(f"- `{p}`" for p in written), encoding="utf-8")
 
 
 def main() -> int: