diff --git a/diffgraph/schema/diffgraph-v2.schema.json b/diffgraph/schema/diffgraph-v2.schema.json new file mode 100644 index 0000000..e899687 --- /dev/null +++ b/diffgraph/schema/diffgraph-v2.schema.json @@ -0,0 +1,436 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://wildestai.com/schemas/diffgraph/v2.0/schema.json", + "title": "DiffGraph v2.0", + "description": "Canonical output schema for the `wild diff` CLI. Every claim carries its analysis_source (structural | inferred | derived) and an evidence pointer. Consumers MUST reject unknown major schema_version values.", + "type": "object", + "required": ["schema_version", "generated_at", "wild_version", "diff_ref", "files", "symbols", "relationships", "metadata"], + "additionalProperties": false, + + "properties": { + + "schema_version": { + "type": "string", + "description": "Semver-style MAJOR.MINOR. Consumers MUST reject unknown MAJOR. MINOR bumps are additive only.", + "pattern": "^\\d+\\.\\d+$", + "examples": ["2.0"] + }, + + "generated_at": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 UTC timestamp of the analysis run." + }, + + "wild_version": { + "type": "string", + "description": "Semver of the `wild` CLI that produced this artifact.", + "examples": ["2.0.0", "2.1.0-dev"] + }, + + "diff_ref": { + "type": "object", + "description": "Describes what was diffed.", + "required": ["kind"], + "additionalProperties": false, + "properties": { + "kind": { + "type": "string", + "enum": ["unstaged", "staged", "commit_range", "file_scope"], + "description": "Maps directly to the `wild diff` variant used." + }, + "base_ref": { + "type": ["string", "null"], + "description": "Commit SHA or ref for the base side. null for working-tree diffs." + }, + "head_ref": { + "type": ["string", "null"], + "description": "Commit SHA or ref for the head side. null for working-tree diffs." + }, + "pathspecs": { + "type": "array", + "items": { "type": "string" }, + "description": "File or glob filters passed to `wild diff`. Empty = all files." + }, + "repo_root": { + "type": "string", + "description": "Absolute path to the git repo root. Used to resolve relative file paths." + } + } + }, + + "files": { + "type": "array", + "description": "One entry per file that appeared in the diff.", + "items": { "$ref": "#/$defs/FileEntry" } + }, + + "symbols": { + "type": "array", + "description": "Named code entities in changed files. Extracted by static analysis (structural) or LLM (inferred).", + "items": { "$ref": "#/$defs/SymbolEntry" } + }, + + "relationships": { + "type": "array", + "description": "Edges between symbols or files. analysis_source is mandatory on every edge.", + "items": { "$ref": "#/$defs/RelationshipEntry" } + }, + + "summary": { + "oneOf": [ + { "$ref": "#/$defs/SummaryEntry" }, + { "type": "null" } + ], + "description": "Top-level LLM summary of the change. null when running in local-only mode." + }, + + "metadata": { + "$ref": "#/$defs/Metadata" + } + }, + + "$defs": { + + "AnalysisSource": { + "type": "string", + "enum": ["structural", "inferred", "derived"], + "description": "structural = deterministic static analysis; inferred = LLM interpretation; derived = aggregated from structural + inferred." + }, + + "Evidence": { + "type": "object", + "description": "Pointer to what produced a claim. kind determines which fields are present.", + "required": ["kind"], + "properties": { + "kind": { + "type": "string", + "enum": [ + "git_diff_stat", + "git_diff_name_status", + "path_pattern", + "ast_parse", + "import_statement", + "call_site", + "llm_inference", + "structural_basis" + ] + }, + "file": { "type": "string", "description": "Relevant for ast_parse, import_statement, call_site." }, + "line_start": { "type": "integer", "minimum": 0, "description": "1-indexed line number." }, + "line_end": { "type": "integer", "minimum": 0 }, + "snippet": { "type": "string", "description": "Short source excerpt (signature line or import statement)." }, + "pattern": { "type": "string", "description": "Glob/regex pattern (kind=path_pattern)." }, + "detail": { "type": "string", "description": "Free-text detail (kind=git_diff_stat/name_status)." }, + "model": { "type": "string", "description": "LLM model id (kind=llm_inference)." }, + "prompt_ref": { "type": "string", "description": "Internal prompt template reference (kind=llm_inference)." }, + "temperature": { "type": "number", "minimum": 0, "maximum": 2, "description": "(kind=llm_inference)." }, + "symbol_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Symbol IDs that grounded this inferred claim (kind=structural_basis)." + }, + "file_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "File IDs that grounded this inferred claim (kind=structural_basis)." + } + } + }, + + "Classification": { + "type": "object", + "required": ["is_test", "analysis_source"], + "additionalProperties": false, + "properties": { + "is_test": { "type": "boolean" }, + "analysis_source": { "$ref": "#/$defs/AnalysisSource" }, + "evidence": { + "type": "array", + "items": { "$ref": "#/$defs/Evidence" } + } + } + }, + + "FileEntry": { + "type": "object", + "required": ["id", "path", "change_kind", "analysis_source"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable ID: 'file::' (normalized, forward slashes, relative to repo_root).", + "pattern": "^file::.+" + }, + "path": { + "type": "string", + "description": "Current path relative to repo root." + }, + "old_path": { + "type": ["string", "null"], + "description": "Pre-rename path. null if not a rename." + }, + "language": { + "type": ["string", "null"], + "description": "Detected language. null = unknown (static analysis not available for this file)." + }, + "change_kind": { + "type": "string", + "enum": ["added", "modified", "deleted", "renamed", "renamed_modified"] + }, + "lines_added": { + "type": ["integer", "null"], + "minimum": 0 + }, + "lines_removed": { + "type": ["integer", "null"], + "minimum": 0 + }, + "analysis_source": { + "type": "string", + "const": "structural", + "description": "File entries are always structural (git metadata)." + }, + "evidence": { + "type": "array", + "items": { "$ref": "#/$defs/Evidence" } + }, + "classification": { + "oneOf": [ + { "$ref": "#/$defs/Classification" }, + { "type": "null" } + ] + } + } + }, + + "SymbolEntry": { + "type": "object", + "required": ["id", "name", "file_id", "kind", "change_kind", "analysis_source"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable ID: 'sym::::'. Deterministic for the same diff + repo state.", + "pattern": "^sym::.+::.+" + }, + "name": { + "type": "string", + "description": "Short name as it appears in source." + }, + "qualified_name": { + "type": ["string", "null"], + "description": "Dotted path where resolvable (e.g. 'auth.validator.validate_token'). null if cannot determine." + }, + "file_id": { + "type": "string", + "description": "Refers to files[].id.", + "pattern": "^file::.+" + }, + "kind": { + "type": "string", + "enum": ["function", "class", "method", "import", "constant", "type_alias", "module"], + "description": "Symbol category." + }, + "parent_id": { + "type": ["string", "null"], + "description": "Symbol ID of containing class/function. null for top-level symbols.", + "pattern": "^sym::.+::.+" + }, + "change_kind": { + "type": "string", + "enum": ["added", "modified", "deleted", "unchanged"], + "description": "Derived by diffing tree-sitter AST outputs pre- and post-change." + }, + "analysis_source": { + "$ref": "#/$defs/AnalysisSource" + }, + "location": { + "oneOf": [ + { + "type": "object", + "required": ["file", "line_start", "line_end"], + "additionalProperties": false, + "properties": { + "file": { "type": "string" }, + "line_start": { "type": "integer", "minimum": 0 }, + "line_end": { "type": "integer", "minimum": 0 } + } + }, + { "type": "null" } + ], + "description": "Line range in post-change file. null for deleted symbols." + }, + "evidence": { + "type": "array", + "items": { "$ref": "#/$defs/Evidence" }, + "description": "Required for inferred; strongly recommended for structural. Structural symbols should include at least one ast_parse entry." + } + }, + "if": { + "properties": { "analysis_source": { "const": "inferred" } }, + "required": ["analysis_source"] + }, + "then": { + "required": ["evidence"], + "properties": { + "evidence": { "minItems": 1 } + } + } + }, + + "RelationshipEntry": { + "type": "object", + "required": ["id", "kind", "source_id", "target_id", "analysis_source"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Stable ID: 'rel::->'. Append '#N' for multi-edges.", + "pattern": "^rel::.+->.+" + }, + "kind": { + "type": "string", + "enum": ["imports", "calls", "inherits", "implements", "defines", "contains", "semantic_related", "co_changed"], + "description": "See design/JSON-SCHEMA.md for semantics and allowed analysis_source per kind." + }, + "source_id": { + "type": "string", + "description": "symbols[].id or files[].id." + }, + "target_id": { + "type": "string", + "description": "symbols[].id or files[].id." + }, + "analysis_source": { + "$ref": "#/$defs/AnalysisSource" + }, + "confidence": { + "type": ["number", "null"], + "minimum": 0, + "maximum": 1, + "description": "Required when analysis_source == 'inferred'. null for structural relationships." + }, + "resolution_method": { + "type": ["string", "null"], + "enum": ["import_grounded", "resolved", "heuristic", null], + "description": "For 'calls' relationships: how the target was resolved. 'import_grounded' = explicit import + call site, not full project indexing." + }, + "evidence": { + "type": "array", + "items": { "$ref": "#/$defs/Evidence" } + }, + "label": { + "type": ["string", "null"], + "description": "Human-readable edge description. From LLM for inferred edges." + } + }, + "if": { + "properties": { "analysis_source": { "const": "inferred" } }, + "required": ["analysis_source"] + }, + "then": { + "required": ["confidence", "evidence"], + "properties": { + "evidence": { "minItems": 1 }, + "confidence": { "type": "number" } + } + } + }, + + "SummaryEntry": { + "type": "object", + "required": ["text", "analysis_source"], + "additionalProperties": false, + "properties": { + "text": { + "type": "string", + "description": "Human-readable summary of the change." + }, + "analysis_source": { + "type": "string", + "const": "inferred", + "description": "Summaries are always inferred (require LLM interpretation)." + }, + "confidence": { + "type": ["number", "null"], + "minimum": 0, + "maximum": 1 + }, + "evidence": { + "type": "array", + "items": { "$ref": "#/$defs/Evidence" }, + "description": "Must include at least one llm_inference entry and one structural_basis entry." + } + } + }, + + "Warning": { + "type": "object", + "required": ["code"], + "additionalProperties": false, + "properties": { + "code": { + "type": "string", + "enum": ["PARSE_FAILURE", "UNSUPPORTED_LANGUAGE", "PARTIAL_ANALYSIS", "LLM_TIMEOUT", "LLM_ERROR", "UNKNOWN"], + "description": "Machine-readable warning code. Consumers can surface these to the user." + }, + "file": { "type": "string" }, + "detail": { "type": "string" } + } + }, + + "Metadata": { + "type": "object", + "required": ["privacy_tier"], + "additionalProperties": false, + "properties": { + "privacy_tier": { + "type": "string", + "enum": ["local", "cloud_llm", "cloud_backend"], + "description": "local = no data left the machine; cloud_llm = diff sent to LLM API; cloud_backend = data sent to WildestAI backend." + }, + "cloud_providers_used": { + "type": "array", + "items": { "type": "string" }, + "description": "LLM provider IDs used (e.g. 'openai', 'anthropic'). Empty for local-only runs." + }, + "analysis_duration_ms": { + "type": ["integer", "null"], + "minimum": 0 + }, + "languages_detected": { + "type": "array", + "items": { "type": "string" }, + "description": "Languages found in the diffed files." + }, + "files_analyzed": { + "type": ["integer", "null"], + "minimum": 0 + }, + "files_skipped": { + "type": ["integer", "null"], + "minimum": 0 + }, + "llm_calls": { + "type": ["integer", "null"], + "minimum": 0, + "description": "Number of LLM API calls made. 0 for local-only runs." + }, + "llm_model": { + "type": ["string", "null"], + "description": "Primary LLM model used, if any." + }, + "tiers_used": { + "type": "array", + "items": { "$ref": "#/$defs/AnalysisSource" }, + "description": "Which analysis tiers contributed to this output." + }, + "warnings": { + "type": "array", + "items": { "$ref": "#/$defs/Warning" } + } + } + } + } +} diff --git a/llms.txt b/llms.txt new file mode 100644 index 0000000..cfadb19 --- /dev/null +++ b/llms.txt @@ -0,0 +1,37 @@ +# DiffGraph-CLI (WildestAI) + +> `wild diff` — AI-powered semantic analysis of git diffs. + +## What it does + +Wraps `git` and adds an AI-powered `wild diff` command that analyses your git diff +and produces a dependency graph HTML report showing what changed, how components +relate, and what needs attention. + +## Quick start + +```bash +pip install wildest-ai +export OPENAI_API_KEY=sk-... +wild diff # analyse current unstaged changes +wild diff --staged # analyse staged changes +``` + +All other `wild ` commands pass through to `git` transparently. + +## Agent usage + +See `skill.md` in this repo for the full agent skill. + +For programmatic MCP access, run: +```bash +python mcp_server.py +``` + +MCP tools: `run_wild_diff`, `list_docs`, `get_docs`, `search_docs` + +## Links + +- GitHub: https://github.com/WildestAI/DiffGraph-CLI +- Website: https://wildest.ai +- Full context: https://wildest.ai/llms-full.txt diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..fca530c --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +""" +WildestAI MCP Server + +Exposes DiffGraph-CLI functionality and documentation to AI agents via the +Model Context Protocol (MCP). Agents can run `wild diff` on a repo and retrieve +docs programmatically. + +Usage: + python mcp_server.py + +Requirements: + pip install mcp +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +from mcp.server.fastmcp import FastMCP + +# --------------------------------------------------------------------------- +# Setup +# --------------------------------------------------------------------------- + +REPO_ROOT = Path(__file__).parent +DOCS_DIR = REPO_ROOT / "docs" + +mcp = FastMCP( + name="wildestai", + instructions=( + "WildestAI helps developers understand code changes. " + "Use run_wild_diff to analyse a git repository's current diff and get a " + "semantic understanding of what changed. Use list_docs / get_docs / search_docs " + "to explore the project's documentation." + ), +) + +# --------------------------------------------------------------------------- +# Tool: run_wild_diff +# --------------------------------------------------------------------------- + +@mcp.tool() +def run_wild_diff( + repo_path: str, + args: str = "", + output_file: str = "", +) -> dict: + """ + Run `wild diff` on a git repository and return the result. + + Args: + repo_path: Absolute path to the git repository root. + args: Optional extra arguments, e.g. "--staged", "", "". + Pass as a space-separated string. + output_file: Optional path for the HTML output file. Defaults to + /diffgraph.html. + + Returns: + A dict with keys: + success (bool): Whether the command succeeded. + stdout (str): Standard output from wild diff. + stderr (str): Standard error output. + output_file (str): Path to the generated HTML report (if any). + returncode (int): Process exit code. + """ + repo = Path(repo_path).expanduser().resolve() + if not repo.is_dir(): + return {"success": False, "error": f"Directory not found: {repo_path}"} + + if not (repo / ".git").exists(): + return {"success": False, "error": f"Not a git repository: {repo_path}"} + + cmd = ["wild", "diff", "--no-open"] + if output_file: + cmd += ["--output", output_file] + if args: + cmd += args.split() + + try: + result = subprocess.run( + cmd, + cwd=str(repo), + capture_output=True, + text=True, + timeout=120, + ) + out_path = output_file or str(repo / "diffgraph.html") + return { + "success": result.returncode == 0, + "stdout": result.stdout, + "stderr": result.stderr, + "output_file": out_path if Path(out_path).exists() else "", + "returncode": result.returncode, + } + except subprocess.TimeoutExpired: + return {"success": False, "error": "wild diff timed out after 120 seconds"} + except FileNotFoundError: + return { + "success": False, + "error": ( + "`wild` command not found. Install with: pip install wildest-ai " + "or `pip install -e .` from the DiffGraph-CLI repo." + ), + } + +# --------------------------------------------------------------------------- +# Tool: list_docs +# --------------------------------------------------------------------------- + +@mcp.tool() +def list_docs() -> list[dict]: + """ + List available documentation pages for DiffGraph-CLI / WildestAI. + + Returns: + A list of dicts, each with keys: + name (str): Short name / slug of the document. + title (str): Human-readable title. + path (str): Relative path from the repo root. + """ + pages = [] + + # Core docs + built_in = [ + ("readme", "README — Overview & Quick Start", "README.md"), + ("changelog", "Changelog", "CHANGELOG.md"), + ("skill", "Agent Skill — how to use wild from an AI agent", "skill.md"), + ("context", "Project Context — deep reference", "../../wildestai/docs/DiffGraph-CLI/CONTEXT.md"), + ("vision", "WildestAI Vision — 22-section strategic context", "../../wildestai/docs/DiffGraph-CLI/WildestAI-vision.md"), + ] + for name, title, rel_path in built_in: + full = (REPO_ROOT / rel_path).resolve() + if full.exists(): + pages.append({"name": name, "title": title, "path": str(full.relative_to(REPO_ROOT.parent.parent) if full.is_relative_to(REPO_ROOT.parent.parent) else full)}) + + # docs/ subdirectory + if DOCS_DIR.exists(): + for f in sorted(DOCS_DIR.glob("**/*.md")): + slug = f.stem.lower().replace(" ", "-") + pages.append({ + "name": slug, + "title": f.stem.replace("-", " ").replace("_", " ").title(), + "path": str(f.relative_to(REPO_ROOT)), + }) + + return pages + + +# --------------------------------------------------------------------------- +# Tool: get_docs +# --------------------------------------------------------------------------- + +@mcp.tool() +def get_docs(name: str) -> dict: + """ + Retrieve the content of a documentation page by name or path. + + Args: + name: The short name from list_docs (e.g. "readme", "context") or a + relative path from the repo root (e.g. "docs/Roadmap-v1-git-wrapper.md"). + + Returns: + A dict with keys: + found (bool): Whether the document was located. + name (str): The name used to look up. + content (str): Full text of the document (if found). + error (str): Error message (if not found). + """ + # Map known slugs to paths + slug_map = { + "readme": REPO_ROOT / "README.md", + "changelog": REPO_ROOT / "CHANGELOG.md", + "skill": REPO_ROOT / "skill.md", + "context": REPO_ROOT.parent / "docs" / "DiffGraph-CLI" / "CONTEXT.md", + "vision": REPO_ROOT.parent / "docs" / "DiffGraph-CLI" / "WildestAI-vision.md", + } + + target: Path | None = None + + if name in slug_map: + target = slug_map[name] + else: + # Try as relative path from repo root + candidate = (REPO_ROOT / name).resolve() + if candidate.exists(): + target = candidate + else: + # Try docs subdir + candidate2 = (DOCS_DIR / name).resolve() + if candidate2.exists(): + target = candidate2 + + if target is None or not target.exists(): + return { + "found": False, + "name": name, + "content": "", + "error": f"Document '{name}' not found. Call list_docs() to see available pages.", + } + + try: + content = target.read_text(encoding="utf-8") + return {"found": True, "name": name, "content": content, "error": ""} + except Exception as e: + return {"found": False, "name": name, "content": "", "error": str(e)} + + +# --------------------------------------------------------------------------- +# Tool: search_docs +# --------------------------------------------------------------------------- + +@mcp.tool() +def search_docs(query: str, max_results: int = 5) -> list[dict]: + """ + Search across all documentation for a query string (case-insensitive). + + Args: + query: The search term or phrase. + max_results: Maximum number of matching excerpts to return (default 5). + + Returns: + A list of dicts, each with: + document (str): Document name. + path (str): Relative path. + excerpt (str): The matching line + a few lines of context. + line (int): Line number of the match. + """ + results = [] + query_lower = query.lower() + + search_paths = [ + (REPO_ROOT / "README.md", "readme"), + (REPO_ROOT / "CHANGELOG.md", "changelog"), + (REPO_ROOT / "skill.md", "skill"), + (REPO_ROOT.parent / "docs" / "DiffGraph-CLI" / "CONTEXT.md", "context"), + ] + + if DOCS_DIR.exists(): + for f in DOCS_DIR.glob("**/*.md"): + search_paths.append((f, f.stem.lower())) + + for path, name in search_paths: + if not path.exists(): + continue + try: + lines = path.read_text(encoding="utf-8").splitlines() + except Exception: + continue + + for i, line in enumerate(lines): + if query_lower in line.lower(): + start = max(0, i - 2) + end = min(len(lines), i + 3) + excerpt = "\n".join(lines[start:end]) + results.append({ + "document": name, + "path": str(path.relative_to(REPO_ROOT) if path.is_relative_to(REPO_ROOT) else path), + "excerpt": excerpt, + "line": i + 1, + }) + if len(results) >= max_results: + return results + + return results + + +# --------------------------------------------------------------------------- +# Resource: llms.txt +# --------------------------------------------------------------------------- + +@mcp.resource("wildestai://llms.txt") +def llms_txt() -> str: + """The llms.txt for WildestAI — compact AI discoverability summary.""" + llms_path = REPO_ROOT.parent / "wildest-ai-website" / "public" / "llms.txt" + if llms_path.exists(): + return llms_path.read_text(encoding="utf-8") + return ( + "# WildestAI\n\n" + "WildestAI turns code changes from raw diffs into understandable, " + "evidence-linked software evolution.\n\n" + "CLI: wild diff — analyses git diffs with AI, generates dependency graph HTML report.\n" + "Website: https://wildest.ai\n" + "GitHub: https://github.com/WildestAI/DiffGraph-CLI\n" + ) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + mcp.run() diff --git a/skill.md b/skill.md new file mode 100644 index 0000000..02d7ceb --- /dev/null +++ b/skill.md @@ -0,0 +1,86 @@ +# skill: wild-diff + +Use this skill to analyse git diffs with WildestAI's `wild` CLI and get semantic understanding of code changes. + +## What it does + +`wild diff` analyses your current git diff using AI and produces: +- A dependency graph of changed components and their relationships +- An HTML report (`diffgraph.html`) with Mermaid.js visualisation +- Syntax-highlighted code blocks with AI-generated summaries +- Answers to: what changed, how components relate, what needs attention + +## Prerequisites + +```bash +# Install +pip install wildest-ai +# or from source: +git clone https://github.com/WildestAI/DiffGraph-CLI && cd DiffGraph-CLI && pip install -e . + +# Set your OpenAI API key +export OPENAI_API_KEY=sk-... +``` + +## Commands + +```bash +# Analyse unstaged + untracked changes (most common) +wild diff + +# Analyse only staged changes +wild diff --staged + +# Diff between HEAD and a specific commit +wild diff + +# Diff between two commits +wild diff + +# Diff for a specific file +wild diff path/to/file.py + +# Write output to a custom file, don't auto-open +wild diff --output report.html --no-open + +# All other git commands pass through transparently +wild log +wild blame path/to/file.py +wild status +``` + +## Output + +By default, generates `diffgraph.html` in the current directory and opens it in your browser. + +Use `--no-open` to suppress auto-open (useful in CI or headless environments). +Use `--output ` to write to a specific location. + +## MCP Server + +If you need programmatic access (agent-to-agent), start the MCP server: + +```bash +cd path/to/DiffGraph-CLI +python mcp_server.py +``` + +The MCP server exposes tools: +- `run_wild_diff(repo_path, args)` — run wild diff and return structured analysis +- `get_docs(page)` — retrieve a documentation page +- `list_docs()` — list available documentation pages +- `search_docs(query)` — search across docs + +## Configuration + +Environment variables: +- `OPENAI_API_KEY` — required for AI analysis +- Copy `.env.example` to `.env` to set locally + +## Notes + +- Works with Python 3.8+ +- Tested on macOS and Linux +- The CLI wraps `git` — it must be run inside a git repository +- Large diffs may be slow; consider scoping with file paths or commit ranges +- The `.env` file is git-ignored — never commit API keys