|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Source-scraped API doc generator for the ARCP Java SDK. |
| 3 | +
|
| 4 | +Walks library modules (skipping examples, recipes, tests, generated sources), |
| 5 | +extracts package declarations, top-level Javadoc comments and signatures for |
| 6 | +public classes/interfaces/enums/records/methods, and emits one Markdown file |
| 7 | +per package under docs/api/, plus a docs/api/index.md. |
| 8 | +
|
| 9 | +Output is intended to be ingested by the arpc.dev site, which globs |
| 10 | +<lang>-sdk/docs/**/*.md at build time. |
| 11 | +""" |
| 12 | +from __future__ import annotations |
| 13 | + |
| 14 | +import re |
| 15 | +import sys |
| 16 | +from collections import defaultdict |
| 17 | +from dataclasses import dataclass, field |
| 18 | +from pathlib import Path |
| 19 | + |
| 20 | +ROOT = Path(__file__).resolve().parent.parent |
| 21 | +OUT = ROOT / "docs" / "api" |
| 22 | + |
| 23 | +LIBRARY_MODULES = [ |
| 24 | + "arcp", "arcp-client", "arcp-core", "arcp-runtime", "arcp-runtime-jetty", |
| 25 | + "arcp-otel", "arcp-tck", "arcp-middleware-jakarta", |
| 26 | + "arcp-middleware-spring-boot", "arcp-middleware-vertx", |
| 27 | +] |
| 28 | + |
| 29 | +PKG_RE = re.compile(r"^\s*package\s+([\w.]+)\s*;", re.MULTILINE) |
| 30 | +TYPE_RE = re.compile( |
| 31 | + r"public\s+(?:static\s+)?(?:final\s+|abstract\s+|sealed\s+|non-sealed\s+)?" |
| 32 | + r"(class|interface|enum|record|@interface)\s+(\w+)" |
| 33 | +) |
| 34 | +METHOD_RE = re.compile( |
| 35 | + r"public\s+(?:static\s+|final\s+|abstract\s+|default\s+|synchronized\s+|native\s+)*" |
| 36 | + r"(?:<[^>]+>\s+)?" |
| 37 | + r"([\w.<>\[\],\s?]+?)\s+(\w+)\s*\(([^)]*)\)" |
| 38 | +) |
| 39 | +KW = {"if", "for", "while", "switch", "catch", "return", "synchronized", "new"} |
| 40 | +LABEL = { |
| 41 | + "class": ("class", "classes"), |
| 42 | + "interface": ("interface", "interfaces"), |
| 43 | + "enum": ("enum", "enums"), |
| 44 | + "record": ("record", "records"), |
| 45 | + "@interface": ("annotation type", "annotation types"), |
| 46 | +} |
| 47 | + |
| 48 | + |
| 49 | +@dataclass |
| 50 | +class TypeInfo: |
| 51 | + name: str |
| 52 | + kind: str |
| 53 | + doc: str |
| 54 | + file: Path |
| 55 | + # Each method: (name, signature, doc) |
| 56 | + methods: list[tuple[str, str, str]] = field(default_factory=list) |
| 57 | + |
| 58 | + |
| 59 | +def clean_javadoc(raw: str) -> str: |
| 60 | + """Strip leading ``*``, normalize whitespace, inline ``{@link/@code}``.""" |
| 61 | + out = [] |
| 62 | + for line in raw.splitlines(): |
| 63 | + line = line.strip() |
| 64 | + if line.startswith("*"): |
| 65 | + line = line[1:].lstrip() |
| 66 | + out.append(line) |
| 67 | + text = re.sub(r"\n{3,}", "\n\n", "\n".join(out).strip()) |
| 68 | + text = re.sub(r"\{@link(?:plain)?\s+([^}]+)\}", |
| 69 | + lambda m: f"`{m.group(1).split()[0]}`", text) |
| 70 | + text = re.sub(r"\{@code\s+([^}]+)\}", r"`\1`", text) |
| 71 | + text = re.sub(r"\{@literal\s+([^}]+)\}", r"\1", text) |
| 72 | + return text |
| 73 | + |
| 74 | + |
| 75 | +def preceding_doc(src: str, start: int) -> str: |
| 76 | + """Find the nearest /** ... */ block immediately before ``start``.""" |
| 77 | + head = src[:start] |
| 78 | + m = re.search(r"/\*\*(.*?)\*/\s*(?:@\w+(?:\([^)]*\))?\s*)*\Z", head, re.DOTALL) |
| 79 | + return clean_javadoc(m.group(1)) if m else "" |
| 80 | + |
| 81 | + |
| 82 | +def parse_file(path: Path) -> tuple[str, list[TypeInfo]]: |
| 83 | + src = path.read_text(encoding="utf-8", errors="replace") |
| 84 | + pkg_m = PKG_RE.search(src) |
| 85 | + if not pkg_m: |
| 86 | + return "", [] |
| 87 | + pkg = pkg_m.group(1) |
| 88 | + |
| 89 | + types = [ |
| 90 | + TypeInfo(name=m.group(2), kind=m.group(1), |
| 91 | + doc=preceding_doc(src, m.start()), file=path) |
| 92 | + for m in TYPE_RE.finditer(src) |
| 93 | + ] |
| 94 | + if not types: |
| 95 | + return pkg, [] |
| 96 | + |
| 97 | + # Each type's body range: opening { and matching close }. |
| 98 | + ranges: list[tuple[TypeInfo, int, int]] = [] |
| 99 | + for ti in types: |
| 100 | + idx = src.find(ti.name, src.find(f" {ti.kind} ")) |
| 101 | + brace = src.find("{", idx) |
| 102 | + if brace == -1: |
| 103 | + ranges.append((ti, -1, -1)) |
| 104 | + continue |
| 105 | + depth = 0 |
| 106 | + end = brace |
| 107 | + for i in range(brace, len(src)): |
| 108 | + c = src[i] |
| 109 | + if c == "{": |
| 110 | + depth += 1 |
| 111 | + elif c == "}": |
| 112 | + depth -= 1 |
| 113 | + if depth == 0: |
| 114 | + end = i |
| 115 | + break |
| 116 | + ranges.append((ti, brace, end)) |
| 117 | + |
| 118 | + for m in METHOD_RE.finditer(src): |
| 119 | + ret, name, params = m.group(1).strip(), m.group(2), m.group(3).strip() |
| 120 | + if name in KW: |
| 121 | + continue |
| 122 | + owner = None |
| 123 | + for ti, b, e in ranges: |
| 124 | + if b <= m.start() <= e: |
| 125 | + owner = ti |
| 126 | + if owner is None: |
| 127 | + continue |
| 128 | + owner.methods.append( |
| 129 | + (name, f"public {ret} {name}({params})", preceding_doc(src, m.start())) |
| 130 | + ) |
| 131 | + |
| 132 | + return pkg, types |
| 133 | + |
| 134 | + |
| 135 | +def write_package_md(pkg: str, type_infos: list[TypeInfo]) -> Path: |
| 136 | + out_path = OUT / f"{pkg.replace('.', '/')}.md" |
| 137 | + out_path.parent.mkdir(parents=True, exist_ok=True) |
| 138 | + by_kind: dict[str, list[TypeInfo]] = defaultdict(list) |
| 139 | + for ti in sorted(type_infos, key=lambda t: t.name): |
| 140 | + by_kind[ti.kind].append(ti) |
| 141 | + |
| 142 | + def fmt(k: str, n: int) -> str: |
| 143 | + sing, plur = LABEL.get(k, (k, k + "s")) |
| 144 | + return f"{n} {sing if n == 1 else plur}" |
| 145 | + |
| 146 | + lines = [ |
| 147 | + f"# Package `{pkg}`", |
| 148 | + "", |
| 149 | + "_" + ", ".join(fmt(k, len(v)) for k, v in sorted(by_kind.items())) + "_", |
| 150 | + "", |
| 151 | + "| Type | Name | Summary |", |
| 152 | + "| --- | --- | --- |", |
| 153 | + ] |
| 154 | + for ti in sorted(type_infos, key=lambda t: t.name): |
| 155 | + first = ti.doc.split("\n\n", 1)[0].replace("\n", " ").strip() or "_(undocumented)_" |
| 156 | + lines.append(f"| {ti.kind} | [`{ti.name}`](#{ti.name.lower()}) | {first.replace('|', '\\|')} |") |
| 157 | + lines.append("") |
| 158 | + |
| 159 | + for ti in sorted(type_infos, key=lambda t: t.name): |
| 160 | + rel_src = ti.file.relative_to(ROOT) |
| 161 | + lines += [ |
| 162 | + f"## {ti.name}", "", |
| 163 | + f"`{ti.kind} {pkg}.{ti.name}`", "", |
| 164 | + f"Source: `{rel_src}`", "", |
| 165 | + ] |
| 166 | + if ti.doc: |
| 167 | + lines += [ti.doc, ""] |
| 168 | + if ti.methods: |
| 169 | + lines += ["### Public methods", ""] |
| 170 | + for name, sig, doc in sorted(ti.methods): |
| 171 | + lines += [f"#### `{name}`", "", "```java", sig, "```", ""] |
| 172 | + if doc: |
| 173 | + lines += [doc, ""] |
| 174 | + |
| 175 | + out_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") |
| 176 | + return out_path |
| 177 | + |
| 178 | + |
| 179 | +def clean_out() -> None: |
| 180 | + if not OUT.exists(): |
| 181 | + return |
| 182 | + for p in sorted(OUT.rglob("*.md"), reverse=True): |
| 183 | + p.unlink() |
| 184 | + for d in sorted([p for p in OUT.rglob("*") if p.is_dir()], reverse=True): |
| 185 | + d.rmdir() |
| 186 | + |
| 187 | + |
| 188 | +def main() -> int: |
| 189 | + clean_out() |
| 190 | + OUT.mkdir(parents=True, exist_ok=True) |
| 191 | + |
| 192 | + pkg_to_types: dict[str, list[TypeInfo]] = defaultdict(list) |
| 193 | + file_count = 0 |
| 194 | + for module in LIBRARY_MODULES: |
| 195 | + src_root = ROOT / module / "src" / "main" / "java" |
| 196 | + if not src_root.exists(): |
| 197 | + continue |
| 198 | + for jf in src_root.rglob("*.java"): |
| 199 | + if "/generated-sources/" in str(jf) or "/target/" in str(jf): |
| 200 | + continue |
| 201 | + pkg, types = parse_file(jf) |
| 202 | + if not pkg or not types: |
| 203 | + continue |
| 204 | + pkg_to_types[pkg].extend(types) |
| 205 | + file_count += 1 |
| 206 | + |
| 207 | + for pkg, type_infos in sorted(pkg_to_types.items()): |
| 208 | + write_package_md(pkg, type_infos) |
| 209 | + |
| 210 | + total_types = sum(len(v) for v in pkg_to_types.values()) |
| 211 | + lines = [ |
| 212 | + "# Java SDK API reference", |
| 213 | + "", |
| 214 | + "Source-scraped API index for the ARCP Java SDK. Each package below", |
| 215 | + "links to a Markdown file listing its public types and methods with", |
| 216 | + "their Javadoc summaries.", |
| 217 | + "", |
| 218 | + f"_{len(pkg_to_types)} packages, {total_types} types, scraped from {file_count} source files._", |
| 219 | + "", |
| 220 | + "## Packages", |
| 221 | + "", |
| 222 | + ] |
| 223 | + for pkg in sorted(pkg_to_types): |
| 224 | + ntypes = len(pkg_to_types[pkg]) |
| 225 | + noun = "type" if ntypes == 1 else "types" |
| 226 | + lines.append(f"- [`{pkg}`]({pkg.replace('.', '/')}.md) — {ntypes} {noun}") |
| 227 | + lines.append("") |
| 228 | + (OUT / "index.md").write_text("\n".join(lines), encoding="utf-8") |
| 229 | + |
| 230 | + print(f"Wrote {len(pkg_to_types) + 1} Markdown files under {OUT.relative_to(ROOT)}/") |
| 231 | + return 0 |
| 232 | + |
| 233 | + |
| 234 | +if __name__ == "__main__": |
| 235 | + sys.exit(main()) |
0 commit comments