From b962b4423c1f32a0e43cc9332bee59d84f9d3038 Mon Sep 17 00:00:00 2001 From: AlonePenguin <187998801+AlonePenguin@users.noreply.github.com> Date: Mon, 1 Jun 2026 07:33:24 -0400 Subject: [PATCH] Add manuscript terminology definition assistant --- .../README.md | 37 ++ .../demo.js | 50 +++ .../index.js | 305 ++++++++++++++++ .../make-demo-video.js | 88 +++++ .../package.json | 21 ++ .../reports/clean-terminology-packet.json | 123 +++++++ .../reports/demo-script.txt | 10 + .../reports/demo.mp4 | Bin 0 -> 9516 bytes .../reports/risky-terminology-packet.json | 339 ++++++++++++++++++ .../reports/summary.svg | 16 + .../reports/terminology-review-report.md | 32 ++ .../sample-data.js | 141 ++++++++ .../test.js | 42 +++ 13 files changed, 1204 insertions(+) create mode 100644 manuscript-terminology-definition-assistant/README.md create mode 100644 manuscript-terminology-definition-assistant/demo.js create mode 100644 manuscript-terminology-definition-assistant/index.js create mode 100644 manuscript-terminology-definition-assistant/make-demo-video.js create mode 100644 manuscript-terminology-definition-assistant/package.json create mode 100644 manuscript-terminology-definition-assistant/reports/clean-terminology-packet.json create mode 100644 manuscript-terminology-definition-assistant/reports/demo-script.txt create mode 100644 manuscript-terminology-definition-assistant/reports/demo.mp4 create mode 100644 manuscript-terminology-definition-assistant/reports/risky-terminology-packet.json create mode 100644 manuscript-terminology-definition-assistant/reports/summary.svg create mode 100644 manuscript-terminology-definition-assistant/reports/terminology-review-report.md create mode 100644 manuscript-terminology-definition-assistant/sample-data.js create mode 100644 manuscript-terminology-definition-assistant/test.js diff --git a/manuscript-terminology-definition-assistant/README.md b/manuscript-terminology-definition-assistant/README.md new file mode 100644 index 00000000..8d87788f --- /dev/null +++ b/manuscript-terminology-definition-assistant/README.md @@ -0,0 +1,37 @@ +# Manuscript Terminology Definition Assistant + +This module is a focused AI-Assisted Research Tools slice for SCIBASE issue #13. It reviews manuscript terminology, acronym expansion, nomenclature style, citation term bindings, and lay-summary jargon before AI-generated peer-review packets, summaries, or citation recommendations are released. + +The assistant checks: + +- acronym expansion at first use, with stricter handling in title, abstract, summary, and lay-summary sections +- conflicting acronym expansions +- required reviewer-facing definitions for technical terms +- missing lay definitions for public-facing summaries +- preferred nomenclature and domain style drift +- citation recommendations that reference unknown or undefined terms +- generated summaries that include unexplained high-jargon terms + +It is intentionally separate from broad research-tool suites, evidence-grounded summarizers, citation context or metadata guards, statistical review, protocol deviation review, lay-summary safety, and collaborative editor glossary/export checks. This slice focuses on terminology readiness inside AI peer-review and summary packets. + +## Reviewer Path + +```bash +npm run check +npm test +npm run demo +npm run verify-video +``` + +Generated reviewer artifacts: + +- `reports/clean-terminology-packet.json` +- `reports/risky-terminology-packet.json` +- `reports/terminology-review-report.md` +- `reports/summary.svg` +- `reports/demo-script.txt` +- `reports/demo.mp4` + +## Safety + +All fixtures are synthetic. The module does not call uploaded manuscript stores, private corpora, citation indexes, external AI APIs, credential stores, payment systems, or external services. diff --git a/manuscript-terminology-definition-assistant/demo.js b/manuscript-terminology-definition-assistant/demo.js new file mode 100644 index 00000000..4e58003b --- /dev/null +++ b/manuscript-terminology-definition-assistant/demo.js @@ -0,0 +1,50 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { evaluateTerminologyPacket, renderMarkdownReport, renderSvgSummary } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); + +const cleanEvaluation = evaluateTerminologyPacket(cleanPacket); +const riskyEvaluation = evaluateTerminologyPacket(riskyPacket); + +fs.writeFileSync( + path.join(reportsDir, "clean-terminology-packet.json"), + `${JSON.stringify({ input: cleanPacket, evaluation: cleanEvaluation }, null, 2)}\n` +); +fs.writeFileSync( + path.join(reportsDir, "risky-terminology-packet.json"), + `${JSON.stringify({ input: riskyPacket, evaluation: riskyEvaluation }, null, 2)}\n` +); +fs.writeFileSync( + path.join(reportsDir, "terminology-review-report.md"), + renderMarkdownReport(riskyPacket, riskyEvaluation) +); +fs.writeFileSync( + path.join(reportsDir, "summary.svg"), + renderSvgSummary(riskyEvaluation) +); +fs.writeFileSync( + path.join(reportsDir, "demo-script.txt"), + [ + "Manuscript terminology definition assistant demo", + "", + `Clean packet decision: ${cleanEvaluation.summary.decision}`, + `Clean audit digest: ${cleanEvaluation.summary.auditDigest}`, + "", + `Risky packet decision: ${riskyEvaluation.summary.decision}`, + `Risky finding count: ${riskyEvaluation.summary.findingCount}`, + `Risky audit digest: ${riskyEvaluation.summary.auditDigest}`, + "", + "The risky packet demonstrates acronym expansion conflicts, missing first-use definitions, unknown citation term bindings, nomenclature style drift, and unexplained lay-summary jargon.", + "" + ].join("\n") +); + +console.log(JSON.stringify({ + cleanDecision: cleanEvaluation.summary.decision, + riskyDecision: riskyEvaluation.summary.decision, + riskyFindings: riskyEvaluation.summary.findingCount, + report: "reports/terminology-review-report.md" +}, null, 2)); diff --git a/manuscript-terminology-definition-assistant/index.js b/manuscript-terminology-definition-assistant/index.js new file mode 100644 index 00000000..f1a58874 --- /dev/null +++ b/manuscript-terminology-definition-assistant/index.js @@ -0,0 +1,305 @@ +const crypto = require("node:crypto"); + +const ABSTRACT_SECTIONS = new Set(["title", "abstract", "summary", "lay_summary"]); + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function stableJson(value) { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(",")}]`; + } + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +function sha256(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex"); +} + +function normalize(value) { + return String(value || "").trim().toLowerCase().replace(/\s+/g, " "); +} + +function severityRank(severity) { + return { critical: 4, high: 3, medium: 2, low: 1 }[severity] || 0; +} + +function addFinding(findings, severity, code, message, refs, action) { + findings.push({ + severity, + code, + message, + refs: asArray(refs), + action + }); +} + +function evaluateTerminologyPacket(packet) { + const findings = []; + const terms = asArray(packet.terms); + const citations = asArray(packet.citationRecommendations); + const summaries = asArray(packet.generatedSummaries); + const termById = new Map(terms.map((term) => [term.id, term])); + const acronymGroups = new Map(); + + for (const term of terms) { + if (!term.id) { + addFinding(findings, "high", "TERM_MISSING_ID", "A terminology entry is missing a stable id.", [], "assign_term_id"); + continue; + } + + const short = String(term.short || "").trim(); + if (short) { + if (!acronymGroups.has(short.toUpperCase())) { + acronymGroups.set(short.toUpperCase(), []); + } + acronymGroups.get(short.toUpperCase()).push(term); + } + + if (short && term.expandedAtFirstUse !== true) { + const severity = ABSTRACT_SECTIONS.has(term.firstUseSection) ? "high" : "medium"; + addFinding( + findings, + severity, + "ACRONYM_NOT_EXPANDED_AT_FIRST_USE", + `${short} is not expanded at first use in ${term.firstUseSection || "an unknown section"}.`, + [term.id], + "expand_acronym_at_first_use" + ); + } + + if (term.requiresDefinition && !String(term.definition || "").trim()) { + addFinding( + findings, + ABSTRACT_SECTIONS.has(term.firstUseSection) ? "high" : "medium", + "TERM_DEFINITION_MISSING", + `${term.label || term.id} requires a reviewer-facing definition before AI summaries or citation suggestions use it.`, + [term.id], + "add_reviewer_facing_definition" + ); + } + + if (term.audience === "lay" && term.jargonLevel === "high" && !term.layDefinition) { + addFinding( + findings, + "medium", + "LAY_DEFINITION_MISSING", + `${term.label || term.id} appears in a lay-facing packet without a lay definition.`, + [term.id], + "add_lay_definition_or_remove_from_lay_summary" + ); + } + + const observedForms = new Set(asArray(term.observedForms).map(normalize).filter(Boolean)); + const preferred = normalize(term.preferredForm || term.label); + if (preferred && observedForms.size > 1 && !observedForms.has(preferred)) { + addFinding( + findings, + "medium", + "PREFERRED_TERM_FORM_ABSENT", + `${term.label || term.id} has multiple observed forms but none match the preferred nomenclature.`, + [term.id], + "normalize_term_to_preferred_form" + ); + } + + if (term.expectedStyle && term.observedStyle && term.expectedStyle !== term.observedStyle) { + addFinding( + findings, + "medium", + "NOMENCLATURE_STYLE_MISMATCH", + `${term.label || term.id} uses ${term.observedStyle} style where ${term.expectedStyle} style is expected.`, + [term.id], + "fix_domain_nomenclature_style" + ); + } + } + + for (const [short, group] of acronymGroups.entries()) { + const expansions = new Set(group.map((term) => normalize(term.expansion)).filter(Boolean)); + if (expansions.size > 1) { + addFinding( + findings, + "high", + "ACRONYM_EXPANSION_CONFLICT", + `${short} maps to ${expansions.size} different expansions in the manuscript packet.`, + group.map((term) => term.id), + "resolve_acronym_expansion_conflict" + ); + } + } + + for (const citation of citations) { + const term = termById.get(citation.termId); + if (!term) { + addFinding( + findings, + "medium", + "CITATION_TERM_UNKNOWN", + `Citation recommendation ${citation.id || "unknown"} references an unknown terminology id.`, + [citation.id], + "bind_citation_to_known_term" + ); + continue; + } + if (term.requiresDefinition && !String(term.definition || "").trim()) { + addFinding( + findings, + "medium", + "CITATION_BEFORE_TERM_DEFINITION", + `Citation recommendation ${citation.id || "unknown"} uses ${term.label || term.id} before the term is defined.`, + [citation.id, term.id], + "define_term_before_citation_insertion" + ); + } + } + + for (const summary of summaries) { + for (const termId of asArray(summary.termIds)) { + const term = termById.get(termId); + if (!term) { + continue; + } + if (summary.mode === "layperson" && (term.jargonLevel === "high" || term.audience === "expert") && !term.layDefinition) { + addFinding( + findings, + "high", + "LAY_SUMMARY_JARGON_UNEXPLAINED", + `Generated layperson summary ${summary.id || "unknown"} includes ${term.label || term.id} without an accessible definition.`, + [summary.id, term.id], + "rewrite_lay_summary_with_definition" + ); + } + } + } + + findings.sort((a, b) => severityRank(b.severity) - severityRank(a.severity) || a.code.localeCompare(b.code)); + const decision = findings.some((finding) => severityRank(finding.severity) >= 3) + ? "hold_ai_research_packet" + : findings.some((finding) => finding.severity === "medium") + ? "revise_terminology_packet" + : "release_ai_research_packet"; + + const coverage = terms.map((term) => ({ + id: term.id, + label: term.label, + short: term.short || null, + firstUseSection: term.firstUseSection || null, + hasDefinition: Boolean(String(term.definition || "").trim()), + hasLayDefinition: Boolean(term.layDefinition), + observedForms: asArray(term.observedForms) + })); + + const summary = { + manuscriptId: packet.manuscriptId, + decision, + termsReviewed: terms.length, + citationRecommendationsReviewed: citations.length, + generatedSummariesReviewed: summaries.length, + findingCount: findings.length, + highOrCriticalFindings: findings.filter((finding) => severityRank(finding.severity) >= 3).length + }; + const auditDigest = `sha256:${sha256({ summary, findings, coverage }).slice(0, 16)}`; + + return { + summary: { + ...summary, + auditDigest + }, + coverage, + findings, + actions: buildActions(findings) + }; +} + +function buildActions(findings) { + const seen = new Set(); + const actions = []; + for (const finding of findings) { + if (!finding.action || seen.has(finding.action)) { + continue; + } + seen.add(finding.action); + actions.push({ + id: finding.action, + severity: finding.severity, + refs: finding.refs + }); + } + return actions; +} + +function renderMarkdownReport(packet, evaluation) { + const lines = []; + lines.push(`# Manuscript Terminology Definition Review: ${packet.manuscriptId}`); + lines.push(""); + lines.push(`Decision: **${evaluation.summary.decision}**`); + lines.push(`Audit digest: \`${evaluation.summary.auditDigest}\``); + lines.push(""); + lines.push("## Findings"); + lines.push(""); + if (evaluation.findings.length === 0) { + lines.push("No terminology blockers were detected."); + } else { + lines.push("| Severity | Code | Message | Action |"); + lines.push("| --- | --- | --- | --- |"); + for (const finding of evaluation.findings) { + lines.push(`| ${finding.severity} | \`${finding.code}\` | ${escapeMarkdown(finding.message)} | \`${finding.action}\` |`); + } + } + lines.push(""); + lines.push("## Term Coverage"); + lines.push(""); + lines.push("| Term | Short | Section | Definition | Lay definition | Observed forms |"); + lines.push("| --- | --- | --- | --- | --- | --- |"); + for (const item of evaluation.coverage) { + lines.push(`| ${item.label || item.id} | ${item.short || ""} | ${item.firstUseSection || ""} | ${item.hasDefinition ? "yes" : "no"} | ${item.hasLayDefinition ? "yes" : "no"} | ${item.observedForms.join(", ")} |`); + } + lines.push(""); + lines.push("Synthetic data only. No uploaded manuscripts, private corpora, citation indexes, external AI APIs, credentials, or payment systems are used."); + return `${lines.join("\n")}\n`; +} + +function renderSvgSummary(evaluation) { + const color = evaluation.summary.decision === "hold_ai_research_packet" ? "#b91c1c" : evaluation.summary.decision === "revise_terminology_packet" ? "#b45309" : "#047857"; + const rows = evaluation.findings.slice(0, 5).map((finding, index) => { + const y = 304 + index * 42; + return `${escapeXml(finding.severity.toUpperCase())} ${escapeXml(finding.code)}`; + }).join("\n"); + return ` + + + + Terminology Definition Assistant + + ${escapeXml(evaluation.summary.decision)} + Terms reviewed: ${evaluation.summary.termsReviewed} + Findings: ${evaluation.summary.findingCount} +${rows || 'No findings detected.'} + ${escapeXml(evaluation.summary.auditDigest)} + +`; +} + +function escapeMarkdown(value) { + return String(value).replace(/\|/g, "\\|").replace(/\n/g, " "); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +module.exports = { + evaluateTerminologyPacket, + renderMarkdownReport, + renderSvgSummary, + sha256 +}; diff --git a/manuscript-terminology-definition-assistant/make-demo-video.js b/manuscript-terminology-definition-assistant/make-demo-video.js new file mode 100644 index 00000000..571e994c --- /dev/null +++ b/manuscript-terminology-definition-assistant/make-demo-video.js @@ -0,0 +1,88 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); +const { evaluateTerminologyPacket } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +const framesDir = path.join(reportsDir, "frames"); +fs.mkdirSync(framesDir, { recursive: true }); + +const clean = evaluateTerminologyPacket(cleanPacket); +const risky = evaluateTerminologyPacket(riskyPacket); +const width = 960; +const height = 540; +const frames = 72; +const fps = 18; + +function setPixel(buffer, x, y, r, g, b) { + if (x < 0 || y < 0 || x >= width || y >= height) { + return; + } + const offset = (y * width + x) * 3; + buffer[offset] = r; + buffer[offset + 1] = g; + buffer[offset + 2] = b; +} + +function fillRect(buffer, x, y, w, h, r, g, b) { + for (let row = y; row < y + h; row += 1) { + for (let col = x; col < x + w; col += 1) { + setPixel(buffer, col, row, r, g, b); + } + } +} + +function writeFrame(index, progress) { + const buffer = Buffer.alloc(width * height * 3, 248); + fillRect(buffer, 0, 0, width, height, 248, 250, 252); + fillRect(buffer, 54, 48, 852, 444, 255, 255, 255); + fillRect(buffer, 54, 48, 852, 8, 17, 24, 39); + + const left = Math.floor(330 * Math.min(1, progress * 1.8)); + const right = Math.floor(330 * Math.max(0, (progress - 0.3) * 1.55)); + fillRect(buffer, 104, 112, 330, 78, 229, 231, 235); + fillRect(buffer, 104, 112, left, 78, 5, 150, 105); + fillRect(buffer, 526, 112, 330, 78, 229, 231, 235); + fillRect(buffer, 526, 112, right, 78, 185, 28, 28); + + for (let i = 0; i < clean.summary.termsReviewed; i += 1) { + fillRect(buffer, 116 + i * 58, 246, 40, 124, 16, 185, 129); + } + + for (let i = 0; i < Math.min(9, risky.summary.findingCount); i += 1) { + const barHeight = 38 + (i % 4) * 26; + fillRect(buffer, 548 + i * 30, 378 - barHeight, 22, barHeight, 220, 38, 38); + } + + fillRect(buffer, 104, 430, Math.floor(752 * progress), 18, 37, 99, 235); + const header = Buffer.from(`P6\n${width} ${height}\n255\n`, "ascii"); + fs.writeFileSync(path.join(framesDir, `frame-${String(index).padStart(3, "0")}.ppm`), Buffer.concat([header, buffer])); +} + +for (let index = 0; index < frames; index += 1) { + writeFrame(index, index / (frames - 1)); +} + +const output = path.join(reportsDir, "demo.mp4"); +const ffmpeg = process.env.FFMPEG_PATH || "ffmpeg"; +const result = spawnSync(ffmpeg, [ + "-y", + "-framerate", + String(fps), + "-i", + path.join(framesDir, "frame-%03d.ppm"), + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + output +], { stdio: "inherit" }); + +fs.rmSync(framesDir, { recursive: true, force: true }); + +if (result.status !== 0) { + process.exit(result.status || 1); +} + +console.log(`Wrote ${output}`); diff --git a/manuscript-terminology-definition-assistant/package.json b/manuscript-terminology-definition-assistant/package.json new file mode 100644 index 00000000..f8c72cb0 --- /dev/null +++ b/manuscript-terminology-definition-assistant/package.json @@ -0,0 +1,21 @@ +{ + "name": "manuscript-terminology-definition-assistant", + "version": "1.0.0", + "private": true, + "description": "Synthetic manuscript terminology and acronym definition assistant for SCIBASE AI research tools.", + "main": "index.js", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check test.js && node --check demo.js && node --check make-demo-video.js", + "test": "node test.js", + "demo": "node demo.js && node make-demo-video.js", + "verify-video": "ffprobe -v error -select_streams v:0 -show_entries stream=codec_name,width,height,duration,avg_frame_rate -show_entries format=duration,size -of default=noprint_wrappers=1 reports/demo.mp4" + }, + "keywords": [ + "scibase", + "ai-assisted-research", + "terminology", + "acronyms", + "peer-review" + ], + "license": "MIT" +} diff --git a/manuscript-terminology-definition-assistant/reports/clean-terminology-packet.json b/manuscript-terminology-definition-assistant/reports/clean-terminology-packet.json new file mode 100644 index 00000000..1a8214bf --- /dev/null +++ b/manuscript-terminology-definition-assistant/reports/clean-terminology-packet.json @@ -0,0 +1,123 @@ +{ + "input": { + "manuscriptId": "astrocyte-calcium-preprint-v3", + "terms": [ + { + "id": "term-gcamp", + "label": "genetically encoded calcium indicator", + "short": "GECI", + "expansion": "genetically encoded calcium indicator", + "firstUseSection": "abstract", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "A fluorescent protein sensor used to report intracellular calcium dynamics.", + "layDefinition": "A lab-made sensor that glows when cell calcium changes.", + "jargonLevel": "medium", + "observedForms": [ + "genetically encoded calcium indicator", + "GECI" + ], + "preferredForm": "genetically encoded calcium indicator" + }, + { + "id": "term-gfap", + "label": "glial fibrillary acidic protein", + "short": "GFAP", + "expansion": "glial fibrillary acidic protein", + "firstUseSection": "methods", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "An astrocyte marker used to identify glial cells in the imaging cohort.", + "layDefinition": "A marker that helps identify support cells in the brain.", + "jargonLevel": "medium", + "observedForms": [ + "glial fibrillary acidic protein", + "GFAP" + ], + "preferredForm": "glial fibrillary acidic protein" + }, + { + "id": "term-stat-model", + "label": "mixed-effects model", + "firstUseSection": "results", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "A statistical model that accounts for repeated observations from the same animal.", + "layDefinition": "A model that avoids counting repeated measurements as unrelated.", + "jargonLevel": "medium", + "observedForms": [ + "mixed-effects model" + ], + "preferredForm": "mixed-effects model" + } + ], + "citationRecommendations": [ + { + "id": "cite-geci-method", + "termId": "term-gcamp", + "reason": "Supports calcium indicator methodology." + } + ], + "generatedSummaries": [ + { + "id": "summary-lay", + "mode": "layperson", + "termIds": [ + "term-gcamp", + "term-stat-model" + ] + } + ] + }, + "evaluation": { + "summary": { + "manuscriptId": "astrocyte-calcium-preprint-v3", + "decision": "release_ai_research_packet", + "termsReviewed": 3, + "citationRecommendationsReviewed": 1, + "generatedSummariesReviewed": 1, + "findingCount": 0, + "highOrCriticalFindings": 0, + "auditDigest": "sha256:c0ce9eb80921f467" + }, + "coverage": [ + { + "id": "term-gcamp", + "label": "genetically encoded calcium indicator", + "short": "GECI", + "firstUseSection": "abstract", + "hasDefinition": true, + "hasLayDefinition": true, + "observedForms": [ + "genetically encoded calcium indicator", + "GECI" + ] + }, + { + "id": "term-gfap", + "label": "glial fibrillary acidic protein", + "short": "GFAP", + "firstUseSection": "methods", + "hasDefinition": true, + "hasLayDefinition": true, + "observedForms": [ + "glial fibrillary acidic protein", + "GFAP" + ] + }, + { + "id": "term-stat-model", + "label": "mixed-effects model", + "short": null, + "firstUseSection": "results", + "hasDefinition": true, + "hasLayDefinition": true, + "observedForms": [ + "mixed-effects model" + ] + } + ], + "findings": [], + "actions": [] + } +} diff --git a/manuscript-terminology-definition-assistant/reports/demo-script.txt b/manuscript-terminology-definition-assistant/reports/demo-script.txt new file mode 100644 index 00000000..6a90dfab --- /dev/null +++ b/manuscript-terminology-definition-assistant/reports/demo-script.txt @@ -0,0 +1,10 @@ +Manuscript terminology definition assistant demo + +Clean packet decision: release_ai_research_packet +Clean audit digest: sha256:c0ce9eb80921f467 + +Risky packet decision: hold_ai_research_packet +Risky finding count: 12 +Risky audit digest: sha256:525040b780da7400 + +The risky packet demonstrates acronym expansion conflicts, missing first-use definitions, unknown citation term bindings, nomenclature style drift, and unexplained lay-summary jargon. diff --git a/manuscript-terminology-definition-assistant/reports/demo.mp4 b/manuscript-terminology-definition-assistant/reports/demo.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..667a48406ee970f9f9d87b297d8a90723bf88fb3 GIT binary patch literal 9516 zcmbt)2{@Er+xRoamaJJK+enMu*wIxD~d=F7gW zGxIlJH%~kmL;1S>ZMzx3K|u~rz$gSMdbs&eR%_rZz|)QRyAGF*piMHv`?>iLVI1ak zUr#@GP!S05U1j=DwYRygjfuHYh%gTPz~h|$6d%-Do!N;>bM*oBA}WpgdqH5F9C*<{ z2|xejx!QrO17-uAS78-H&=TNlsw$~!E2*GWP(GfnL8_YStH^6#!YeDFq6x5!5pIy= zN;VAtJp=%j`x|#RsKdIzZ5IHrOjrk477l>ida1Bp!hB(wSOBoTa6gQ%3}8C| zD*&*aux>CfxF75}0tuAx1Llwww!zikjfMy1L0{r(36DwrGq8bRNWqiIur2?36g}6X z=k|OW31%qnhF>KHOo65N0Cx~!RKN!fTg2{8A%f5-QQ-@)z^1&*%CfRj&}}mMaJa*) zELGtHszdtwI2?vhiYNk!LPV)(YM>};s;a6eS2Ybyv<4mwm?;4SWh+|~Q$=+Y&e0fP zx)BLrz?ei1_942{P^xIOnxZONRReT-&}d{`W#z!YKqWXZlYH=gN+ilLWmt-m2hG<9 z;E>2PPm&*17e&Ck;t3cP6bQZ;HIy6C)rUmz#;E9`bAXP!pC`2D0Pb#Qs2Wh(zXrMwk?u$VK74U8$BtIfXRYgHX1?7&X(p<p zLEY000~X5#?}jJCX1Kbzdg7_D5Kjtm)v!RK=P?hOD;Oh@iGD7}NMta&)=37W-o#+Q z8>6a$Uaz`XMo|ewKO(`OhEYeuYEtmKxctjpc$`AO5Gd{_Uyy}} zuvwsiQB_kyqmIMIV9-jMph|`rua@{AjHV9gpwftBj5^Ab3|0wv1Xx7C3x6Cq0KA+K z9fH`biKn9>y_MyGf;oYX`!Rj0g}WKqvPvC;JN$H97P;rW>foHX{>d9AsRs-dcidd< zWKM@{yvnEp5mI%`of3XGN2X<$d`|7OXgPAAF(Z78E&fJ}Fiff2zm27&q{Fy@o=M3-PVV-9PK?{25KL_9hUy~tCQ z+YGQw4UKQVR5A>64+C3~j}qy8M=*jo+or;#2s;CrYitgfW6CTKP?@f^2aQvQ0&ha` z8y0^I2L)ixHX};Au~w0xIC3Lr)J3CFWMF*yfMMSk-J{n-82)$D6{newHHxY%RUGo% zE8jm^*3fSGnyp znWIi1amYO5z3jK5esI%MQQfvfmYj^>V*p+5)Dm)8;;z z@Xl+uG8bP5R-O>*>?fn&BYR1dJYT2~cb|OqL`?;D=JGNze#YafTl|NxB15?i{@d~b zZA7jdC#HJrwF+ID4-y_2mlv$)zQpuM<2zI86R(nqMa8KW@t%ld8)fSkKUYZ+9twC_ zD$XDNa`e0o=Yo@cjC*wOV!5Bh_Yr|4u4KsziJmvvHl;nk;&`=w2T=-LVQ7@5v0^8& zH@^5_>;{g9+ip&ki)LjH9eMk1w#TA<XzeI5ru{b*HN8Do!02fQ#D#6 zX2~YC4dGWud!FrXN%Pof!r}e0@h;z6!ljF4eN67kE}IvS1K(fE2?tR8URUp80fGw(Z- zRHS9ZHtt}0LB3Yfd5fu3z9H%TO6dN#P0!2Dkay^RlblM=yYc=>+M|oj!N$`=)7*|< zxlW5ppMI}}Z@8e$aci?oWGdRS_*S?2JUTxoyU|EI`|P((2e>mb985cRsEU8krYKYi z)N*SJH)yNOIzRJ!$~Vy#lXx@vsedbn6LXSsU}9n`TaL2<%L&%Y*2QGGc=eD7?8uxm zt?TE$JN6cro|}y0G!?g}5IVW5KiTxg?PwLBd??)1I=*~3OO3!HP&PNEWRiC*ec#rc z$1G|mTlp~DGahPH-oqANEbJD=l@(@na_^h1yB_AfJM?C3qVL((3-RVj`4`(~!>%ab zsTP^UHa6nfA1OpwYUJ{Yo2p#NbY>$zxSKJ3V$`TYz<2hi3LisWjgvZMvWNHlmK^NP z9mZo57B8(6GIrW1l~<{2D2FBt)AaT$WphUU+&wtqC1EuU4+G!x`_YD8}DNaUqg zABWCu*F~{ewS9U2RPUl-Eia?u>;{XA%nI#YPlp#ODmQ2Siu-izps-Qn zlySLCk)ON2K$Zy--Ad7Tmh-9%N3ojO##|izAb|B(=8^?=S1DSt*YIhB-DQS>bZ_V0 zw733={vUd>Pd(sB_nLdsx-nQqD%plM*|)9 z=FLk#+oZqQUT%H+B5Kp2{R3wXu|8}z79%!~o5f#G>LYEhX)yP9+~ixn)w4QwD)ioF zA*NHiqQ5LOkc*9fgor^ddiCX{L)NA>eu#(wp2QI|>5&#JX;iXi9?S3&({Ac8d~hc@ z@1ayKk|=~=_h`xSFFCYD&eq}EZpSO7vM>22mKG(>J#SpTzVP}qzi{rS-CB2IqfSbb zjUO7M^|mcV3*X88@oI(2Q^jZIMbFThesPW6xCFoBE=ycR5a;`mE5pwVb;ERrF<*NP zG2-!?seSa>dd1Hzh{{{Wk1)AQ;l}gIeq3Q)*@ZnM#|MOe!4DEJ>l9H zxeFbiz5Or8rM7p9cF5NE?GeCewV(Ie-`l$LyIn}jynbk{q3<0Dt`Jf7F8V<1VH+_w zWAArNM{O#n2zw;m$o^BY_OaX$-}FXnJi7My=g4L(o73d?B7XPn16faywjcB2eoEH! zwM|_OY78uI7BAl-Q5R9DVq73UbtLPBV3^2rFT{Ay>xolBC z*Yo1Yv{fm%C4&3o4R9ztkIR(uF|J+CPX%`!d{_@w^6APX>)Cr0qtulNJwfRh*87O|pBw=5OPM#C;pw4?bCZYtR&vY;#U}l!>T47OqGe z2&{?nplMUOYSZOBul~Z6YUDJP#P5$iF=#>y*t*x5>6g;2^pZ!mB!Y#j|EHLXUP(2^ z(YzWcpwbOorm?$BXr*5ey*1U`arv;g-+EG)c(4ixy(OYI&;p*&6G(D>Yi&lzgw~ln2unz z4Sk?{a?e~q_r%KC!?(^sAhpsE}q+ttfx1-vui)HB-hF4gkt4apx>V)Z*(ZXX5L4z(Kw z3hs`{B4#hs=m6Uqmq|45=h?BBMStBienx`ikM~_SgE?=tNlgBHUA)|T>edAbuUp3+ zs(iJ>LQweo?YY$@Gn3bwhd&opw3aRfe7N0rov!Xo+g}ejsvxqXSLHMRQj`7}b!#%^ z^yoJQRiok<8A>OK^kX2uRbM_|^?2o~?9c<)y(y{Kl*7k}4?%{Y=XaI8}u1wluwH++d`OdHJeJQq;g!YgShds|-e zlAvhAW8Z2IWQIQRdeB>G-W8L1ldT7NolGCM*j_ts4g@PAvahdc`%YiMYxso7^Q&9^ zG<1%Bm{IwLksASbqe!%t5%(QcMeYMqFo6p$^VX`SOyX;;H7zQ^DI(yhR$-1#)R}7AcjR*Jo7yps-IrNe} zPYU0k70Wzx(1sg`K;yEXnegc=gEeF>6W{UF{*H~#q`1MR>lT-K3$xH<6a--gB$oDy zrFi5&kvM#w4rmk**`sTAlv0*%KiJUzGQY`*d8A$Z8sRtUv=#8a+8laEwTdGpE;8G!Y^x7RIJTnobdO=KYg^+= zOZ5d_lWExyQTWcOhhuCs;Zs;OE|s$K-AY7K-r3mSqm{G4@y-5-Z?)EEgR{Z0>p|tXvXr^B%NBZYJXFfI z7i9u_r4=sA*1Vs?o`|-yf4mlt_}1KV_y z)ZxhcG0)P6=zz=`mqjt}=Z;IM-?gp=FJpDFxNaPJR0pN_MK^Zzes-+<@jk<{X+>M5 zy1>78gc(qJye|l0HA@LP^UxxUzqjVfm7#VtH#pQnvIs`vnj)J2K@nA5(WK@L6+D@! zt^EL550{nl=jPqsGFF!)5HJkO6Zx|H!i*O*!-;c+ecy{y6sqB!XG#T3yT>?iDm#qd#AN#-?0OJC`2haVj5%9Z@T3lA? zUjp~T0-uvP62rx|GTU;UM6P^Z+%V2w^79i$k}l@p3LXL6;;9(kL@@Nrx{gsV~Zbk zf>*GoX26cH&FS5G=ZbXzgz`B0fz94vejh!C4mcFxvOfGd3EV3s$;;4538rn4+r(uL z5xaMhwDmFtBvC1wj$T^94LK$ez9yzwOxC*4L;0c{{EEbDAN{DVuXwEAzU(_2_+)J0 z`(!n8;lo3-z0%l1TkodbV;@)&3A%#|xnbu8Z){D!9Ibn8x=?Q=?D`Mgv=3GJ56E{# z&Tb5)h0%XJIuojPJJ6Ox_d0kf-$>paJ~Ng_8-0w z=im|k^MZ5t@VoDqa-`i>tWz&yS7=Oc&!BT9$xOjUm2{Swbbvxi3ZDXP#bb{p~pbErgimhvExE4RT zZAw#E?tByd^^D7j4m~qXYv8g~L?s;mC9{X+ zm_4I1P)u-#U2e{V1B2ei!xU7eB-=I)^y6L$9 z5f(NKc;m)1Q61X0#JIx?K^xK=-<@6#qCxgpz-A93 zJAB>7xN$jNYlts3(9&Tip*_CdWN>GLD0ZZ|u|?l;roVS=PXk}Qzm|HOw;dnKG{J+_ z-_+nJ1o7&m>>*5Z(>dX2N5f?j{!DgUH%iXW{F1Y;YUwX65Q&V)crb=>65y3qYFQ#? zojp_FXET};zb%>lgWn?TrWc*Msk59H@(@Jf@hSY=;SR}CC&|lM5=WBWH+Ry{Z8cVH zNO>A{biTiMms9J5Pgui!U2knJ@oyhNwQ|)^Vu~iIjAhJrc3?6XWs|uAZIX1jEd~L1Y$TS_f6$_PIizZ+gpJ zzgdn{ZspF^>yW$SRgg1>T3^+a-J|+~ojgJs@Wxj|WItI~&~jD5&i_Zj-g|2bVnn)7JOE_AxXh<(3Ho<_<}3pXJK#{DDCq<7&-R}%?{c~tai8DkR|~H9hhHmOX^I!a zzIU0nvP=E8$LYBG!alEz6OoPTWr!k+lEBkY8t>;1V^yIE-bK)h@{yRItog^PgxW8k?;%O8rVG`!UZGcB4xK+f^nH&R$tH_xVZTz1#;k&+Ykm_ln{|k;UV!H}+H;4H4cv zy1x1Jm9wvQ5`$?h0y32l4_U_zVy3pugm|iy7sooDt}i-#N~Vz$2;Yrku$u_3o-vWg zZJSqiAK5Zn4JDR0#N3p3c%d#TdV~2D)~58E+$XK3A=L+;x{fSw`PDl42v~!VuGtr^ zpmXXK6XGG@1k$_g+LSPfN(gCC4H5=l@o87f_I%Bf2d;wdvJ1+2Ak42$*CC6a05IeQL}h4v4%e%|VvT}~35 zyLF~+dW+?kx1WK7BM$hMn%BmtKF-yC#RJYQV0$rq)(zRH6K;R-;@K|s3!9Rmh03GM z1x}YhplfJmEsA*OVwDM@43~83-d*0U#0B38RB%~-f1WYf-glTSjBr+DkGTwMlQ)N7 zhxb#-+%&RqD9}!2gs+cRC3bhuPi1%*@q*gB~J# z(^`B)4BKjo(s|Ne-mm8X$v5T?wA zb$}ck1O(Qnvde%<$lt&7(%d=5hP~TgklxvXpZ<_Yy&I{ReC#V|!>%E@ZJNxI@B(0P znVZ*~Szuy(bsMwo>@XMToNl`5@num?U;EZEcV|%oRIiwcz$#uQrN=5>+3E5Mg8Rc)sp>yfea$sXF)uqT zUim6ObZ7!CRty+`kRI||*lY?(3=({>&%Yk zV;_0XfCjV&ml^Sgm$ANC_cGL_xBjyU8#1cSN9@|aa#N9Y&&wAhuyEiMXY$zI71SuU z&pdnnNLW$t-b*%quOYxqAD4akUruFe>)_MwcJkV;wuzo8M@v+fRxP`oXbuQr77z5# z5z7rM`;Zf;cR^!k=gz+ldV&G;vWV>Jb(g9bH~LW;SNOcX`x;42oH6UKM~9#!;m4`g zea6`6Fn|oEw`SeqqK$*T(a~Q<_6;d8(Y(&>YwH6)geH!BY?Yc9IvoUX^>CS2|BQjG zQEPTi@BCh?m1a0IZSKgbjM@fUm#r<&`I0QMcp%|Sypr!XKrWBVuKOM55Z5^m`DLY% z?rRH%9$t(hf){CQhRFUCuioTq3LG7x19T}wcK^C&X=;-VpYmmUW8J{OgOXnz`kkG( z`6dC9IWBW>Es62H#HvV}gy{zB!G}t-+#r0f#Y8X4I({ zbr7^EvCVFC5?ehY;l82KKK z96lRx^W==`vEByw#<(oDHQ$zsSWYMc?_DI`h4q%xK*# bBBx5@MO^qTEO^s2&^vhKgVLS6IGz6is{HD2 literal 0 HcmV?d00001 diff --git a/manuscript-terminology-definition-assistant/reports/risky-terminology-packet.json b/manuscript-terminology-definition-assistant/reports/risky-terminology-packet.json new file mode 100644 index 00000000..88d6e479 --- /dev/null +++ b/manuscript-terminology-definition-assistant/reports/risky-terminology-packet.json @@ -0,0 +1,339 @@ +{ + "input": { + "manuscriptId": "oncology-organoid-draft-v1", + "terms": [ + { + "id": "term-pdx-a", + "label": "patient-derived xenograft", + "short": "PDX", + "expansion": "patient-derived xenograft", + "firstUseSection": "abstract", + "expandedAtFirstUse": false, + "requiresDefinition": true, + "definition": "", + "jargonLevel": "high", + "observedForms": [ + "PDX", + "patient derived xenograft" + ], + "preferredForm": "patient-derived xenograft", + "audience": "expert" + }, + { + "id": "term-pdx-b", + "label": "pharmacodynamic index", + "short": "PDX", + "expansion": "pharmacodynamic index", + "firstUseSection": "results", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "A composite drug-response index used in the exploratory analysis.", + "jargonLevel": "high", + "observedForms": [ + "PDX", + "pharmacodynamic index" + ], + "preferredForm": "pharmacodynamic index" + }, + { + "id": "term-erbb2", + "label": "ERBB2", + "firstUseSection": "title", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "", + "jargonLevel": "high", + "expectedStyle": "HGNC gene symbol", + "observedStyle": "protein alias", + "observedForms": [ + "HER2", + "ERBB2-positive", + "Her-2" + ], + "preferredForm": "ERBB2" + }, + { + "id": "term-organoid", + "label": "organoid viability score", + "firstUseSection": "lay_summary", + "expandedAtFirstUse": true, + "requiresDefinition": true, + "definition": "A normalized assay readout for live tumor organoid cells after treatment.", + "jargonLevel": "high", + "audience": "lay", + "observedForms": [ + "organoid viability score", + "OVS" + ], + "preferredForm": "organoid viability score" + } + ], + "citationRecommendations": [ + { + "id": "cite-pdx-model", + "termId": "term-pdx-a", + "reason": "Recommended for first use of PDX model." + }, + { + "id": "cite-unknown", + "termId": "term-nonexistent", + "reason": "Unbound recommendation from retrieval layer." + } + ], + "generatedSummaries": [ + { + "id": "summary-lay", + "mode": "layperson", + "termIds": [ + "term-pdx-a", + "term-organoid" + ] + } + ] + }, + "evaluation": { + "summary": { + "manuscriptId": "oncology-organoid-draft-v1", + "decision": "hold_ai_research_packet", + "termsReviewed": 4, + "citationRecommendationsReviewed": 2, + "generatedSummariesReviewed": 1, + "findingCount": 12, + "highOrCriticalFindings": 6, + "auditDigest": "sha256:525040b780da7400" + }, + "coverage": [ + { + "id": "term-pdx-a", + "label": "patient-derived xenograft", + "short": "PDX", + "firstUseSection": "abstract", + "hasDefinition": false, + "hasLayDefinition": false, + "observedForms": [ + "PDX", + "patient derived xenograft" + ] + }, + { + "id": "term-pdx-b", + "label": "pharmacodynamic index", + "short": "PDX", + "firstUseSection": "results", + "hasDefinition": true, + "hasLayDefinition": false, + "observedForms": [ + "PDX", + "pharmacodynamic index" + ] + }, + { + "id": "term-erbb2", + "label": "ERBB2", + "short": null, + "firstUseSection": "title", + "hasDefinition": false, + "hasLayDefinition": false, + "observedForms": [ + "HER2", + "ERBB2-positive", + "Her-2" + ] + }, + { + "id": "term-organoid", + "label": "organoid viability score", + "short": null, + "firstUseSection": "lay_summary", + "hasDefinition": true, + "hasLayDefinition": false, + "observedForms": [ + "organoid viability score", + "OVS" + ] + } + ], + "findings": [ + { + "severity": "high", + "code": "ACRONYM_EXPANSION_CONFLICT", + "message": "PDX maps to 2 different expansions in the manuscript packet.", + "refs": [ + "term-pdx-a", + "term-pdx-b" + ], + "action": "resolve_acronym_expansion_conflict" + }, + { + "severity": "high", + "code": "ACRONYM_NOT_EXPANDED_AT_FIRST_USE", + "message": "PDX is not expanded at first use in abstract.", + "refs": [ + "term-pdx-a" + ], + "action": "expand_acronym_at_first_use" + }, + { + "severity": "high", + "code": "LAY_SUMMARY_JARGON_UNEXPLAINED", + "message": "Generated layperson summary summary-lay includes patient-derived xenograft without an accessible definition.", + "refs": [ + "summary-lay", + "term-pdx-a" + ], + "action": "rewrite_lay_summary_with_definition" + }, + { + "severity": "high", + "code": "LAY_SUMMARY_JARGON_UNEXPLAINED", + "message": "Generated layperson summary summary-lay includes organoid viability score without an accessible definition.", + "refs": [ + "summary-lay", + "term-organoid" + ], + "action": "rewrite_lay_summary_with_definition" + }, + { + "severity": "high", + "code": "TERM_DEFINITION_MISSING", + "message": "patient-derived xenograft requires a reviewer-facing definition before AI summaries or citation suggestions use it.", + "refs": [ + "term-pdx-a" + ], + "action": "add_reviewer_facing_definition" + }, + { + "severity": "high", + "code": "TERM_DEFINITION_MISSING", + "message": "ERBB2 requires a reviewer-facing definition before AI summaries or citation suggestions use it.", + "refs": [ + "term-erbb2" + ], + "action": "add_reviewer_facing_definition" + }, + { + "severity": "medium", + "code": "CITATION_BEFORE_TERM_DEFINITION", + "message": "Citation recommendation cite-pdx-model uses patient-derived xenograft before the term is defined.", + "refs": [ + "cite-pdx-model", + "term-pdx-a" + ], + "action": "define_term_before_citation_insertion" + }, + { + "severity": "medium", + "code": "CITATION_TERM_UNKNOWN", + "message": "Citation recommendation cite-unknown references an unknown terminology id.", + "refs": [ + "cite-unknown" + ], + "action": "bind_citation_to_known_term" + }, + { + "severity": "medium", + "code": "LAY_DEFINITION_MISSING", + "message": "organoid viability score appears in a lay-facing packet without a lay definition.", + "refs": [ + "term-organoid" + ], + "action": "add_lay_definition_or_remove_from_lay_summary" + }, + { + "severity": "medium", + "code": "NOMENCLATURE_STYLE_MISMATCH", + "message": "ERBB2 uses protein alias style where HGNC gene symbol style is expected.", + "refs": [ + "term-erbb2" + ], + "action": "fix_domain_nomenclature_style" + }, + { + "severity": "medium", + "code": "PREFERRED_TERM_FORM_ABSENT", + "message": "patient-derived xenograft has multiple observed forms but none match the preferred nomenclature.", + "refs": [ + "term-pdx-a" + ], + "action": "normalize_term_to_preferred_form" + }, + { + "severity": "medium", + "code": "PREFERRED_TERM_FORM_ABSENT", + "message": "ERBB2 has multiple observed forms but none match the preferred nomenclature.", + "refs": [ + "term-erbb2" + ], + "action": "normalize_term_to_preferred_form" + } + ], + "actions": [ + { + "id": "resolve_acronym_expansion_conflict", + "severity": "high", + "refs": [ + "term-pdx-a", + "term-pdx-b" + ] + }, + { + "id": "expand_acronym_at_first_use", + "severity": "high", + "refs": [ + "term-pdx-a" + ] + }, + { + "id": "rewrite_lay_summary_with_definition", + "severity": "high", + "refs": [ + "summary-lay", + "term-pdx-a" + ] + }, + { + "id": "add_reviewer_facing_definition", + "severity": "high", + "refs": [ + "term-pdx-a" + ] + }, + { + "id": "define_term_before_citation_insertion", + "severity": "medium", + "refs": [ + "cite-pdx-model", + "term-pdx-a" + ] + }, + { + "id": "bind_citation_to_known_term", + "severity": "medium", + "refs": [ + "cite-unknown" + ] + }, + { + "id": "add_lay_definition_or_remove_from_lay_summary", + "severity": "medium", + "refs": [ + "term-organoid" + ] + }, + { + "id": "fix_domain_nomenclature_style", + "severity": "medium", + "refs": [ + "term-erbb2" + ] + }, + { + "id": "normalize_term_to_preferred_form", + "severity": "medium", + "refs": [ + "term-pdx-a" + ] + } + ] + } +} diff --git a/manuscript-terminology-definition-assistant/reports/summary.svg b/manuscript-terminology-definition-assistant/reports/summary.svg new file mode 100644 index 00000000..c620d4a0 --- /dev/null +++ b/manuscript-terminology-definition-assistant/reports/summary.svg @@ -0,0 +1,16 @@ + + + + + Terminology Definition Assistant + + hold_ai_research_packet + Terms reviewed: 4 + Findings: 12 +HIGH ACRONYM_EXPANSION_CONFLICT +HIGH ACRONYM_NOT_EXPANDED_AT_FIRST_USE +HIGH LAY_SUMMARY_JARGON_UNEXPLAINED +HIGH LAY_SUMMARY_JARGON_UNEXPLAINED +HIGH TERM_DEFINITION_MISSING + sha256:525040b780da7400 + diff --git a/manuscript-terminology-definition-assistant/reports/terminology-review-report.md b/manuscript-terminology-definition-assistant/reports/terminology-review-report.md new file mode 100644 index 00000000..c53a6b10 --- /dev/null +++ b/manuscript-terminology-definition-assistant/reports/terminology-review-report.md @@ -0,0 +1,32 @@ +# Manuscript Terminology Definition Review: oncology-organoid-draft-v1 + +Decision: **hold_ai_research_packet** +Audit digest: `sha256:525040b780da7400` + +## Findings + +| Severity | Code | Message | Action | +| --- | --- | --- | --- | +| high | `ACRONYM_EXPANSION_CONFLICT` | PDX maps to 2 different expansions in the manuscript packet. | `resolve_acronym_expansion_conflict` | +| high | `ACRONYM_NOT_EXPANDED_AT_FIRST_USE` | PDX is not expanded at first use in abstract. | `expand_acronym_at_first_use` | +| high | `LAY_SUMMARY_JARGON_UNEXPLAINED` | Generated layperson summary summary-lay includes patient-derived xenograft without an accessible definition. | `rewrite_lay_summary_with_definition` | +| high | `LAY_SUMMARY_JARGON_UNEXPLAINED` | Generated layperson summary summary-lay includes organoid viability score without an accessible definition. | `rewrite_lay_summary_with_definition` | +| high | `TERM_DEFINITION_MISSING` | patient-derived xenograft requires a reviewer-facing definition before AI summaries or citation suggestions use it. | `add_reviewer_facing_definition` | +| high | `TERM_DEFINITION_MISSING` | ERBB2 requires a reviewer-facing definition before AI summaries or citation suggestions use it. | `add_reviewer_facing_definition` | +| medium | `CITATION_BEFORE_TERM_DEFINITION` | Citation recommendation cite-pdx-model uses patient-derived xenograft before the term is defined. | `define_term_before_citation_insertion` | +| medium | `CITATION_TERM_UNKNOWN` | Citation recommendation cite-unknown references an unknown terminology id. | `bind_citation_to_known_term` | +| medium | `LAY_DEFINITION_MISSING` | organoid viability score appears in a lay-facing packet without a lay definition. | `add_lay_definition_or_remove_from_lay_summary` | +| medium | `NOMENCLATURE_STYLE_MISMATCH` | ERBB2 uses protein alias style where HGNC gene symbol style is expected. | `fix_domain_nomenclature_style` | +| medium | `PREFERRED_TERM_FORM_ABSENT` | patient-derived xenograft has multiple observed forms but none match the preferred nomenclature. | `normalize_term_to_preferred_form` | +| medium | `PREFERRED_TERM_FORM_ABSENT` | ERBB2 has multiple observed forms but none match the preferred nomenclature. | `normalize_term_to_preferred_form` | + +## Term Coverage + +| Term | Short | Section | Definition | Lay definition | Observed forms | +| --- | --- | --- | --- | --- | --- | +| patient-derived xenograft | PDX | abstract | no | no | PDX, patient derived xenograft | +| pharmacodynamic index | PDX | results | yes | no | PDX, pharmacodynamic index | +| ERBB2 | | title | no | no | HER2, ERBB2-positive, Her-2 | +| organoid viability score | | lay_summary | yes | no | organoid viability score, OVS | + +Synthetic data only. No uploaded manuscripts, private corpora, citation indexes, external AI APIs, credentials, or payment systems are used. diff --git a/manuscript-terminology-definition-assistant/sample-data.js b/manuscript-terminology-definition-assistant/sample-data.js new file mode 100644 index 00000000..1684b911 --- /dev/null +++ b/manuscript-terminology-definition-assistant/sample-data.js @@ -0,0 +1,141 @@ +const cleanPacket = { + manuscriptId: "astrocyte-calcium-preprint-v3", + terms: [ + { + id: "term-gcamp", + label: "genetically encoded calcium indicator", + short: "GECI", + expansion: "genetically encoded calcium indicator", + firstUseSection: "abstract", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "A fluorescent protein sensor used to report intracellular calcium dynamics.", + layDefinition: "A lab-made sensor that glows when cell calcium changes.", + jargonLevel: "medium", + observedForms: ["genetically encoded calcium indicator", "GECI"], + preferredForm: "genetically encoded calcium indicator" + }, + { + id: "term-gfap", + label: "glial fibrillary acidic protein", + short: "GFAP", + expansion: "glial fibrillary acidic protein", + firstUseSection: "methods", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "An astrocyte marker used to identify glial cells in the imaging cohort.", + layDefinition: "A marker that helps identify support cells in the brain.", + jargonLevel: "medium", + observedForms: ["glial fibrillary acidic protein", "GFAP"], + preferredForm: "glial fibrillary acidic protein" + }, + { + id: "term-stat-model", + label: "mixed-effects model", + firstUseSection: "results", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "A statistical model that accounts for repeated observations from the same animal.", + layDefinition: "A model that avoids counting repeated measurements as unrelated.", + jargonLevel: "medium", + observedForms: ["mixed-effects model"], + preferredForm: "mixed-effects model" + } + ], + citationRecommendations: [ + { + id: "cite-geci-method", + termId: "term-gcamp", + reason: "Supports calcium indicator methodology." + } + ], + generatedSummaries: [ + { + id: "summary-lay", + mode: "layperson", + termIds: ["term-gcamp", "term-stat-model"] + } + ] +}; + +const riskyPacket = { + manuscriptId: "oncology-organoid-draft-v1", + terms: [ + { + id: "term-pdx-a", + label: "patient-derived xenograft", + short: "PDX", + expansion: "patient-derived xenograft", + firstUseSection: "abstract", + expandedAtFirstUse: false, + requiresDefinition: true, + definition: "", + jargonLevel: "high", + observedForms: ["PDX", "patient derived xenograft"], + preferredForm: "patient-derived xenograft", + audience: "expert" + }, + { + id: "term-pdx-b", + label: "pharmacodynamic index", + short: "PDX", + expansion: "pharmacodynamic index", + firstUseSection: "results", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "A composite drug-response index used in the exploratory analysis.", + jargonLevel: "high", + observedForms: ["PDX", "pharmacodynamic index"], + preferredForm: "pharmacodynamic index" + }, + { + id: "term-erbb2", + label: "ERBB2", + firstUseSection: "title", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "", + jargonLevel: "high", + expectedStyle: "HGNC gene symbol", + observedStyle: "protein alias", + observedForms: ["HER2", "ERBB2-positive", "Her-2"], + preferredForm: "ERBB2" + }, + { + id: "term-organoid", + label: "organoid viability score", + firstUseSection: "lay_summary", + expandedAtFirstUse: true, + requiresDefinition: true, + definition: "A normalized assay readout for live tumor organoid cells after treatment.", + jargonLevel: "high", + audience: "lay", + observedForms: ["organoid viability score", "OVS"], + preferredForm: "organoid viability score" + } + ], + citationRecommendations: [ + { + id: "cite-pdx-model", + termId: "term-pdx-a", + reason: "Recommended for first use of PDX model." + }, + { + id: "cite-unknown", + termId: "term-nonexistent", + reason: "Unbound recommendation from retrieval layer." + } + ], + generatedSummaries: [ + { + id: "summary-lay", + mode: "layperson", + termIds: ["term-pdx-a", "term-organoid"] + } + ] +}; + +module.exports = { + cleanPacket, + riskyPacket +}; diff --git a/manuscript-terminology-definition-assistant/test.js b/manuscript-terminology-definition-assistant/test.js new file mode 100644 index 00000000..948ca97e --- /dev/null +++ b/manuscript-terminology-definition-assistant/test.js @@ -0,0 +1,42 @@ +const assert = require("node:assert/strict"); +const { evaluateTerminologyPacket, renderMarkdownReport, renderSvgSummary } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +function codes(evaluation) { + return new Set(evaluation.findings.map((finding) => finding.code)); +} + +const clean = evaluateTerminologyPacket(cleanPacket); +assert.equal(clean.summary.decision, "release_ai_research_packet"); +assert.equal(clean.findings.length, 0); + +const risky = evaluateTerminologyPacket(riskyPacket); +assert.equal(risky.summary.decision, "hold_ai_research_packet"); +assert.equal(codes(risky).has("ACRONYM_NOT_EXPANDED_AT_FIRST_USE"), true); +assert.equal(codes(risky).has("TERM_DEFINITION_MISSING"), true); +assert.equal(codes(risky).has("ACRONYM_EXPANSION_CONFLICT"), true); +assert.equal(codes(risky).has("NOMENCLATURE_STYLE_MISMATCH"), true); +assert.equal(codes(risky).has("CITATION_BEFORE_TERM_DEFINITION"), true); +assert.equal(codes(risky).has("CITATION_TERM_UNKNOWN"), true); +assert.equal(codes(risky).has("LAY_SUMMARY_JARGON_UNEXPLAINED"), true); + +const reviseOnly = clone(cleanPacket); +reviseOnly.terms[1].observedForms = ["GFAP", "Gfap marker"]; +reviseOnly.terms[1].preferredForm = "glial fibrillary acidic protein"; +const revise = evaluateTerminologyPacket(reviseOnly); +assert.equal(revise.summary.decision, "revise_terminology_packet"); +assert.equal(codes(revise).has("PREFERRED_TERM_FORM_ABSENT"), true); + +const markdown = renderMarkdownReport(riskyPacket, risky); +assert.match(markdown, /Manuscript Terminology Definition Review/); +assert.match(markdown, /hold_ai_research_packet/); + +const svg = renderSvgSummary(risky); +assert.match(svg, /