diff --git a/model-assumption-diagnostics-assistant/README.md b/model-assumption-diagnostics-assistant/README.md new file mode 100644 index 00000000..f65363f4 --- /dev/null +++ b/model-assumption-diagnostics-assistant/README.md @@ -0,0 +1,41 @@ +# Model Assumption Diagnostics Assistant + +This slice adds a synthetic, dependency-free assistant for the SCIBASE AI-Powered Research Assistant Suite. It reviews analysis packets before an automated peer-review summary is trusted and flags model-assumption failures that can make otherwise polished AI review output misleading. + +The assistant focuses on modeling diagnostics that are separate from existing same-issue submissions covering power feasibility, Bayesian prior sensitivity, missing data, causal adjustment, uncertainty calibration, figure-claim consistency, external validity, and sample chain-of-custody. + +## What It Checks + +- Binary outcomes modeled with an identity-link linear model. +- Low events per predictor. +- Complete or quasi-separation risk. +- Heteroscedasticity without robust uncertainty. +- High multicollinearity. +- Residual skew and influential outliers. +- Residual autocorrelation. +- Too few clusters for clustered claims. +- Missing calibration evidence for binary prediction claims. +- Missing sensitivity models or reviewer-facing diagnostic handoff links. + +## Reviewer Output + +Running the demo generates: + +- `reports/model-assumption-report.json` +- `reports/model-assumption-report.md` +- `reports/summary.svg` +- `reports/demo-script.txt` +- `reports/demo.gif` +- `reports/demo.mp4` + +The generated reviewer packet is synthetic only. It does not call external APIs, run live models, use credentials, or include private research data. + +## Commands + +```bash +npm test +npm run demo +npm run demo:video +``` + +`demo:video` renders small local GIF and MP4 demos from generated synthetic frames. diff --git a/model-assumption-diagnostics-assistant/demo.js b/model-assumption-diagnostics-assistant/demo.js new file mode 100644 index 00000000..34ef44d6 --- /dev/null +++ b/model-assumption-diagnostics-assistant/demo.js @@ -0,0 +1,25 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import { riskyAnalysisPacket } from "./sample-data.js"; +import { buildReviewerMarkdown, buildSummarySvg, reviewModelAssumptions } from "./index.js"; + +const reportsDir = new URL("./reports/", import.meta.url); +await mkdir(reportsDir, { recursive: true }); + +const report = reviewModelAssumptions(riskyAnalysisPacket); +await writeFile(new URL("model-assumption-report.json", reportsDir), `${JSON.stringify(report, null, 2)}\n`); +await writeFile(new URL("model-assumption-report.md", reportsDir), buildReviewerMarkdown(report)); +await writeFile(new URL("summary.svg", reportsDir), buildSummarySvg(report)); +await writeFile( + new URL("demo-script.txt", reportsDir), + [ + "Demo: Model Assumption Diagnostics Assistant", + `Manuscript: ${report.manuscriptId}`, + `Decision: ${report.decision}`, + `Risk score: ${report.riskScore}/100`, + `Blockers: ${report.summary.blockCount}`, + `Warnings: ${report.summary.warnCount}`, + "Reviewer action: hold AI review until model family, separation, sensitivity, and diagnostic handoff blockers are fixed.", + ].join("\n"), +); + +console.log(JSON.stringify(report.summary, null, 2)); diff --git a/model-assumption-diagnostics-assistant/demo_video.py b/model-assumption-diagnostics-assistant/demo_video.py new file mode 100644 index 00000000..b62bfe6e --- /dev/null +++ b/model-assumption-diagnostics-assistant/demo_video.py @@ -0,0 +1,46 @@ +from pathlib import Path + +import imageio.v3 as iio +import numpy as np +from PIL import Image, ImageDraw, ImageFont + + +ROOT = Path(__file__).resolve().parent +REPORTS = ROOT / "reports" +REPORTS.mkdir(exist_ok=True) + + +def font(size): + for name in ("arial.ttf", "segoeui.ttf"): + try: + return ImageFont.truetype(name, size) + except OSError: + pass + return ImageFont.load_default() + + +frames = [] +slides = [ + ("Model Assumption Diagnostics", "Synthetic reviewer packet for SCIBASE #16"), + ("Decision", "hold-ai-review · risk score 100/100"), + ("Blockers", "model-family mismatch · low events per predictor · separation"), + ("Reviewer Action", "Refit, add sensitivity evidence, and link diagnostics before AI review release"), +] + +for title, subtitle in slides: + image = Image.new("RGB", (960, 540), "#0e1726") + draw = ImageDraw.Draw(image) + draw.rectangle((48, 58, 912, 482), outline="#334155", width=3) + draw.text((82, 132), title, fill="#f8fafc", font=font(44)) + draw.text((82, 214), subtitle, fill="#cbd5e1", font=font(24)) + draw.rectangle((82, 340, 690, 380), fill="#ef4444") + draw.text((82, 410), "No external services, credentials, or private research data.", fill="#94a3b8", font=font(20)) + frames.extend([image] * 14) + +gif_path = REPORTS / "demo.gif" +frames[0].save(gif_path, save_all=True, append_images=frames[1:], duration=120, loop=0) +mp4_path = REPORTS / "demo.mp4" +iio.imwrite(mp4_path, [np.asarray(frame) for frame in frames], fps=8, codec="libx264") + +print(f"wrote {gif_path}") +print(f"wrote {mp4_path}") diff --git a/model-assumption-diagnostics-assistant/index.js b/model-assumption-diagnostics-assistant/index.js new file mode 100644 index 00000000..05662f5d --- /dev/null +++ b/model-assumption-diagnostics-assistant/index.js @@ -0,0 +1,260 @@ +const SEVERITY_ORDER = { block: 3, warn: 2, info: 1 }; + +function issue(code, severity, message, evidence, remediation) { + return { code, severity, message, evidence, remediation }; +} + +function isBinaryOutcome(packet) { + return packet.declaredPrimaryModel?.outcomeType === "binary"; +} + +function usesLinearIdentityModel(packet) { + const model = packet.declaredPrimaryModel || {}; + return model.family === "linear-regression" || model.linkFunction === "identity"; +} + +function assumptionsMentioned(packet) { + const claims = packet.reviewerClaims || []; + return claims.some((claim) => /assumption|diagnostic|limitation/i.test(claim)); +} + +export function reviewModelAssumptions(packet) { + if (!packet || typeof packet !== "object") { + throw new TypeError("analysis packet must be an object"); + } + + const diagnostics = packet.diagnostics || {}; + const remediation = packet.remediationEvidence || {}; + const findings = []; + + if (isBinaryOutcome(packet) && usesLinearIdentityModel(packet)) { + findings.push( + issue( + "MODEL_FAMILY_OUTCOME_MISMATCH", + "block", + "Binary outcomes should not be released through an identity-link linear model without an explicit sensitivity justification.", + { + outcomeType: packet.declaredPrimaryModel?.outcomeType, + family: packet.declaredPrimaryModel?.family, + linkFunction: packet.declaredPrimaryModel?.linkFunction, + }, + "Refit with a logistic or mixed-effects logistic model, or add a documented sensitivity analysis before AI review conclusions are trusted.", + ), + ); + } + + if (diagnostics.eventCount && diagnostics.predictors && diagnostics.eventCount / diagnostics.predictors < 5) { + findings.push( + issue( + "LOW_EVENTS_PER_PREDICTOR", + "block", + "The event-per-predictor ratio is too low for stable adjusted effects.", + { + eventCount: diagnostics.eventCount, + predictors: diagnostics.predictors, + eventsPerPredictor: Number((diagnostics.eventCount / diagnostics.predictors).toFixed(2)), + }, + "Reduce predictors, use penalization, or present the model as exploratory until stability is demonstrated.", + ), + ); + } + + if (diagnostics.completeSeparationIndicators > 0) { + findings.push( + issue( + "SEPARATION_RISK", + "block", + "Separation indicators suggest coefficient estimates may be unstable or infinite.", + { completeSeparationIndicators: diagnostics.completeSeparationIndicators }, + "Use penalized logistic regression or exact methods and report the separation diagnostic in the reviewer packet.", + ), + ); + } + + if (diagnostics.breuschPaganPValue < 0.05 && !diagnostics.robustStandardErrorsDeclared) { + findings.push( + issue( + "HETEROSCEDASTICITY_UNHANDLED", + "warn", + "Residual variance is not constant and no robust standard errors were declared.", + { breuschPaganPValue: diagnostics.breuschPaganPValue }, + "Report robust or clustered standard errors and flag affected inference in the limitations section.", + ), + ); + } + + if (diagnostics.maxVarianceInflationFactor > 8) { + findings.push( + issue( + "MULTICOLLINEARITY_HIGH", + "warn", + "Predictors show high variance inflation, so independent-effect claims may be overstated.", + { maxVarianceInflationFactor: diagnostics.maxVarianceInflationFactor }, + "Collapse correlated predictors, use regularization, or downgrade independent-effect language.", + ), + ); + } + + if (diagnostics.residualSkew > 1.25 || diagnostics.outlierStudentizedResidualMax > 4) { + findings.push( + issue( + "RESIDUAL_OUTLIER_PRESSURE", + "warn", + "Residual skew or influential outliers are large enough to require sensitivity reporting.", + { + residualSkew: diagnostics.residualSkew, + outlierStudentizedResidualMax: diagnostics.outlierStudentizedResidualMax, + }, + "Add influence diagnostics, robust fit sensitivity, and a reviewer-facing note describing impacted claims.", + ), + ); + } + + if (diagnostics.durbinWatson && (diagnostics.durbinWatson < 1.4 || diagnostics.durbinWatson > 2.6)) { + findings.push( + issue( + "AUTOCORRELATION_RISK", + "warn", + "Residual autocorrelation may invalidate nominal uncertainty estimates.", + { durbinWatson: diagnostics.durbinWatson }, + "Model temporal or batch correlation explicitly, or use clustered uncertainty estimates.", + ), + ); + } + + if (diagnostics.clusterCount && diagnostics.clusterCount < 5) { + findings.push( + issue( + "TOO_FEW_CLUSTERS", + "warn", + "Cluster-adjusted claims are fragile with very few clusters.", + { clusterCount: diagnostics.clusterCount, smallestClusterSize: diagnostics.smallestClusterSize }, + "Report cluster limitations and avoid generalizing beyond the observed sites or batches.", + ), + ); + } + + if (!diagnostics.calibrationReported && isBinaryOutcome(packet)) { + findings.push( + issue( + "CALIBRATION_MISSING", + "warn", + "Binary prediction claims need calibration evidence before reviewer summaries treat them as reliable.", + { calibrationReported: diagnostics.calibrationReported }, + "Add calibration curve, Brier score, or held-out calibration summary.", + ), + ); + } + + if (!remediation.sensitivityModelIncluded || !remediation.reviewerPacketLinksDiagnostics) { + findings.push( + issue( + "REVIEW_PACKET_INCOMPLETE", + "block", + "The reviewer handoff is missing sensitivity evidence or links to diagnostics.", + { + sensitivityModelIncluded: remediation.sensitivityModelIncluded, + reviewerPacketLinksDiagnostics: remediation.reviewerPacketLinksDiagnostics, + }, + "Hold automated peer-review approval until the handoff packet includes diagnostics and sensitivity results.", + ), + ); + } + + if (findings.length > 0 && assumptionsMentioned(packet) && !remediation.limitationTextMentionsAssumptionRisk) { + findings.push( + issue( + "OVERCONFIDENT_ASSUMPTION_CLAIM", + "warn", + "The manuscript claims diagnostics were considered but does not disclose assumption risk in limitations.", + { limitationTextMentionsAssumptionRisk: remediation.limitationTextMentionsAssumptionRisk }, + "Add limitation language that names the specific modeling assumptions still under review.", + ), + ); + } + + const blockCount = findings.filter((finding) => finding.severity === "block").length; + const warnCount = findings.filter((finding) => finding.severity === "warn").length; + const riskScore = Math.min(100, blockCount * 28 + warnCount * 11); + const decision = blockCount > 0 ? "hold-ai-review" : warnCount > 0 ? "needs-reviewer-note" : "ready-for-ai-review"; + + return { + manuscriptId: packet.manuscriptId, + title: packet.title, + decision, + riskScore, + summary: { + blockCount, + warnCount, + findingCount: findings.length, + }, + findings: findings.sort((a, b) => SEVERITY_ORDER[b.severity] - SEVERITY_ORDER[a.severity] || a.code.localeCompare(b.code)), + }; +} + +export function buildReviewerMarkdown(report) { + const lines = [ + `# Model Assumption Diagnostics: ${report.manuscriptId}`, + "", + `Decision: **${report.decision}**`, + `Risk score: **${report.riskScore}/100**`, + "", + `Findings: ${report.summary.blockCount} blockers, ${report.summary.warnCount} warnings.`, + "", + ]; + + for (const finding of report.findings) { + lines.push(`## ${finding.severity.toUpperCase()}: ${finding.code}`); + lines.push(finding.message); + lines.push(""); + lines.push(`Evidence: \`${JSON.stringify(finding.evidence)}\``); + lines.push(""); + lines.push(`Remediation: ${finding.remediation}`); + lines.push(""); + } + + if (report.findings.length === 0) { + lines.push("No assumption blockers or warnings were detected in the synthetic packet."); + lines.push(""); + } + + return lines.join("\n"); +} + +export function buildSummarySvg(report) { + const blockers = report.summary.blockCount; + const warnings = report.summary.warnCount; + const safe = Math.max(0, 10 - blockers - warnings); + return ` + + Model Assumption Diagnostics + ${escapeXml(report.manuscriptId)} · ${escapeXml(report.decision)} + + + ${report.riskScore}/100 + + + ${blockers} + blockers + + + + ${warnings} + warnings + + + + ${safe} + assumption checks clear + + +`; +} + +function escapeXml(value) { + return String(value) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """); +} diff --git a/model-assumption-diagnostics-assistant/package.json b/model-assumption-diagnostics-assistant/package.json new file mode 100644 index 00000000..705ffd1b --- /dev/null +++ b/model-assumption-diagnostics-assistant/package.json @@ -0,0 +1,12 @@ +{ + "name": "model-assumption-diagnostics-assistant", + "version": "1.0.0", + "description": "Synthetic model-assumption diagnostics assistant for SCIBASE AI review workflows.", + "type": "module", + "private": true, + "scripts": { + "test": "node test.js", + "demo": "node demo.js", + "demo:video": "python demo_video.py" + } +} diff --git a/model-assumption-diagnostics-assistant/reports/demo-script.txt b/model-assumption-diagnostics-assistant/reports/demo-script.txt new file mode 100644 index 00000000..5a7452c1 --- /dev/null +++ b/model-assumption-diagnostics-assistant/reports/demo-script.txt @@ -0,0 +1,7 @@ +Demo: Model Assumption Diagnostics Assistant +Manuscript: SCI-MODEL-ASSUMPTION-001 +Decision: hold-ai-review +Risk score: 100/100 +Blockers: 4 +Warnings: 7 +Reviewer action: hold AI review until model family, separation, sensitivity, and diagnostic handoff blockers are fixed. \ No newline at end of file diff --git a/model-assumption-diagnostics-assistant/reports/demo.gif b/model-assumption-diagnostics-assistant/reports/demo.gif new file mode 100644 index 00000000..15efea98 Binary files /dev/null and b/model-assumption-diagnostics-assistant/reports/demo.gif differ diff --git a/model-assumption-diagnostics-assistant/reports/demo.mp4 b/model-assumption-diagnostics-assistant/reports/demo.mp4 new file mode 100644 index 00000000..c0528199 Binary files /dev/null and b/model-assumption-diagnostics-assistant/reports/demo.mp4 differ diff --git a/model-assumption-diagnostics-assistant/reports/model-assumption-report.json b/model-assumption-diagnostics-assistant/reports/model-assumption-report.json new file mode 100644 index 00000000..d5aa5b4a --- /dev/null +++ b/model-assumption-diagnostics-assistant/reports/model-assumption-report.json @@ -0,0 +1,119 @@ +{ + "manuscriptId": "SCI-MODEL-ASSUMPTION-001", + "title": "Predicting wet-lab replication yield from multi-site assay metadata", + "decision": "hold-ai-review", + "riskScore": 100, + "summary": { + "blockCount": 4, + "warnCount": 7, + "findingCount": 11 + }, + "findings": [ + { + "code": "LOW_EVENTS_PER_PREDICTOR", + "severity": "block", + "message": "The event-per-predictor ratio is too low for stable adjusted effects.", + "evidence": { + "eventCount": 18, + "predictors": 14, + "eventsPerPredictor": 1.29 + }, + "remediation": "Reduce predictors, use penalization, or present the model as exploratory until stability is demonstrated." + }, + { + "code": "MODEL_FAMILY_OUTCOME_MISMATCH", + "severity": "block", + "message": "Binary outcomes should not be released through an identity-link linear model without an explicit sensitivity justification.", + "evidence": { + "outcomeType": "binary", + "family": "linear-regression", + "linkFunction": "identity" + }, + "remediation": "Refit with a logistic or mixed-effects logistic model, or add a documented sensitivity analysis before AI review conclusions are trusted." + }, + { + "code": "REVIEW_PACKET_INCOMPLETE", + "severity": "block", + "message": "The reviewer handoff is missing sensitivity evidence or links to diagnostics.", + "evidence": { + "sensitivityModelIncluded": false, + "reviewerPacketLinksDiagnostics": false + }, + "remediation": "Hold automated peer-review approval until the handoff packet includes diagnostics and sensitivity results." + }, + { + "code": "SEPARATION_RISK", + "severity": "block", + "message": "Separation indicators suggest coefficient estimates may be unstable or infinite.", + "evidence": { + "completeSeparationIndicators": 2 + }, + "remediation": "Use penalized logistic regression or exact methods and report the separation diagnostic in the reviewer packet." + }, + { + "code": "AUTOCORRELATION_RISK", + "severity": "warn", + "message": "Residual autocorrelation may invalidate nominal uncertainty estimates.", + "evidence": { + "durbinWatson": 1.12 + }, + "remediation": "Model temporal or batch correlation explicitly, or use clustered uncertainty estimates." + }, + { + "code": "CALIBRATION_MISSING", + "severity": "warn", + "message": "Binary prediction claims need calibration evidence before reviewer summaries treat them as reliable.", + "evidence": { + "calibrationReported": false + }, + "remediation": "Add calibration curve, Brier score, or held-out calibration summary." + }, + { + "code": "HETEROSCEDASTICITY_UNHANDLED", + "severity": "warn", + "message": "Residual variance is not constant and no robust standard errors were declared.", + "evidence": { + "breuschPaganPValue": 0.004 + }, + "remediation": "Report robust or clustered standard errors and flag affected inference in the limitations section." + }, + { + "code": "MULTICOLLINEARITY_HIGH", + "severity": "warn", + "message": "Predictors show high variance inflation, so independent-effect claims may be overstated.", + "evidence": { + "maxVarianceInflationFactor": 12.6 + }, + "remediation": "Collapse correlated predictors, use regularization, or downgrade independent-effect language." + }, + { + "code": "OVERCONFIDENT_ASSUMPTION_CLAIM", + "severity": "warn", + "message": "The manuscript claims diagnostics were considered but does not disclose assumption risk in limitations.", + "evidence": { + "limitationTextMentionsAssumptionRisk": false + }, + "remediation": "Add limitation language that names the specific modeling assumptions still under review." + }, + { + "code": "RESIDUAL_OUTLIER_PRESSURE", + "severity": "warn", + "message": "Residual skew or influential outliers are large enough to require sensitivity reporting.", + "evidence": { + "residualSkew": 1.8, + "outlierStudentizedResidualMax": 4.9 + }, + "remediation": "Add influence diagnostics, robust fit sensitivity, and a reviewer-facing note describing impacted claims." + }, + { + "code": "TOO_FEW_CLUSTERS", + "severity": "warn", + "message": "Cluster-adjusted claims are fragile with very few clusters.", + "evidence": { + "clusterCount": 3, + "smallestClusterSize": 6 + }, + "remediation": "Report cluster limitations and avoid generalizing beyond the observed sites or batches." + } + ] +} diff --git a/model-assumption-diagnostics-assistant/reports/model-assumption-report.md b/model-assumption-diagnostics-assistant/reports/model-assumption-report.md new file mode 100644 index 00000000..14d86c57 --- /dev/null +++ b/model-assumption-diagnostics-assistant/reports/model-assumption-report.md @@ -0,0 +1,83 @@ +# Model Assumption Diagnostics: SCI-MODEL-ASSUMPTION-001 + +Decision: **hold-ai-review** +Risk score: **100/100** + +Findings: 4 blockers, 7 warnings. + +## BLOCK: LOW_EVENTS_PER_PREDICTOR +The event-per-predictor ratio is too low for stable adjusted effects. + +Evidence: `{"eventCount":18,"predictors":14,"eventsPerPredictor":1.29}` + +Remediation: Reduce predictors, use penalization, or present the model as exploratory until stability is demonstrated. + +## BLOCK: MODEL_FAMILY_OUTCOME_MISMATCH +Binary outcomes should not be released through an identity-link linear model without an explicit sensitivity justification. + +Evidence: `{"outcomeType":"binary","family":"linear-regression","linkFunction":"identity"}` + +Remediation: Refit with a logistic or mixed-effects logistic model, or add a documented sensitivity analysis before AI review conclusions are trusted. + +## BLOCK: REVIEW_PACKET_INCOMPLETE +The reviewer handoff is missing sensitivity evidence or links to diagnostics. + +Evidence: `{"sensitivityModelIncluded":false,"reviewerPacketLinksDiagnostics":false}` + +Remediation: Hold automated peer-review approval until the handoff packet includes diagnostics and sensitivity results. + +## BLOCK: SEPARATION_RISK +Separation indicators suggest coefficient estimates may be unstable or infinite. + +Evidence: `{"completeSeparationIndicators":2}` + +Remediation: Use penalized logistic regression or exact methods and report the separation diagnostic in the reviewer packet. + +## WARN: AUTOCORRELATION_RISK +Residual autocorrelation may invalidate nominal uncertainty estimates. + +Evidence: `{"durbinWatson":1.12}` + +Remediation: Model temporal or batch correlation explicitly, or use clustered uncertainty estimates. + +## WARN: CALIBRATION_MISSING +Binary prediction claims need calibration evidence before reviewer summaries treat them as reliable. + +Evidence: `{"calibrationReported":false}` + +Remediation: Add calibration curve, Brier score, or held-out calibration summary. + +## WARN: HETEROSCEDASTICITY_UNHANDLED +Residual variance is not constant and no robust standard errors were declared. + +Evidence: `{"breuschPaganPValue":0.004}` + +Remediation: Report robust or clustered standard errors and flag affected inference in the limitations section. + +## WARN: MULTICOLLINEARITY_HIGH +Predictors show high variance inflation, so independent-effect claims may be overstated. + +Evidence: `{"maxVarianceInflationFactor":12.6}` + +Remediation: Collapse correlated predictors, use regularization, or downgrade independent-effect language. + +## WARN: OVERCONFIDENT_ASSUMPTION_CLAIM +The manuscript claims diagnostics were considered but does not disclose assumption risk in limitations. + +Evidence: `{"limitationTextMentionsAssumptionRisk":false}` + +Remediation: Add limitation language that names the specific modeling assumptions still under review. + +## WARN: RESIDUAL_OUTLIER_PRESSURE +Residual skew or influential outliers are large enough to require sensitivity reporting. + +Evidence: `{"residualSkew":1.8,"outlierStudentizedResidualMax":4.9}` + +Remediation: Add influence diagnostics, robust fit sensitivity, and a reviewer-facing note describing impacted claims. + +## WARN: TOO_FEW_CLUSTERS +Cluster-adjusted claims are fragile with very few clusters. + +Evidence: `{"clusterCount":3,"smallestClusterSize":6}` + +Remediation: Report cluster limitations and avoid generalizing beyond the observed sites or batches. diff --git a/model-assumption-diagnostics-assistant/reports/summary.svg b/model-assumption-diagnostics-assistant/reports/summary.svg new file mode 100644 index 00000000..661c4b97 --- /dev/null +++ b/model-assumption-diagnostics-assistant/reports/summary.svg @@ -0,0 +1,23 @@ + + + Model Assumption Diagnostics + SCI-MODEL-ASSUMPTION-001 · hold-ai-review + + + 100/100 + + + 4 + blockers + + + + 7 + warnings + + + + 0 + assumption checks clear + + diff --git a/model-assumption-diagnostics-assistant/requirements-map.md b/model-assumption-diagnostics-assistant/requirements-map.md new file mode 100644 index 00000000..a4af0fe3 --- /dev/null +++ b/model-assumption-diagnostics-assistant/requirements-map.md @@ -0,0 +1,27 @@ +# Requirements Map + +Issue #16 asks for an AI-powered research assistant suite that can detect methodology red flags, check claims against evidence, and provide useful review suggestions before public release. + +This contribution covers a distinct review capability: model-assumption diagnostics before an AI peer-review report is trusted. + +| Issue capability | Implementation | +| --- | --- | +| Methodological red flags | `reviewModelAssumptions()` flags model-family mismatch, low events per predictor, separation, heteroscedasticity, multicollinearity, outliers, autocorrelation, cluster fragility, and missing calibration. | +| Claims vs. evidence alignment | The assistant compares reviewer claims and remediation evidence against actual diagnostics. | +| Internal team review output | `buildReviewerMarkdown()` emits a reviewer-facing packet with evidence and remediation. | +| Reproducible synthetic demo | `demo.js` creates deterministic JSON, Markdown, SVG, and demo script artifacts from `sample-data.js`. | +| No private data or credentials | All inputs are synthetic and local; there are no network calls, secrets, patient records, payment data, or external services. | + +## Non-Overlap + +This is not: + +- A study-power or sample-size feasibility assistant. +- A missing-data sensitivity assistant. +- A causal adjustment or DAG review assistant. +- A Bayesian prior sensitivity assistant. +- An uncertainty calibration assistant. +- A figure or citation claim consistency assistant. +- A sample chain-of-custody assistant. + +The slice is specifically about whether the primary statistical or ML model can support the manuscript claims under its declared assumptions. diff --git a/model-assumption-diagnostics-assistant/sample-data.js b/model-assumption-diagnostics-assistant/sample-data.js new file mode 100644 index 00000000..108a4b4e --- /dev/null +++ b/model-assumption-diagnostics-assistant/sample-data.js @@ -0,0 +1,78 @@ +export const riskyAnalysisPacket = { + manuscriptId: "SCI-MODEL-ASSUMPTION-001", + title: "Predicting wet-lab replication yield from multi-site assay metadata", + domain: "computational biology", + declaredPrimaryModel: { + family: "linear-regression", + outcomeType: "binary", + outcomeDescription: "successful replication on follow-up assay", + linkFunction: "identity", + }, + reviewerClaims: [ + "The model explains assay replication success across partner labs.", + "No material modeling limitations were detected by the automated review assistant.", + "The top three features have independent effects after adjustment.", + ], + diagnostics: { + sampleSize: 84, + eventCount: 18, + predictors: 14, + residualSkew: 1.8, + outlierStudentizedResidualMax: 4.9, + breuschPaganPValue: 0.004, + maxVarianceInflationFactor: 12.6, + durbinWatson: 1.12, + completeSeparationIndicators: 2, + clusterCount: 3, + smallestClusterSize: 6, + transformationDeclared: false, + robustStandardErrorsDeclared: false, + crossValidationFolds: 3, + calibrationReported: false, + }, + remediationEvidence: { + sensitivityModelIncluded: false, + reviewerPacketLinksDiagnostics: false, + limitationTextMentionsAssumptionRisk: false, + plannedRefitFamily: "none", + }, +}; + +export const cleanAnalysisPacket = { + manuscriptId: "SCI-MODEL-ASSUMPTION-002", + title: "Estimating spectroscopy drift with pre-registered mixed-effects models", + domain: "instrumentation", + declaredPrimaryModel: { + family: "mixed-effects-logistic", + outcomeType: "binary", + outcomeDescription: "instrument drift outside tolerance", + linkFunction: "logit", + }, + reviewerClaims: [ + "The model uses an outcome-appropriate link function.", + "Diagnostics and limitations were included in the review packet.", + ], + diagnostics: { + sampleSize: 640, + eventCount: 214, + predictors: 8, + residualSkew: 0.42, + outlierStudentizedResidualMax: 2.4, + breuschPaganPValue: 0.21, + maxVarianceInflationFactor: 3.2, + durbinWatson: 1.96, + completeSeparationIndicators: 0, + clusterCount: 16, + smallestClusterSize: 28, + transformationDeclared: true, + robustStandardErrorsDeclared: true, + crossValidationFolds: 10, + calibrationReported: true, + }, + remediationEvidence: { + sensitivityModelIncluded: true, + reviewerPacketLinksDiagnostics: true, + limitationTextMentionsAssumptionRisk: true, + plannedRefitFamily: "mixed-effects-logistic", + }, +}; diff --git a/model-assumption-diagnostics-assistant/test.js b/model-assumption-diagnostics-assistant/test.js new file mode 100644 index 00000000..32d4b4d0 --- /dev/null +++ b/model-assumption-diagnostics-assistant/test.js @@ -0,0 +1,29 @@ +import assert from "node:assert/strict"; +import { cleanAnalysisPacket, riskyAnalysisPacket } from "./sample-data.js"; +import { buildReviewerMarkdown, buildSummarySvg, reviewModelAssumptions } from "./index.js"; + +const risky = reviewModelAssumptions(riskyAnalysisPacket); +assert.equal(risky.decision, "hold-ai-review"); +assert.ok(risky.riskScore >= 90); +assert.ok(risky.findings.some((finding) => finding.code === "MODEL_FAMILY_OUTCOME_MISMATCH")); +assert.ok(risky.findings.some((finding) => finding.code === "LOW_EVENTS_PER_PREDICTOR")); +assert.ok(risky.findings.some((finding) => finding.code === "SEPARATION_RISK")); +assert.ok(risky.findings.some((finding) => finding.code === "REVIEW_PACKET_INCOMPLETE")); +assert.ok(risky.findings.some((finding) => finding.code === "MULTICOLLINEARITY_HIGH")); + +const clean = reviewModelAssumptions(cleanAnalysisPacket); +assert.equal(clean.decision, "ready-for-ai-review"); +assert.equal(clean.summary.findingCount, 0); +assert.equal(clean.riskScore, 0); + +assert.throws(() => reviewModelAssumptions(null), /analysis packet must be an object/); + +const markdown = buildReviewerMarkdown(risky); +assert.match(markdown, /MODEL_FAMILY_OUTCOME_MISMATCH/); +assert.match(markdown, /hold-ai-review/); + +const svg = buildSummarySvg(risky); +assert.match(svg, /