Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions model-assumption-diagnostics-assistant/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Model Assumption Diagnostics Assistant

This slice adds a synthetic, dependency-free assistant for the SCIBASE AI-Powered Research Assistant Suite. It reviews analysis packets before an automated peer-review summary is trusted and flags model-assumption failures that can make otherwise polished AI review output misleading.

The assistant focuses on modeling diagnostics that are separate from existing same-issue submissions covering power feasibility, Bayesian prior sensitivity, missing data, causal adjustment, uncertainty calibration, figure-claim consistency, external validity, and sample chain-of-custody.

## What It Checks

- Binary outcomes modeled with an identity-link linear model.
- Low events per predictor.
- Complete or quasi-separation risk.
- Heteroscedasticity without robust uncertainty.
- High multicollinearity.
- Residual skew and influential outliers.
- Residual autocorrelation.
- Too few clusters for clustered claims.
- Missing calibration evidence for binary prediction claims.
- Missing sensitivity models or reviewer-facing diagnostic handoff links.

## Reviewer Output

Running the demo generates:

- `reports/model-assumption-report.json`
- `reports/model-assumption-report.md`
- `reports/summary.svg`
- `reports/demo-script.txt`
- `reports/demo.gif`
- `reports/demo.mp4`

The generated reviewer packet is synthetic only. It does not call external APIs, run live models, use credentials, or include private research data.

## Commands

```bash
npm test
npm run demo
npm run demo:video
```

`demo:video` renders small local GIF and MP4 demos from generated synthetic frames.
25 changes: 25 additions & 0 deletions model-assumption-diagnostics-assistant/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { mkdir, writeFile } from "node:fs/promises";
import { riskyAnalysisPacket } from "./sample-data.js";
import { buildReviewerMarkdown, buildSummarySvg, reviewModelAssumptions } from "./index.js";

const reportsDir = new URL("./reports/", import.meta.url);
await mkdir(reportsDir, { recursive: true });

const report = reviewModelAssumptions(riskyAnalysisPacket);
await writeFile(new URL("model-assumption-report.json", reportsDir), `${JSON.stringify(report, null, 2)}\n`);
await writeFile(new URL("model-assumption-report.md", reportsDir), buildReviewerMarkdown(report));
await writeFile(new URL("summary.svg", reportsDir), buildSummarySvg(report));
await writeFile(
new URL("demo-script.txt", reportsDir),
[
"Demo: Model Assumption Diagnostics Assistant",
`Manuscript: ${report.manuscriptId}`,
`Decision: ${report.decision}`,
`Risk score: ${report.riskScore}/100`,
`Blockers: ${report.summary.blockCount}`,
`Warnings: ${report.summary.warnCount}`,
"Reviewer action: hold AI review until model family, separation, sensitivity, and diagnostic handoff blockers are fixed.",
].join("\n"),
);

console.log(JSON.stringify(report.summary, null, 2));
46 changes: 46 additions & 0 deletions model-assumption-diagnostics-assistant/demo_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pathlib import Path

import imageio.v3 as iio
import numpy as np
from PIL import Image, ImageDraw, ImageFont


ROOT = Path(__file__).resolve().parent
REPORTS = ROOT / "reports"
REPORTS.mkdir(exist_ok=True)


def font(size):
for name in ("arial.ttf", "segoeui.ttf"):
try:
return ImageFont.truetype(name, size)
except OSError:
pass
return ImageFont.load_default()


frames = []
slides = [
("Model Assumption Diagnostics", "Synthetic reviewer packet for SCIBASE #16"),
("Decision", "hold-ai-review · risk score 100/100"),
("Blockers", "model-family mismatch · low events per predictor · separation"),
("Reviewer Action", "Refit, add sensitivity evidence, and link diagnostics before AI review release"),
]

for title, subtitle in slides:
image = Image.new("RGB", (960, 540), "#0e1726")
draw = ImageDraw.Draw(image)
draw.rectangle((48, 58, 912, 482), outline="#334155", width=3)
draw.text((82, 132), title, fill="#f8fafc", font=font(44))
draw.text((82, 214), subtitle, fill="#cbd5e1", font=font(24))
draw.rectangle((82, 340, 690, 380), fill="#ef4444")
draw.text((82, 410), "No external services, credentials, or private research data.", fill="#94a3b8", font=font(20))
frames.extend([image] * 14)

gif_path = REPORTS / "demo.gif"
frames[0].save(gif_path, save_all=True, append_images=frames[1:], duration=120, loop=0)
mp4_path = REPORTS / "demo.mp4"
iio.imwrite(mp4_path, [np.asarray(frame) for frame in frames], fps=8, codec="libx264")

print(f"wrote {gif_path}")
print(f"wrote {mp4_path}")
260 changes: 260 additions & 0 deletions model-assumption-diagnostics-assistant/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
const SEVERITY_ORDER = { block: 3, warn: 2, info: 1 };

function issue(code, severity, message, evidence, remediation) {
return { code, severity, message, evidence, remediation };
}

function isBinaryOutcome(packet) {
return packet.declaredPrimaryModel?.outcomeType === "binary";
}

function usesLinearIdentityModel(packet) {
const model = packet.declaredPrimaryModel || {};
return model.family === "linear-regression" || model.linkFunction === "identity";
}

function assumptionsMentioned(packet) {
const claims = packet.reviewerClaims || [];
return claims.some((claim) => /assumption|diagnostic|limitation/i.test(claim));
}

export function reviewModelAssumptions(packet) {
if (!packet || typeof packet !== "object") {
throw new TypeError("analysis packet must be an object");
}

const diagnostics = packet.diagnostics || {};
const remediation = packet.remediationEvidence || {};
const findings = [];

if (isBinaryOutcome(packet) && usesLinearIdentityModel(packet)) {
findings.push(
issue(
"MODEL_FAMILY_OUTCOME_MISMATCH",
"block",
"Binary outcomes should not be released through an identity-link linear model without an explicit sensitivity justification.",
{
outcomeType: packet.declaredPrimaryModel?.outcomeType,
family: packet.declaredPrimaryModel?.family,
linkFunction: packet.declaredPrimaryModel?.linkFunction,
},
"Refit with a logistic or mixed-effects logistic model, or add a documented sensitivity analysis before AI review conclusions are trusted.",
),
);
}

if (diagnostics.eventCount && diagnostics.predictors && diagnostics.eventCount / diagnostics.predictors < 5) {
findings.push(
issue(
"LOW_EVENTS_PER_PREDICTOR",
"block",
"The event-per-predictor ratio is too low for stable adjusted effects.",
{
eventCount: diagnostics.eventCount,
predictors: diagnostics.predictors,
eventsPerPredictor: Number((diagnostics.eventCount / diagnostics.predictors).toFixed(2)),
},
"Reduce predictors, use penalization, or present the model as exploratory until stability is demonstrated.",
),
);
}

if (diagnostics.completeSeparationIndicators > 0) {
findings.push(
issue(
"SEPARATION_RISK",
"block",
"Separation indicators suggest coefficient estimates may be unstable or infinite.",
{ completeSeparationIndicators: diagnostics.completeSeparationIndicators },
"Use penalized logistic regression or exact methods and report the separation diagnostic in the reviewer packet.",
),
);
}

if (diagnostics.breuschPaganPValue < 0.05 && !diagnostics.robustStandardErrorsDeclared) {
findings.push(
issue(
"HETEROSCEDASTICITY_UNHANDLED",
"warn",
"Residual variance is not constant and no robust standard errors were declared.",
{ breuschPaganPValue: diagnostics.breuschPaganPValue },
"Report robust or clustered standard errors and flag affected inference in the limitations section.",
),
);
}

if (diagnostics.maxVarianceInflationFactor > 8) {
findings.push(
issue(
"MULTICOLLINEARITY_HIGH",
"warn",
"Predictors show high variance inflation, so independent-effect claims may be overstated.",
{ maxVarianceInflationFactor: diagnostics.maxVarianceInflationFactor },
"Collapse correlated predictors, use regularization, or downgrade independent-effect language.",
),
);
}

if (diagnostics.residualSkew > 1.25 || diagnostics.outlierStudentizedResidualMax > 4) {
findings.push(
issue(
"RESIDUAL_OUTLIER_PRESSURE",
"warn",
"Residual skew or influential outliers are large enough to require sensitivity reporting.",
{
residualSkew: diagnostics.residualSkew,
outlierStudentizedResidualMax: diagnostics.outlierStudentizedResidualMax,
},
"Add influence diagnostics, robust fit sensitivity, and a reviewer-facing note describing impacted claims.",
),
);
}

if (diagnostics.durbinWatson && (diagnostics.durbinWatson < 1.4 || diagnostics.durbinWatson > 2.6)) {
findings.push(
issue(
"AUTOCORRELATION_RISK",
"warn",
"Residual autocorrelation may invalidate nominal uncertainty estimates.",
{ durbinWatson: diagnostics.durbinWatson },
"Model temporal or batch correlation explicitly, or use clustered uncertainty estimates.",
),
);
}

if (diagnostics.clusterCount && diagnostics.clusterCount < 5) {
findings.push(
issue(
"TOO_FEW_CLUSTERS",
"warn",
"Cluster-adjusted claims are fragile with very few clusters.",
{ clusterCount: diagnostics.clusterCount, smallestClusterSize: diagnostics.smallestClusterSize },
"Report cluster limitations and avoid generalizing beyond the observed sites or batches.",
),
);
}

if (!diagnostics.calibrationReported && isBinaryOutcome(packet)) {
findings.push(
issue(
"CALIBRATION_MISSING",
"warn",
"Binary prediction claims need calibration evidence before reviewer summaries treat them as reliable.",
{ calibrationReported: diagnostics.calibrationReported },
"Add calibration curve, Brier score, or held-out calibration summary.",
),
);
}

if (!remediation.sensitivityModelIncluded || !remediation.reviewerPacketLinksDiagnostics) {
findings.push(
issue(
"REVIEW_PACKET_INCOMPLETE",
"block",
"The reviewer handoff is missing sensitivity evidence or links to diagnostics.",
{
sensitivityModelIncluded: remediation.sensitivityModelIncluded,
reviewerPacketLinksDiagnostics: remediation.reviewerPacketLinksDiagnostics,
},
"Hold automated peer-review approval until the handoff packet includes diagnostics and sensitivity results.",
),
);
}

if (findings.length > 0 && assumptionsMentioned(packet) && !remediation.limitationTextMentionsAssumptionRisk) {
findings.push(
issue(
"OVERCONFIDENT_ASSUMPTION_CLAIM",
"warn",
"The manuscript claims diagnostics were considered but does not disclose assumption risk in limitations.",
{ limitationTextMentionsAssumptionRisk: remediation.limitationTextMentionsAssumptionRisk },
"Add limitation language that names the specific modeling assumptions still under review.",
),
);
}

const blockCount = findings.filter((finding) => finding.severity === "block").length;
const warnCount = findings.filter((finding) => finding.severity === "warn").length;
const riskScore = Math.min(100, blockCount * 28 + warnCount * 11);
const decision = blockCount > 0 ? "hold-ai-review" : warnCount > 0 ? "needs-reviewer-note" : "ready-for-ai-review";

return {
manuscriptId: packet.manuscriptId,
title: packet.title,
decision,
riskScore,
summary: {
blockCount,
warnCount,
findingCount: findings.length,
},
findings: findings.sort((a, b) => SEVERITY_ORDER[b.severity] - SEVERITY_ORDER[a.severity] || a.code.localeCompare(b.code)),
};
}

export function buildReviewerMarkdown(report) {
const lines = [
`# Model Assumption Diagnostics: ${report.manuscriptId}`,
"",
`Decision: **${report.decision}**`,
`Risk score: **${report.riskScore}/100**`,
"",
`Findings: ${report.summary.blockCount} blockers, ${report.summary.warnCount} warnings.`,
"",
];

for (const finding of report.findings) {
lines.push(`## ${finding.severity.toUpperCase()}: ${finding.code}`);
lines.push(finding.message);
lines.push("");
lines.push(`Evidence: \`${JSON.stringify(finding.evidence)}\``);
lines.push("");
lines.push(`Remediation: ${finding.remediation}`);
lines.push("");
}

if (report.findings.length === 0) {
lines.push("No assumption blockers or warnings were detected in the synthetic packet.");
lines.push("");
}

return lines.join("\n");
}

export function buildSummarySvg(report) {
const blockers = report.summary.blockCount;
const warnings = report.summary.warnCount;
const safe = Math.max(0, 10 - blockers - warnings);
return `<svg xmlns="http://www.w3.org/2000/svg" width="920" height="420" viewBox="0 0 920 420" role="img" aria-label="Model assumption diagnostics summary">
<rect width="920" height="420" fill="#0e1726"/>
<text x="48" y="66" fill="#f8fafc" font-family="Arial, sans-serif" font-size="30" font-weight="700">Model Assumption Diagnostics</text>
<text x="48" y="106" fill="#cbd5e1" font-family="Arial, sans-serif" font-size="17">${escapeXml(report.manuscriptId)} · ${escapeXml(report.decision)}</text>
<rect x="48" y="148" width="${Math.min(760, report.riskScore * 7.6)}" height="34" rx="5" fill="${report.decision === "hold-ai-review" ? "#ef4444" : "#f59e0b"}"/>
<rect x="48" y="148" width="760" height="34" rx="5" fill="none" stroke="#334155"/>
<text x="824" y="173" fill="#f8fafc" font-family="Arial, sans-serif" font-size="20" text-anchor="end">${report.riskScore}/100</text>
<g transform="translate(48 230)">
<rect width="210" height="106" rx="8" fill="#1e293b"/>
<text x="24" y="42" fill="#fecaca" font-family="Arial, sans-serif" font-size="28" font-weight="700">${blockers}</text>
<text x="24" y="76" fill="#cbd5e1" font-family="Arial, sans-serif" font-size="16">blockers</text>
</g>
<g transform="translate(288 230)">
<rect width="210" height="106" rx="8" fill="#1e293b"/>
<text x="24" y="42" fill="#fde68a" font-family="Arial, sans-serif" font-size="28" font-weight="700">${warnings}</text>
<text x="24" y="76" fill="#cbd5e1" font-family="Arial, sans-serif" font-size="16">warnings</text>
</g>
<g transform="translate(528 230)">
<rect width="210" height="106" rx="8" fill="#1e293b"/>
<text x="24" y="42" fill="#bbf7d0" font-family="Arial, sans-serif" font-size="28" font-weight="700">${safe}</text>
<text x="24" y="76" fill="#cbd5e1" font-family="Arial, sans-serif" font-size="16">assumption checks clear</text>
</g>
</svg>
`;
}

function escapeXml(value) {
return String(value)
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;");
}
12 changes: 12 additions & 0 deletions model-assumption-diagnostics-assistant/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "model-assumption-diagnostics-assistant",
"version": "1.0.0",
"description": "Synthetic model-assumption diagnostics assistant for SCIBASE AI review workflows.",
"type": "module",
"private": true,
"scripts": {
"test": "node test.js",
"demo": "node demo.js",
"demo:video": "python demo_video.py"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Demo: Model Assumption Diagnostics Assistant
Manuscript: SCI-MODEL-ASSUMPTION-001
Decision: hold-ai-review
Risk score: 100/100
Blockers: 4
Warnings: 7
Reviewer action: hold AI review until model family, separation, sensitivity, and diagnostic handoff blockers are fixed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading