diff --git a/systematic-review-screening-drift-assistant/README.md b/systematic-review-screening-drift-assistant/README.md new file mode 100644 index 00000000..39caa93a --- /dev/null +++ b/systematic-review-screening-drift-assistant/README.md @@ -0,0 +1,43 @@ +# Systematic Review Screening Drift Assistant + +This self-contained module adds a deterministic AI research-assistant guard for systematic review screening packets. It is scoped to SCIBASE issue #16, the AI-Powered Research Assistant Suite, and focuses on whether screening decisions, exclusion rationales, and research-gap prompts are safe to surface to researchers. + +The assistant does not call external APIs, payment systems, live review platforms, or private data stores. All fixtures are synthetic and all checks run with Node built-ins. + +## What It Checks + +- Locked eligibility criteria versions and complete PICO plus study-design fields. +- Dual independent review at title/abstract and full-text stages. +- Unresolved reviewer conflicts before assistant output release. +- Structured exclusion reasons from an approved taxonomy. +- Full-text retrieval evidence, locators, and content hashes. +- Duplicate-cluster canonical record selection. +- AI recommendations limited to assist-only authority with human approval gates. +- Private reviewer notes kept out of assistant context and generated gap prompts. +- Research-gap prompts backed by enough screened-study evidence and limitation signals. + +## Local Validation + +```sh +npm --prefix systematic-review-screening-drift-assistant run check +npm --prefix systematic-review-screening-drift-assistant test +npm --prefix systematic-review-screening-drift-assistant run demo +npm --prefix systematic-review-screening-drift-assistant run make-demo-video +npm --prefix systematic-review-screening-drift-assistant run verify-video +``` + +## Generated Artifacts + +Running the demo writes: + +- `reports/clean-screening-report.json` +- `reports/risky-screening-report.json` +- `reports/risky-screening-handoff.md` +- `reports/screening-dashboard.svg` +- `reports/demo.mp4` + +The risky packet intentionally demonstrates release blockers: criteria-version drift, missing criteria fields, stale search evidence, broad AI action authority, missing human approval, missing dual review, unresolved full-text conflict, missing full-text retrieval, invalid exclusion reason, missing exclusion evidence, private note leakage, and under-evidenced gap prompts. + +## Issue Fit + +This is a distinct AI-powered research assistant slice. It complements the broad assistant suite, evidence binder, structured abstract checker, external-validity transfer assistant, geospatial assistant, prompt-safety guard, omics review assistants, and generic peer-review generators by focusing specifically on systematic review screening integrity and exclusion-rationale drift. diff --git a/systematic-review-screening-drift-assistant/demo.js b/systematic-review-screening-drift-assistant/demo.js new file mode 100644 index 00000000..4e5c1d8b --- /dev/null +++ b/systematic-review-screening-drift-assistant/demo.js @@ -0,0 +1,85 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { evaluateSystematicReviewScreening } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +const risky = evaluateSystematicReviewScreening(riskyPacket); + +function writeJson(name, value) { + fs.writeFileSync(path.join(reportsDir, name), `${JSON.stringify(value, null, 2)}\n`); +} + +function escapeXml(value) { + return String(value) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll("\"", """); +} + +function makeMarkdownReport(report) { + const lines = [ + `# Screening Assistant Handoff: ${report.summary.reviewId}`, + "", + `Decision: ${report.summary.decision}`, + `Findings: ${report.summary.findingCount}`, + `High or critical findings: ${report.summary.highOrCriticalFindings}`, + `Audit digest: ${report.summary.auditDigest}`, + "", + "## Required Actions" + ]; + + if (report.recommendations.length === 0) { + lines.push("- No remediation required."); + } else { + for (const recommendation of report.recommendations) { + lines.push(`- ${recommendation}`); + } + } + + lines.push("", "## Top Findings"); + for (const finding of report.findings.slice(0, 8)) { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.code}: ${finding.message}`); + } + + return `${lines.join("\n")}\n`; +} + +function makeSvg(cleanReport, riskyReport) { + const cleanWidth = 280; + const riskyWidth = Math.min(560, 36 * riskyReport.summary.findingCount); + const criticalWidth = Math.min(560, 72 * riskyReport.summary.criticalFindings); + return ` + + + + Systematic Review Screening Drift Assistant + Eligibility version locks, dual review, exclusion evidence, AI authority, and gap prompt safety. + Clean packet + + + ${escapeXml(cleanReport.summary.decision)} + Risky packet findings + + + ${riskyReport.summary.findingCount} findings + Critical release blockers + + + ${riskyReport.summary.criticalFindings} critical +`; +} + +writeJson("clean-screening-report.json", clean); +writeJson("risky-screening-report.json", risky); +fs.writeFileSync(path.join(reportsDir, "risky-screening-handoff.md"), makeMarkdownReport(risky)); +fs.writeFileSync(path.join(reportsDir, "screening-dashboard.svg"), makeSvg(clean, risky)); + +console.log("Clean decision:", clean.summary.decision); +console.log("Risky decision:", risky.summary.decision); +console.log("Risky finding count:", risky.summary.findingCount); +console.log(`Reports written to ${reportsDir}`); diff --git a/systematic-review-screening-drift-assistant/index.js b/systematic-review-screening-drift-assistant/index.js new file mode 100644 index 00000000..bc9f7315 --- /dev/null +++ b/systematic-review-screening-drift-assistant/index.js @@ -0,0 +1,398 @@ +const crypto = require("node:crypto"); + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function stableJson(value) { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(",")}]`; + } + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +function sha256(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex"); +} + +function toDate(value) { + const parsed = new Date(value || ""); + return Number.isNaN(parsed.getTime()) ? null : parsed; +} + +function daysBetween(laterValue, earlierValue) { + const later = toDate(laterValue); + const earlier = toDate(earlierValue); + if (!later || !earlier) { + return null; + } + return Math.floor((later.getTime() - earlier.getTime()) / (24 * 60 * 60 * 1000)); +} + +function severityRank(severity) { + return { critical: 4, high: 3, medium: 2, low: 1 }[severity] || 0; +} + +function addFinding(findings, severity, code, message, refs, action) { + findings.push({ + severity, + code, + message, + refs: asArray(refs), + action + }); +} + +function unique(values) { + return [...new Set(asArray(values).filter(Boolean))]; +} + +function normalizedSet(values) { + return new Set(asArray(values).map((value) => String(value || "").trim().toLowerCase()).filter(Boolean)); +} + +function missingRequired(required, actual) { + const actualSet = normalizedSet(actual); + return asArray(required).filter((value) => !actualSet.has(String(value).toLowerCase())); +} + +function reviewerCount(decisions) { + return normalizedSet(asArray(decisions).map((decision) => decision.reviewer)).size; +} + +function stageDecisions(study, stage) { + return asArray(study.decisions).filter((decision) => decision.stage === stage); +} + +function shouldReviewFullText(study) { + return study.fullTextRequired !== false && study.screeningOutcome !== "title_abstract_excluded"; +} + +function isAiOverride(recommendation) { + return recommendation.applied === true || recommendation.authority !== "assist_only"; +} + +function evaluateSystematicReviewScreening(packet) { + const findings = []; + const reviewDate = packet.reviewDate || new Date().toISOString().slice(0, 10); + const eligibility = packet.eligibilityCriteria || {}; + const taxonomy = new Map(asArray(packet.exclusionTaxonomy).map((reason) => [reason.id, reason])); + const studies = asArray(packet.studies); + const requiredCriteriaFields = ["population", "intervention", "comparator", "outcome", "studyDesign"]; + const studySummaries = []; + + if (!eligibility.version) { + addFinding( + findings, + "critical", + "ELIGIBILITY_VERSION_MISSING", + "The systematic review packet has no locked eligibility-criteria version.", + [packet.reviewId || "review"], + "lock_eligibility_criteria_before_ai_screening" + ); + } + + const missingCriteria = missingRequired(requiredCriteriaFields, Object.keys(eligibility.fields || {})); + if (missingCriteria.length > 0) { + addFinding( + findings, + "high", + "ELIGIBILITY_CRITERIA_INCOMPLETE", + `Eligibility criteria are missing required fields: ${missingCriteria.join(", ")}.`, + [eligibility.version || packet.reviewId || "eligibility"], + "complete_pico_and_study_design_criteria" + ); + } + + const searchAge = daysBetween(reviewDate, packet.searchSnapshot && packet.searchSnapshot.capturedAt); + if (searchAge === null || searchAge > Number(packet.maxSearchSnapshotAgeDays || 30)) { + addFinding( + findings, + "medium", + "SEARCH_SNAPSHOT_STALE", + `Search snapshot is ${searchAge === null ? "missing" : `${searchAge} days old`} for ${packet.reviewId || "review"}.`, + [packet.searchSnapshot && packet.searchSnapshot.id || "search-snapshot"], + "refresh_search_snapshot_before_gap_generation" + ); + } + + const aiPolicy = packet.aiPolicy || {}; + if (aiPolicy.actionAuthority !== "assist_only") { + addFinding( + findings, + "critical", + "AI_POLICY_AUTHORITY_TOO_BROAD", + "AI assistant policy allows actions beyond assist-only recommendations.", + [packet.reviewId || "review"], + "set_ai_policy_to_assist_only" + ); + } + if (aiPolicy.humanApprovalRequired !== true) { + addFinding( + findings, + "high", + "HUMAN_APPROVAL_GATE_MISSING", + "AI screening output can be released without an explicit human approval gate.", + [packet.reviewId || "review"], + "require_human_approval_for_screening_release" + ); + } + + if (studies.length === 0) { + addFinding( + findings, + "critical", + "NO_STUDIES_TO_SCREEN", + "No studies were supplied to the screening assistant.", + [packet.reviewId || "review"], + "attach_screening_records_before_assistant_run" + ); + } + + for (const study of studies) { + const refs = [study.id || "study"]; + const summary = { + id: study.id, + outcome: study.screeningOutcome || "unknown", + requiredActions: [], + titleAbstractReviewers: reviewerCount(stageDecisions(study, "title_abstract")), + fullTextReviewers: reviewerCount(stageDecisions(study, "full_text")) + }; + + if (study.criteriaVersion !== eligibility.version) { + addFinding( + findings, + "high", + "CRITERIA_VERSION_DRIFT", + `${study.id || "Study"} was screened against ${study.criteriaVersion || "no criteria version"} instead of ${eligibility.version || "the locked version"}.`, + refs, + "rescreen_against_locked_eligibility_version" + ); + summary.requiredActions.push("rescreen_against_locked_eligibility_version"); + } + + if (summary.titleAbstractReviewers < 2) { + addFinding( + findings, + "high", + "TITLE_ABSTRACT_DUAL_REVIEW_GAP", + `${study.id || "Study"} does not have two independent title/abstract reviewers.`, + refs, + "assign_second_title_abstract_reviewer" + ); + summary.requiredActions.push("assign_second_title_abstract_reviewer"); + } + + if (shouldReviewFullText(study) && summary.fullTextReviewers < 2) { + addFinding( + findings, + "high", + "FULL_TEXT_DUAL_REVIEW_GAP", + `${study.id || "Study"} does not have two independent full-text reviewers.`, + refs, + "assign_second_full_text_reviewer" + ); + summary.requiredActions.push("assign_second_full_text_reviewer"); + } + + for (const [stage, consensus] of Object.entries(study.consensus || {})) { + if (consensus && consensus.state === "conflict" && !consensus.resolution) { + addFinding( + findings, + "critical", + "UNRESOLVED_SCREENING_CONFLICT", + `${study.id || "Study"} has an unresolved ${stage.replace("_", "/")} screening conflict.`, + refs, + "resolve_reviewer_conflict_before_ai_release" + ); + summary.requiredActions.push("resolve_reviewer_conflict_before_ai_release"); + } + } + + if (shouldReviewFullText(study)) { + if (!study.fullText || !study.fullText.retrievedAt) { + addFinding( + findings, + "high", + "FULL_TEXT_RETRIEVAL_MISSING", + `${study.id || "Study"} reached full-text screening without retrieval evidence.`, + refs, + "attach_full_text_retrieval_evidence" + ); + summary.requiredActions.push("attach_full_text_retrieval_evidence"); + } else if (!study.fullText.hash || !study.fullText.locator) { + addFinding( + findings, + "medium", + "FULL_TEXT_PROVENANCE_GAP", + `${study.id || "Study"} full-text evidence is missing a locator or content hash.`, + refs, + "record_full_text_locator_and_hash" + ); + } + } + + if (study.duplicateGroup && !study.duplicateGroup.canonicalStudyId) { + addFinding( + findings, + "medium", + "DUPLICATE_CLUSTER_CANONICAL_MISSING", + `${study.id || "Study"} is in duplicate cluster ${study.duplicateGroup.id || "unknown"} without a canonical record.`, + refs, + "select_canonical_record_for_duplicate_cluster" + ); + } + + if (study.screeningOutcome === "excluded") { + const reasons = asArray(study.exclusionReasons); + if (reasons.length === 0) { + addFinding( + findings, + "high", + "EXCLUSION_REASON_MISSING", + `${study.id || "Study"} is excluded without a structured exclusion reason.`, + refs, + "select_taxonomy_backed_exclusion_reason" + ); + } + for (const reason of reasons) { + const taxonomyReason = taxonomy.get(reason.id); + if (!taxonomyReason) { + addFinding( + findings, + "high", + "EXCLUSION_REASON_NOT_IN_TAXONOMY", + `${study.id || "Study"} uses exclusion reason ${reason.id || "unknown"}, which is not in the approved taxonomy.`, + refs, + "map_exclusion_to_approved_reason_taxonomy" + ); + } + if (!reason.evidenceLocator || !reason.evidenceQuote) { + addFinding( + findings, + "high", + "EXCLUSION_EVIDENCE_MISSING", + `${study.id || "Study"} exclusion reason ${reason.id || "unknown"} lacks full-text evidence locator and quote.`, + refs, + "attach_full_text_evidence_for_exclusion" + ); + } + } + } + + if (asArray(study.aiRecommendations).some(isAiOverride) && study.humanApproved !== true) { + addFinding( + findings, + "critical", + "AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL", + `${study.id || "Study"} has an AI screening recommendation that can override or apply without human approval.`, + refs, + "downgrade_ai_recommendation_to_assist_only" + ); + summary.requiredActions.push("downgrade_ai_recommendation_to_assist_only"); + } + + if (asArray(study.reviewerNotes).some((note) => note.private === true && note.exportedToAssistant === true)) { + addFinding( + findings, + "critical", + "PRIVATE_REVIEW_NOTE_EXPORTED", + `${study.id || "Study"} exports private reviewer notes into the AI assistant packet.`, + refs, + "remove_private_notes_from_ai_context" + ); + summary.requiredActions.push("remove_private_notes_from_ai_context"); + } + + studySummaries.push({ + ...summary, + requiredActions: unique(summary.requiredActions) + }); + } + + const minGapEvidenceStudies = Number(packet.minGapEvidenceStudies || 3); + const gapPrompts = asArray(packet.gapPrompts).map((prompt) => { + const refs = [prompt.id || "gap-prompt"]; + if (asArray(prompt.evidenceStudyIds).length < minGapEvidenceStudies) { + addFinding( + findings, + "medium", + "GAP_PROMPT_UNDER_EVIDENCED", + `${prompt.id || "Gap prompt"} cites fewer than ${minGapEvidenceStudies} screened studies.`, + refs, + "add_more_screened_study_evidence_before_surfacing_gap" + ); + } + if (asArray(prompt.limitationSignals).length === 0) { + addFinding( + findings, + "medium", + "GAP_PROMPT_LIMITATION_SIGNALS_MISSING", + `${prompt.id || "Gap prompt"} has no limitation signals from excluded or unresolved studies.`, + refs, + "attach_limitation_signals_to_gap_prompt" + ); + } + if (prompt.includesPrivateReviewerNote === true) { + addFinding( + findings, + "critical", + "PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT", + `${prompt.id || "Gap prompt"} includes private reviewer-note content.`, + refs, + "regenerate_gap_prompt_from_public_screening_fields" + ); + } + return { + id: prompt.id, + evidenceStudyCount: asArray(prompt.evidenceStudyIds).length, + limitationSignalCount: asArray(prompt.limitationSignals).length, + readyForRelease: asArray(prompt.evidenceStudyIds).length >= minGapEvidenceStudies && + asArray(prompt.limitationSignals).length > 0 && + prompt.includesPrivateReviewerNote !== true + }; + }); + + const highOrCriticalFindings = findings.filter((finding) => severityRank(finding.severity) >= severityRank("high")).length; + const criticalFindings = findings.filter((finding) => finding.severity === "critical").length; + const decision = criticalFindings > 0 + ? "hold_systematic_review_assistant" + : highOrCriticalFindings > 0 + ? "revise_screening_packet" + : "release_systematic_review_assistant"; + + const sortedFindings = findings.sort((a, b) => severityRank(b.severity) - severityRank(a.severity) || a.code.localeCompare(b.code)); + const recommendations = unique(sortedFindings.map((finding) => finding.action)); + const auditDigest = `sha256:${sha256({ + reviewId: packet.reviewId, + inputDigest: sha256(packet), + decision, + findingCodes: sortedFindings.map((finding) => finding.code), + recommendations + })}`; + + return { + summary: { + reviewId: packet.reviewId || "systematic-review", + decision, + findingCount: sortedFindings.length, + highOrCriticalFindings, + criticalFindings, + studiesReviewed: studies.length, + gapPromptsReviewed: gapPrompts.length, + auditDigest + }, + findings: sortedFindings, + studySummaries, + gapPrompts, + recommendations + }; +} + +module.exports = { + evaluateSystematicReviewScreening, + sha256 +}; diff --git a/systematic-review-screening-drift-assistant/make-demo-video.js b/systematic-review-screening-drift-assistant/make-demo-video.js new file mode 100644 index 00000000..951dc762 --- /dev/null +++ b/systematic-review-screening-drift-assistant/make-demo-video.js @@ -0,0 +1,93 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); +const { evaluateSystematicReviewScreening } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +const framesDir = path.join(reportsDir, "frames"); +fs.mkdirSync(framesDir, { recursive: true }); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +const risky = evaluateSystematicReviewScreening(riskyPacket); +const width = 960; +const height = 540; +const frames = 72; +const fps = 18; + +function setPixel(buffer, x, y, r, g, b) { + if (x < 0 || y < 0 || x >= width || y >= height) { + return; + } + const offset = (y * width + x) * 3; + buffer[offset] = r; + buffer[offset + 1] = g; + buffer[offset + 2] = b; +} + +function fillRect(buffer, x, y, w, h, r, g, b) { + for (let row = y; row < y + h; row += 1) { + for (let col = x; col < x + w; col += 1) { + setPixel(buffer, col, row, r, g, b); + } + } +} + +function writeFrame(index, progress) { + const buffer = Buffer.alloc(width * height * 3, 248); + fillRect(buffer, 0, 0, width, height, 248, 250, 252); + fillRect(buffer, 56, 48, 848, 444, 255, 255, 255); + fillRect(buffer, 56, 48, 848, 8, 15, 23, 42); + + const cleanWidth = Math.floor(302 * Math.min(1, progress * 1.7)); + const riskyWidth = Math.floor(302 * Math.max(0, (progress - 0.15) * 1.5)); + fillRect(buffer, 96, 112, 302, 58, 226, 232, 240); + fillRect(buffer, 96, 112, cleanWidth, 58, 16, 185, 129); + fillRect(buffer, 560, 112, 302, 58, 226, 232, 240); + fillRect(buffer, 560, 112, riskyWidth, 58, 239, 68, 68); + + const cleanStudies = clean.summary.studiesReviewed; + for (let i = 0; i < cleanStudies; i += 1) { + fillRect(buffer, 116 + i * 76, 234, 48, 96, 20, 184, 166); + fillRect(buffer, 124 + i * 76, 248, 32, 12, 255, 255, 255); + fillRect(buffer, 124 + i * 76, 276, 32, 12, 255, 255, 255); + } + + for (let i = 0; i < Math.min(13, risky.summary.findingCount); i += 1) { + const barHeight = 26 + (i % 7) * 15; + const color = i < risky.summary.criticalFindings ? [127, 29, 29] : [220, 38, 38]; + fillRect(buffer, 560 + i * 22, 370 - barHeight, 16, barHeight, color[0], color[1], color[2]); + } + + fillRect(buffer, 96, 418, Math.floor(340 * progress), 18, 37, 99, 235); + fillRect(buffer, 96, 448, Math.floor(602 * progress), 18, 217, 119, 6); + + const header = Buffer.from(`P6\n${width} ${height}\n255\n`, "ascii"); + fs.writeFileSync(path.join(framesDir, `frame-${String(index).padStart(3, "0")}.ppm`), Buffer.concat([header, buffer])); +} + +for (let index = 0; index < frames; index += 1) { + writeFrame(index, index / (frames - 1)); +} + +const output = path.join(reportsDir, "demo.mp4"); +const result = spawnSync(process.env.FFMPEG_PATH || "ffmpeg", [ + "-y", + "-framerate", + String(fps), + "-i", + path.join(framesDir, "frame-%03d.ppm"), + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + output +], { stdio: "inherit" }); + +fs.rmSync(framesDir, { recursive: true, force: true }); + +if (result.status !== 0) { + process.exit(result.status || 1); +} + +console.log(`Wrote ${output}`); diff --git a/systematic-review-screening-drift-assistant/package.json b/systematic-review-screening-drift-assistant/package.json new file mode 100644 index 00000000..9e9c97d7 --- /dev/null +++ b/systematic-review-screening-drift-assistant/package.json @@ -0,0 +1,15 @@ +{ + "name": "systematic-review-screening-drift-assistant", + "version": "1.0.0", + "description": "Dependency-free systematic review screening drift assistant for SCIBASE AI research workflows.", + "main": "index.js", + "scripts": { + "check": "node test.js", + "test": "node test.js", + "demo": "node demo.js", + "make-demo-video": "node make-demo-video.js", + "verify-video": "node verify-video.js" + }, + "license": "MIT", + "private": true +} diff --git a/systematic-review-screening-drift-assistant/reports/clean-screening-report.json b/systematic-review-screening-drift-assistant/reports/clean-screening-report.json new file mode 100644 index 00000000..03dd4543 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/clean-screening-report.json @@ -0,0 +1,45 @@ +{ + "summary": { + "reviewId": "prisma-living-review-cardiometabolic-ai", + "decision": "release_systematic_review_assistant", + "findingCount": 0, + "highOrCriticalFindings": 0, + "criticalFindings": 0, + "studiesReviewed": 3, + "gapPromptsReviewed": 1, + "auditDigest": "sha256:6d084a53e0ef678115431ea547a0eaa249f5544cf34f9d448ad70d32c6c0dafe" + }, + "findings": [], + "studySummaries": [ + { + "id": "SR-001", + "outcome": "included", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + }, + { + "id": "SR-002", + "outcome": "excluded", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + }, + { + "id": "SR-003", + "outcome": "included", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + } + ], + "gapPrompts": [ + { + "id": "gap-underserved-rural-cohorts", + "evidenceStudyCount": 3, + "limitationSignalCount": 2, + "readyForRelease": true + } + ], + "recommendations": [] +} diff --git a/systematic-review-screening-drift-assistant/reports/demo.mp4 b/systematic-review-screening-drift-assistant/reports/demo.mp4 new file mode 100644 index 00000000..6d858d22 Binary files /dev/null and b/systematic-review-screening-drift-assistant/reports/demo.mp4 differ diff --git a/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md b/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md new file mode 100644 index 00000000..a9d89943 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md @@ -0,0 +1,36 @@ +# Screening Assistant Handoff: prisma-living-review-cardiometabolic-ai + +Decision: hold_systematic_review_assistant +Findings: 18 +High or critical findings: 13 +Audit digest: sha256:0c9ae18fee18fd54d887b93c23b161376559b3d740fe538f9902fe916ee704d4 + +## Required Actions +- set_ai_policy_to_assist_only +- downgrade_ai_recommendation_to_assist_only +- remove_private_notes_from_ai_context +- regenerate_gap_prompt_from_public_screening_fields +- resolve_reviewer_conflict_before_ai_release +- rescreen_against_locked_eligibility_version +- complete_pico_and_study_design_criteria +- attach_full_text_evidence_for_exclusion +- map_exclusion_to_approved_reason_taxonomy +- assign_second_full_text_reviewer +- attach_full_text_retrieval_evidence +- require_human_approval_for_screening_release +- assign_second_title_abstract_reviewer +- select_canonical_record_for_duplicate_cluster +- record_full_text_locator_and_hash +- attach_limitation_signals_to_gap_prompt +- add_more_screened_study_evidence_before_surfacing_gap +- refresh_search_snapshot_before_gap_generation + +## Top Findings +- CRITICAL AI_POLICY_AUTHORITY_TOO_BROAD: AI assistant policy allows actions beyond assist-only recommendations. +- CRITICAL AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL: SR-100 has an AI screening recommendation that can override or apply without human approval. +- CRITICAL PRIVATE_REVIEW_NOTE_EXPORTED: SR-100 exports private reviewer notes into the AI assistant packet. +- CRITICAL PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT: gap-adolescent-ai-coaching includes private reviewer-note content. +- CRITICAL UNRESOLVED_SCREENING_CONFLICT: SR-100 has an unresolved full/text screening conflict. +- HIGH CRITERIA_VERSION_DRIFT: SR-100 was screened against eligibility-v2 instead of eligibility-v3. +- HIGH ELIGIBILITY_CRITERIA_INCOMPLETE: Eligibility criteria are missing required fields: outcome, studyDesign. +- HIGH EXCLUSION_EVIDENCE_MISSING: SR-100 exclusion reason low_quality lacks full-text evidence locator and quote. diff --git a/systematic-review-screening-drift-assistant/reports/risky-screening-report.json b/systematic-review-screening-drift-assistant/reports/risky-screening-report.json new file mode 100644 index 00000000..de1756cb --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/risky-screening-report.json @@ -0,0 +1,229 @@ +{ + "summary": { + "reviewId": "prisma-living-review-cardiometabolic-ai", + "decision": "hold_systematic_review_assistant", + "findingCount": 18, + "highOrCriticalFindings": 13, + "criticalFindings": 5, + "studiesReviewed": 2, + "gapPromptsReviewed": 1, + "auditDigest": "sha256:0c9ae18fee18fd54d887b93c23b161376559b3d740fe538f9902fe916ee704d4" + }, + "findings": [ + { + "severity": "critical", + "code": "AI_POLICY_AUTHORITY_TOO_BROAD", + "message": "AI assistant policy allows actions beyond assist-only recommendations.", + "refs": [ + "prisma-living-review-cardiometabolic-ai" + ], + "action": "set_ai_policy_to_assist_only" + }, + { + "severity": "critical", + "code": "AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL", + "message": "SR-100 has an AI screening recommendation that can override or apply without human approval.", + "refs": [ + "SR-100" + ], + "action": "downgrade_ai_recommendation_to_assist_only" + }, + { + "severity": "critical", + "code": "PRIVATE_REVIEW_NOTE_EXPORTED", + "message": "SR-100 exports private reviewer notes into the AI assistant packet.", + "refs": [ + "SR-100" + ], + "action": "remove_private_notes_from_ai_context" + }, + { + "severity": "critical", + "code": "PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT", + "message": "gap-adolescent-ai-coaching includes private reviewer-note content.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "regenerate_gap_prompt_from_public_screening_fields" + }, + { + "severity": "critical", + "code": "UNRESOLVED_SCREENING_CONFLICT", + "message": "SR-100 has an unresolved full/text screening conflict.", + "refs": [ + "SR-100" + ], + "action": "resolve_reviewer_conflict_before_ai_release" + }, + { + "severity": "high", + "code": "CRITERIA_VERSION_DRIFT", + "message": "SR-100 was screened against eligibility-v2 instead of eligibility-v3.", + "refs": [ + "SR-100" + ], + "action": "rescreen_against_locked_eligibility_version" + }, + { + "severity": "high", + "code": "ELIGIBILITY_CRITERIA_INCOMPLETE", + "message": "Eligibility criteria are missing required fields: outcome, studyDesign.", + "refs": [ + "eligibility-v3" + ], + "action": "complete_pico_and_study_design_criteria" + }, + { + "severity": "high", + "code": "EXCLUSION_EVIDENCE_MISSING", + "message": "SR-100 exclusion reason low_quality lacks full-text evidence locator and quote.", + "refs": [ + "SR-100" + ], + "action": "attach_full_text_evidence_for_exclusion" + }, + { + "severity": "high", + "code": "EXCLUSION_REASON_NOT_IN_TAXONOMY", + "message": "SR-100 uses exclusion reason low_quality, which is not in the approved taxonomy.", + "refs": [ + "SR-100" + ], + "action": "map_exclusion_to_approved_reason_taxonomy" + }, + { + "severity": "high", + "code": "FULL_TEXT_DUAL_REVIEW_GAP", + "message": "SR-101 does not have two independent full-text reviewers.", + "refs": [ + "SR-101" + ], + "action": "assign_second_full_text_reviewer" + }, + { + "severity": "high", + "code": "FULL_TEXT_RETRIEVAL_MISSING", + "message": "SR-100 reached full-text screening without retrieval evidence.", + "refs": [ + "SR-100" + ], + "action": "attach_full_text_retrieval_evidence" + }, + { + "severity": "high", + "code": "HUMAN_APPROVAL_GATE_MISSING", + "message": "AI screening output can be released without an explicit human approval gate.", + "refs": [ + "prisma-living-review-cardiometabolic-ai" + ], + "action": "require_human_approval_for_screening_release" + }, + { + "severity": "high", + "code": "TITLE_ABSTRACT_DUAL_REVIEW_GAP", + "message": "SR-100 does not have two independent title/abstract reviewers.", + "refs": [ + "SR-100" + ], + "action": "assign_second_title_abstract_reviewer" + }, + { + "severity": "medium", + "code": "DUPLICATE_CLUSTER_CANONICAL_MISSING", + "message": "SR-101 is in duplicate cluster dup-cluster-77 without a canonical record.", + "refs": [ + "SR-101" + ], + "action": "select_canonical_record_for_duplicate_cluster" + }, + { + "severity": "medium", + "code": "FULL_TEXT_PROVENANCE_GAP", + "message": "SR-101 full-text evidence is missing a locator or content hash.", + "refs": [ + "SR-101" + ], + "action": "record_full_text_locator_and_hash" + }, + { + "severity": "medium", + "code": "GAP_PROMPT_LIMITATION_SIGNALS_MISSING", + "message": "gap-adolescent-ai-coaching has no limitation signals from excluded or unresolved studies.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "attach_limitation_signals_to_gap_prompt" + }, + { + "severity": "medium", + "code": "GAP_PROMPT_UNDER_EVIDENCED", + "message": "gap-adolescent-ai-coaching cites fewer than 3 screened studies.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "add_more_screened_study_evidence_before_surfacing_gap" + }, + { + "severity": "medium", + "code": "SEARCH_SNAPSHOT_STALE", + "message": "Search snapshot is 182 days old for prisma-living-review-cardiometabolic-ai.", + "refs": [ + "search-2025-12-01-medline-only" + ], + "action": "refresh_search_snapshot_before_gap_generation" + } + ], + "studySummaries": [ + { + "id": "SR-100", + "outcome": "excluded", + "requiredActions": [ + "rescreen_against_locked_eligibility_version", + "assign_second_title_abstract_reviewer", + "resolve_reviewer_conflict_before_ai_release", + "attach_full_text_retrieval_evidence", + "downgrade_ai_recommendation_to_assist_only", + "remove_private_notes_from_ai_context" + ], + "titleAbstractReviewers": 1, + "fullTextReviewers": 2 + }, + { + "id": "SR-101", + "outcome": "included", + "requiredActions": [ + "assign_second_full_text_reviewer" + ], + "titleAbstractReviewers": 2, + "fullTextReviewers": 1 + } + ], + "gapPrompts": [ + { + "id": "gap-adolescent-ai-coaching", + "evidenceStudyCount": 1, + "limitationSignalCount": 0, + "readyForRelease": false + } + ], + "recommendations": [ + "set_ai_policy_to_assist_only", + "downgrade_ai_recommendation_to_assist_only", + "remove_private_notes_from_ai_context", + "regenerate_gap_prompt_from_public_screening_fields", + "resolve_reviewer_conflict_before_ai_release", + "rescreen_against_locked_eligibility_version", + "complete_pico_and_study_design_criteria", + "attach_full_text_evidence_for_exclusion", + "map_exclusion_to_approved_reason_taxonomy", + "assign_second_full_text_reviewer", + "attach_full_text_retrieval_evidence", + "require_human_approval_for_screening_release", + "assign_second_title_abstract_reviewer", + "select_canonical_record_for_duplicate_cluster", + "record_full_text_locator_and_hash", + "attach_limitation_signals_to_gap_prompt", + "add_more_screened_study_evidence_before_surfacing_gap", + "refresh_search_snapshot_before_gap_generation" + ] +} diff --git a/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg b/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg new file mode 100644 index 00000000..573efde8 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg @@ -0,0 +1,19 @@ + + + + + Systematic Review Screening Drift Assistant + Eligibility version locks, dual review, exclusion evidence, AI authority, and gap prompt safety. + Clean packet + + + release_systematic_review_assistant + Risky packet findings + + + 18 findings + Critical release blockers + + + 5 critical + \ No newline at end of file diff --git a/systematic-review-screening-drift-assistant/sample-data.js b/systematic-review-screening-drift-assistant/sample-data.js new file mode 100644 index 00000000..1feaeacb --- /dev/null +++ b/systematic-review-screening-drift-assistant/sample-data.js @@ -0,0 +1,376 @@ +const cleanPacket = { + reviewId: "prisma-living-review-cardiometabolic-ai", + reviewDate: "2026-06-01", + maxSearchSnapshotAgeDays: 21, + minGapEvidenceStudies: 3, + eligibilityCriteria: { + version: "eligibility-v3", + fields: { + population: "Adults with cardiometabolic risk markers", + intervention: "AI-assisted remote coaching or monitoring", + comparator: "Usual care or non-AI digital support", + outcome: "HbA1c, systolic blood pressure, adherence, or adverse events", + studyDesign: "Randomized, quasi-experimental, or prospective cohort" + } + }, + searchSnapshot: { + id: "search-2026-05-29-medline-embase", + capturedAt: "2026-05-29", + databases: ["MEDLINE", "Embase", "CENTRAL"] + }, + aiPolicy: { + actionAuthority: "assist_only", + humanApprovalRequired: true + }, + exclusionTaxonomy: [ + { + id: "outside_population", + label: "Outside target population" + }, + { + id: "wrong_intervention", + label: "No AI-assisted intervention" + }, + { + id: "wrong_outcome", + label: "No eligible outcome" + } + ], + studies: [ + { + id: "SR-001", + title: "Remote AI coaching for cardiometabolic risk", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-b", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-30", + locator: "s3://review-evidence/sr-001.pdf", + hash: "sha256:1c9e6c0b1f39" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [ + { + private: false, + exportedToAssistant: true, + text: "Outcome and intervention match eligibility-v3." + } + ] + }, + { + id: "SR-002", + title: "Mobile reminders without AI personalization", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "excluded", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-c", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-d", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-c", + decision: "exclude" + }, + { + stage: "full_text", + reviewer: "reviewer-d", + decision: "exclude" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "exclude" + } + }, + fullText: { + retrievedAt: "2026-05-30", + locator: "s3://review-evidence/sr-002.pdf", + hash: "sha256:2f58f225abcd" + }, + exclusionReasons: [ + { + id: "wrong_intervention", + evidenceLocator: "page 4, intervention section", + evidenceQuote: "The reminder cadence was rule-based and did not use AI personalization." + } + ], + aiRecommendations: [ + { + action: "exclude", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + }, + { + id: "SR-003", + title: "AI adherence coach in hypertension clinics", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-b", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-31", + locator: "s3://review-evidence/sr-003.pdf", + hash: "sha256:3b51f77acdef" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + } + ], + gapPrompts: [ + { + id: "gap-underserved-rural-cohorts", + evidenceStudyIds: ["SR-001", "SR-002", "SR-003"], + limitationSignals: ["few rural cohorts", "short follow-up windows"], + includesPrivateReviewerNote: false + } + ] +}; + +const riskyPacket = { + reviewId: "prisma-living-review-cardiometabolic-ai", + reviewDate: "2026-06-01", + maxSearchSnapshotAgeDays: 21, + minGapEvidenceStudies: 3, + eligibilityCriteria: { + version: "eligibility-v3", + fields: { + population: "Adults with cardiometabolic risk markers", + intervention: "AI-assisted remote coaching or monitoring", + comparator: "Usual care or non-AI digital support" + } + }, + searchSnapshot: { + id: "search-2025-12-01-medline-only", + capturedAt: "2025-12-01", + databases: ["MEDLINE"] + }, + aiPolicy: { + actionAuthority: "auto_apply", + humanApprovalRequired: false + }, + exclusionTaxonomy: [ + { + id: "outside_population", + label: "Outside target population" + }, + { + id: "wrong_intervention", + label: "No AI-assisted intervention" + } + ], + studies: [ + { + id: "SR-100", + title: "AI lifestyle coaching in adolescent athletes", + criteriaVersion: "eligibility-v2", + fullTextRequired: true, + screeningOutcome: "excluded", + humanApproved: false, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "exclude" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "conflict" + } + }, + exclusionReasons: [ + { + id: "low_quality", + evidenceLocator: "", + evidenceQuote: "" + } + ], + aiRecommendations: [ + { + action: "exclude", + authority: "auto_apply", + applied: true + } + ], + reviewerNotes: [ + { + private: true, + exportedToAssistant: true, + text: "Reviewer B disclosed a conflict in a private note." + } + ] + }, + { + id: "SR-101", + title: "Duplicate conference abstract for remote monitoring trial", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: false, + duplicateGroup: { + id: "dup-cluster-77" + }, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-c", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-d", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-c", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-12", + locator: "", + hash: "" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + } + ], + gapPrompts: [ + { + id: "gap-adolescent-ai-coaching", + evidenceStudyIds: ["SR-100"], + limitationSignals: [], + includesPrivateReviewerNote: true + } + ] +}; + +module.exports = { + cleanPacket, + riskyPacket +}; diff --git a/systematic-review-screening-drift-assistant/test.js b/systematic-review-screening-drift-assistant/test.js new file mode 100644 index 00000000..600c9866 --- /dev/null +++ b/systematic-review-screening-drift-assistant/test.js @@ -0,0 +1,38 @@ +const assert = require("node:assert/strict"); +const { evaluateSystematicReviewScreening, sha256 } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +assert.equal(clean.summary.decision, "release_systematic_review_assistant"); +assert.equal(clean.summary.findingCount, 0); +assert.equal(clean.summary.studiesReviewed, 3); +assert.equal(clean.summary.gapPromptsReviewed, 1); +assert.ok(clean.summary.auditDigest.startsWith("sha256:")); +assert.equal(clean.gapPrompts[0].readyForRelease, true); + +const risky = evaluateSystematicReviewScreening(riskyPacket); +assert.equal(risky.summary.decision, "hold_systematic_review_assistant"); +assert.equal(risky.summary.studiesReviewed, 2); +assert.ok(risky.summary.findingCount >= 13); +assert.ok(risky.summary.criticalFindings >= 4); +assert.ok(risky.summary.highOrCriticalFindings >= 10); + +const findingCodes = new Set(risky.findings.map((finding) => finding.code)); +assert.ok(findingCodes.has("AI_POLICY_AUTHORITY_TOO_BROAD")); +assert.ok(findingCodes.has("AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL")); +assert.ok(findingCodes.has("CRITERIA_VERSION_DRIFT")); +assert.ok(findingCodes.has("ELIGIBILITY_CRITERIA_INCOMPLETE")); +assert.ok(findingCodes.has("EXCLUSION_EVIDENCE_MISSING")); +assert.ok(findingCodes.has("EXCLUSION_REASON_NOT_IN_TAXONOMY")); +assert.ok(findingCodes.has("FULL_TEXT_DUAL_REVIEW_GAP")); +assert.ok(findingCodes.has("FULL_TEXT_RETRIEVAL_MISSING")); +assert.ok(findingCodes.has("PRIVATE_REVIEW_NOTE_EXPORTED")); +assert.ok(findingCodes.has("PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT")); +assert.ok(findingCodes.has("UNRESOLVED_SCREENING_CONFLICT")); + +const firstDigest = evaluateSystematicReviewScreening(riskyPacket).summary.auditDigest; +const secondDigest = evaluateSystematicReviewScreening(riskyPacket).summary.auditDigest; +assert.equal(firstDigest, secondDigest); +assert.equal(sha256({ b: 2, a: 1 }), sha256({ a: 1, b: 2 })); + +console.log("systematic review screening drift assistant tests passed"); diff --git a/systematic-review-screening-drift-assistant/verify-video.js b/systematic-review-screening-drift-assistant/verify-video.js new file mode 100644 index 00000000..39af983c --- /dev/null +++ b/systematic-review-screening-drift-assistant/verify-video.js @@ -0,0 +1,37 @@ +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); + +const videoPath = path.join(__dirname, "reports", "demo.mp4"); +assert.ok(fs.existsSync(videoPath), "reports/demo.mp4 must exist"); +assert.ok(fs.statSync(videoPath).size > 5000, "reports/demo.mp4 should not be empty"); + +const probe = spawnSync(process.env.FFPROBE_PATH || "ffprobe", [ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name,width,height,r_frame_rate:format=duration", + "-of", + "json", + videoPath +], { encoding: "utf8" }); + +if (probe.status !== 0) { + process.stderr.write(probe.stderr || "ffprobe failed\n"); + process.exit(probe.status || 1); +} + +const metadata = JSON.parse(probe.stdout); +const stream = metadata.streams && metadata.streams[0]; +assert.equal(stream.codec_name, "h264"); +assert.equal(stream.width, 960); +assert.equal(stream.height, 540); +assert.equal(stream.r_frame_rate, "18/1"); + +const duration = Number(metadata.format && metadata.format.duration); +assert.ok(duration >= 3.9 && duration <= 4.2, `unexpected duration ${duration}`); + +console.log(`demo.mp4 verified: ${stream.codec_name}, ${stream.width}x${stream.height}, ${duration.toFixed(3)}s, ${stream.r_frame_rate}`);