From a0ee67c30a371a163d906eb9df6a682003888062 Mon Sep 17 00:00:00 2001 From: AlonePenguin <187998801+AlonePenguin@users.noreply.github.com> Date: Mon, 1 Jun 2026 08:20:36 -0400 Subject: [PATCH] Add systematic review screening drift assistant --- .../README.md | 43 ++ .../demo.js | 85 ++++ .../index.js | 398 ++++++++++++++++++ .../make-demo-video.js | 93 ++++ .../package.json | 15 + .../reports/clean-screening-report.json | 45 ++ .../reports/demo.mp4 | Bin 0 -> 11087 bytes .../reports/risky-screening-handoff.md | 36 ++ .../reports/risky-screening-report.json | 229 ++++++++++ .../reports/screening-dashboard.svg | 19 + .../sample-data.js | 376 +++++++++++++++++ .../test.js | 38 ++ .../verify-video.js | 37 ++ 13 files changed, 1414 insertions(+) create mode 100644 systematic-review-screening-drift-assistant/README.md create mode 100644 systematic-review-screening-drift-assistant/demo.js create mode 100644 systematic-review-screening-drift-assistant/index.js create mode 100644 systematic-review-screening-drift-assistant/make-demo-video.js create mode 100644 systematic-review-screening-drift-assistant/package.json create mode 100644 systematic-review-screening-drift-assistant/reports/clean-screening-report.json create mode 100644 systematic-review-screening-drift-assistant/reports/demo.mp4 create mode 100644 systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md create mode 100644 systematic-review-screening-drift-assistant/reports/risky-screening-report.json create mode 100644 systematic-review-screening-drift-assistant/reports/screening-dashboard.svg create mode 100644 systematic-review-screening-drift-assistant/sample-data.js create mode 100644 systematic-review-screening-drift-assistant/test.js create mode 100644 systematic-review-screening-drift-assistant/verify-video.js diff --git a/systematic-review-screening-drift-assistant/README.md b/systematic-review-screening-drift-assistant/README.md new file mode 100644 index 00000000..39caa93a --- /dev/null +++ b/systematic-review-screening-drift-assistant/README.md @@ -0,0 +1,43 @@ +# Systematic Review Screening Drift Assistant + +This self-contained module adds a deterministic AI research-assistant guard for systematic review screening packets. It is scoped to SCIBASE issue #16, the AI-Powered Research Assistant Suite, and focuses on whether screening decisions, exclusion rationales, and research-gap prompts are safe to surface to researchers. + +The assistant does not call external APIs, payment systems, live review platforms, or private data stores. All fixtures are synthetic and all checks run with Node built-ins. + +## What It Checks + +- Locked eligibility criteria versions and complete PICO plus study-design fields. +- Dual independent review at title/abstract and full-text stages. +- Unresolved reviewer conflicts before assistant output release. +- Structured exclusion reasons from an approved taxonomy. +- Full-text retrieval evidence, locators, and content hashes. +- Duplicate-cluster canonical record selection. +- AI recommendations limited to assist-only authority with human approval gates. +- Private reviewer notes kept out of assistant context and generated gap prompts. +- Research-gap prompts backed by enough screened-study evidence and limitation signals. + +## Local Validation + +```sh +npm --prefix systematic-review-screening-drift-assistant run check +npm --prefix systematic-review-screening-drift-assistant test +npm --prefix systematic-review-screening-drift-assistant run demo +npm --prefix systematic-review-screening-drift-assistant run make-demo-video +npm --prefix systematic-review-screening-drift-assistant run verify-video +``` + +## Generated Artifacts + +Running the demo writes: + +- `reports/clean-screening-report.json` +- `reports/risky-screening-report.json` +- `reports/risky-screening-handoff.md` +- `reports/screening-dashboard.svg` +- `reports/demo.mp4` + +The risky packet intentionally demonstrates release blockers: criteria-version drift, missing criteria fields, stale search evidence, broad AI action authority, missing human approval, missing dual review, unresolved full-text conflict, missing full-text retrieval, invalid exclusion reason, missing exclusion evidence, private note leakage, and under-evidenced gap prompts. + +## Issue Fit + +This is a distinct AI-powered research assistant slice. It complements the broad assistant suite, evidence binder, structured abstract checker, external-validity transfer assistant, geospatial assistant, prompt-safety guard, omics review assistants, and generic peer-review generators by focusing specifically on systematic review screening integrity and exclusion-rationale drift. diff --git a/systematic-review-screening-drift-assistant/demo.js b/systematic-review-screening-drift-assistant/demo.js new file mode 100644 index 00000000..4e5c1d8b --- /dev/null +++ b/systematic-review-screening-drift-assistant/demo.js @@ -0,0 +1,85 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { evaluateSystematicReviewScreening } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +const risky = evaluateSystematicReviewScreening(riskyPacket); + +function writeJson(name, value) { + fs.writeFileSync(path.join(reportsDir, name), `${JSON.stringify(value, null, 2)}\n`); +} + +function escapeXml(value) { + return String(value) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll("\"", """); +} + +function makeMarkdownReport(report) { + const lines = [ + `# Screening Assistant Handoff: ${report.summary.reviewId}`, + "", + `Decision: ${report.summary.decision}`, + `Findings: ${report.summary.findingCount}`, + `High or critical findings: ${report.summary.highOrCriticalFindings}`, + `Audit digest: ${report.summary.auditDigest}`, + "", + "## Required Actions" + ]; + + if (report.recommendations.length === 0) { + lines.push("- No remediation required."); + } else { + for (const recommendation of report.recommendations) { + lines.push(`- ${recommendation}`); + } + } + + lines.push("", "## Top Findings"); + for (const finding of report.findings.slice(0, 8)) { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.code}: ${finding.message}`); + } + + return `${lines.join("\n")}\n`; +} + +function makeSvg(cleanReport, riskyReport) { + const cleanWidth = 280; + const riskyWidth = Math.min(560, 36 * riskyReport.summary.findingCount); + const criticalWidth = Math.min(560, 72 * riskyReport.summary.criticalFindings); + return ` + + + + Systematic Review Screening Drift Assistant + Eligibility version locks, dual review, exclusion evidence, AI authority, and gap prompt safety. + Clean packet + + + ${escapeXml(cleanReport.summary.decision)} + Risky packet findings + + + ${riskyReport.summary.findingCount} findings + Critical release blockers + + + ${riskyReport.summary.criticalFindings} critical +`; +} + +writeJson("clean-screening-report.json", clean); +writeJson("risky-screening-report.json", risky); +fs.writeFileSync(path.join(reportsDir, "risky-screening-handoff.md"), makeMarkdownReport(risky)); +fs.writeFileSync(path.join(reportsDir, "screening-dashboard.svg"), makeSvg(clean, risky)); + +console.log("Clean decision:", clean.summary.decision); +console.log("Risky decision:", risky.summary.decision); +console.log("Risky finding count:", risky.summary.findingCount); +console.log(`Reports written to ${reportsDir}`); diff --git a/systematic-review-screening-drift-assistant/index.js b/systematic-review-screening-drift-assistant/index.js new file mode 100644 index 00000000..bc9f7315 --- /dev/null +++ b/systematic-review-screening-drift-assistant/index.js @@ -0,0 +1,398 @@ +const crypto = require("node:crypto"); + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function stableJson(value) { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(",")}]`; + } + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +function sha256(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex"); +} + +function toDate(value) { + const parsed = new Date(value || ""); + return Number.isNaN(parsed.getTime()) ? null : parsed; +} + +function daysBetween(laterValue, earlierValue) { + const later = toDate(laterValue); + const earlier = toDate(earlierValue); + if (!later || !earlier) { + return null; + } + return Math.floor((later.getTime() - earlier.getTime()) / (24 * 60 * 60 * 1000)); +} + +function severityRank(severity) { + return { critical: 4, high: 3, medium: 2, low: 1 }[severity] || 0; +} + +function addFinding(findings, severity, code, message, refs, action) { + findings.push({ + severity, + code, + message, + refs: asArray(refs), + action + }); +} + +function unique(values) { + return [...new Set(asArray(values).filter(Boolean))]; +} + +function normalizedSet(values) { + return new Set(asArray(values).map((value) => String(value || "").trim().toLowerCase()).filter(Boolean)); +} + +function missingRequired(required, actual) { + const actualSet = normalizedSet(actual); + return asArray(required).filter((value) => !actualSet.has(String(value).toLowerCase())); +} + +function reviewerCount(decisions) { + return normalizedSet(asArray(decisions).map((decision) => decision.reviewer)).size; +} + +function stageDecisions(study, stage) { + return asArray(study.decisions).filter((decision) => decision.stage === stage); +} + +function shouldReviewFullText(study) { + return study.fullTextRequired !== false && study.screeningOutcome !== "title_abstract_excluded"; +} + +function isAiOverride(recommendation) { + return recommendation.applied === true || recommendation.authority !== "assist_only"; +} + +function evaluateSystematicReviewScreening(packet) { + const findings = []; + const reviewDate = packet.reviewDate || new Date().toISOString().slice(0, 10); + const eligibility = packet.eligibilityCriteria || {}; + const taxonomy = new Map(asArray(packet.exclusionTaxonomy).map((reason) => [reason.id, reason])); + const studies = asArray(packet.studies); + const requiredCriteriaFields = ["population", "intervention", "comparator", "outcome", "studyDesign"]; + const studySummaries = []; + + if (!eligibility.version) { + addFinding( + findings, + "critical", + "ELIGIBILITY_VERSION_MISSING", + "The systematic review packet has no locked eligibility-criteria version.", + [packet.reviewId || "review"], + "lock_eligibility_criteria_before_ai_screening" + ); + } + + const missingCriteria = missingRequired(requiredCriteriaFields, Object.keys(eligibility.fields || {})); + if (missingCriteria.length > 0) { + addFinding( + findings, + "high", + "ELIGIBILITY_CRITERIA_INCOMPLETE", + `Eligibility criteria are missing required fields: ${missingCriteria.join(", ")}.`, + [eligibility.version || packet.reviewId || "eligibility"], + "complete_pico_and_study_design_criteria" + ); + } + + const searchAge = daysBetween(reviewDate, packet.searchSnapshot && packet.searchSnapshot.capturedAt); + if (searchAge === null || searchAge > Number(packet.maxSearchSnapshotAgeDays || 30)) { + addFinding( + findings, + "medium", + "SEARCH_SNAPSHOT_STALE", + `Search snapshot is ${searchAge === null ? "missing" : `${searchAge} days old`} for ${packet.reviewId || "review"}.`, + [packet.searchSnapshot && packet.searchSnapshot.id || "search-snapshot"], + "refresh_search_snapshot_before_gap_generation" + ); + } + + const aiPolicy = packet.aiPolicy || {}; + if (aiPolicy.actionAuthority !== "assist_only") { + addFinding( + findings, + "critical", + "AI_POLICY_AUTHORITY_TOO_BROAD", + "AI assistant policy allows actions beyond assist-only recommendations.", + [packet.reviewId || "review"], + "set_ai_policy_to_assist_only" + ); + } + if (aiPolicy.humanApprovalRequired !== true) { + addFinding( + findings, + "high", + "HUMAN_APPROVAL_GATE_MISSING", + "AI screening output can be released without an explicit human approval gate.", + [packet.reviewId || "review"], + "require_human_approval_for_screening_release" + ); + } + + if (studies.length === 0) { + addFinding( + findings, + "critical", + "NO_STUDIES_TO_SCREEN", + "No studies were supplied to the screening assistant.", + [packet.reviewId || "review"], + "attach_screening_records_before_assistant_run" + ); + } + + for (const study of studies) { + const refs = [study.id || "study"]; + const summary = { + id: study.id, + outcome: study.screeningOutcome || "unknown", + requiredActions: [], + titleAbstractReviewers: reviewerCount(stageDecisions(study, "title_abstract")), + fullTextReviewers: reviewerCount(stageDecisions(study, "full_text")) + }; + + if (study.criteriaVersion !== eligibility.version) { + addFinding( + findings, + "high", + "CRITERIA_VERSION_DRIFT", + `${study.id || "Study"} was screened against ${study.criteriaVersion || "no criteria version"} instead of ${eligibility.version || "the locked version"}.`, + refs, + "rescreen_against_locked_eligibility_version" + ); + summary.requiredActions.push("rescreen_against_locked_eligibility_version"); + } + + if (summary.titleAbstractReviewers < 2) { + addFinding( + findings, + "high", + "TITLE_ABSTRACT_DUAL_REVIEW_GAP", + `${study.id || "Study"} does not have two independent title/abstract reviewers.`, + refs, + "assign_second_title_abstract_reviewer" + ); + summary.requiredActions.push("assign_second_title_abstract_reviewer"); + } + + if (shouldReviewFullText(study) && summary.fullTextReviewers < 2) { + addFinding( + findings, + "high", + "FULL_TEXT_DUAL_REVIEW_GAP", + `${study.id || "Study"} does not have two independent full-text reviewers.`, + refs, + "assign_second_full_text_reviewer" + ); + summary.requiredActions.push("assign_second_full_text_reviewer"); + } + + for (const [stage, consensus] of Object.entries(study.consensus || {})) { + if (consensus && consensus.state === "conflict" && !consensus.resolution) { + addFinding( + findings, + "critical", + "UNRESOLVED_SCREENING_CONFLICT", + `${study.id || "Study"} has an unresolved ${stage.replace("_", "/")} screening conflict.`, + refs, + "resolve_reviewer_conflict_before_ai_release" + ); + summary.requiredActions.push("resolve_reviewer_conflict_before_ai_release"); + } + } + + if (shouldReviewFullText(study)) { + if (!study.fullText || !study.fullText.retrievedAt) { + addFinding( + findings, + "high", + "FULL_TEXT_RETRIEVAL_MISSING", + `${study.id || "Study"} reached full-text screening without retrieval evidence.`, + refs, + "attach_full_text_retrieval_evidence" + ); + summary.requiredActions.push("attach_full_text_retrieval_evidence"); + } else if (!study.fullText.hash || !study.fullText.locator) { + addFinding( + findings, + "medium", + "FULL_TEXT_PROVENANCE_GAP", + `${study.id || "Study"} full-text evidence is missing a locator or content hash.`, + refs, + "record_full_text_locator_and_hash" + ); + } + } + + if (study.duplicateGroup && !study.duplicateGroup.canonicalStudyId) { + addFinding( + findings, + "medium", + "DUPLICATE_CLUSTER_CANONICAL_MISSING", + `${study.id || "Study"} is in duplicate cluster ${study.duplicateGroup.id || "unknown"} without a canonical record.`, + refs, + "select_canonical_record_for_duplicate_cluster" + ); + } + + if (study.screeningOutcome === "excluded") { + const reasons = asArray(study.exclusionReasons); + if (reasons.length === 0) { + addFinding( + findings, + "high", + "EXCLUSION_REASON_MISSING", + `${study.id || "Study"} is excluded without a structured exclusion reason.`, + refs, + "select_taxonomy_backed_exclusion_reason" + ); + } + for (const reason of reasons) { + const taxonomyReason = taxonomy.get(reason.id); + if (!taxonomyReason) { + addFinding( + findings, + "high", + "EXCLUSION_REASON_NOT_IN_TAXONOMY", + `${study.id || "Study"} uses exclusion reason ${reason.id || "unknown"}, which is not in the approved taxonomy.`, + refs, + "map_exclusion_to_approved_reason_taxonomy" + ); + } + if (!reason.evidenceLocator || !reason.evidenceQuote) { + addFinding( + findings, + "high", + "EXCLUSION_EVIDENCE_MISSING", + `${study.id || "Study"} exclusion reason ${reason.id || "unknown"} lacks full-text evidence locator and quote.`, + refs, + "attach_full_text_evidence_for_exclusion" + ); + } + } + } + + if (asArray(study.aiRecommendations).some(isAiOverride) && study.humanApproved !== true) { + addFinding( + findings, + "critical", + "AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL", + `${study.id || "Study"} has an AI screening recommendation that can override or apply without human approval.`, + refs, + "downgrade_ai_recommendation_to_assist_only" + ); + summary.requiredActions.push("downgrade_ai_recommendation_to_assist_only"); + } + + if (asArray(study.reviewerNotes).some((note) => note.private === true && note.exportedToAssistant === true)) { + addFinding( + findings, + "critical", + "PRIVATE_REVIEW_NOTE_EXPORTED", + `${study.id || "Study"} exports private reviewer notes into the AI assistant packet.`, + refs, + "remove_private_notes_from_ai_context" + ); + summary.requiredActions.push("remove_private_notes_from_ai_context"); + } + + studySummaries.push({ + ...summary, + requiredActions: unique(summary.requiredActions) + }); + } + + const minGapEvidenceStudies = Number(packet.minGapEvidenceStudies || 3); + const gapPrompts = asArray(packet.gapPrompts).map((prompt) => { + const refs = [prompt.id || "gap-prompt"]; + if (asArray(prompt.evidenceStudyIds).length < minGapEvidenceStudies) { + addFinding( + findings, + "medium", + "GAP_PROMPT_UNDER_EVIDENCED", + `${prompt.id || "Gap prompt"} cites fewer than ${minGapEvidenceStudies} screened studies.`, + refs, + "add_more_screened_study_evidence_before_surfacing_gap" + ); + } + if (asArray(prompt.limitationSignals).length === 0) { + addFinding( + findings, + "medium", + "GAP_PROMPT_LIMITATION_SIGNALS_MISSING", + `${prompt.id || "Gap prompt"} has no limitation signals from excluded or unresolved studies.`, + refs, + "attach_limitation_signals_to_gap_prompt" + ); + } + if (prompt.includesPrivateReviewerNote === true) { + addFinding( + findings, + "critical", + "PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT", + `${prompt.id || "Gap prompt"} includes private reviewer-note content.`, + refs, + "regenerate_gap_prompt_from_public_screening_fields" + ); + } + return { + id: prompt.id, + evidenceStudyCount: asArray(prompt.evidenceStudyIds).length, + limitationSignalCount: asArray(prompt.limitationSignals).length, + readyForRelease: asArray(prompt.evidenceStudyIds).length >= minGapEvidenceStudies && + asArray(prompt.limitationSignals).length > 0 && + prompt.includesPrivateReviewerNote !== true + }; + }); + + const highOrCriticalFindings = findings.filter((finding) => severityRank(finding.severity) >= severityRank("high")).length; + const criticalFindings = findings.filter((finding) => finding.severity === "critical").length; + const decision = criticalFindings > 0 + ? "hold_systematic_review_assistant" + : highOrCriticalFindings > 0 + ? "revise_screening_packet" + : "release_systematic_review_assistant"; + + const sortedFindings = findings.sort((a, b) => severityRank(b.severity) - severityRank(a.severity) || a.code.localeCompare(b.code)); + const recommendations = unique(sortedFindings.map((finding) => finding.action)); + const auditDigest = `sha256:${sha256({ + reviewId: packet.reviewId, + inputDigest: sha256(packet), + decision, + findingCodes: sortedFindings.map((finding) => finding.code), + recommendations + })}`; + + return { + summary: { + reviewId: packet.reviewId || "systematic-review", + decision, + findingCount: sortedFindings.length, + highOrCriticalFindings, + criticalFindings, + studiesReviewed: studies.length, + gapPromptsReviewed: gapPrompts.length, + auditDigest + }, + findings: sortedFindings, + studySummaries, + gapPrompts, + recommendations + }; +} + +module.exports = { + evaluateSystematicReviewScreening, + sha256 +}; diff --git a/systematic-review-screening-drift-assistant/make-demo-video.js b/systematic-review-screening-drift-assistant/make-demo-video.js new file mode 100644 index 00000000..951dc762 --- /dev/null +++ b/systematic-review-screening-drift-assistant/make-demo-video.js @@ -0,0 +1,93 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); +const { evaluateSystematicReviewScreening } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +const framesDir = path.join(reportsDir, "frames"); +fs.mkdirSync(framesDir, { recursive: true }); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +const risky = evaluateSystematicReviewScreening(riskyPacket); +const width = 960; +const height = 540; +const frames = 72; +const fps = 18; + +function setPixel(buffer, x, y, r, g, b) { + if (x < 0 || y < 0 || x >= width || y >= height) { + return; + } + const offset = (y * width + x) * 3; + buffer[offset] = r; + buffer[offset + 1] = g; + buffer[offset + 2] = b; +} + +function fillRect(buffer, x, y, w, h, r, g, b) { + for (let row = y; row < y + h; row += 1) { + for (let col = x; col < x + w; col += 1) { + setPixel(buffer, col, row, r, g, b); + } + } +} + +function writeFrame(index, progress) { + const buffer = Buffer.alloc(width * height * 3, 248); + fillRect(buffer, 0, 0, width, height, 248, 250, 252); + fillRect(buffer, 56, 48, 848, 444, 255, 255, 255); + fillRect(buffer, 56, 48, 848, 8, 15, 23, 42); + + const cleanWidth = Math.floor(302 * Math.min(1, progress * 1.7)); + const riskyWidth = Math.floor(302 * Math.max(0, (progress - 0.15) * 1.5)); + fillRect(buffer, 96, 112, 302, 58, 226, 232, 240); + fillRect(buffer, 96, 112, cleanWidth, 58, 16, 185, 129); + fillRect(buffer, 560, 112, 302, 58, 226, 232, 240); + fillRect(buffer, 560, 112, riskyWidth, 58, 239, 68, 68); + + const cleanStudies = clean.summary.studiesReviewed; + for (let i = 0; i < cleanStudies; i += 1) { + fillRect(buffer, 116 + i * 76, 234, 48, 96, 20, 184, 166); + fillRect(buffer, 124 + i * 76, 248, 32, 12, 255, 255, 255); + fillRect(buffer, 124 + i * 76, 276, 32, 12, 255, 255, 255); + } + + for (let i = 0; i < Math.min(13, risky.summary.findingCount); i += 1) { + const barHeight = 26 + (i % 7) * 15; + const color = i < risky.summary.criticalFindings ? [127, 29, 29] : [220, 38, 38]; + fillRect(buffer, 560 + i * 22, 370 - barHeight, 16, barHeight, color[0], color[1], color[2]); + } + + fillRect(buffer, 96, 418, Math.floor(340 * progress), 18, 37, 99, 235); + fillRect(buffer, 96, 448, Math.floor(602 * progress), 18, 217, 119, 6); + + const header = Buffer.from(`P6\n${width} ${height}\n255\n`, "ascii"); + fs.writeFileSync(path.join(framesDir, `frame-${String(index).padStart(3, "0")}.ppm`), Buffer.concat([header, buffer])); +} + +for (let index = 0; index < frames; index += 1) { + writeFrame(index, index / (frames - 1)); +} + +const output = path.join(reportsDir, "demo.mp4"); +const result = spawnSync(process.env.FFMPEG_PATH || "ffmpeg", [ + "-y", + "-framerate", + String(fps), + "-i", + path.join(framesDir, "frame-%03d.ppm"), + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + output +], { stdio: "inherit" }); + +fs.rmSync(framesDir, { recursive: true, force: true }); + +if (result.status !== 0) { + process.exit(result.status || 1); +} + +console.log(`Wrote ${output}`); diff --git a/systematic-review-screening-drift-assistant/package.json b/systematic-review-screening-drift-assistant/package.json new file mode 100644 index 00000000..9e9c97d7 --- /dev/null +++ b/systematic-review-screening-drift-assistant/package.json @@ -0,0 +1,15 @@ +{ + "name": "systematic-review-screening-drift-assistant", + "version": "1.0.0", + "description": "Dependency-free systematic review screening drift assistant for SCIBASE AI research workflows.", + "main": "index.js", + "scripts": { + "check": "node test.js", + "test": "node test.js", + "demo": "node demo.js", + "make-demo-video": "node make-demo-video.js", + "verify-video": "node verify-video.js" + }, + "license": "MIT", + "private": true +} diff --git a/systematic-review-screening-drift-assistant/reports/clean-screening-report.json b/systematic-review-screening-drift-assistant/reports/clean-screening-report.json new file mode 100644 index 00000000..03dd4543 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/clean-screening-report.json @@ -0,0 +1,45 @@ +{ + "summary": { + "reviewId": "prisma-living-review-cardiometabolic-ai", + "decision": "release_systematic_review_assistant", + "findingCount": 0, + "highOrCriticalFindings": 0, + "criticalFindings": 0, + "studiesReviewed": 3, + "gapPromptsReviewed": 1, + "auditDigest": "sha256:6d084a53e0ef678115431ea547a0eaa249f5544cf34f9d448ad70d32c6c0dafe" + }, + "findings": [], + "studySummaries": [ + { + "id": "SR-001", + "outcome": "included", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + }, + { + "id": "SR-002", + "outcome": "excluded", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + }, + { + "id": "SR-003", + "outcome": "included", + "requiredActions": [], + "titleAbstractReviewers": 2, + "fullTextReviewers": 2 + } + ], + "gapPrompts": [ + { + "id": "gap-underserved-rural-cohorts", + "evidenceStudyCount": 3, + "limitationSignalCount": 2, + "readyForRelease": true + } + ], + "recommendations": [] +} diff --git a/systematic-review-screening-drift-assistant/reports/demo.mp4 b/systematic-review-screening-drift-assistant/reports/demo.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6d858d22bd43968e5ef47f14ed655fd09d31d528 GIT binary patch literal 11087 zcma)i2RxPU-~WA_?2Lw$>+^YEpLKn%dmIRYP*$#9L|YQU5rW7d_zVbR?v6t{ z5=GGvM8V@oAh<&i^DtALe9cLaIymaZhwI*NB7x$Z#$1Q?fUPv?F0 zj+VCOU<~DG`ET3R01q#8scuJ7z{b+SWe)>#x3$FoX~XBs;F_Rj?qunJhk5o99c`Vg zK;y8x;|}OQ)#kO_9Xo32f`@tF10LV+=Hh_bZSNB%xmq}Yb}`A7^d}%NPbwTVaE6b6 zV(xSxtAN=+=N;PNVA2Hq;ut~9SwXZg8s%VX;fWC!-C^GS$GW`@8sdOU5n%~&ZjV50 z*5F*wC%(8o%0-ElZP z4(kWB*$1}|13TbAzz5t92r-HK5AV6%J`j|hvHg4J9a+H32w#Jw`E&h;@9-YqpKJIM z#{M_ozw!Q8Igp5d@cxnm^Z)7p|LOA=KhWb3|G)eHlK%(pzxn=s{eQyyAN=)ioWHLB zVekLymw(vvf7O3S-=iS*xSh!C#O6<4?Bswb81vp6gHLK;8O#ATOVCyZRKME?VZhk{ zkPjeLKr(zlFs~3Gc|dmn!I-cef5;lNVLf0Ou$FMJUGM>7LILY*;b!S-4$fB{@jGXD zjO(8T3lupQb0X0J9{6(z*zQ(EzC2d~49H<=z9S4kAqZ3sD^NMm;7=H?I8;_HcsvC0 z?Q;ZNU@;a^kZ*724=R6u{k2c+MGSv0s*h)G9xg8^0Thnlf=8jn#ZWFH7z_qwAtEL& zEM^V{)C2*8ke05ps(>g8dsz{1TH#LwPA)r>;A|buU13FRom}xQ4(5Of^jkQ%xtM#I;s}mJb60SU z13m&9uB{V*0E{l?Fprgsxg(wgyk%iZ^a5>LOBoO>Q*%pmB5a0*sfDdM37*8(1;1mM z2j14&#?=Ch5r}vvQ)>bdjP7<4fv7#+3t-D&#Dw>nreGVCL7zpDaCj#?&dpUuR2Wv% z#T@pj3!Y>H#9eTv|7|Wj?t+uSxmckb!R~^G%>oxP7!g5XlrwCMjIf|MXu|s|kOgOR zPZ@DZ&>*?ui87)nTOx=O$OsTbfMxCs5&#Y-WBI=i_m$or#uTWe zB%c!O8$a$OX}UqbW>*F8b$frAUkRe_6D57QJDvL;K{|xdoN|9Hxoz^#Ni$X)dAW*9 zIvbC#R|8}uvY_lSOY-_*@{O5yMHstar{VidmmXzuCR$V+N~g0lNQEF_k;hfmJxz5E z*zqY^I<{sAVng>mCRuA*uAX}!jM|x&#c)!Sb3wbvN739Q z&+2=--lFKMlUW_w{U3X%?Y`>jBD#pG0gzvgSok0!vzlA^YTY{}Hk%)|9$SHeu~XR| z`Afat>vbK!*dd`0mEz^jc-GJ59ugiBA_wl8c4*Tk<6KdJafbF-xw$pwFGiPMUK6_Q ztMlnu&UjWQ;aa*ur9VaT)!K~7@f$-t>PY96q;TyJT{Wk30X9E0Y)al$3%2}3o+vJc zAnKx#RQ7jgPq^_!#M8eDO{KQIRU%yeRW#vHOH{%G36k~G`J^bpmK>xFN+$e?0ih#< zc|2)=R=Hla`U-CmDpx*ObtpkTQkGcugepUU)g3#Jlp$MjoajQOzKYjUNi}Iy7 z-}$hs#gj|Ty>t24+c};}jM3Yzbs?E#EMKTEG33v#V9LHVDp1i?1#vejKD#}J(|7B; z{N#QW^A@+6O6&!6ZN1sudx~H29`ihhD8nIIf7_**;shSUKvCnW{CIKI(Gnd>d23cP zPIpneF!PW0t#g-*y>C_Z99ue-<>_FD%Xw^qW6#Qc+Hd}5^ui-gx3os3Fk{RXO<@0N zTei0Nx>BQ>fFHlQiJQR{oj#;&>Q@0cNBfQ~F<<4{I*~JPKIo^1et&o0|FV0R+~HLh z=jg$Pm?SE`8>$aZ3HRS_<|lPXj6U`A(EZie!f5kq<%D)ObOv%Vz7A5t9t z4T4qL9Iw(-Oy{Pip8Ko_45Uc>>~xA;+7xdY%dotc{CJH$`zyN6p7vRbNa=N#X`Tbe zV_#dZLX@Nv8*iM`o7V5q1yatm@jsJkR5ELGLzA~g==zlUT<2pO?2SCt}J1~%_du$67D#ipbn92NGm-w@6>uF$}o0} z!%p;Pd$@$sMV3A*y?Zg)QusSn5JH9Phi4Aw+IPWP`lWM|nqk!Qj=qxdOuRQwKe*?f z^}Pn-uu)EuHIc1LGq|`-zP}}T`|I|{fUJX8JRMYwG^vNC$s1zsPpJwBmiwvQI$j{^v%zSsLPOT{ zlN_az?{l0=S&JC!@2jO{VPBRMsN#y8_>c|VD(hGJto6EmZ_nLoh+d;N)S^-SLFINh z^_K6oBxbF+6mzXd;}62FtfcuXbqHFR#!Ye`75{W&u6JYGFfep3Vk5u0s*^`bxX51n zgsgq@Vhf@Brnr%Icgd?OCa1C=qIyi9E766~?GFjR-5l8#A9T60Af>6B$#T5Fwskb*T+435GXm^-^-m&Bgu8S00+^f2Y=~5H;%6y!$0X8}m+s{P|>Ar2l>t z%j;*Nchqhr4T@EihG9yIA@*Dk78+H~#I5Q})|NF+mlcnO^Lf5>K+o1crxG{WtfA;g zqJCpoAChJDY^?EyE!H*z7wcQjo}5M~uk2*u-OubzB_z*w*(w?T%2RK#C&2FL*K@1t zHP?xkE+4UOtSzfB!<^OUQvOyDz0s7V`t3(z`E*iGjEI8r^BGU2=;O*(2fr?{MJC)i zsWh(erLjPI)D=1OE|M7f+)NxFlW8G#J9Dh7A%|n^z+J86`el=VfO&cRB`y0(##5(i zUv*7?kLWG25<^V{yP4!)k3HO2?wz{2aDrNy(}Hru=}cG9kftfEf0v>LNm8yhB<*G) zXNy!w>;4eV$adjX(*x>ewKv|xyC@eswG$Vn*Q3cO3{pbc1=tHy!-RiITG-qws+H5F zOu8u_*;|&XXZflFZ>zJKai7BM*=eTn{&(tnvZ)lY>%VXY9_n`MHO^>qzE4wXu}1X5 zol7&E0@tb@+`htTlKElWb=R@pG^Trp z*~*bNPpG>8IAQX~R(5RA!l%T6;pf2umt?CjN6Ch+z3ArD;L75@*SJ#2+%`PKtH7B$ zAe^W<$t_!guE?aUQ7vYndREflJKOWEz1aJc2#Y6Ib91QUO2f$u^`C|+A;_95l|!=i z$31uBJ7py1Mkp^LbWWSJKys;4F~2CU#Gm|qSK|i${MjD*!iOiR zQ)6qj8@u+;Rglus@5U|E+)uTCr0Ons>}FkZhiv1$<+(s6KEK?%to#dX4$M3f+?%Z6 zE>}lvpznSrH9zr{j>6E0q7Czzhwe;bD)MNi`Wak-&Ih&8iLrq8R>o@Cd-PGWlD2mY zm(*j4&2xiR3ElnH>AKAoZjZ?N6U6XFj1fYsb(K&tlPW)JC|6v4#6v&0?eHB-J{cjLUf`$|{J=mX7eGzeMG{6IN(ptEHce zQzSKO{Q4{2D_^8?Zp$leuoP|i5Zi|A`Xpb*W-3F38w%~N19Q#ie13_#(GM)DL1#kW z4&UZ7j@JKTqjJ4-Las>Z2j;o;IT|$?6XiTZ72L{!7k(*dr4jT7-+6X)c*SS^E>}S< zu56hs8O4vPN#TAUmbaG?P^Z|M=jN1Nm1J;s{{7HWvFXdXE&gNizhAwZDqq$ujv_nM z=-u+wAwzT{Y>QXwdg>5+SivRPT-l&?>|kgKjiE!wW0&xwsr964 zP#P`UASP_6U4k3h;|vJ863cw_M*>x*jb^3qlsYfu$2DI*PJuNNrVN$+fnvxe?)!2a z@rw_^!mmHe+)(Tsad#!j*pYiGupg81y5oIh^LAD;PW81V=g$Y7*v!M1{lN_sVo-2- z{ESB$cq-hz7i0&|j0iJqJhy}5+pWA9h^i*(B=U=246CnmD1FfHcf#aR_O^Gvs@rcb zJh&_Evt*1p^R{-|Or1oNwEZdHu<6+hOQ%_QwQR(K!D z=ueRf((8jLet4^;(-M{2Ho>^xGiq%QMAC5;3>C-K}r2&<|f3A&LGeJP%)9VN(EzsbLKim)z|n< z?StcoFQ%XLIrh?E-rF&K^US-jCt$UAVK7GCJ~sT7fu zE+Oo=;7emtmh@c){hbq+WnXPgYi>M2CYjk>VH;~TnPMFNR?*NjP&+IfH`t>Wq+eTE z(A)np2BO<}CPpMGqJ_CcYfWPtBR_K0HQ0@Sp?X9fjCGc39B1gDt_ zHdJan{}Ul3S-kIKyg!l_f}m<_ys|V`)TvwGJ|6@NIxOVndc$zayym&YP1=W}@6okd zEjXKYO5WBh?>P1^Zo9LPzin(SkrVchl%DbNGhyeH7yEsOVxi)=Pp7t0!FUYW4VL+n zQDps*kC(fC%E?sH(%rLutHn^yMLqOvolhNkrV6M*W*`=SeoHr7GE#qRc2wblr%L<4 z@2IWt*eHIvXWN|8G?3pYN062n(Ycb{VXTi`i4 zSO(uGkYtY?TI+aV5Gn8^!oX>D=HdCQRc?{WQoX`QvggSO%VA}D^GS{C_$~9_8xOy?AXwt@7+0)hL*=5gV&Q%49dE<5C2C?*CKmFY}GcVK*-SR0s z8K(B(Wz$QjC29?3}c>Afgb?6XJHL_xx%r#EP|bw9o%vJ-pN`+P_!xETtzbVTVJJ`<4+%}F< zKjU@IqBGv>q@S1}>tO#2w#VhJ9LXISG?8V8Bl-@jW_@8W6Xh|vvJuc_$a^ASpC)i5 z!o@r|CE-UHh8k^rL&i$xnr3Z9Xa@Y^4TDJG-*x|Um(9YJ7tuqEO$?TgdlQH9HGb^x zNsx!b@MftZSnNc(&a&MCNLL`0i%meQkI)B4SKZm(%DDwW+(S$b44)y64n-=34B?;g%5+6N+DFSG8a(c|IlVb%5{Kgvv>qAO^?AmUqc&*9B`6f)mqumv*cM97 z!ymLzJ0(-bCw}TG^!|l?Z>Zg;u6kBa%boy%ngaI11RDNh64k%&=^59kr+0@nzK0ITt(#EEr zN@618)zhG@sppYhy=q16x}GRHvlO0ydX&Crlh!9Qs}}S|WcjB*Z}%|n zHw17Jh!m&YWRDS?Eqm-wQLZOCBWTyD%b~pgT{NxfjF3ef=_kCP_Vcc4kLoem{4z?0 z41pgZbwmnr*VpbK)z>5rm6kyVV!T3r*D+R-J{rYS(3tYY7Lw~74fexF+-u*o^@fhymxtwL7C?RwmW=Tt{mzDhAj% zBp2!bszwj^MiRI!LP~GHwK1OQ&bF9e4%g7BSF!sM zl(?#&hCS4Lq&~2g!vk+xq~rF@!2=QwZ%19e&zvtwGSSjLcc1Tc)Xm9g1C0nmW+9kC z29dJ78$f1h0otuCEp27D7ljdFDRmA;%+m>z+~8fgE+RVN`I#!pg`h8bpNBdRv0jK` zKO0{(OzjB(w6F=kc2p$e*xf;>J~S|TFTX}#^Xu{{1~z%{=3nGu0DZy*1{Bik{is5xAdjdUybyJ<@vMIVG88?1@IzJY*xKh$6?Ey@ zgyO~}GGi(UA7lB&XbNY39JWz2ruNX8Z*|YF(w>J{K9vsmN=vXlU|H*zyvRT7#04-D zvGM2r#x!^uWj>J4@lshq%yG*|EP>_Sko4iMYp>sGT(9}!Gt3jKm2TrQH#Nf7BmYEk zsS_f*YZs=|Jv1V<=v^3Wm>z3XKI!yyEc!)O<(v0icSIz^^AN%nZ~bQxAYFrzwmItHXo+&V-$(O><~fj`ZO>KBGimDx>9Q5R3x)X=0NNl@CY@ z?d)2==0|d`K}>CUWS$45K6gm5;>#l|;S!!+tGdC0h6mddCwUmrN8%&ER>NKX*6RWp zx!wlJ)_!vV^{@i%9m~i7FB~di`soangNLH)G9PX^)!|T@Fwjq5S?@F6d!5 zyrNN?{MTKf0Cl&5y7z5V(F>vva2s~es{f8s@z?*}ji_`Q7)d|R5mqmAlb@BK(sR$4 zyODpbA;*j_j+;@NX~6`Dv9U(Q$IHL{K0$Y8o9Ih1)Q@a;^|XTnYlw|c{?~Rwzx_1w zR8cbSz*ps?IaRjZaW)Fx9&LG(FY)P#jUou-bgx6-&&r$kQ!U||y>*IoFmLs4>9?Q+ zYE47d4M)6^kJYLJq62^{Ba&b5S^RGPY&(#>Aj$&y0k_)bkwQ_sV;L4_y z)rYum$y=||0HiP?d3evzlyY90J{7g4kJiVIPY0MV+JgN9i4c482J`f*@As*Lz4Mko zivS>W1sgxUoBr&(MTGq=Ttv1cTllf%>zr2P@9HO=vREsng$!091sLs2H)Q~iX@xOWU+Wt z;XDPgpUAl;L}#zEDKauz!IzV@Qo1%EZ4{#A$1(jJ1dbwQGMK#1xBMIPL|F0X!>++& z3f-6YCpRlr1K!vH1Gv8HKt{kM|!kq9WX0Bx$OF4|+%l zTbx#MX*J>nA!{xd8o#XG=zaT^XU4QnEeslqO|aR?t!k}9^t;uP&H$`{vSFfZOk!H= zb&c8=n`{kDdA4IG<~z@pDKQhz#b|jxbM4?h4V9^XrRg8~yc#z2jKAo@3J2*_#S!8G zN|p^dqeDRhY#Ub9*Nhg8d&P6P^nS~Il9D~A$r}6)<2ih8<;z`Zotv}^T{FwDh5^3v zN*|-I@^7x@ywF_x9>L#12~@)(Qi66Oz<@E|CwFjwS#lC;$G{U_y zai6pu#fx(6BkSq+ZAXTYH9lm-A7hw{*P|n)vLkKFNz`cEs%!3OSq`egR(}kG{od9cg#aeHTap#0GqJzmn9VIeX48Dk9&CK zmpmw&6w1#9km+$xlkZOjb|WEyF7vO>k%QtxDqW@C@5Ox2LTIDLs(UN*i5K7P3Ua08!T&CR55*GKXt_&ur8n#9EliEIe+nt!)Or%g}wLlA_?0_sc+W;#u*;rV&L zx>%qFWQ&b=1KK;pX&%=cI zq|h+9Ps34XM$O#fK4+_#LnI|(P2_&CMz>hnq>}E{Tdg9At6Oz>U5}%}m}5n{=5=#X zU^3v?=soqa)$GEfE0n%-e{cpr0YD<#WJ21N@;++^Q_|E5zO&&1+!wL&vAdN|uhwtp zZW&3|xF~u!rtSWG&F4a6`f05|HiaH;SR+-_ zu@N_8XYtzns$u4(J0Cng1KhKSk!XQQA|amLoNGza zR|gD2n54SrQ#-sT%-qI1*%Tafds*cX|V}A5K4y@UIYV%9)zj(hndu3mb z7+JFNia~%d?+5@9L8M&Uc@{(>9?ajMjK3ArtVExl9>Q8+28xqB{b&SS`ERVzPejy~ z7A6{J+)FM2VrLL3#(TlN3xcaS`eS)lJoAzjIUqR zOzlTSXeEDisj9r!p5||#MG`%Q5~z(-ecZGd4*R)$LZqQD+C*l;OAZk+a3x_LLZ;stP^C~I4{%S}< zsxWfmOG5Cei=cd5GZv8f^qHr`>ux~{kE#O88nJd*R~~?Yhw!vL54vjFvHLSbpUQ}P z<70ZMi19#8hktTz!~}PAD4d_OQa2{g@ordJrt6o+I>Vyagp56J?magn$*xLUQnja- zUED@<-bBDs3{S=diCheVpkuvfpN4(Di}3ob&v0)$GLE~MqC1Z>RDYk9 zTYXJ=ZliP7m}m?3Q}|^X`JP|w%_^Jk^WAUxhUIt->U070-+DR&nb%l1}iF(`w{-Ph$)LTyUkHzu#VJ`j?xKT!*N<|KBJ+JT7J b;QAHa9#v+MloZLIN_-vY*E&`i#cusy&YvSY literal 0 HcmV?d00001 diff --git a/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md b/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md new file mode 100644 index 00000000..a9d89943 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/risky-screening-handoff.md @@ -0,0 +1,36 @@ +# Screening Assistant Handoff: prisma-living-review-cardiometabolic-ai + +Decision: hold_systematic_review_assistant +Findings: 18 +High or critical findings: 13 +Audit digest: sha256:0c9ae18fee18fd54d887b93c23b161376559b3d740fe538f9902fe916ee704d4 + +## Required Actions +- set_ai_policy_to_assist_only +- downgrade_ai_recommendation_to_assist_only +- remove_private_notes_from_ai_context +- regenerate_gap_prompt_from_public_screening_fields +- resolve_reviewer_conflict_before_ai_release +- rescreen_against_locked_eligibility_version +- complete_pico_and_study_design_criteria +- attach_full_text_evidence_for_exclusion +- map_exclusion_to_approved_reason_taxonomy +- assign_second_full_text_reviewer +- attach_full_text_retrieval_evidence +- require_human_approval_for_screening_release +- assign_second_title_abstract_reviewer +- select_canonical_record_for_duplicate_cluster +- record_full_text_locator_and_hash +- attach_limitation_signals_to_gap_prompt +- add_more_screened_study_evidence_before_surfacing_gap +- refresh_search_snapshot_before_gap_generation + +## Top Findings +- CRITICAL AI_POLICY_AUTHORITY_TOO_BROAD: AI assistant policy allows actions beyond assist-only recommendations. +- CRITICAL AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL: SR-100 has an AI screening recommendation that can override or apply without human approval. +- CRITICAL PRIVATE_REVIEW_NOTE_EXPORTED: SR-100 exports private reviewer notes into the AI assistant packet. +- CRITICAL PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT: gap-adolescent-ai-coaching includes private reviewer-note content. +- CRITICAL UNRESOLVED_SCREENING_CONFLICT: SR-100 has an unresolved full/text screening conflict. +- HIGH CRITERIA_VERSION_DRIFT: SR-100 was screened against eligibility-v2 instead of eligibility-v3. +- HIGH ELIGIBILITY_CRITERIA_INCOMPLETE: Eligibility criteria are missing required fields: outcome, studyDesign. +- HIGH EXCLUSION_EVIDENCE_MISSING: SR-100 exclusion reason low_quality lacks full-text evidence locator and quote. diff --git a/systematic-review-screening-drift-assistant/reports/risky-screening-report.json b/systematic-review-screening-drift-assistant/reports/risky-screening-report.json new file mode 100644 index 00000000..de1756cb --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/risky-screening-report.json @@ -0,0 +1,229 @@ +{ + "summary": { + "reviewId": "prisma-living-review-cardiometabolic-ai", + "decision": "hold_systematic_review_assistant", + "findingCount": 18, + "highOrCriticalFindings": 13, + "criticalFindings": 5, + "studiesReviewed": 2, + "gapPromptsReviewed": 1, + "auditDigest": "sha256:0c9ae18fee18fd54d887b93c23b161376559b3d740fe538f9902fe916ee704d4" + }, + "findings": [ + { + "severity": "critical", + "code": "AI_POLICY_AUTHORITY_TOO_BROAD", + "message": "AI assistant policy allows actions beyond assist-only recommendations.", + "refs": [ + "prisma-living-review-cardiometabolic-ai" + ], + "action": "set_ai_policy_to_assist_only" + }, + { + "severity": "critical", + "code": "AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL", + "message": "SR-100 has an AI screening recommendation that can override or apply without human approval.", + "refs": [ + "SR-100" + ], + "action": "downgrade_ai_recommendation_to_assist_only" + }, + { + "severity": "critical", + "code": "PRIVATE_REVIEW_NOTE_EXPORTED", + "message": "SR-100 exports private reviewer notes into the AI assistant packet.", + "refs": [ + "SR-100" + ], + "action": "remove_private_notes_from_ai_context" + }, + { + "severity": "critical", + "code": "PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT", + "message": "gap-adolescent-ai-coaching includes private reviewer-note content.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "regenerate_gap_prompt_from_public_screening_fields" + }, + { + "severity": "critical", + "code": "UNRESOLVED_SCREENING_CONFLICT", + "message": "SR-100 has an unresolved full/text screening conflict.", + "refs": [ + "SR-100" + ], + "action": "resolve_reviewer_conflict_before_ai_release" + }, + { + "severity": "high", + "code": "CRITERIA_VERSION_DRIFT", + "message": "SR-100 was screened against eligibility-v2 instead of eligibility-v3.", + "refs": [ + "SR-100" + ], + "action": "rescreen_against_locked_eligibility_version" + }, + { + "severity": "high", + "code": "ELIGIBILITY_CRITERIA_INCOMPLETE", + "message": "Eligibility criteria are missing required fields: outcome, studyDesign.", + "refs": [ + "eligibility-v3" + ], + "action": "complete_pico_and_study_design_criteria" + }, + { + "severity": "high", + "code": "EXCLUSION_EVIDENCE_MISSING", + "message": "SR-100 exclusion reason low_quality lacks full-text evidence locator and quote.", + "refs": [ + "SR-100" + ], + "action": "attach_full_text_evidence_for_exclusion" + }, + { + "severity": "high", + "code": "EXCLUSION_REASON_NOT_IN_TAXONOMY", + "message": "SR-100 uses exclusion reason low_quality, which is not in the approved taxonomy.", + "refs": [ + "SR-100" + ], + "action": "map_exclusion_to_approved_reason_taxonomy" + }, + { + "severity": "high", + "code": "FULL_TEXT_DUAL_REVIEW_GAP", + "message": "SR-101 does not have two independent full-text reviewers.", + "refs": [ + "SR-101" + ], + "action": "assign_second_full_text_reviewer" + }, + { + "severity": "high", + "code": "FULL_TEXT_RETRIEVAL_MISSING", + "message": "SR-100 reached full-text screening without retrieval evidence.", + "refs": [ + "SR-100" + ], + "action": "attach_full_text_retrieval_evidence" + }, + { + "severity": "high", + "code": "HUMAN_APPROVAL_GATE_MISSING", + "message": "AI screening output can be released without an explicit human approval gate.", + "refs": [ + "prisma-living-review-cardiometabolic-ai" + ], + "action": "require_human_approval_for_screening_release" + }, + { + "severity": "high", + "code": "TITLE_ABSTRACT_DUAL_REVIEW_GAP", + "message": "SR-100 does not have two independent title/abstract reviewers.", + "refs": [ + "SR-100" + ], + "action": "assign_second_title_abstract_reviewer" + }, + { + "severity": "medium", + "code": "DUPLICATE_CLUSTER_CANONICAL_MISSING", + "message": "SR-101 is in duplicate cluster dup-cluster-77 without a canonical record.", + "refs": [ + "SR-101" + ], + "action": "select_canonical_record_for_duplicate_cluster" + }, + { + "severity": "medium", + "code": "FULL_TEXT_PROVENANCE_GAP", + "message": "SR-101 full-text evidence is missing a locator or content hash.", + "refs": [ + "SR-101" + ], + "action": "record_full_text_locator_and_hash" + }, + { + "severity": "medium", + "code": "GAP_PROMPT_LIMITATION_SIGNALS_MISSING", + "message": "gap-adolescent-ai-coaching has no limitation signals from excluded or unresolved studies.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "attach_limitation_signals_to_gap_prompt" + }, + { + "severity": "medium", + "code": "GAP_PROMPT_UNDER_EVIDENCED", + "message": "gap-adolescent-ai-coaching cites fewer than 3 screened studies.", + "refs": [ + "gap-adolescent-ai-coaching" + ], + "action": "add_more_screened_study_evidence_before_surfacing_gap" + }, + { + "severity": "medium", + "code": "SEARCH_SNAPSHOT_STALE", + "message": "Search snapshot is 182 days old for prisma-living-review-cardiometabolic-ai.", + "refs": [ + "search-2025-12-01-medline-only" + ], + "action": "refresh_search_snapshot_before_gap_generation" + } + ], + "studySummaries": [ + { + "id": "SR-100", + "outcome": "excluded", + "requiredActions": [ + "rescreen_against_locked_eligibility_version", + "assign_second_title_abstract_reviewer", + "resolve_reviewer_conflict_before_ai_release", + "attach_full_text_retrieval_evidence", + "downgrade_ai_recommendation_to_assist_only", + "remove_private_notes_from_ai_context" + ], + "titleAbstractReviewers": 1, + "fullTextReviewers": 2 + }, + { + "id": "SR-101", + "outcome": "included", + "requiredActions": [ + "assign_second_full_text_reviewer" + ], + "titleAbstractReviewers": 2, + "fullTextReviewers": 1 + } + ], + "gapPrompts": [ + { + "id": "gap-adolescent-ai-coaching", + "evidenceStudyCount": 1, + "limitationSignalCount": 0, + "readyForRelease": false + } + ], + "recommendations": [ + "set_ai_policy_to_assist_only", + "downgrade_ai_recommendation_to_assist_only", + "remove_private_notes_from_ai_context", + "regenerate_gap_prompt_from_public_screening_fields", + "resolve_reviewer_conflict_before_ai_release", + "rescreen_against_locked_eligibility_version", + "complete_pico_and_study_design_criteria", + "attach_full_text_evidence_for_exclusion", + "map_exclusion_to_approved_reason_taxonomy", + "assign_second_full_text_reviewer", + "attach_full_text_retrieval_evidence", + "require_human_approval_for_screening_release", + "assign_second_title_abstract_reviewer", + "select_canonical_record_for_duplicate_cluster", + "record_full_text_locator_and_hash", + "attach_limitation_signals_to_gap_prompt", + "add_more_screened_study_evidence_before_surfacing_gap", + "refresh_search_snapshot_before_gap_generation" + ] +} diff --git a/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg b/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg new file mode 100644 index 00000000..573efde8 --- /dev/null +++ b/systematic-review-screening-drift-assistant/reports/screening-dashboard.svg @@ -0,0 +1,19 @@ + + + + + Systematic Review Screening Drift Assistant + Eligibility version locks, dual review, exclusion evidence, AI authority, and gap prompt safety. + Clean packet + + + release_systematic_review_assistant + Risky packet findings + + + 18 findings + Critical release blockers + + + 5 critical + \ No newline at end of file diff --git a/systematic-review-screening-drift-assistant/sample-data.js b/systematic-review-screening-drift-assistant/sample-data.js new file mode 100644 index 00000000..1feaeacb --- /dev/null +++ b/systematic-review-screening-drift-assistant/sample-data.js @@ -0,0 +1,376 @@ +const cleanPacket = { + reviewId: "prisma-living-review-cardiometabolic-ai", + reviewDate: "2026-06-01", + maxSearchSnapshotAgeDays: 21, + minGapEvidenceStudies: 3, + eligibilityCriteria: { + version: "eligibility-v3", + fields: { + population: "Adults with cardiometabolic risk markers", + intervention: "AI-assisted remote coaching or monitoring", + comparator: "Usual care or non-AI digital support", + outcome: "HbA1c, systolic blood pressure, adherence, or adverse events", + studyDesign: "Randomized, quasi-experimental, or prospective cohort" + } + }, + searchSnapshot: { + id: "search-2026-05-29-medline-embase", + capturedAt: "2026-05-29", + databases: ["MEDLINE", "Embase", "CENTRAL"] + }, + aiPolicy: { + actionAuthority: "assist_only", + humanApprovalRequired: true + }, + exclusionTaxonomy: [ + { + id: "outside_population", + label: "Outside target population" + }, + { + id: "wrong_intervention", + label: "No AI-assisted intervention" + }, + { + id: "wrong_outcome", + label: "No eligible outcome" + } + ], + studies: [ + { + id: "SR-001", + title: "Remote AI coaching for cardiometabolic risk", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-b", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-30", + locator: "s3://review-evidence/sr-001.pdf", + hash: "sha256:1c9e6c0b1f39" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [ + { + private: false, + exportedToAssistant: true, + text: "Outcome and intervention match eligibility-v3." + } + ] + }, + { + id: "SR-002", + title: "Mobile reminders without AI personalization", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "excluded", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-c", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-d", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-c", + decision: "exclude" + }, + { + stage: "full_text", + reviewer: "reviewer-d", + decision: "exclude" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "exclude" + } + }, + fullText: { + retrievedAt: "2026-05-30", + locator: "s3://review-evidence/sr-002.pdf", + hash: "sha256:2f58f225abcd" + }, + exclusionReasons: [ + { + id: "wrong_intervention", + evidenceLocator: "page 4, intervention section", + evidenceQuote: "The reminder cadence was rule-based and did not use AI personalization." + } + ], + aiRecommendations: [ + { + action: "exclude", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + }, + { + id: "SR-003", + title: "AI adherence coach in hypertension clinics", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: true, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-b", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-31", + locator: "s3://review-evidence/sr-003.pdf", + hash: "sha256:3b51f77acdef" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + } + ], + gapPrompts: [ + { + id: "gap-underserved-rural-cohorts", + evidenceStudyIds: ["SR-001", "SR-002", "SR-003"], + limitationSignals: ["few rural cohorts", "short follow-up windows"], + includesPrivateReviewerNote: false + } + ] +}; + +const riskyPacket = { + reviewId: "prisma-living-review-cardiometabolic-ai", + reviewDate: "2026-06-01", + maxSearchSnapshotAgeDays: 21, + minGapEvidenceStudies: 3, + eligibilityCriteria: { + version: "eligibility-v3", + fields: { + population: "Adults with cardiometabolic risk markers", + intervention: "AI-assisted remote coaching or monitoring", + comparator: "Usual care or non-AI digital support" + } + }, + searchSnapshot: { + id: "search-2025-12-01-medline-only", + capturedAt: "2025-12-01", + databases: ["MEDLINE"] + }, + aiPolicy: { + actionAuthority: "auto_apply", + humanApprovalRequired: false + }, + exclusionTaxonomy: [ + { + id: "outside_population", + label: "Outside target population" + }, + { + id: "wrong_intervention", + label: "No AI-assisted intervention" + } + ], + studies: [ + { + id: "SR-100", + title: "AI lifestyle coaching in adolescent athletes", + criteriaVersion: "eligibility-v2", + fullTextRequired: true, + screeningOutcome: "excluded", + humanApproved: false, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-a", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-a", + decision: "exclude" + }, + { + stage: "full_text", + reviewer: "reviewer-b", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "conflict" + } + }, + exclusionReasons: [ + { + id: "low_quality", + evidenceLocator: "", + evidenceQuote: "" + } + ], + aiRecommendations: [ + { + action: "exclude", + authority: "auto_apply", + applied: true + } + ], + reviewerNotes: [ + { + private: true, + exportedToAssistant: true, + text: "Reviewer B disclosed a conflict in a private note." + } + ] + }, + { + id: "SR-101", + title: "Duplicate conference abstract for remote monitoring trial", + criteriaVersion: "eligibility-v3", + fullTextRequired: true, + screeningOutcome: "included", + humanApproved: false, + duplicateGroup: { + id: "dup-cluster-77" + }, + decisions: [ + { + stage: "title_abstract", + reviewer: "reviewer-c", + decision: "include" + }, + { + stage: "title_abstract", + reviewer: "reviewer-d", + decision: "include" + }, + { + stage: "full_text", + reviewer: "reviewer-c", + decision: "include" + } + ], + consensus: { + title_abstract: { + state: "resolved", + resolution: "include" + }, + full_text: { + state: "resolved", + resolution: "include" + } + }, + fullText: { + retrievedAt: "2026-05-12", + locator: "", + hash: "" + }, + aiRecommendations: [ + { + action: "include", + authority: "assist_only", + applied: false + } + ], + reviewerNotes: [] + } + ], + gapPrompts: [ + { + id: "gap-adolescent-ai-coaching", + evidenceStudyIds: ["SR-100"], + limitationSignals: [], + includesPrivateReviewerNote: true + } + ] +}; + +module.exports = { + cleanPacket, + riskyPacket +}; diff --git a/systematic-review-screening-drift-assistant/test.js b/systematic-review-screening-drift-assistant/test.js new file mode 100644 index 00000000..600c9866 --- /dev/null +++ b/systematic-review-screening-drift-assistant/test.js @@ -0,0 +1,38 @@ +const assert = require("node:assert/strict"); +const { evaluateSystematicReviewScreening, sha256 } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const clean = evaluateSystematicReviewScreening(cleanPacket); +assert.equal(clean.summary.decision, "release_systematic_review_assistant"); +assert.equal(clean.summary.findingCount, 0); +assert.equal(clean.summary.studiesReviewed, 3); +assert.equal(clean.summary.gapPromptsReviewed, 1); +assert.ok(clean.summary.auditDigest.startsWith("sha256:")); +assert.equal(clean.gapPrompts[0].readyForRelease, true); + +const risky = evaluateSystematicReviewScreening(riskyPacket); +assert.equal(risky.summary.decision, "hold_systematic_review_assistant"); +assert.equal(risky.summary.studiesReviewed, 2); +assert.ok(risky.summary.findingCount >= 13); +assert.ok(risky.summary.criticalFindings >= 4); +assert.ok(risky.summary.highOrCriticalFindings >= 10); + +const findingCodes = new Set(risky.findings.map((finding) => finding.code)); +assert.ok(findingCodes.has("AI_POLICY_AUTHORITY_TOO_BROAD")); +assert.ok(findingCodes.has("AI_SCREENING_OVERRIDE_WITHOUT_APPROVAL")); +assert.ok(findingCodes.has("CRITERIA_VERSION_DRIFT")); +assert.ok(findingCodes.has("ELIGIBILITY_CRITERIA_INCOMPLETE")); +assert.ok(findingCodes.has("EXCLUSION_EVIDENCE_MISSING")); +assert.ok(findingCodes.has("EXCLUSION_REASON_NOT_IN_TAXONOMY")); +assert.ok(findingCodes.has("FULL_TEXT_DUAL_REVIEW_GAP")); +assert.ok(findingCodes.has("FULL_TEXT_RETRIEVAL_MISSING")); +assert.ok(findingCodes.has("PRIVATE_REVIEW_NOTE_EXPORTED")); +assert.ok(findingCodes.has("PRIVATE_REVIEW_NOTE_IN_GAP_PROMPT")); +assert.ok(findingCodes.has("UNRESOLVED_SCREENING_CONFLICT")); + +const firstDigest = evaluateSystematicReviewScreening(riskyPacket).summary.auditDigest; +const secondDigest = evaluateSystematicReviewScreening(riskyPacket).summary.auditDigest; +assert.equal(firstDigest, secondDigest); +assert.equal(sha256({ b: 2, a: 1 }), sha256({ a: 1, b: 2 })); + +console.log("systematic review screening drift assistant tests passed"); diff --git a/systematic-review-screening-drift-assistant/verify-video.js b/systematic-review-screening-drift-assistant/verify-video.js new file mode 100644 index 00000000..39af983c --- /dev/null +++ b/systematic-review-screening-drift-assistant/verify-video.js @@ -0,0 +1,37 @@ +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); + +const videoPath = path.join(__dirname, "reports", "demo.mp4"); +assert.ok(fs.existsSync(videoPath), "reports/demo.mp4 must exist"); +assert.ok(fs.statSync(videoPath).size > 5000, "reports/demo.mp4 should not be empty"); + +const probe = spawnSync(process.env.FFPROBE_PATH || "ffprobe", [ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name,width,height,r_frame_rate:format=duration", + "-of", + "json", + videoPath +], { encoding: "utf8" }); + +if (probe.status !== 0) { + process.stderr.write(probe.stderr || "ffprobe failed\n"); + process.exit(probe.status || 1); +} + +const metadata = JSON.parse(probe.stdout); +const stream = metadata.streams && metadata.streams[0]; +assert.equal(stream.codec_name, "h264"); +assert.equal(stream.width, 960); +assert.equal(stream.height, 540); +assert.equal(stream.r_frame_rate, "18/1"); + +const duration = Number(metadata.format && metadata.format.duration); +assert.ok(duration >= 3.9 && duration <= 4.2, `unexpected duration ${duration}`); + +console.log(`demo.mp4 verified: ${stream.codec_name}, ${stream.width}x${stream.height}, ${duration.toFixed(3)}s, ${stream.r_frame_rate}`);