diff --git a/geospatial-spatial-autocorrelation-assistant/.gitignore b/geospatial-spatial-autocorrelation-assistant/.gitignore
new file mode 100644
index 00000000..2bf074d6
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/.gitignore
@@ -0,0 +1 @@
+reports/frames/
diff --git a/geospatial-spatial-autocorrelation-assistant/README.md b/geospatial-spatial-autocorrelation-assistant/README.md
new file mode 100644
index 00000000..d04aa6b9
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/README.md
@@ -0,0 +1,47 @@
+# Geospatial Spatial-Autocorrelation Review Assistant
+
+Self-contained reviewer utility for SCIBASE issue #16, AI-Powered Research Assistant Suite. It reviews synthetic geospatial manuscript packets before AI peer-review, reproducibility, or research-gap recommendations are released to researchers.
+
+## What It Checks
+
+- Coordinate reference system and analysis projection evidence.
+- Invalid or over-precise coordinates for sensitive human-subject or protected-location studies.
+- Spatial train/test leakage from nearby train and holdout samples.
+- High Moran's I paired with random validation splits.
+- Preprocessing fitted on the full dataset for spatial covariates.
+- Test-set tuning, missing external spatial validation, stale covariate windows, and missing raster/vector source metadata.
+- Reproducibility artifacts: data manifest, code commit, environment spec, and spatial block map.
+- Research-gap prompts for under-sampled regions and missing spatial validation benchmarks.
+
+## Why This Is Distinct
+
+Existing #16 work covers broad assistant orchestration, evidence binding, structured abstracts, randomization/blinding, survival analysis, missing-data sensitivity, causal adjustment, genomic/proteomics/single-cell review, and related peer-review checks. Existing #17 geospatial work validates sample-provenance graph edges. This module is a separate #16 peer-review layer for manuscript-method validity: spatial autocorrelation, blocked validation, coordinate/projection evidence, and geography-aware reproducibility.
+
+## Usage
+
+```bash
+npm run check
+npm test
+npm run demo
+npm run verify-video
+```
+
+Generated reviewer artifacts are written to `reports/`:
+
+- `risky-audit.json`
+- `clean-audit.json`
+- `risky-review.md`
+- `summary.svg`
+- `demo.mp4`
+
+## API
+
+```js
+const {
+ evaluateGeospatialReviewPacket,
+ renderMarkdownReport,
+ renderSvgSummary
+} = require("./index");
+```
+
+The evaluator returns a deterministic status (`READY`, `REVIEW`, or `HOLD`), finding counts, manuscript decisions, reproducibility scores, remediation actions, research-gap opportunities, and a stable fingerprint.
diff --git a/geospatial-spatial-autocorrelation-assistant/demo.js b/geospatial-spatial-autocorrelation-assistant/demo.js
new file mode 100644
index 00000000..4521dbe4
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/demo.js
@@ -0,0 +1,47 @@
+"use strict";
+
+const fs = require("node:fs");
+const path = require("node:path");
+const {
+ evaluateGeospatialReviewPacket,
+ renderMarkdownReport,
+ renderSvgSummary
+} = require("./index");
+const { riskyPacket, cleanPacket } = require("./sample-data");
+
+const reportsDir = path.join(__dirname, "reports");
+fs.mkdirSync(reportsDir, { recursive: true });
+
+const now = "2026-06-01T10:30:00.000Z";
+const risky = evaluateGeospatialReviewPacket(riskyPacket, { now });
+const clean = evaluateGeospatialReviewPacket(cleanPacket, { now });
+
+fs.writeFileSync(path.join(reportsDir, "risky-audit.json"), `${JSON.stringify(risky, null, 2)}\n`);
+fs.writeFileSync(path.join(reportsDir, "clean-audit.json"), `${JSON.stringify(clean, null, 2)}\n`);
+fs.writeFileSync(path.join(reportsDir, "risky-review.md"), renderMarkdownReport(risky, riskyPacket));
+fs.writeFileSync(path.join(reportsDir, "summary.svg"), renderSvgSummary(risky));
+fs.writeFileSync(
+ path.join(reportsDir, "manifest.json"),
+ `${JSON.stringify(
+ {
+ generatedAt: now,
+ artifacts: [
+ "risky-audit.json",
+ "clean-audit.json",
+ "risky-review.md",
+ "summary.svg",
+ "demo.mp4"
+ ],
+ riskyStatus: risky.status,
+ cleanStatus: clean.status,
+ riskyFingerprint: risky.fingerprint,
+ cleanFingerprint: clean.fingerprint
+ },
+ null,
+ 2
+ )}\n`
+);
+
+console.log(`Risky packet: ${risky.status} (${risky.findings.length} findings)`);
+console.log(`Clean packet: ${clean.status} (${clean.findings.length} findings)`);
+console.log(`Wrote reports to ${reportsDir}`);
diff --git a/geospatial-spatial-autocorrelation-assistant/index.js b/geospatial-spatial-autocorrelation-assistant/index.js
new file mode 100644
index 00000000..6712ff47
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/index.js
@@ -0,0 +1,690 @@
+"use strict";
+
+const crypto = require("node:crypto");
+
+const SEVERITY_ORDER = ["critical", "high", "warning", "info"];
+
+const DEFAULT_POLICY = {
+ minSpatialHoldoutKm: 25,
+ highMoransI: 0.35,
+ maxSensitivePrecisionDecimals: 4,
+ minRegionsForBroadClaims: 3,
+ maxCovariateWindowDays: 365
+};
+
+function evaluateGeospatialReviewPacket(packet, options = {}) {
+ if (!isPlainObject(packet)) {
+ throw new TypeError("evaluateGeospatialReviewPacket expects a packet object");
+ }
+
+ const now = options.now ?? new Date().toISOString();
+ const policy = { ...DEFAULT_POLICY, ...(isPlainObject(packet.policy) ? packet.policy : {}) };
+ const manuscripts = asArray(packet.manuscripts);
+ const findings = [];
+
+ if (manuscripts.length === 0) {
+ findings.push(
+ finding(
+ "PACKET_SCHEMA_MISSING_MANUSCRIPTS",
+ "high",
+ "The geospatial review packet has no manuscripts to inspect.",
+ "AI peer review output needs at least one manuscript or study packet.",
+ "manuscripts",
+ "Attach manuscript metadata, spatial design, samples, model split evidence, and reproducibility artifacts.",
+ "research assistant owner"
+ )
+ );
+ }
+
+ manuscripts.forEach((manuscript, index) => inspectManuscript(manuscript, index, policy, findings));
+
+ const sortedFindings = sortFindings(findings);
+ const status = determineStatus(sortedFindings);
+ const reviewDecisions = manuscripts.map((manuscript, index) =>
+ buildReviewDecision(manuscript, index, sortedFindings)
+ );
+ const researchGapOpportunities = buildResearchGapOpportunities(manuscripts, sortedFindings, policy);
+ const remediationActions = sortedFindings.map((item) => ({
+ code: item.code,
+ manuscriptId: item.manuscriptId ?? null,
+ modelId: item.modelId ?? null,
+ owner: item.owner,
+ action: item.remediation
+ }));
+
+ const fingerprint = crypto
+ .createHash("sha256")
+ .update(
+ JSON.stringify({
+ policy,
+ manuscripts: manuscripts.map((manuscript) => ({
+ id: manuscript.id,
+ spatialDesign: manuscript.spatialDesign,
+ claims: manuscript.claims,
+ models: manuscript.models,
+ artifactKeys: Object.keys(isPlainObject(manuscript.reproducibilityArtifacts) ? manuscript.reproducibilityArtifacts : {})
+ })),
+ codes: sortedFindings.map((item) => item.code)
+ })
+ )
+ .digest("hex")
+ .slice(0, 16);
+
+ return {
+ generatedAt: now,
+ status,
+ summary: summarize(status, sortedFindings, manuscripts.length, researchGapOpportunities.length),
+ findingCounts: countBySeverity(sortedFindings),
+ findings: sortedFindings,
+ reviewDecisions,
+ researchGapOpportunities,
+ remediationActions,
+ fingerprint
+ };
+}
+
+function renderMarkdownReport(result, packet) {
+ const lines = [
+ "# Geospatial Spatial-Autocorrelation Review Assistant",
+ "",
+ `Packet: ${packet.id ?? "unknown"}`,
+ `Status: ${result.status}`,
+ `Fingerprint: ${result.fingerprint}`,
+ "",
+ "## Summary",
+ "",
+ result.summary,
+ "",
+ "## Manuscript Decisions",
+ ""
+ ];
+
+ result.reviewDecisions.forEach((decision) => {
+ lines.push(
+ `- ${decision.manuscriptId}: ${decision.decision}; reproducibility score ${decision.reproducibilityScore}/100; ${decision.reasonCodes.length} finding(s)`
+ );
+ });
+
+ lines.push("", "## Findings", "");
+ if (result.findings.length === 0) {
+ lines.push("- No geospatial peer-review blockers found.");
+ } else {
+ result.findings.forEach((item) => {
+ lines.push(`- ${item.severity.toUpperCase()} ${item.code}: ${item.message}`);
+ lines.push(` - Evidence: ${item.evidence}`);
+ lines.push(` - Remediation: ${item.remediation}`);
+ });
+ }
+
+ lines.push("", "## Research Gap Opportunities", "");
+ if (result.researchGapOpportunities.length === 0) {
+ lines.push("- No under-sampled geography or replication opportunities were generated.");
+ } else {
+ result.researchGapOpportunities.forEach((gap) => {
+ lines.push(`- ${gap.id}: ${gap.title}`);
+ lines.push(` - Rationale: ${gap.rationale}`);
+ lines.push(` - First action: ${gap.firstAction}`);
+ });
+ }
+
+ return `${lines.join("\n")}\n`;
+}
+
+function renderSvgSummary(result) {
+ const counts = result.findingCounts;
+ const critical = counts.critical ?? 0;
+ const high = counts.high ?? 0;
+ const warning = counts.warning ?? 0;
+ const ready = result.status === "READY";
+ const statusColor = ready ? "#16794c" : result.status === "REVIEW" ? "#a15c00" : "#a11b32";
+ const holdWidth = Math.min(330, (critical + high) * 54);
+ const warningWidth = Math.min(220, warning * 42);
+ const readyWidth = ready ? 300 : Math.max(80, 300 - holdWidth);
+
+ return [
+ ``
+ ].join("\n");
+}
+
+function inspectManuscript(manuscript, index, policy, findings) {
+ const manuscriptId = manuscript.id ?? `manuscript-${index}`;
+ const path = `manuscripts[${index}]`;
+ const spatialDesign = isPlainObject(manuscript.spatialDesign) ? manuscript.spatialDesign : {};
+ const samples = asArray(manuscript.samples);
+ const models = asArray(manuscript.models);
+ const claims = asArray(manuscript.claims);
+ const artifacts = isPlainObject(manuscript.reproducibilityArtifacts) ? manuscript.reproducibilityArtifacts : {};
+
+ if (!manuscript.id) {
+ findings.push(
+ finding(
+ "MANUSCRIPT_MISSING_ID",
+ "high",
+ `Manuscript at index ${index} has no stable id.`,
+ "Reviewer packets need stable manuscript ids for traceability.",
+ `${path}.id`,
+ "Assign a stable manuscript id before releasing assistant output.",
+ "research assistant owner",
+ manuscriptId
+ )
+ );
+ }
+
+ if (!spatialDesign.crs && !spatialDesign.epsg) {
+ findings.push(
+ finding(
+ "MISSING_CRS_EVIDENCE",
+ "high",
+ `${manuscriptId} does not declare a coordinate reference system.`,
+ "Spatial distances, joins, and raster overlays cannot be reviewed without CRS evidence.",
+ `${path}.spatialDesign.crs`,
+ "Declare the source CRS/EPSG code and any analysis projection used for distance or area operations.",
+ "geospatial methods reviewer",
+ manuscriptId
+ )
+ );
+ }
+
+ if (samples.length === 0) {
+ findings.push(
+ finding(
+ "MISSING_SPATIAL_SAMPLE_TABLE",
+ "high",
+ `${manuscriptId} has no sample table with coordinates and split labels.`,
+ "Spatial leakage and regional coverage checks need sample-level geography.",
+ `${path}.samples`,
+ "Attach synthetic-safe sample coordinates, split labels, site ids, and region labels.",
+ "data steward",
+ manuscriptId
+ )
+ );
+ }
+
+ inspectCoordinates(samples, path, manuscriptId, findings);
+ inspectSensitivePrecision(manuscript, spatialDesign, path, manuscriptId, policy, findings);
+ inspectBroadClaims(manuscript, claims, samples, path, manuscriptId, policy, findings);
+
+ models.forEach((model, modelIndex) =>
+ inspectModel(model, modelIndex, manuscript, samples, path, manuscriptId, policy, findings)
+ );
+
+ inspectArtifacts(artifacts, models, path, manuscriptId, findings);
+}
+
+function inspectCoordinates(samples, path, manuscriptId, findings) {
+ samples.forEach((sample, sampleIndex) => {
+ const lat = Number(sample.lat);
+ const lon = Number(sample.lon);
+ if (!Number.isFinite(lat) || !Number.isFinite(lon) || lat < -90 || lat > 90 || lon < -180 || lon > 180) {
+ findings.push(
+ finding(
+ "INVALID_COORDINATE",
+ "critical",
+ `${manuscriptId} has an invalid coordinate at sample ${sample.id ?? sampleIndex}.`,
+ `Observed lat=${sample.lat}, lon=${sample.lon}.`,
+ `${path}.samples[${sampleIndex}]`,
+ "Correct or exclude invalid coordinates before AI peer review summarizes spatial findings.",
+ "data steward",
+ manuscriptId
+ )
+ );
+ }
+ });
+}
+
+function inspectSensitivePrecision(manuscript, spatialDesign, path, manuscriptId, policy, findings) {
+ const sensitivity = String(manuscript.sensitivity ?? "").toLowerCase();
+ const sensitive = sensitivity.includes("human") || sensitivity.includes("protected") || sensitivity.includes("restricted");
+ const decimals = Number(spatialDesign.coordinatePrecisionDecimals);
+ if (sensitive && Number.isFinite(decimals) && decimals > policy.maxSensitivePrecisionDecimals) {
+ findings.push(
+ finding(
+ "SENSITIVE_COORDINATE_OVERPRECISION",
+ "high",
+ `${manuscriptId} exposes sensitive locations at ${decimals} decimal places.`,
+ "Human-subject or protected-species locations should be generalized before reviewer packets or public summaries.",
+ `${path}.spatialDesign.coordinatePrecisionDecimals`,
+ `Round or jitter coordinates to ${policy.maxSensitivePrecisionDecimals} decimals or provide an approved restricted-location access path.`,
+ "privacy reviewer",
+ manuscriptId
+ )
+ );
+ }
+}
+
+function inspectBroadClaims(manuscript, claims, samples, path, manuscriptId, policy, findings) {
+ const broadClaims = claims.filter(isBroadClaim);
+ if (broadClaims.length === 0) {
+ return;
+ }
+
+ const regions = new Set(samples.map((sample) => sample.region).filter(Boolean));
+ broadClaims.forEach((claim, claimIndex) => {
+ const claimedRegions = asArray(claim.claimedRegions).filter(Boolean);
+ const expectedRegions = Math.max(policy.minRegionsForBroadClaims, claimedRegions.length || 0);
+ if (regions.size < expectedRegions) {
+ findings.push(
+ finding(
+ "OVERBROAD_GEOGRAPHIC_CLAIM",
+ "high",
+ `${manuscriptId} makes a broad geographic claim with only ${regions.size} observed region(s).`,
+ claim.text ?? "Broad geographic claim without matching sampled-region coverage.",
+ `${path}.claims[${claimIndex}]`,
+ "Limit the claim to sampled regions or add external validation sites covering the claimed geography.",
+ "methods reviewer",
+ manuscriptId
+ )
+ );
+ }
+ });
+}
+
+function inspectModel(model, modelIndex, manuscript, samples, path, manuscriptId, policy, findings) {
+ const modelId = model.id ?? `model-${modelIndex}`;
+ const modelPath = `${path}.models[${modelIndex}]`;
+ const splitStrategy = String(model.splitStrategy ?? "").toLowerCase();
+ const moransI = Number(model.moransI);
+ const spatialSplit = isSpatialSplit(model);
+ const minDistance = minimumTrainTestDistanceKm(samples);
+
+ if (Number.isFinite(minDistance) && minDistance < policy.minSpatialHoldoutKm && !spatialSplit) {
+ findings.push(
+ finding(
+ "SPATIAL_SPLIT_LEAKAGE",
+ "critical",
+ `${manuscriptId}/${modelId} has train/test samples only ${minDistance.toFixed(1)} km apart without spatial blocking.`,
+ `Policy requires at least ${policy.minSpatialHoldoutKm} km or explicit spatial block validation.`,
+ `${modelPath}.splitStrategy`,
+ "Use spatial block, leave-site-out, or regional holdout validation and regenerate performance claims.",
+ "model reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ if (Number.isFinite(moransI) && moransI >= policy.highMoransI && !spatialSplit) {
+ findings.push(
+ finding(
+ "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT",
+ "high",
+ `${manuscriptId}/${modelId} reports Moran's I ${moransI.toFixed(2)} with a ${model.splitStrategy ?? "missing"} split.`,
+ "High spatial autocorrelation inflates random train/test validation.",
+ `${modelPath}.moransI`,
+ "Run spatial block cross-validation or leave-region-out validation before presenting performance as reviewer-ready.",
+ "spatial statistics reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ const preprocessingFitScope = String(model.preprocessingFitScope ?? "").toLowerCase();
+ if (preprocessingFitScope.includes("full") && hasSpatialCovariates(model)) {
+ findings.push(
+ finding(
+ "FULL_DATASET_PREPROCESSING_LEAKAGE",
+ "high",
+ `${manuscriptId}/${modelId} fits spatial preprocessing on the full dataset.`,
+ "Raster normalization, imputation, or feature selection must be learned inside each training fold.",
+ `${modelPath}.preprocessingFitScope`,
+ "Refit preprocessing inside training folds and attach fold-specific transformation hashes.",
+ "reproducibility reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ const tunedOn = String(model.hyperparameterTunedOn ?? "").toLowerCase();
+ if (tunedOn.includes("test") || tunedOn.includes("holdout")) {
+ findings.push(
+ finding(
+ "TEST_SET_TUNING",
+ "critical",
+ `${manuscriptId}/${modelId} tunes model choices on the test/holdout set.`,
+ "Reviewer-facing performance claims require a locked final test set.",
+ `${modelPath}.hyperparameterTunedOn`,
+ "Move tuning to inner validation folds and rerun the locked test set once.",
+ "model reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ inspectCovariates(model, modelPath, manuscriptId, modelId, policy, findings);
+
+ const needsExternalValidation = asArray(manuscript.claims).some(isBroadClaim) || String(model.deploymentContext ?? "").length > 0;
+ if (needsExternalValidation && asArray(model.externalValidationSites).length === 0) {
+ findings.push(
+ finding(
+ "MISSING_EXTERNAL_SPATIAL_VALIDATION",
+ "high",
+ `${manuscriptId}/${modelId} lacks external spatial validation for broader deployment claims.`,
+ "Broad geographic or deployment claims should be checked outside the training geography.",
+ `${modelPath}.externalValidationSites`,
+ "Add an out-of-region validation site or limit the manuscript claim to the sampled geography.",
+ "methods reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ if (!splitStrategy) {
+ findings.push(
+ finding(
+ "MISSING_SPLIT_STRATEGY",
+ "warning",
+ `${manuscriptId}/${modelId} does not describe its spatial validation split strategy.`,
+ "Peer review needs the split design to reason about leakage and autocorrelation.",
+ `${modelPath}.splitStrategy`,
+ "Document random, blocked, leave-site-out, or external validation split evidence.",
+ "methods reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+}
+
+function inspectCovariates(model, modelPath, manuscriptId, modelId, policy, findings) {
+ asArray(model.covariates).forEach((covariate, covariateIndex) => {
+ if (!covariate.source) {
+ findings.push(
+ finding(
+ "COVARIATE_SOURCE_MISSING",
+ "warning",
+ `${manuscriptId}/${modelId} covariate ${covariate.name ?? covariateIndex} has no source citation or artifact id.`,
+ "Raster/vector covariates should be traceable for reproducibility and recency review.",
+ `${modelPath}.covariates[${covariateIndex}].source`,
+ "Attach a source DOI, artifact id, or repository path for each spatial covariate.",
+ "data steward",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ const windowDays = Number(covariate.acquisitionWindowDays);
+ if (Number.isFinite(windowDays) && windowDays > policy.maxCovariateWindowDays) {
+ findings.push(
+ finding(
+ "STALE_COVARIATE_WINDOW",
+ "warning",
+ `${manuscriptId}/${modelId} covariate ${covariate.name ?? covariateIndex} spans ${windowDays} acquisition days.`,
+ "Long covariate windows can hide temporal drift in geospatial models.",
+ `${modelPath}.covariates[${covariateIndex}].acquisitionWindowDays`,
+ "Use period-matched covariates or report temporal-drift sensitivity checks.",
+ "methods reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+
+ const resolutionMeters = Number(covariate.resolutionMeters);
+ if (!Number.isFinite(resolutionMeters) || resolutionMeters <= 0) {
+ findings.push(
+ finding(
+ "COVARIATE_RESOLUTION_MISSING",
+ "warning",
+ `${manuscriptId}/${modelId} covariate ${covariate.name ?? covariateIndex} lacks raster/vector resolution evidence.`,
+ "Spatial scale mismatch cannot be reviewed without resolution metadata.",
+ `${modelPath}.covariates[${covariateIndex}].resolutionMeters`,
+ "Attach spatial resolution, aggregation rules, and resampling method for each covariate.",
+ "geospatial methods reviewer",
+ manuscriptId,
+ modelId
+ )
+ );
+ }
+ });
+}
+
+function inspectArtifacts(artifacts, models, path, manuscriptId, findings) {
+ const required = [
+ ["dataManifest", "DATA_MANIFEST_MISSING", "Attach a data manifest with sample ids, coordinates, split labels, and hashes."],
+ ["codeCommit", "CODE_COMMIT_MISSING", "Attach the analysis code commit or immutable archive hash."],
+ ["environmentSpec", "ENVIRONMENT_SPEC_MISSING", "Attach a pinned environment or container digest for spatial libraries."]
+ ];
+
+ required.forEach(([key, code, remediation]) => {
+ if (!artifacts[key]) {
+ findings.push(
+ finding(
+ code,
+ "high",
+ `${manuscriptId} is missing reproducibility artifact ${key}.`,
+ "Geospatial results depend on data, code, and environment parity.",
+ `${path}.reproducibilityArtifacts.${key}`,
+ remediation,
+ "reproducibility reviewer",
+ manuscriptId
+ )
+ );
+ }
+ });
+
+ const hasSpatialModel = models.some((model) => isSpatialSplit(model) || Number.isFinite(Number(model.moransI)));
+ if (hasSpatialModel && !artifacts.spatialBlockMap) {
+ findings.push(
+ finding(
+ "SPATIAL_BLOCK_MAP_MISSING",
+ "warning",
+ `${manuscriptId} has spatial validation claims without a block map artifact.`,
+ "Reviewers need the held-out geometry or block map to audit leakage.",
+ `${path}.reproducibilityArtifacts.spatialBlockMap`,
+ "Attach a block-map artifact id, geometry hash, or leave-site-out manifest.",
+ "geospatial methods reviewer",
+ manuscriptId
+ )
+ );
+ }
+}
+
+function buildReviewDecision(manuscript, index, findings) {
+ const manuscriptId = manuscript.id ?? `manuscript-${index}`;
+ const manuscriptFindings = findings.filter((item) => item.manuscriptId === manuscriptId || !item.manuscriptId);
+ const decision = manuscriptFindings.some((item) => item.severity === "critical" || item.severity === "high")
+ ? "HOLD"
+ : manuscriptFindings.some((item) => item.severity === "warning")
+ ? "REVIEW"
+ : "READY";
+
+ return {
+ manuscriptId,
+ decision,
+ reasonCodes: manuscriptFindings.map((item) => item.code),
+ reproducibilityScore: scoreFindings(manuscriptFindings)
+ };
+}
+
+function buildResearchGapOpportunities(manuscripts, findings, policy) {
+ const gaps = [];
+ manuscripts.forEach((manuscript, index) => {
+ const manuscriptId = manuscript.id ?? `manuscript-${index}`;
+ const samples = asArray(manuscript.samples);
+ const regions = new Set(samples.map((sample) => sample.region).filter(Boolean));
+ const broad = asArray(manuscript.claims).some(isBroadClaim);
+ const manuscriptFindings = findings.filter((item) => item.manuscriptId === manuscriptId);
+
+ if (broad && regions.size < policy.minRegionsForBroadClaims) {
+ gaps.push({
+ id: `${manuscriptId}-regional-replication`,
+ title: "Prioritize out-of-region replication before broad geographic claims",
+ rationale: `${manuscriptId} samples ${regions.size} region(s), below the ${policy.minRegionsForBroadClaims}-region policy for broad claims.`,
+ firstAction: "Recruit or simulate a holdout site in the least represented claimed region."
+ });
+ }
+
+ if (manuscriptFindings.some((item) => item.code === "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT")) {
+ gaps.push({
+ id: `${manuscriptId}-spatial-validation-gap`,
+ title: "Add spatial block validation benchmark",
+ rationale: "High autocorrelation with random validation means reported accuracy may be optimistic.",
+ firstAction: "Create a leave-region-out benchmark and compare it to the random split baseline."
+ });
+ }
+ });
+ return gaps;
+}
+
+function summarize(status, findings, manuscriptCount, gapCount) {
+ if (findings.length === 0) {
+ return `${manuscriptCount} manuscript(s) are ready for geospatial peer-review release with no spatial leakage or reproducibility findings.`;
+ }
+
+ const counts = countBySeverity(findings);
+ return `${status}: ${manuscriptCount} manuscript(s) produced ${findings.length} finding(s): ${counts.critical ?? 0} critical, ${counts.high ?? 0} high, ${counts.warning ?? 0} warning, and ${gapCount} research gap prompt(s).`;
+}
+
+function finding(code, severity, message, evidence, path, remediation, owner, manuscriptId = null, modelId = null) {
+ return {
+ code,
+ severity,
+ message,
+ evidence,
+ path,
+ remediation,
+ owner,
+ manuscriptId,
+ modelId
+ };
+}
+
+function determineStatus(findings) {
+ if (findings.some((item) => item.severity === "critical" || item.severity === "high")) {
+ return "HOLD";
+ }
+ if (findings.some((item) => item.severity === "warning")) {
+ return "REVIEW";
+ }
+ return "READY";
+}
+
+function scoreFindings(findings) {
+ const counts = countBySeverity(findings);
+ return Math.max(
+ 0,
+ 100 - (counts.critical ?? 0) * 30 - (counts.high ?? 0) * 17 - (counts.warning ?? 0) * 7
+ );
+}
+
+function sortFindings(findings) {
+ return [...findings].sort((a, b) => {
+ const severityDiff = SEVERITY_ORDER.indexOf(a.severity) - SEVERITY_ORDER.indexOf(b.severity);
+ if (severityDiff !== 0) {
+ return severityDiff;
+ }
+ return a.code.localeCompare(b.code);
+ });
+}
+
+function countBySeverity(findings) {
+ return findings.reduce((counts, item) => {
+ counts[item.severity] = (counts[item.severity] ?? 0) + 1;
+ return counts;
+ }, {});
+}
+
+function minimumTrainTestDistanceKm(samples) {
+ const train = samples.filter((sample) => String(sample.split ?? "").toLowerCase() === "train");
+ const test = samples.filter((sample) => String(sample.split ?? "").toLowerCase() === "test");
+ if (train.length === 0 || test.length === 0) {
+ return Infinity;
+ }
+
+ let minimum = Infinity;
+ train.forEach((trainSample) => {
+ test.forEach((testSample) => {
+ const distance = haversineKm(trainSample.lat, trainSample.lon, testSample.lat, testSample.lon);
+ if (distance < minimum) {
+ minimum = distance;
+ }
+ });
+ });
+ return minimum;
+}
+
+function haversineKm(latA, lonA, latB, lonB) {
+ const aLat = Number(latA);
+ const aLon = Number(lonA);
+ const bLat = Number(latB);
+ const bLon = Number(lonB);
+ if (![aLat, aLon, bLat, bLon].every(Number.isFinite)) {
+ return Infinity;
+ }
+
+ const earthRadiusKm = 6371;
+ const dLat = radians(bLat - aLat);
+ const dLon = radians(bLon - aLon);
+ const startLat = radians(aLat);
+ const endLat = radians(bLat);
+ const a =
+ Math.sin(dLat / 2) ** 2 +
+ Math.cos(startLat) * Math.cos(endLat) * Math.sin(dLon / 2) ** 2;
+ return 2 * earthRadiusKm * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
+}
+
+function radians(value) {
+ return (value * Math.PI) / 180;
+}
+
+function isSpatialSplit(model) {
+ const split = String(model.splitStrategy ?? "").toLowerCase();
+ return split.includes("spatial") || split.includes("block") || split.includes("leave-site") || split.includes("leave_region");
+}
+
+function hasSpatialCovariates(model) {
+ return Boolean(model.spatialCovariates) || asArray(model.covariates).length > 0;
+}
+
+function isBroadClaim(claim) {
+ const scope = String(claim.scope ?? "").toLowerCase();
+ const text = String(claim.text ?? "").toLowerCase();
+ return (
+ ["global", "continental", "multi-region", "national", "deployment"].includes(scope) ||
+ text.includes("generalize") ||
+ text.includes("across regions") ||
+ text.includes("continent") ||
+ text.includes("nationwide") ||
+ text.includes("global")
+ );
+}
+
+function isPlainObject(value) {
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+
+function asArray(value) {
+ return Array.isArray(value) ? value : [];
+}
+
+function escapeXml(value) {
+ return String(value)
+ .replaceAll("&", "&")
+ .replaceAll("<", "<")
+ .replaceAll(">", ">")
+ .replaceAll('"', """);
+}
+
+module.exports = {
+ evaluateGeospatialReviewPacket,
+ renderMarkdownReport,
+ renderSvgSummary,
+ haversineKm
+};
diff --git a/geospatial-spatial-autocorrelation-assistant/make-demo-video.js b/geospatial-spatial-autocorrelation-assistant/make-demo-video.js
new file mode 100644
index 00000000..f690b872
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/make-demo-video.js
@@ -0,0 +1,128 @@
+"use strict";
+
+const { execFileSync } = require("node:child_process");
+const fs = require("node:fs");
+const path = require("node:path");
+
+const WIDTH = 960;
+const HEIGHT = 540;
+const FONT = {
+ A: ["01110", "10001", "10001", "11111", "10001", "10001", "10001"],
+ B: ["11110", "10001", "10001", "11110", "10001", "10001", "11110"],
+ C: ["01111", "10000", "10000", "10000", "10000", "10000", "01111"],
+ D: ["11110", "10001", "10001", "10001", "10001", "10001", "11110"],
+ E: ["11111", "10000", "10000", "11110", "10000", "10000", "11111"],
+ G: ["01111", "10000", "10000", "10111", "10001", "10001", "01111"],
+ I: ["11111", "00100", "00100", "00100", "00100", "00100", "11111"],
+ K: ["10001", "10010", "10100", "11000", "10100", "10010", "10001"],
+ L: ["10000", "10000", "10000", "10000", "10000", "10000", "11111"],
+ O: ["01110", "10001", "10001", "10001", "10001", "10001", "01110"],
+ P: ["11110", "10001", "10001", "11110", "10000", "10000", "10000"],
+ R: ["11110", "10001", "10001", "11110", "10100", "10010", "10001"],
+ S: ["01111", "10000", "10000", "01110", "00001", "00001", "11110"],
+ T: ["11111", "00100", "00100", "00100", "00100", "00100", "00100"],
+ V: ["10001", "10001", "10001", "10001", "01010", "01010", "00100"],
+ W: ["10001", "10001", "10001", "10101", "10101", "10101", "01010"],
+ Y: ["10001", "01010", "00100", "00100", "00100", "00100", "00100"]
+};
+
+const reportsDir = path.join(__dirname, "reports");
+const framesDir = path.join(reportsDir, "frames");
+fs.mkdirSync(framesDir, { recursive: true });
+
+for (const file of fs.readdirSync(framesDir)) {
+ fs.unlinkSync(path.join(framesDir, file));
+}
+
+const slides = [
+ { label: "CRS READY", color: [22, 121, 76], fill: 0.72 },
+ { label: "LEAKAGE", color: [161, 27, 50], fill: 0.88 },
+ { label: "BLOCK SPLIT", color: [22, 121, 76], fill: 0.78 },
+ { label: "GAP MAP", color: [161, 92, 0], fill: 0.64 }
+];
+
+let frameIndex = 0;
+for (const slide of slides) {
+ for (let i = 0; i < 8; i += 1) {
+ const progress = (i + 1) / 8;
+ const buffer = createFrame(slide, progress);
+ fs.writeFileSync(path.join(framesDir, `frame-${String(frameIndex).padStart(3, "0")}.ppm`), buffer);
+ frameIndex += 1;
+ }
+}
+
+const output = path.join(reportsDir, "demo.mp4");
+execFileSync(
+ "ffmpeg",
+ [
+ "-y",
+ "-framerate",
+ "8",
+ "-i",
+ path.join(framesDir, "frame-%03d.ppm"),
+ "-pix_fmt",
+ "yuv420p",
+ "-movflags",
+ "+faststart",
+ output
+ ],
+ { stdio: "ignore" }
+);
+
+const stats = fs.statSync(output);
+console.log(`Wrote ${output} (${stats.size} bytes)`);
+
+function createFrame(slide, progress) {
+ const pixels = Buffer.alloc(WIDTH * HEIGHT * 3);
+ fillRect(pixels, 0, 0, WIDTH, HEIGHT, [16, 24, 32]);
+ fillRect(pixels, 48, 48, 864, 444, [248, 250, 252]);
+ fillRect(pixels, 80, 190, 800, 88, [226, 232, 240]);
+ fillRect(pixels, 80, 190, Math.round(800 * slide.fill * progress), 88, slide.color);
+ fillRect(pixels, 80, 322, 220, 42, [226, 232, 240]);
+ fillRect(pixels, 332, 322, 220, 42, [226, 232, 240]);
+ fillRect(pixels, 584, 322, 220, 42, [226, 232, 240]);
+ fillRect(pixels, 80, 322, 130, 42, [161, 27, 50]);
+ fillRect(pixels, 332, 322, 150, 42, [161, 92, 0]);
+ fillRect(pixels, 584, 322, 200, 42, [22, 121, 76]);
+ drawText(pixels, "SPATIAL REVIEW", 82, 104, 5, [17, 24, 39]);
+ drawText(pixels, slide.label, 108, 214, 7, [255, 255, 255]);
+ drawText(pixels, "PEER READY", 82, 414, 4, [51, 65, 85]);
+ return Buffer.concat([Buffer.from(`P6\n${WIDTH} ${HEIGHT}\n255\n`, "ascii"), pixels]);
+}
+
+function fillRect(pixels, x, y, width, height, color) {
+ const x2 = Math.min(WIDTH, x + width);
+ const y2 = Math.min(HEIGHT, y + height);
+ for (let row = Math.max(0, y); row < y2; row += 1) {
+ for (let col = Math.max(0, x); col < x2; col += 1) {
+ const offset = (row * WIDTH + col) * 3;
+ pixels[offset] = color[0];
+ pixels[offset + 1] = color[1];
+ pixels[offset + 2] = color[2];
+ }
+ }
+}
+
+function drawText(pixels, text, x, y, scale, color) {
+ let cursor = x;
+ for (const rawChar of text) {
+ const char = rawChar.toUpperCase();
+ if (char === " ") {
+ cursor += 4 * scale;
+ continue;
+ }
+ const glyph = FONT[char];
+ if (!glyph) {
+ cursor += 6 * scale;
+ continue;
+ }
+ glyph.forEach((row, rowIndex) => {
+ for (let colIndex = 0; colIndex < row.length; colIndex += 1) {
+ if (row[colIndex] === "1") {
+ fillRect(pixels, cursor + colIndex * scale, y + rowIndex * scale, scale, scale, color);
+ }
+ }
+ });
+ cursor += 6 * scale;
+ }
+}
diff --git a/geospatial-spatial-autocorrelation-assistant/package.json b/geospatial-spatial-autocorrelation-assistant/package.json
new file mode 100644
index 00000000..a583ce99
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/package.json
@@ -0,0 +1,21 @@
+{
+ "name": "geospatial-spatial-autocorrelation-assistant",
+ "version": "1.0.0",
+ "private": true,
+ "description": "Synthetic geospatial peer-review assistant for spatial autocorrelation, split leakage, CRS, and reproducibility risk.",
+ "main": "index.js",
+ "scripts": {
+ "check": "node --check index.js && node --check sample-data.js && node --check test.js && node --check demo.js && node --check make-demo-video.js",
+ "test": "node test.js",
+ "demo": "node demo.js && node make-demo-video.js",
+ "verify-video": "ffprobe -v error -show_entries stream=codec_name,width,height,duration -of default=nokey=1:noprint_wrappers=1 reports/demo.mp4"
+ },
+ "keywords": [
+ "geospatial",
+ "peer-review",
+ "spatial-autocorrelation",
+ "reproducibility",
+ "synthetic"
+ ],
+ "license": "MIT"
+}
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/clean-audit.json b/geospatial-spatial-autocorrelation-assistant/reports/clean-audit.json
new file mode 100644
index 00000000..0faadf2c
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/reports/clean-audit.json
@@ -0,0 +1,18 @@
+{
+ "generatedAt": "2026-06-01T10:30:00.000Z",
+ "status": "READY",
+ "summary": "1 manuscript(s) are ready for geospatial peer-review release with no spatial leakage or reproducibility findings.",
+ "findingCounts": {},
+ "findings": [],
+ "reviewDecisions": [
+ {
+ "manuscriptId": "rangeland-blocked-validation",
+ "decision": "READY",
+ "reasonCodes": [],
+ "reproducibilityScore": 100
+ }
+ ],
+ "researchGapOpportunities": [],
+ "remediationActions": [],
+ "fingerprint": "aa2c187bd4b36628"
+}
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/demo.mp4 b/geospatial-spatial-autocorrelation-assistant/reports/demo.mp4
new file mode 100644
index 00000000..0e79ef6b
Binary files /dev/null and b/geospatial-spatial-autocorrelation-assistant/reports/demo.mp4 differ
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/manifest.json b/geospatial-spatial-autocorrelation-assistant/reports/manifest.json
new file mode 100644
index 00000000..c0dd10c0
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/reports/manifest.json
@@ -0,0 +1,14 @@
+{
+ "generatedAt": "2026-06-01T10:30:00.000Z",
+ "artifacts": [
+ "risky-audit.json",
+ "clean-audit.json",
+ "risky-review.md",
+ "summary.svg",
+ "demo.mp4"
+ ],
+ "riskyStatus": "HOLD",
+ "cleanStatus": "READY",
+ "riskyFingerprint": "e036107e72f70a7e",
+ "cleanFingerprint": "aa2c187bd4b36628"
+}
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/risky-audit.json b/geospatial-spatial-autocorrelation-assistant/reports/risky-audit.json
new file mode 100644
index 00000000..0ea41478
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/reports/risky-audit.json
@@ -0,0 +1,285 @@
+{
+ "generatedAt": "2026-06-01T10:30:00.000Z",
+ "status": "HOLD",
+ "summary": "HOLD: 1 manuscript(s) produced 13 finding(s): 2 critical, 8 high, 3 warning, and 2 research gap prompt(s).",
+ "findingCounts": {
+ "critical": 2,
+ "high": 8,
+ "warning": 3
+ },
+ "findings": [
+ {
+ "code": "SPATIAL_SPLIT_LEAKAGE",
+ "severity": "critical",
+ "message": "urban-heat-random-split/rf-heat-risk has train/test samples only 0.6 km apart without spatial blocking.",
+ "evidence": "Policy requires at least 35 km or explicit spatial block validation.",
+ "path": "manuscripts[0].models[0].splitStrategy",
+ "remediation": "Use spatial block, leave-site-out, or regional holdout validation and regenerate performance claims.",
+ "owner": "model reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "TEST_SET_TUNING",
+ "severity": "critical",
+ "message": "urban-heat-random-split/rf-heat-risk tunes model choices on the test/holdout set.",
+ "evidence": "Reviewer-facing performance claims require a locked final test set.",
+ "path": "manuscripts[0].models[0].hyperparameterTunedOn",
+ "remediation": "Move tuning to inner validation folds and rerun the locked test set once.",
+ "owner": "model reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "DATA_MANIFEST_MISSING",
+ "severity": "high",
+ "message": "urban-heat-random-split is missing reproducibility artifact dataManifest.",
+ "evidence": "Geospatial results depend on data, code, and environment parity.",
+ "path": "manuscripts[0].reproducibilityArtifacts.dataManifest",
+ "remediation": "Attach a data manifest with sample ids, coordinates, split labels, and hashes.",
+ "owner": "reproducibility reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "ENVIRONMENT_SPEC_MISSING",
+ "severity": "high",
+ "message": "urban-heat-random-split is missing reproducibility artifact environmentSpec.",
+ "evidence": "Geospatial results depend on data, code, and environment parity.",
+ "path": "manuscripts[0].reproducibilityArtifacts.environmentSpec",
+ "remediation": "Attach a pinned environment or container digest for spatial libraries.",
+ "owner": "reproducibility reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "FULL_DATASET_PREPROCESSING_LEAKAGE",
+ "severity": "high",
+ "message": "urban-heat-random-split/rf-heat-risk fits spatial preprocessing on the full dataset.",
+ "evidence": "Raster normalization, imputation, or feature selection must be learned inside each training fold.",
+ "path": "manuscripts[0].models[0].preprocessingFitScope",
+ "remediation": "Refit preprocessing inside training folds and attach fold-specific transformation hashes.",
+ "owner": "reproducibility reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT",
+ "severity": "high",
+ "message": "urban-heat-random-split/rf-heat-risk reports Moran's I 0.62 with a random split.",
+ "evidence": "High spatial autocorrelation inflates random train/test validation.",
+ "path": "manuscripts[0].models[0].moransI",
+ "remediation": "Run spatial block cross-validation or leave-region-out validation before presenting performance as reviewer-ready.",
+ "owner": "spatial statistics reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "MISSING_CRS_EVIDENCE",
+ "severity": "high",
+ "message": "urban-heat-random-split does not declare a coordinate reference system.",
+ "evidence": "Spatial distances, joins, and raster overlays cannot be reviewed without CRS evidence.",
+ "path": "manuscripts[0].spatialDesign.crs",
+ "remediation": "Declare the source CRS/EPSG code and any analysis projection used for distance or area operations.",
+ "owner": "geospatial methods reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "MISSING_EXTERNAL_SPATIAL_VALIDATION",
+ "severity": "high",
+ "message": "urban-heat-random-split/rf-heat-risk lacks external spatial validation for broader deployment claims.",
+ "evidence": "Broad geographic or deployment claims should be checked outside the training geography.",
+ "path": "manuscripts[0].models[0].externalValidationSites",
+ "remediation": "Add an out-of-region validation site or limit the manuscript claim to the sampled geography.",
+ "owner": "methods reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "OVERBROAD_GEOGRAPHIC_CLAIM",
+ "severity": "high",
+ "message": "urban-heat-random-split makes a broad geographic claim with only 1 observed region(s).",
+ "evidence": "The model generalizes across continental urban heat islands.",
+ "path": "manuscripts[0].claims[0]",
+ "remediation": "Limit the claim to sampled regions or add external validation sites covering the claimed geography.",
+ "owner": "methods reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "SENSITIVE_COORDINATE_OVERPRECISION",
+ "severity": "high",
+ "message": "urban-heat-random-split exposes sensitive locations at 6 decimal places.",
+ "evidence": "Human-subject or protected-species locations should be generalized before reviewer packets or public summaries.",
+ "path": "manuscripts[0].spatialDesign.coordinatePrecisionDecimals",
+ "remediation": "Round or jitter coordinates to 4 decimals or provide an approved restricted-location access path.",
+ "owner": "privacy reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "COVARIATE_SOURCE_MISSING",
+ "severity": "warning",
+ "message": "urban-heat-random-split/rf-heat-risk covariate NDVI has no source citation or artifact id.",
+ "evidence": "Raster/vector covariates should be traceable for reproducibility and recency review.",
+ "path": "manuscripts[0].models[0].covariates[0].source",
+ "remediation": "Attach a source DOI, artifact id, or repository path for each spatial covariate.",
+ "owner": "data steward",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ },
+ {
+ "code": "SPATIAL_BLOCK_MAP_MISSING",
+ "severity": "warning",
+ "message": "urban-heat-random-split has spatial validation claims without a block map artifact.",
+ "evidence": "Reviewers need the held-out geometry or block map to audit leakage.",
+ "path": "manuscripts[0].reproducibilityArtifacts.spatialBlockMap",
+ "remediation": "Attach a block-map artifact id, geometry hash, or leave-site-out manifest.",
+ "owner": "geospatial methods reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null
+ },
+ {
+ "code": "STALE_COVARIATE_WINDOW",
+ "severity": "warning",
+ "message": "urban-heat-random-split/rf-heat-risk covariate NDVI spans 540 acquisition days.",
+ "evidence": "Long covariate windows can hide temporal drift in geospatial models.",
+ "path": "manuscripts[0].models[0].covariates[0].acquisitionWindowDays",
+ "remediation": "Use period-matched covariates or report temporal-drift sensitivity checks.",
+ "owner": "methods reviewer",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk"
+ }
+ ],
+ "reviewDecisions": [
+ {
+ "manuscriptId": "urban-heat-random-split",
+ "decision": "HOLD",
+ "reasonCodes": [
+ "SPATIAL_SPLIT_LEAKAGE",
+ "TEST_SET_TUNING",
+ "DATA_MANIFEST_MISSING",
+ "ENVIRONMENT_SPEC_MISSING",
+ "FULL_DATASET_PREPROCESSING_LEAKAGE",
+ "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT",
+ "MISSING_CRS_EVIDENCE",
+ "MISSING_EXTERNAL_SPATIAL_VALIDATION",
+ "OVERBROAD_GEOGRAPHIC_CLAIM",
+ "SENSITIVE_COORDINATE_OVERPRECISION",
+ "COVARIATE_SOURCE_MISSING",
+ "SPATIAL_BLOCK_MAP_MISSING",
+ "STALE_COVARIATE_WINDOW"
+ ],
+ "reproducibilityScore": 0
+ }
+ ],
+ "researchGapOpportunities": [
+ {
+ "id": "urban-heat-random-split-regional-replication",
+ "title": "Prioritize out-of-region replication before broad geographic claims",
+ "rationale": "urban-heat-random-split samples 1 region(s), below the 3-region policy for broad claims.",
+ "firstAction": "Recruit or simulate a holdout site in the least represented claimed region."
+ },
+ {
+ "id": "urban-heat-random-split-spatial-validation-gap",
+ "title": "Add spatial block validation benchmark",
+ "rationale": "High autocorrelation with random validation means reported accuracy may be optimistic.",
+ "firstAction": "Create a leave-region-out benchmark and compare it to the random split baseline."
+ }
+ ],
+ "remediationActions": [
+ {
+ "code": "SPATIAL_SPLIT_LEAKAGE",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "model reviewer",
+ "action": "Use spatial block, leave-site-out, or regional holdout validation and regenerate performance claims."
+ },
+ {
+ "code": "TEST_SET_TUNING",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "model reviewer",
+ "action": "Move tuning to inner validation folds and rerun the locked test set once."
+ },
+ {
+ "code": "DATA_MANIFEST_MISSING",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "reproducibility reviewer",
+ "action": "Attach a data manifest with sample ids, coordinates, split labels, and hashes."
+ },
+ {
+ "code": "ENVIRONMENT_SPEC_MISSING",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "reproducibility reviewer",
+ "action": "Attach a pinned environment or container digest for spatial libraries."
+ },
+ {
+ "code": "FULL_DATASET_PREPROCESSING_LEAKAGE",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "reproducibility reviewer",
+ "action": "Refit preprocessing inside training folds and attach fold-specific transformation hashes."
+ },
+ {
+ "code": "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "spatial statistics reviewer",
+ "action": "Run spatial block cross-validation or leave-region-out validation before presenting performance as reviewer-ready."
+ },
+ {
+ "code": "MISSING_CRS_EVIDENCE",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "geospatial methods reviewer",
+ "action": "Declare the source CRS/EPSG code and any analysis projection used for distance or area operations."
+ },
+ {
+ "code": "MISSING_EXTERNAL_SPATIAL_VALIDATION",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "methods reviewer",
+ "action": "Add an out-of-region validation site or limit the manuscript claim to the sampled geography."
+ },
+ {
+ "code": "OVERBROAD_GEOGRAPHIC_CLAIM",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "methods reviewer",
+ "action": "Limit the claim to sampled regions or add external validation sites covering the claimed geography."
+ },
+ {
+ "code": "SENSITIVE_COORDINATE_OVERPRECISION",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "privacy reviewer",
+ "action": "Round or jitter coordinates to 4 decimals or provide an approved restricted-location access path."
+ },
+ {
+ "code": "COVARIATE_SOURCE_MISSING",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "data steward",
+ "action": "Attach a source DOI, artifact id, or repository path for each spatial covariate."
+ },
+ {
+ "code": "SPATIAL_BLOCK_MAP_MISSING",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": null,
+ "owner": "geospatial methods reviewer",
+ "action": "Attach a block-map artifact id, geometry hash, or leave-site-out manifest."
+ },
+ {
+ "code": "STALE_COVARIATE_WINDOW",
+ "manuscriptId": "urban-heat-random-split",
+ "modelId": "rf-heat-risk",
+ "owner": "methods reviewer",
+ "action": "Use period-matched covariates or report temporal-drift sensitivity checks."
+ }
+ ],
+ "fingerprint": "e036107e72f70a7e"
+}
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/risky-review.md b/geospatial-spatial-autocorrelation-assistant/reports/risky-review.md
new file mode 100644
index 00000000..e4579b20
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/reports/risky-review.md
@@ -0,0 +1,64 @@
+# Geospatial Spatial-Autocorrelation Review Assistant
+
+Packet: geo-review-risky-2026-06
+Status: HOLD
+Fingerprint: e036107e72f70a7e
+
+## Summary
+
+HOLD: 1 manuscript(s) produced 13 finding(s): 2 critical, 8 high, 3 warning, and 2 research gap prompt(s).
+
+## Manuscript Decisions
+
+- urban-heat-random-split: HOLD; reproducibility score 0/100; 13 finding(s)
+
+## Findings
+
+- CRITICAL SPATIAL_SPLIT_LEAKAGE: urban-heat-random-split/rf-heat-risk has train/test samples only 0.6 km apart without spatial blocking.
+ - Evidence: Policy requires at least 35 km or explicit spatial block validation.
+ - Remediation: Use spatial block, leave-site-out, or regional holdout validation and regenerate performance claims.
+- CRITICAL TEST_SET_TUNING: urban-heat-random-split/rf-heat-risk tunes model choices on the test/holdout set.
+ - Evidence: Reviewer-facing performance claims require a locked final test set.
+ - Remediation: Move tuning to inner validation folds and rerun the locked test set once.
+- HIGH DATA_MANIFEST_MISSING: urban-heat-random-split is missing reproducibility artifact dataManifest.
+ - Evidence: Geospatial results depend on data, code, and environment parity.
+ - Remediation: Attach a data manifest with sample ids, coordinates, split labels, and hashes.
+- HIGH ENVIRONMENT_SPEC_MISSING: urban-heat-random-split is missing reproducibility artifact environmentSpec.
+ - Evidence: Geospatial results depend on data, code, and environment parity.
+ - Remediation: Attach a pinned environment or container digest for spatial libraries.
+- HIGH FULL_DATASET_PREPROCESSING_LEAKAGE: urban-heat-random-split/rf-heat-risk fits spatial preprocessing on the full dataset.
+ - Evidence: Raster normalization, imputation, or feature selection must be learned inside each training fold.
+ - Remediation: Refit preprocessing inside training folds and attach fold-specific transformation hashes.
+- HIGH HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT: urban-heat-random-split/rf-heat-risk reports Moran's I 0.62 with a random split.
+ - Evidence: High spatial autocorrelation inflates random train/test validation.
+ - Remediation: Run spatial block cross-validation or leave-region-out validation before presenting performance as reviewer-ready.
+- HIGH MISSING_CRS_EVIDENCE: urban-heat-random-split does not declare a coordinate reference system.
+ - Evidence: Spatial distances, joins, and raster overlays cannot be reviewed without CRS evidence.
+ - Remediation: Declare the source CRS/EPSG code and any analysis projection used for distance or area operations.
+- HIGH MISSING_EXTERNAL_SPATIAL_VALIDATION: urban-heat-random-split/rf-heat-risk lacks external spatial validation for broader deployment claims.
+ - Evidence: Broad geographic or deployment claims should be checked outside the training geography.
+ - Remediation: Add an out-of-region validation site or limit the manuscript claim to the sampled geography.
+- HIGH OVERBROAD_GEOGRAPHIC_CLAIM: urban-heat-random-split makes a broad geographic claim with only 1 observed region(s).
+ - Evidence: The model generalizes across continental urban heat islands.
+ - Remediation: Limit the claim to sampled regions or add external validation sites covering the claimed geography.
+- HIGH SENSITIVE_COORDINATE_OVERPRECISION: urban-heat-random-split exposes sensitive locations at 6 decimal places.
+ - Evidence: Human-subject or protected-species locations should be generalized before reviewer packets or public summaries.
+ - Remediation: Round or jitter coordinates to 4 decimals or provide an approved restricted-location access path.
+- WARNING COVARIATE_SOURCE_MISSING: urban-heat-random-split/rf-heat-risk covariate NDVI has no source citation or artifact id.
+ - Evidence: Raster/vector covariates should be traceable for reproducibility and recency review.
+ - Remediation: Attach a source DOI, artifact id, or repository path for each spatial covariate.
+- WARNING SPATIAL_BLOCK_MAP_MISSING: urban-heat-random-split has spatial validation claims without a block map artifact.
+ - Evidence: Reviewers need the held-out geometry or block map to audit leakage.
+ - Remediation: Attach a block-map artifact id, geometry hash, or leave-site-out manifest.
+- WARNING STALE_COVARIATE_WINDOW: urban-heat-random-split/rf-heat-risk covariate NDVI spans 540 acquisition days.
+ - Evidence: Long covariate windows can hide temporal drift in geospatial models.
+ - Remediation: Use period-matched covariates or report temporal-drift sensitivity checks.
+
+## Research Gap Opportunities
+
+- urban-heat-random-split-regional-replication: Prioritize out-of-region replication before broad geographic claims
+ - Rationale: urban-heat-random-split samples 1 region(s), below the 3-region policy for broad claims.
+ - First action: Recruit or simulate a holdout site in the least represented claimed region.
+- urban-heat-random-split-spatial-validation-gap: Add spatial block validation benchmark
+ - Rationale: High autocorrelation with random validation means reported accuracy may be optimistic.
+ - First action: Create a leave-region-out benchmark and compare it to the random split baseline.
diff --git a/geospatial-spatial-autocorrelation-assistant/reports/summary.svg b/geospatial-spatial-autocorrelation-assistant/reports/summary.svg
new file mode 100644
index 00000000..1e389e92
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/reports/summary.svg
@@ -0,0 +1,13 @@
+
\ No newline at end of file
diff --git a/geospatial-spatial-autocorrelation-assistant/sample-data.js b/geospatial-spatial-autocorrelation-assistant/sample-data.js
new file mode 100644
index 00000000..60fd69d9
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/sample-data.js
@@ -0,0 +1,122 @@
+"use strict";
+
+const riskyPacket = {
+ id: "geo-review-risky-2026-06",
+ policy: {
+ minSpatialHoldoutKm: 35,
+ highMoransI: 0.35,
+ maxSensitivePrecisionDecimals: 4,
+ minRegionsForBroadClaims: 3,
+ maxCovariateWindowDays: 365
+ },
+ manuscripts: [
+ {
+ id: "urban-heat-random-split",
+ title: "Continental urban heat risk from neighborhood satellite features",
+ field: "environmental epidemiology",
+ sensitivity: "human-subjects",
+ spatialDesign: {
+ crs: "",
+ projection: "web map tiles",
+ coordinatePrecisionDecimals: 6,
+ samplingFrame: "three volunteer neighborhoods"
+ },
+ claims: [
+ {
+ id: "claim-generalization",
+ scope: "continental",
+ claimedRegions: ["Northeast", "Midwest", "South", "West"],
+ text: "The model generalizes across continental urban heat islands."
+ }
+ ],
+ samples: [
+ { id: "s-001", lat: 40.712776, lon: -74.005974, split: "train", region: "Northeast", site: "NYC-A" },
+ { id: "s-002", lat: 40.734112, lon: -73.98742, split: "test", region: "Northeast", site: "NYC-B" },
+ { id: "s-003", lat: 40.75891, lon: -73.98513, split: "train", region: "Northeast", site: "NYC-C" },
+ { id: "s-004", lat: 40.76172, lon: -73.97864, split: "test", region: "Northeast", site: "NYC-D" }
+ ],
+ models: [
+ {
+ id: "rf-heat-risk",
+ splitStrategy: "random",
+ moransI: 0.62,
+ preprocessingFitScope: "full_dataset",
+ hyperparameterTunedOn: "test",
+ deploymentContext: "national heat-risk triage",
+ spatialCovariates: true,
+ externalValidationSites: [],
+ covariates: [
+ { name: "NDVI", source: "", resolutionMeters: 1000, acquisitionWindowDays: 540 },
+ { name: "impervious_surface", source: "city-open-data:impervious-v1", resolutionMeters: 30, acquisitionWindowDays: 90 }
+ ]
+ }
+ ],
+ reproducibilityArtifacts: {
+ dataManifest: "",
+ codeCommit: "2f7c91e",
+ environmentSpec: "",
+ spatialBlockMap: ""
+ }
+ }
+ ]
+};
+
+const cleanPacket = {
+ id: "geo-review-clean-2026-06",
+ policy: riskyPacket.policy,
+ manuscripts: [
+ {
+ id: "rangeland-blocked-validation",
+ title: "Regional rangeland recovery forecasts with blocked spatial validation",
+ field: "ecology",
+ sensitivity: "public-environmental",
+ spatialDesign: {
+ crs: "EPSG:4326 WGS84 source coordinates; EPSG:5070 equal-area analysis projection",
+ projection: "EPSG:5070",
+ coordinatePrecisionDecimals: 3,
+ samplingFrame: "blocked stratified ecological sites"
+ },
+ claims: [
+ {
+ id: "claim-regional",
+ scope: "regional",
+ claimedRegions: ["Colorado Front Range", "New Mexico Plateau", "Utah Basin"],
+ text: "The blocked model supports regional recovery forecasts for sampled western rangeland systems."
+ }
+ ],
+ samples: [
+ { id: "co-001", lat: 39.739, lon: -104.99, split: "train", region: "Colorado Front Range", site: "CO-A" },
+ { id: "co-002", lat: 39.231, lon: -105.02, split: "train", region: "Colorado Front Range", site: "CO-B" },
+ { id: "nm-001", lat: 35.084, lon: -106.65, split: "test", region: "New Mexico Plateau", site: "NM-A" },
+ { id: "ut-001", lat: 40.760, lon: -111.89, split: "test", region: "Utah Basin", site: "UT-A" }
+ ],
+ models: [
+ {
+ id: "blocked-gbm-recovery",
+ splitStrategy: "spatial_block_leave_region_out",
+ moransI: 0.18,
+ preprocessingFitScope: "training_fold",
+ hyperparameterTunedOn: "inner_validation",
+ deploymentContext: "",
+ spatialCovariates: true,
+ externalValidationSites: ["New Mexico Plateau", "Utah Basin"],
+ covariates: [
+ { name: "soil_moisture", source: "doi:10.1234/soil-moisture-v3", resolutionMeters: 250, acquisitionWindowDays: 30 },
+ { name: "burn_severity", source: "artifact:burn-severity-2026-05", resolutionMeters: 30, acquisitionWindowDays: 12 }
+ ]
+ }
+ ],
+ reproducibilityArtifacts: {
+ dataManifest: "artifact:geo-sample-manifest-v2",
+ codeCommit: "d6b0e3c",
+ environmentSpec: "container:ghcr.io/scibase/geo-review@sha256:abc123",
+ spatialBlockMap: "artifact:block-map-v2"
+ }
+ }
+ ]
+};
+
+module.exports = {
+ riskyPacket,
+ cleanPacket
+};
diff --git a/geospatial-spatial-autocorrelation-assistant/test.js b/geospatial-spatial-autocorrelation-assistant/test.js
new file mode 100644
index 00000000..9628888f
--- /dev/null
+++ b/geospatial-spatial-autocorrelation-assistant/test.js
@@ -0,0 +1,60 @@
+"use strict";
+
+const assert = require("node:assert/strict");
+const {
+ evaluateGeospatialReviewPacket,
+ renderMarkdownReport,
+ renderSvgSummary,
+ haversineKm
+} = require("./index");
+const { riskyPacket, cleanPacket } = require("./sample-data");
+
+assert.throws(() => evaluateGeospatialReviewPacket(null), /expects a packet object/);
+
+const risky = evaluateGeospatialReviewPacket(riskyPacket, { now: "2026-06-01T10:30:00.000Z" });
+assert.equal(risky.status, "HOLD");
+assert.equal(risky.reviewDecisions[0].decision, "HOLD");
+assert.ok(risky.findings.some((item) => item.code === "SPATIAL_SPLIT_LEAKAGE"));
+assert.ok(risky.findings.some((item) => item.code === "MISSING_CRS_EVIDENCE"));
+assert.ok(risky.findings.some((item) => item.code === "HIGH_SPATIAL_AUTOCORRELATION_RANDOM_SPLIT"));
+assert.ok(risky.findings.some((item) => item.code === "TEST_SET_TUNING"));
+assert.ok(risky.researchGapOpportunities.length >= 2);
+assert.ok(risky.reviewDecisions[0].reproducibilityScore < 50);
+
+const riskyRepeat = evaluateGeospatialReviewPacket(riskyPacket, { now: "2026-06-01T10:31:00.000Z" });
+assert.equal(risky.fingerprint, riskyRepeat.fingerprint);
+
+const clean = evaluateGeospatialReviewPacket(cleanPacket, { now: "2026-06-01T10:30:00.000Z" });
+assert.equal(clean.status, "READY");
+assert.equal(clean.findings.length, 0);
+assert.equal(clean.reviewDecisions[0].reproducibilityScore, 100);
+
+const invalidCoordinatePacket = {
+ id: "invalid-coordinate",
+ manuscripts: [
+ {
+ id: "bad-coordinate",
+ spatialDesign: { crs: "EPSG:4326", coordinatePrecisionDecimals: 2 },
+ claims: [],
+ samples: [{ id: "bad", lat: 110, lon: -74, split: "train", region: "X" }],
+ models: [],
+ reproducibilityArtifacts: { dataManifest: "m", codeCommit: "c", environmentSpec: "e" }
+ }
+ ]
+};
+const invalid = evaluateGeospatialReviewPacket(invalidCoordinatePacket, { now: "2026-06-01T10:30:00.000Z" });
+assert.ok(invalid.findings.some((item) => item.code === "INVALID_COORDINATE"));
+
+const markdown = renderMarkdownReport(risky, riskyPacket);
+assert.ok(markdown.includes("Spatial-Autocorrelation"));
+assert.ok(markdown.includes("SPATIAL_SPLIT_LEAKAGE"));
+assert.ok(markdown.includes("Research Gap Opportunities"));
+
+const svg = renderSvgSummary(risky);
+assert.ok(svg.includes("