Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions repository-reproducibility-bundle-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Repository Reproducibility Bundle Guard

Self-contained SCIBASE Project Repository & Version Control slice for issue #10. The guard checks whether a scientific repository release candidate is reproducible before a tag, DOI export, or public publication packet is treated as ready.

## Why this slice is distinct

Existing #10 submissions cover broad repository ledgers, branch protection, component-owner approval, semantic tags, external reference pinning, notebook-output diffs, fork provenance, release signatures, restore rehearsal, automation credential rotation, artifact pruning, Git LFS pointer integrity, review-decision provenance, and release-note claim evidence. This module focuses only on reproducibility bundle readiness: required manifest components, SHA-256 integrity, executable lockfile evidence, pinned runtime images, data/result provenance, release metadata, and export-bundle completeness.

## Run

```bash
npm test
npm run demo
npm run demo:video
```

Demo artifacts are written to `reports/`, including JSON, Markdown, SVG, GIF, and MP4 files.
61 changes: 61 additions & 0 deletions repository-reproducibility-bundle-guard/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
const fs = require("fs");
const path = require("path");

const { assessReproducibilityBundle } = require("./index");
const { releaseCandidate, riskyCandidate } = require("./sample-data");

const reportsDir = path.join(__dirname, "reports");
fs.mkdirSync(reportsDir, { recursive: true });

function markdownReport(name, report) {
const findings = report.findings.length
? report.findings
.map((item) => `- ${item.severity.toUpperCase()} ${item.code}: ${item.message}`)
.join("\n")
: "- No reproducibility bundle findings.";
return `# Repository Reproducibility Bundle Guard

Scenario: ${name}

Repository: ${report.repositoryId}
Release tag: ${report.releaseTag}
Decision: ${report.decision.toUpperCase()}

Reviewed ${report.summary.componentsReviewed} manifest components and ${report.summary.bundleEntries} bundle entries.

## Findings

${findings}

## Release Criteria

${report.releaseCriteria.map((item) => `- ${item}`).join("\n")}
`;
}

function svgReport(report) {
const color = report.decision === "hold" ? "#b91c1c" : report.decision === "revise" ? "#c2410c" : "#15803d";
return `<svg xmlns="http://www.w3.org/2000/svg" width="920" height="430" viewBox="0 0 920 430">
<rect width="920" height="430" fill="#111827"/>
<text x="42" y="66" fill="#f8fafc" font-family="Arial" font-size="32">Repository Reproducibility Bundle Guard</text>
<text x="42" y="112" fill="#cbd5e1" font-family="Arial" font-size="20">${report.repositoryId} / ${report.releaseTag}</text>
<rect x="42" y="150" width="210" height="80" rx="8" fill="${color}"/>
<text x="68" y="201" fill="#fff" font-family="Arial" font-size="30">${report.decision.toUpperCase()}</text>
<text x="42" y="280" fill="#e5e7eb" font-family="Arial" font-size="22">Components: ${report.summary.componentsReviewed}</text>
<text x="42" y="320" fill="#fecaca" font-family="Arial" font-size="20">High: ${report.summary.high}</text>
<text x="172" y="320" fill="#fed7aa" font-family="Arial" font-size="20">Medium: ${report.summary.medium}</text>
<text x="342" y="320" fill="#bfdbfe" font-family="Arial" font-size="20">Low: ${report.summary.low}</text>
<text x="42" y="374" fill="#94a3b8" font-family="Arial" font-size="18">Synthetic release packets only. No private data or external services.</text>
</svg>`;
}

for (const [name, candidate] of [
["release-candidate", releaseCandidate],
["risky-candidate", riskyCandidate],
]) {
const report = assessReproducibilityBundle(candidate);
fs.writeFileSync(path.join(reportsDir, `${name}.json`), JSON.stringify(report, null, 2));
fs.writeFileSync(path.join(reportsDir, `${name}.md`), markdownReport(name, report));
fs.writeFileSync(path.join(reportsDir, `${name}.svg`), svgReport(report));
console.log(`${name}: ${report.decision} (${report.summary.findings} findings)`);
}
46 changes: 46 additions & 0 deletions repository-reproducibility-bundle-guard/demo_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pathlib import Path

import imageio.v3 as iio
import numpy as np
from PIL import Image, ImageDraw, ImageFont


ROOT = Path(__file__).resolve().parent
REPORTS = ROOT / "reports"
REPORTS.mkdir(exist_ok=True)


def font(size):
for name in ("arial.ttf", "segoeui.ttf"):
try:
return ImageFont.truetype(name, size)
except OSError:
pass
return ImageFont.load_default()


slides = [
("Reproducibility Bundle Guard", "Project Repository & Version Control #10"),
("Manifest", "manuscript, data, code, notebooks, results, protocols, metadata"),
("Integrity", "SHA-256 hashes, lockfiles, pinned runtimes, provenance refs"),
("Decision", "hold release until the export bundle is reproducible"),
]

frames = []
for index, (title, subtitle) in enumerate(slides, start=1):
image = Image.new("RGB", (960, 544), "#101827")
draw = ImageDraw.Draw(image)
draw.rectangle((44, 52, 916, 492), outline="#22c55e", width=3)
draw.text((80, 124), title, fill="#f8fafc", font=font(40))
draw.text((80, 206), subtitle, fill="#dcfce7", font=font(24))
draw.rectangle((80, 326, 818, 382), fill="#166534")
draw.text((104, 342), "release only when reviewers can reproduce the exact packet", fill="#f0fdf4", font=font(21))
draw.text((80, 438), f"Slide {index}/4 - synthetic reviewer artifact", fill="#cbd5e1", font=font(20))
frames.extend([image] * 14)

gif_path = REPORTS / "demo.gif"
mp4_path = REPORTS / "demo.mp4"
frames[0].save(gif_path, save_all=True, append_images=frames[1:], duration=120, loop=0)
iio.imwrite(mp4_path, [np.asarray(frame) for frame in frames], fps=8, codec="libx264")
print(f"wrote {gif_path}")
print(f"wrote {mp4_path}")
272 changes: 272 additions & 0 deletions repository-reproducibility-bundle-guard/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
const HIGH = "high";
const MEDIUM = "medium";
const LOW = "low";

const REQUIRED_COMPONENT_TYPES = [
"manuscript",
"data",
"code",
"notebook",
"results",
"protocol",
"metadata",
];

function requiredString(value, field) {
if (typeof value !== "string" || value.trim() === "") {
throw new TypeError(`${field} must be a non-empty string`);
}
return value.trim();
}

function array(value, field) {
if (!Array.isArray(value)) {
throw new TypeError(`${field} must be an array`);
}
return value;
}

function unique(values) {
return [...new Set(values.map(String))];
}

function isSha256(value) {
return typeof value === "string" && /^[a-f0-9]{64}$/.test(value);
}

function isPinnedRuntime(value) {
return typeof value === "string" && /@sha256:[a-f0-9]{64}$/.test(value);
}

function normalizeComponent(raw, index) {
return {
id: requiredString(raw.id, `components[${index}].id`),
type: requiredString(raw.type, `components[${index}].type`),
path: requiredString(raw.path, `components[${index}].path`),
sha256: requiredString(raw.sha256, `components[${index}].sha256`).toLowerCase(),
bytes: Number(raw.bytes || 0),
versioned: raw.versioned !== false,
provenanceRefs: unique(raw.provenanceRefs || []),
runtimeImage: raw.runtimeImage ? String(raw.runtimeImage) : "",
lockfiles: unique(raw.lockfiles || []),
exportTargets: unique(raw.exportTargets || []),
};
}

function normalizeCandidate(raw) {
return {
repositoryId: requiredString(raw.repositoryId, "repositoryId"),
releaseTag: requiredString(raw.releaseTag, "releaseTag"),
commit: requiredString(raw.commit, "commit"),
metadata: {
doi: raw.metadata && raw.metadata.doi ? String(raw.metadata.doi) : "",
schemaOrg: Boolean(raw.metadata && raw.metadata.schemaOrg),
dataCite: Boolean(raw.metadata && raw.metadata.dataCite),
license: raw.metadata && raw.metadata.license ? String(raw.metadata.license) : "",
authors: unique((raw.metadata && raw.metadata.authors) || []),
},
components: array(raw.components || [], "components").map(normalizeComponent),
bundle: {
archivePath: raw.bundle && raw.bundle.archivePath ? String(raw.bundle.archivePath) : "",
sha256: raw.bundle && raw.bundle.sha256 ? String(raw.bundle.sha256).toLowerCase() : "",
includes: unique((raw.bundle && raw.bundle.includes) || []),
generatedAt: raw.bundle && raw.bundle.generatedAt ? String(raw.bundle.generatedAt) : "",
},
};
}

function finding(code, severity, sourceId, message, remediation) {
return { code, severity, sourceId, message, remediation };
}

function assessReproducibilityBundle(rawCandidate) {
const candidate = normalizeCandidate(rawCandidate);
const findings = [];
const componentTypes = new Set(candidate.components.map((item) => item.type));
const componentPaths = candidate.components.map((item) => item.path);
const duplicatePaths = componentPaths.filter((path, index) => componentPaths.indexOf(path) !== index);

for (const type of REQUIRED_COMPONENT_TYPES) {
if (!componentTypes.has(type)) {
findings.push(
finding(
"MISSING_REQUIRED_COMPONENT",
HIGH,
type,
`${type} component is absent from the release manifest.`,
"Add the required component or explicitly hold the release until the research packet is complete."
)
);
}
}

for (const path of unique(duplicatePaths)) {
findings.push(
finding(
"DUPLICATE_MANIFEST_PATH",
HIGH,
path,
`${path} appears more than once in the release manifest.`,
"Keep one canonical manifest row per path so rollback and integrity checks are deterministic."
)
);
}

for (const component of candidate.components) {
if (!isSha256(component.sha256)) {
findings.push(
finding(
"INVALID_COMPONENT_DIGEST",
HIGH,
component.id,
`${component.path} does not carry a valid SHA-256 digest.`,
"Record the lowercase SHA-256 digest before tagging the release."
)
);
}

if (!component.versioned) {
findings.push(
finding(
"UNVERSIONED_COMPONENT",
HIGH,
component.id,
`${component.path} is not marked as version-controlled.`,
"Commit, tag, or archive the component under repository version control before release."
)
);
}

if (["code", "notebook"].includes(component.type) && component.lockfiles.length === 0) {
findings.push(
finding(
"MISSING_RUNTIME_LOCKFILE",
HIGH,
component.id,
`${component.path} lacks lockfile evidence for executable reproducibility.`,
"Attach package-lock, requirements lock, renv, Manifest.toml, or equivalent runtime lock evidence."
)
);
}

if (["code", "notebook"].includes(component.type) && !isPinnedRuntime(component.runtimeImage)) {
findings.push(
finding(
"UNPINNED_RUNTIME_IMAGE",
MEDIUM,
component.id,
`${component.path} runtime image is not digest-pinned.`,
"Pin the container/runtime image by immutable digest instead of a mutable tag."
)
);
}

if (["data", "results"].includes(component.type) && component.provenanceRefs.length === 0) {
findings.push(
finding(
"MISSING_DATA_PROVENANCE",
MEDIUM,
component.id,
`${component.path} lacks provenance references to inputs, instruments, or analysis commits.`,
"Link the dataset/result to source instruments, upstream datasets, notebooks, or analysis commits."
)
);
}

if (component.type === "metadata" && component.exportTargets.length === 0) {
findings.push(
finding(
"MISSING_METADATA_EXPORT_TARGETS",
LOW,
component.id,
`${component.path} does not declare export targets.`,
"Declare DOI, schema.org, DataCite, or repository export targets for discovery."
)
);
}
}

if (!candidate.metadata.doi || !candidate.metadata.schemaOrg || !candidate.metadata.dataCite || !candidate.metadata.license) {
findings.push(
finding(
"INCOMPLETE_RELEASE_METADATA",
HIGH,
"metadata",
"Release metadata is missing DOI, schema.org, DataCite, or license evidence.",
"Complete persistent identifier, discovery metadata, citation metadata, and license fields before publication."
)
);
}

if (candidate.metadata.authors.length === 0) {
findings.push(
finding(
"MISSING_AUTHOR_ATTRIBUTION",
MEDIUM,
"metadata.authors",
"Release metadata does not include author attribution.",
"Attach author identifiers before the repository release is exported or assigned a DOI."
)
);
}

if (!candidate.bundle.archivePath || !isSha256(candidate.bundle.sha256)) {
findings.push(
finding(
"INVALID_EXPORT_BUNDLE",
HIGH,
"bundle",
"Release bundle archive path or digest is missing/invalid.",
"Generate a release archive with a stable SHA-256 digest before tagging."
)
);
}

const missingFromBundle = unique(
candidate.components
.filter((component) => !candidate.bundle.includes.includes(component.path))
.map((component) => component.path)
);
if (missingFromBundle.length > 0) {
findings.push(
finding(
"BUNDLE_OMITS_MANIFEST_COMPONENTS",
HIGH,
"bundle.includes",
`Release bundle omits manifest paths: ${missingFromBundle.join(", ")}.`,
"Regenerate the archive so every manifest component is present in the export bundle."
)
);
}

const high = findings.filter((item) => item.severity === HIGH).length;
const medium = findings.filter((item) => item.severity === MEDIUM).length;
return {
repositoryId: candidate.repositoryId,
releaseTag: candidate.releaseTag,
commit: candidate.commit,
decision: high > 0 ? "hold" : medium > 0 ? "revise" : "release",
summary: {
componentsReviewed: candidate.components.length,
requiredTypesCovered: REQUIRED_COMPONENT_TYPES.filter((type) => componentTypes.has(type)).length,
bundleEntries: candidate.bundle.includes.length,
findings: findings.length,
high,
medium,
low: findings.filter((item) => item.severity === LOW).length,
},
findings,
releaseCriteria: [
"Every required scientific repository component is present in the manifest.",
"Every manifest entry has a stable digest and version-control status.",
"Executable code and notebooks include lockfile evidence and pinned runtimes.",
"Datasets and results link back to provenance inputs.",
"The export bundle includes every manifest path and has its own SHA-256 digest.",
],
};
}

module.exports = {
assessReproducibilityBundle,
normalizeCandidate,
};
Loading