Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,11 @@ bash scripts/run_corpus.sh
```

It builds every `corpus/validation/<probe>/generated.cpp` into a
`strategy.dylib` / `strategy.so`, runs each against
`corpus/data/ohlcv_ETH-USDT-USDT_15m_warmup6m.csv` when present
(falling back to `corpus/data/ohlcv_ETH-USDT-USDT_15m.csv`), and
`strategy.dylib` / `strategy.so`, runs each against the 15m chart feed
derived from the corpus's single committed 1m feed
(`corpus/data/ohlcv_ETH-USDT-USDT_1m.csv`, Git LFS; the harness
materializes `corpus/data/derived/ohlcv_ETH-USDT-USDT_15m.csv` via
`scripts/derive_corpus_feeds.py`), and
rewrites the regenerated `engine_trades.csv` files. It also prints a
canonical `scripts/verify_corpus.py --all --quiet` summary with the five
parity labels (`excellent`, `strong`, `moderate`, `weak`, `minimal`).
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@
# OHLCV resolution order (first existing wins):
# 1. DATA/ETHUSDT_15.csv — snapshot (paths: benchmarks/assets/data or benchmarks/data)
# 2. benchmarks/_workdir/data/ETHUSDT_15.csv — working copy from run_all.sh
# 3. corpus/data/ohlcv_ETH-USDT-USDT_15m.csv — fallback
# 3. corpus/data/derived/ohlcv_ETH-USDT-USDT_15m_window.csv — fallback
# (derived from the single committed 1m feed by
# scripts/derive_corpus_feeds.py)
_CANDIDATE_OHLCV = [
DATA / "ETHUSDT_15.csv",
BENCH_DIR / "_workdir" / "data" / "ETHUSDT_15.csv",
REPO_ROOT / "corpus" / "data" / "ohlcv_ETH-USDT-USDT_15m.csv",
REPO_ROOT / "corpus" / "data" / "derived" / "ohlcv_ETH-USDT-USDT_15m_window.csv",
]
OHLCV_PATH = next((p for p in _CANDIDATE_OHLCV if p.exists()), _CANDIDATE_OHLCV[-1])

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/runners/run_pineforge_canonical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static std::string fmt(double v) {

int main(int argc, char** argv) {
const char* in_path = argc > 1 ? argv[1]
: "../corpus/data/ohlcv_ETH-USDT-USDT_15m.csv";
: "../corpus/data/derived/ohlcv_ETH-USDT-USDT_15m_window.csv";
const char* out_path = argc > 2 ? argv[2]
: "strategies/_indicators/canonical_pineforge.csv";

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/runners/run_pinets_canonical.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const candidates = [
resolve(REPO, 'benchmarks/assets/data/ETHUSDT_15.csv'),
resolve(REPO, 'benchmarks/data/ETHUSDT_15.csv'),
resolve(REPO, 'benchmarks/_workdir/data/ETHUSDT_15.csv'),
resolve(REPO, 'corpus/data/ohlcv_ETH-USDT-USDT_15m.csv'),
resolve(REPO, 'corpus/data/derived/ohlcv_ETH-USDT-USDT_15m_window.csv'),
];
const csvPath = candidates.find(existsSync) ?? candidates.at(-1);
console.log(`pinets: using OHLCV ${csvPath.replace(REPO + '/', '')}`);
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/runners/run_pynecore.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
{strategy_dir}/pynecore_stats.csv — strategy stats (verbatim from pyne)

The CLI invokes the locally-installed `pyne run` against the corpus
OHLCV (`corpus/data/ohlcv_ETH-USDT-USDT_15m.csv`, pre-converted to
OHLCV (`corpus/data/derived/ohlcv_ETH-USDT-USDT_15m_window.csv`,
derived from the committed 1m feed, pre-converted to
PyneCore's `.ohlcv` format under `benchmarks/_workdir/data/`). It then
re-emits the resulting trade list in PineForge's TV-mirror schema —
same column names, same exit-then-entry row order, same reverse-
Expand Down
2 changes: 1 addition & 1 deletion corpus
Submodule corpus updated 38 files
+3 −2 .gitignore
+1 −2 LEGAL.md
+26 −21 README.md
+0 −220 data/build_deep_warmup_feed.py
+0 −3 data/ohlcv_ETH-USDT-USDT_15m.csv
+0 −3 data/ohlcv_ETH-USDT-USDT_15m_warmup6m.csv
+2 −2 data/ohlcv_ETH-USDT-USDT_1m.csv
+0 −3 data/ohlcv_ETH-USDT-USDT_1m_warmup6m.csv
+1 −1 draft-probes/README.md
+1 −1 special-validation/crypto-htf/mtf-htf-monthly-ema-cross-01/inputs.json
+2,804 −2,804 validation/bracket-tp-sl-oca-reduce-isolate-01/engine_trades.csv
+1 −1 validation/composite-trendmaster-three-tier-ema-state-01/inputs.json
+14,964 −14,964 validation/composite-vcp-fvg-active-zones-01/engine_trades.csv
+20,398 −20,398 validation/input-source-subscript-hl2-01/engine_trades.csv
+1 −1 validation/ltf-bool-array-bull-majority-01/inputs.json
+860 −860 validation/order-close-all-cancel-all-01/engine_trades.csv
+858 −858 validation/order-cross-exit-close-same-pass-01/engine_trades.csv
+814 −814 validation/order-dual-side-same-id-stop-no-cancel-01/engine_trades.csv
+428 −428 validation/order-dual-stop-near-only-01/engine_trades.csv
+262 −262 validation/order-dual-stop-open-high-first-path-01/engine_trades.csv
+428 −428 validation/order-dual-stop-source-order-long-first-01/engine_trades.csv
+428 −428 validation/order-dual-stop-source-order-short-first-01/engine_trades.csv
+408 −408 validation/order-one-side-four-bar-far-opposite-01/engine_trades.csv
+3,568 −3,568 validation/order-range-expansion-pending-stop-01/engine_trades.csv
+820 −820 validation/order-same-id-stop-after-flat-01/engine_trades.csv
+860 −860 validation/order-same-id-stop-cross-before-modify-01/engine_trades.csv
+690 −690 validation/order-same-id-stop-minute-zero-01/engine_trades.csv
+814 −814 validation/order-same-id-stop-modification-01/engine_trades.csv
+776 −776 validation/order-same-id-stop-raise-only-01/engine_trades.csv
+408 −408 validation/order-same-id-stop-window-four-bars-01/engine_trades.csv
+666 −666 validation/order-stale-stop-after-close-no-cancel-01/engine_trades.csv
+654 −654 validation/order-stop-entry-touch-boundary-01/engine_trades.csv
+3,392 −3,392 validation/ta-median-vs-ema-cross-01/engine_trades.csv
+1 −1 validation/ta-nvi-pvi-cross-01/inputs.json
+2,692 −2,692 validation/ta-sma-dual-cross-01/engine_trades.csv
+1,708 −1,708 validation/ta-triple-sma-stack-latch-01/engine_trades.csv
+3,236 −3,236 validation/ta-vwma-vs-sma-divergence-01/engine_trades.csv
+1 −1 validation/vwap-bands-mean-reversion-2sigma-01/inputs.json
1 change: 1 addition & 0 deletions scripts/crossvalidate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ def main() -> int:
"generated.cpp, corpus convention 1e6; pf_report_t "
"does not expose it)")
args = ap.parse_args()
rs.ensure_derived()
if args.all:
return crossvalidate_all(args.corpus_root.resolve(), args.ohlcv.resolve())
if args.strategy_dir is None:
Expand Down
114 changes: 114 additions & 0 deletions scripts/derive_corpus_feeds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""Materialize the derived corpus feeds from the single committed 1m feed.

The corpus ships exactly ONE reference feed:
corpus/data/ohlcv_ETH-USDT-USDT_1m.csv (Git LFS)
1-minute Binance ETH-USDT-USDT perp bars, full exchange history
(2020-01-01 onward) through the end of the comparison window.

Everything else the harnesses consume is derived deterministically from
it into corpus/data/derived/ (gitignored):

ohlcv_ETH-USDT-USDT_15m.csv 900s resample, full history —
the default chart feed
ohlcv_ETH-USDT-USDT_15m_window.csv comparison-window slice of the
above — cold-start probes and
benchmark runners (bench inputs
must stay historically
comparable), and the harness's
window-bounds fallback

Resample rule: open=first, high=max, low=min, close=last, volume=sum
(rounded to 6dp), timestamp=bucket start; a trailing partial bucket is
kept. Idempotent: files are only rewritten when missing or older than
the source feed. Import ensure_derived() or run as a script.
"""
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
SOURCE_1M = REPO_ROOT / "corpus" / "data" / "ohlcv_ETH-USDT-USDT_1m.csv"
DERIVED_DIR = REPO_ROOT / "corpus" / "data" / "derived"
DERIVED_15M = DERIVED_DIR / "ohlcv_ETH-USDT-USDT_15m.csv"
DERIVED_15M_WINDOW = DERIVED_DIR / "ohlcv_ETH-USDT-USDT_15m_window.csv"

# Comparison window (epoch ms, inclusive), pinned from the historical
# window-only 15m feed the corpus used to ship: first bar 2025-04-20
# 21:00 UTC, last bar 2026-05-04 06:00 UTC bucket.
WINDOW_START_MS = 1745182800000
WINDOW_END_MS = 1777906800000

HEADER = "timestamp,open,high,low,close,volume"


def _fmt(v: float) -> str:
"""Shortest-repr float, integral values written bare (2382 not 2382.0)
— matches the committed feed's formatting."""
if v == int(v) and abs(v) < 1e15:
return str(int(v))
return repr(v)


def _stale(target: Path, source: Path) -> bool:
return (not target.exists()
or target.stat().st_mtime < source.stat().st_mtime)


def ensure_derived(verbose: bool = False) -> None:
"""Create/refresh the derived feeds. Cheap no-op when up to date."""
if not SOURCE_1M.exists():
raise FileNotFoundError(
f"{SOURCE_1M} missing — is the corpus submodule checked out "
"with git-lfs installed? (file should be ~176 MB, not a "
"small LFS pointer)")
if SOURCE_1M.stat().st_size < 1_000_000:
raise RuntimeError(
f"{SOURCE_1M} is suspiciously small — likely an unsmudged "
"Git LFS pointer. Run: git lfs install && git lfs pull "
"(inside the corpus submodule).")
if not (_stale(DERIVED_15M, SOURCE_1M)
or _stale(DERIVED_15M_WINDOW, SOURCE_1M)):
return

if verbose:
print(f"[derive] resampling {SOURCE_1M.name} -> 15m ...")
buckets = [] # [ts, o, h, l, c, v] per 900s bucket, in order
cur_key = None
with SOURCE_1M.open() as fh:
next(fh) # header
for line in fh:
ts_s, o, h, l, c, v = line.rstrip("\n").split(",")
ts = int(ts_s)
key = ts - ts % 900_000
if key != cur_key:
buckets.append([key, o, float(h), float(l), c, float(v)])
cur_key = key
else:
b = buckets[-1]
hf, lf = float(h), float(l)
if hf > b[2]:
b[2] = hf
if lf < b[3]:
b[3] = lf
b[4] = c
b[5] += float(v)

DERIVED_DIR.mkdir(parents=True, exist_ok=True)
full_lines = [HEADER]
window_lines = [HEADER]
for ts, o, h, l, c, v in buckets:
row = f"{ts},{o},{_fmt(h)},{_fmt(l)},{c},{_fmt(round(v, 6))}"
full_lines.append(row)
if WINDOW_START_MS <= ts <= WINDOW_END_MS:
window_lines.append(row)
for path, lines in ((DERIVED_15M, full_lines),
(DERIVED_15M_WINDOW, window_lines)):
tmp = path.with_suffix(".csv.new")
tmp.write_text("\n".join(lines) + "\n")
tmp.replace(path)
if verbose:
print(f"[derive] wrote {path.relative_to(REPO_ROOT)} "
f"({len(lines) - 1} bars)")


if __name__ == "__main__":
ensure_derived(verbose=True)
9 changes: 8 additions & 1 deletion scripts/run_corpus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,14 @@ Run: git submodule update --init corpus
(the TV validation corpus is a PUBLIC submodule: https://github.com/pineforge-4pass/pineforge-corpus)"
fi

# --- 0) (optional) regenerate generated.cpp from strategy.pine --------
# --- 0) derive chart feeds from the single committed 1m feed ----------
# The corpus ships one Git-LFS feed (full-history 1m); the 15m chart
# feeds live in corpus/data/derived/ and are rebuilt here when stale.

log "materializing derived corpus feeds"
"$PY" scripts/derive_corpus_feeds.py

# --- 0b) (optional) regenerate generated.cpp from strategy.pine -------
# REGEN=1 re-derives every corpus/*/*/generated.cpp from its strategy.pine
# through the pineforge-release Docker image (engine + bundled transpiler), so
# the build below compiles freshly-transpiled C++ instead of the committed copy.
Expand Down
16 changes: 12 additions & 4 deletions scripts/run_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

# Custom OHLCV input
python scripts/run_strategy.py corpus/basic/greedy \\
--ohlcv corpus/data/ohlcv_ETH-USDT-USDT_15m.csv
--ohlcv corpus/data/derived/ohlcv_ETH-USDT-USDT_15m_window.csv

# Don't overwrite engine_trades.csv if it already exists
python scripts/run_strategy.py corpus/basic/greedy --no-overwrite
Expand Down Expand Up @@ -52,9 +52,15 @@
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
REFERENCE_OHLCV = REPO_ROOT / "corpus" / "data" / "ohlcv_ETH-USDT-USDT_15m.csv"
WARMUP_OHLCV = REPO_ROOT / "corpus" / "data" / "ohlcv_ETH-USDT-USDT_15m_warmup6m.csv"
DEFAULT_OHLCV = WARMUP_OHLCV if WARMUP_OHLCV.exists() else REFERENCE_OHLCV
# The corpus ships a single committed feed (full-history 1m, Git LFS);
# the 15m chart feeds are derived from it locally. ensure_derived() is
# called from main() — importing this module stays side-effect free for
# consumers that only want the ABI mirrors.
from derive_corpus_feeds import ( # noqa: E402
DERIVED_15M, DERIVED_15M_WINDOW, ensure_derived)
REFERENCE_OHLCV = DERIVED_15M_WINDOW
WARMUP_OHLCV = DERIVED_15M
DEFAULT_OHLCV = WARMUP_OHLCV

# Keys in inputs.json that are validator/harness metadata, not Pine input()
# values. Mirrors the canonical validator's VALIDATION_INPUT_META_KEYS so
Expand Down Expand Up @@ -1168,6 +1174,8 @@ def main() -> int:
"$PINEFORGE_RELEASE_IMAGE or ghcr .../pineforge-release:latest).")
args = ap.parse_args()

ensure_derived()

strategy_dir = args.strategy_dir.resolve()
out_path = (args.output.resolve() if args.output
else strategy_dir / "engine_trades.csv")
Expand Down
Loading