From a95ce02549e32b1234b7c77d3a4dc93302b35708 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Mon, 15 Jun 2026 15:28:17 -0700 Subject: [PATCH 01/12] test: add regression for multi-rule shared-prerequisite cycle (fixed v1.3.0) --- tests/cases.json | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/cases.json b/tests/cases.json index c16e641..7741bca 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -4898,6 +4898,57 @@ "ruleId": "" } ], + [ + "Multiple rules with divergent conditions on same prerequisite", + { + "attributes": { + "id": "u1", + "public_user_id": "pid_eci:other" + }, + "features": { + "probeFeature": { + "defaultValue": "ui_default", + "rules": [ + { + "condition": { "public_user_id": "pid_eci:does_not_match" }, + "force": "ui_default" + } + ] + }, + "childFlag": { + "defaultValue": "hello", + "rules": [ + { + "parentConditions": [ + { + "id": "probeFeature", + "condition": { "value": "ON" } + } + ], + "force": "prereqon" + }, + { + "parentConditions": [ + { + "id": "probeFeature", + "condition": { "value": { "$ne": "ON" } } + } + ], + "force": "prereqoff" + } + ] + } + } + }, + "childFlag", + { + "value": "prereqoff", + "on": true, + "off": false, + "source": "force", + "ruleId": "" + } + ], [ "SavedGroups correctly pulled from context for force rule", { From 672136a629cb3dd191825ceb6250b890c3fab626 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Mon, 15 Jun 2026 16:47:00 -0700 Subject: [PATCH 02/12] fix: raise on NaN in compare() so IEEE 754 semantics propagate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compare(NaN, NaN) returned 0 because both > and < evaluate to false on NaN, wrongly satisfying $eq. Raise instead — the existing exception handlers in evalOperatorCondition give the correct truth values ($eq=False, $ne=True, $lt/$lte/$gt/$gte=False), matching JS and the IEEE 754 spec. JSON can't represent NaN so the regression case lives in test_growthbook.py rather than cases.json. --- growthbook/core.py | 10 ++++++++++ tests/test_growthbook.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/growthbook/core.py b/growthbook/core.py index 1b4e72d..a5820ee 100644 --- a/growthbook/core.py +++ b/growthbook/core.py @@ -1,4 +1,5 @@ import logging +import math import re import json from functools import lru_cache @@ -106,6 +107,15 @@ def elemMatch(condition, attributeValue, savedGroups) -> bool: return False def compare(val1, val2) -> int: + # IEEE 754: NaN is unordered with everything (including itself), so the + # "0 if neither > nor <" fallthrough below would wrongly report equal. + # Raise instead — callers' existing exception handling gives the right + # truth value: $eq=False, $ne=True, $lt/$lte/$gt/$gte=False. + if isinstance(val1, float) and math.isnan(val1): + raise ValueError("NaN") + if isinstance(val2, float) and math.isnan(val2): + raise ValueError("NaN") + if _is_numeric(val1) and not _is_numeric(val2): if (val2 is None): val2 = 0 diff --git a/tests/test_growthbook.py b/tests/test_growthbook.py index 809b08b..b51f6a6 100644 --- a/tests/test_growthbook.py +++ b/tests/test_growthbook.py @@ -106,6 +106,29 @@ def test_conditions(evalCondition_data): assert evalCondition(attributes, condition, savedGroups) == expected +def test_nan_attribute_obeys_ieee_754(): + # JSON can't represent NaN, so this can't live in cases.json. Without + # the explicit NaN guard in compare(), compare(NaN, NaN) returns 0 + # because both > and < are False — wrongly reporting $eq match. + # IEEE 754: every comparison involving NaN is False except !=. + nan = float("nan") + cond_nan = {"x": nan} + + assert evalCondition({"x": nan}, {"x": {"$eq": nan}}) is False + assert evalCondition({"x": nan}, {"x": {"$ne": nan}}) is True + assert evalCondition({"x": nan}, {"x": {"$lt": nan}}) is False + assert evalCondition({"x": nan}, {"x": {"$lte": nan}}) is False + assert evalCondition({"x": nan}, {"x": {"$gt": nan}}) is False + assert evalCondition({"x": nan}, {"x": {"$gte": nan}}) is False + # NaN attribute vs concrete number: every comparison is False. + assert evalCondition({"x": nan}, {"x": {"$eq": 5}}) is False + assert evalCondition({"x": nan}, {"x": {"$lt": 5}}) is False + assert evalCondition({"x": nan}, {"x": {"$gt": 5}}) is False + # NaN condition vs concrete attribute: same. + assert evalCondition({"x": 5}, {"x": {"$eq": nan}}) is False + assert evalCondition({"x": 5}, {"x": {"$ne": nan}}) is True + + def test_version_comparison_normalizes_unhashable_values(): assert paddedVersionString(["1.2.3"]) == paddedVersionString("0") assert paddedVersionString({"version": "1.2.3"}) == paddedVersionString("0") From 505c8e13739a3b5913a0163083d784124b7e79ee Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Mon, 15 Jun 2026 16:47:09 -0700 Subject: [PATCH 03/12] test: catch up tests/cases.json with 30 JS spec-0.7.1 cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JS sdk-js/test/cases.json carries 30 cases that Python's copy was missing: 24 evalCondition cases (case-sensitive failure + granular case-insensitive variants), 2 hashVersion 2 feature cases, and 4 sticky-bucket minBucketVersion/bucketVersion boundary cases. All 30 pass against current Python — pure defensive coverage, no behavioral change. Source: docs/sdk_parity/tier1.md (B2 catch-up section). --- tests/cases.json | 804 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 791 insertions(+), 13 deletions(-) diff --git a/tests/cases.json b/tests/cases.json index 7741bca..62d2aa3 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -3118,6 +3118,401 @@ { "id": "3" }, false, { "group_id": [1, "2", 3] } + ], + [ + "$in - fail (case sensitive mismatch)", + { + "country": { + "$in": [ + "us", + "uk" + ] + } + }, + { + "country": "US" + }, + false + ], + [ + "$nin - pass (case sensitive mismatch)", + { + "country": { + "$nin": [ + "us", + "uk" + ] + } + }, + { + "country": "US" + }, + true + ], + [ + "$ini - pass (case insensitive match)", + { + "country": { + "$ini": [ + "us", + "uk" + ] + } + }, + { + "country": "US" + }, + true + ], + [ + "$ini - pass (uppercase pattern, lowercase value)", + { + "country": { + "$ini": [ + "US", + "UK" + ] + } + }, + { + "country": "us" + }, + true + ], + [ + "$ini - pass (mixed case)", + { + "country": { + "$ini": [ + "Us", + "Uk" + ] + } + }, + { + "country": "US" + }, + true + ], + [ + "$ini - fail (no match)", + { + "country": { + "$ini": [ + "us", + "uk" + ] + } + }, + { + "country": "CA" + }, + false + ], + [ + "$ini - array pass 1", + { + "tags": { + "$ini": [ + "a", + "b" + ] + } + }, + { + "tags": [ + "d", + "e", + "A" + ] + }, + true + ], + [ + "$ini - array pass 2", + { + "tags": { + "$ini": [ + "A", + "B" + ] + } + }, + { + "tags": [ + "d", + "b", + "f" + ] + }, + true + ], + [ + "$ini - array fail", + { + "tags": { + "$ini": [ + "a", + "b" + ] + } + }, + { + "tags": [ + "d", + "e", + "f" + ] + }, + false + ], + [ + "$ini - not array", + { + "num": { + "$ini": 1 + } + }, + { + "num": 1 + }, + false + ], + [ + "$nini - pass (case insensitive match)", + { + "country": { + "$nini": [ + "us", + "uk" + ] + } + }, + { + "country": "CA" + }, + true + ], + [ + "$nini - fail (case insensitive match)", + { + "country": { + "$nini": [ + "us", + "uk" + ] + } + }, + { + "country": "US" + }, + false + ], + [ + "$nini - fail (uppercase pattern, lowercase value)", + { + "country": { + "$nini": [ + "US", + "UK" + ] + } + }, + { + "country": "us" + }, + false + ], + [ + "$nini - array pass", + { + "tags": { + "$nini": [ + "a", + "b" + ] + } + }, + { + "tags": [ + "d", + "e", + "f" + ] + }, + true + ], + [ + "$nini - array fail 1", + { + "tags": { + "$nini": [ + "a", + "b" + ] + } + }, + { + "tags": [ + "d", + "e", + "A" + ] + }, + false + ], + [ + "$nini - array fail 2", + { + "tags": { + "$nini": [ + "A", + "B" + ] + } + }, + { + "tags": [ + "d", + "b", + "f" + ] + }, + false + ], + [ + "$nini - not array", + { + "num": { + "$nini": 1 + } + }, + { + "num": 1 + }, + false + ], + [ + "$regex - fail (case sensitive mismatch)", + { + "userAgent": { + "$regex": "(mobile|tablet)" + } + }, + { + "userAgent": "Android Mobile Browser" + }, + false + ], + [ + "$all - fail (case sensitive mismatch)", + { + "tags": { + "$all": [ + "one", + "three" + ] + } + }, + { + "tags": [ + "ONE", + "two", + "THREE" + ] + }, + false + ], + [ + "$alli - pass (case insensitive match)", + { + "tags": { + "$alli": [ + "one", + "three" + ] + } + }, + { + "tags": [ + "ONE", + "two", + "THREE" + ] + }, + true + ], + [ + "$alli - pass (uppercase pattern, lowercase value)", + { + "tags": { + "$alli": [ + "ONE", + "THREE" + ] + } + }, + { + "tags": [ + "one", + "two", + "three" + ] + }, + true + ], + [ + "$alli - pass (mixed case)", + { + "tags": { + "$alli": [ + "One", + "Three" + ] + } + }, + { + "tags": [ + "ONE", + "two", + "three" + ] + }, + true + ], + [ + "$alli - fail (case insensitive, missing value)", + { + "tags": { + "$alli": [ + "one", + "three" + ] + } + }, + { + "tags": [ + "ONE", + "two", + "four" + ] + }, + false + ], + [ + "$alli - fail not array", + { + "tags": { + "$alli": [ + "one", + "three" + ] + } + }, + { + "tags": "hello" + }, + false ] ], "hash": [ @@ -5036,21 +5431,77 @@ }, "ruleId": "" } - ] - ], - "run": [ - [ - "default weights - 1", - { "attributes": { "id": "1" } }, - { "key": "my-test", "variations": [0, 1] }, - 1, - true, - true ], [ - "default weights - 2", - { "attributes": { "id": "2" } }, - { "key": "my-test", "variations": [0, 1] }, + "force rules - hashVersion 2 includes user", + { + "attributes": { + "id": "user2" + }, + "features": { + "feature": { + "defaultValue": 0, + "rules": [ + { + "force": 1, + "coverage": 0.5, + "hashVersion": 2 + } + ] + } + } + }, + "feature", + { + "value": 1, + "on": true, + "off": false, + "source": "force", + "ruleId": "" + } + ], + [ + "force rules - hashVersion 2 excludes user that v1 would include", + { + "attributes": { + "id": "user3" + }, + "features": { + "feature": { + "defaultValue": 0, + "rules": [ + { + "force": 1, + "coverage": 0.5, + "hashVersion": 2 + } + ] + } + } + }, + "feature", + { + "value": 0, + "on": false, + "off": true, + "source": "defaultValue", + "ruleId": "" + } + ] + ], + "run": [ + [ + "default weights - 1", + { "attributes": { "id": "1" } }, + { "key": "my-test", "variations": [0, 1] }, + 1, + true, + true + ], + [ + "default weights - 2", + { "attributes": { "id": "2" } }, + { "key": "my-test", "variations": [0, 1] }, 0, true, true @@ -6711,6 +7162,333 @@ "assignments": { "feature-exp__0": "1" } } } + ], + [ + "uses a sticky bucket when sticky bucket version == experiment.minBucketVersion === experiment.bucketVersion", + { + "attributes": { + "deviceId": "d123", + "anonymousId": "ses123", + "foo": "bar", + "country": "USA" + }, + "features": { + "exp1": { + "defaultValue": "control", + "rules": [ + { + "key": "feature-exp", + "seed": "feature-exp", + "hashAttribute": "id", + "fallbackAttribute": "deviceId", + "hashVersion": 2, + "bucketVersion": 3, + "minBucketVersion": 3, + "condition": { + "country": "USA" + }, + "variations": [ + "control", + "red", + "blue" + ], + "meta": [ + { + "key": "0" + }, + { + "key": "1" + }, + { + "key": "2" + } + ], + "coverage": 1, + "weights": [ + 0.3334, + 0.3333, + 0.3333 + ], + "phase": "0" + } + ] + } + } + }, + [ + { + "attributeName": "deviceId", + "attributeValue": "d123", + "assignments": { + "feature-exp__3": "2" + } + } + ], + "exp1", + { + "bucket": 0.6468, + "featureId": "exp1", + "hashAttribute": "deviceId", + "hashUsed": true, + "hashValue": "d123", + "inExperiment": true, + "key": "2", + "stickyBucketUsed": true, + "value": "blue", + "variationId": 2 + }, + { + "deviceId||d123": { + "assignments": { + "feature-exp__3": "2" + }, + "attributeName": "deviceId", + "attributeValue": "d123" + } + } + ], + [ + "skips assignment when sticky bucket version < experiment.minBucketVersion", + { + "attributes": { + "deviceId": "d123", + "anonymousId": "ses123", + "foo": "bar", + "country": "USA" + }, + "features": { + "exp1": { + "defaultValue": "control", + "rules": [ + { + "key": "feature-exp", + "seed": "feature-exp", + "hashAttribute": "id", + "fallbackAttribute": "deviceId", + "hashVersion": 2, + "bucketVersion": 3, + "minBucketVersion": 3, + "condition": { + "country": "USA" + }, + "variations": [ + "control", + "red", + "blue" + ], + "meta": [ + { + "key": "0" + }, + { + "key": "1" + }, + { + "key": "2" + } + ], + "coverage": 1, + "weights": [ + 0.3334, + 0.3333, + 0.3333 + ], + "phase": "0" + } + ] + } + } + }, + [ + { + "attributeName": "deviceId", + "attributeValue": "d123", + "assignments": { + "feature-exp__2": "2" + } + } + ], + "exp1", + null, + { + "deviceId||d123": { + "assignments": { + "feature-exp__2": "2" + }, + "attributeName": "deviceId", + "attributeValue": "d123" + } + } + ], + [ + "resets sticky bucketing when bucket version > experiment.bucketVersion (invalid version)", + { + "attributes": { + "deviceId": "d123", + "anonymousId": "ses123", + "foo": "bar", + "country": "USA" + }, + "features": { + "exp1": { + "defaultValue": "control", + "rules": [ + { + "key": "feature-exp", + "seed": "feature-exp", + "hashAttribute": "id", + "fallbackAttribute": "deviceId", + "hashVersion": 2, + "bucketVersion": 3, + "minBucketVersion": 3, + "condition": { + "country": "USA" + }, + "variations": [ + "control", + "red", + "blue" + ], + "meta": [ + { + "key": "0" + }, + { + "key": "1" + }, + { + "key": "2" + } + ], + "coverage": 1, + "weights": [ + 0.3334, + 0.3333, + 0.3333 + ], + "phase": "0" + } + ] + } + } + }, + [ + { + "attributeName": "deviceId", + "attributeValue": "d123", + "assignments": { + "feature-exp__4": "2" + } + } + ], + "exp1", + { + "bucket": 0.6468, + "featureId": "exp1", + "hashAttribute": "deviceId", + "hashUsed": true, + "hashValue": "d123", + "inExperiment": true, + "key": "1", + "stickyBucketUsed": false, + "value": "red", + "variationId": 1 + }, + { + "deviceId||d123": { + "assignments": { + "feature-exp__3": "1", + "feature-exp__4": "2" + }, + "attributeName": "deviceId", + "attributeValue": "d123" + } + } + ], + [ + "resets sticky bucketing when bucket version < experiment.bucketVersion (invalid version)", + { + "attributes": { + "deviceId": "d123", + "anonymousId": "ses123", + "foo": "bar", + "country": "USA" + }, + "features": { + "exp1": { + "defaultValue": "control", + "rules": [ + { + "key": "feature-exp", + "seed": "feature-exp", + "hashAttribute": "id", + "fallbackAttribute": "deviceId", + "hashVersion": 2, + "bucketVersion": 4, + "minBucketVersion": 3, + "condition": { + "country": "USA" + }, + "variations": [ + "control", + "red", + "blue" + ], + "meta": [ + { + "key": "0" + }, + { + "key": "1" + }, + { + "key": "2" + } + ], + "coverage": 1, + "weights": [ + 0.3334, + 0.3333, + 0.3333 + ], + "phase": "0" + } + ] + } + } + }, + [ + { + "attributeName": "deviceId", + "attributeValue": "d123", + "assignments": { + "feature-exp__3": "2" + } + } + ], + "exp1", + { + "bucket": 0.6468, + "featureId": "exp1", + "hashAttribute": "deviceId", + "hashUsed": true, + "hashValue": "d123", + "inExperiment": true, + "key": "1", + "stickyBucketUsed": false, + "value": "red", + "variationId": 1 + }, + { + "deviceId||d123": { + "assignments": { + "feature-exp__3": "2", + "feature-exp__4": "1" + }, + "attributeName": "deviceId", + "attributeValue": "d123" + } + } ] ], "urlRedirect": [ From e428acbb3cf6e22d62a4248d691a5f0a169e4da0 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Mon, 15 Jun 2026 19:05:54 -0700 Subject: [PATCH 04/12] ci: add corpus-freshness check vs JS sdk-js cases.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JS is source of truth for the conformance corpus. specVersion is a label, not a contract — Python's copy fell 30 cases behind under 0.7.1 without any signal. The new check fetches the JS cases.json and diffs case names per top-level key: - 'missing' (JS has, Python doesn't): fails CI unless listed in tests/scripts/corpus_skiplist.json - 'extra' (Python has, JS doesn't): reported but never fails; Python carries intentional extensions ($notRegex regression cases, locally-authored regressions) Wired as a separate job in the build workflow so it surfaces clearly in the PR UI and doesn't run 5x across the python-version matrix. Source URL is configurable via --js-source or GB_JS_CASES_URL; default is the JS SDK main-branch raw URL. --- .github/workflows/main.yml | 16 ++ tests/scripts/check_corpus_freshness.py | 263 ++++++++++++++++++++++++ tests/scripts/corpus_skiplist.json | 7 + 3 files changed, 286 insertions(+) create mode 100644 tests/scripts/check_corpus_freshness.py create mode 100644 tests/scripts/corpus_skiplist.json diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1dc03ec..3ba76ac 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,3 +37,19 @@ jobs: - name: Test with pytest run: | pytest + + corpus-freshness: + # Drift check between tests/cases.json and the JS SDK's cases.json. + # JS is source of truth; bumping Python's specVersion without pulling + # in JS's new cases is how we lost track of 30 cases under 0.7.1. + # Fails the build on any "missing" case not listed in + # tests/scripts/corpus_skiplist.json. + # Extras (Python's local additions) are reported but never fail. + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + - name: Check corpus freshness vs JS SDK + run: python3 tests/scripts/check_corpus_freshness.py diff --git a/tests/scripts/check_corpus_freshness.py b/tests/scripts/check_corpus_freshness.py new file mode 100644 index 0000000..349bc3d --- /dev/null +++ b/tests/scripts/check_corpus_freshness.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +"""Check Python's tests/cases.json against the JS SDK's cases.json. + +The two corpora are maintained by hand; `specVersion` is a label, not a +contract. This script makes drift visible: + + - "missing" — case present in JS, absent in Python. Treated as an + error (fail CI) so corpus catch-up is an active decision + rather than a silent omission. The only escape is the + skiplist (see corpus_skiplist.json) for cases Python + deliberately can't or shouldn't carry. + - "extra" — case present in Python, absent in JS. Reported as + informational only — Python carries documented + extensions (e.g., $notRegex regression cases) plus + locally-authored regressions. Extras NEVER fail CI. + +Source-of-truth URL is configurable via --js-source or env GB_JS_CASES_URL. +Defaults to the JS SDK's main-branch raw URL. + +Exit codes: + 0 — no drift, or all drift is in the skiplist + 1 — at least one case missing from Python that isn't in the skiplist + 2 — fetch / parse / IO error (treated as build infra failure, not drift) + +Run locally: + python3 tests/scripts/check_corpus_freshness.py + python3 tests/scripts/check_corpus_freshness.py --js-source /path/to/local/cases.json + GB_JS_CASES_URL=https://... python3 tests/scripts/check_corpus_freshness.py + +In CI, this runs on every push as a separate step in the build workflow. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +from pathlib import Path +from typing import Dict, List, Set, Tuple + +REPO_ROOT = Path(__file__).resolve().parents[2] +LOCAL_CASES = REPO_ROOT / "tests" / "cases.json" +SKIPLIST = REPO_ROOT / "tests" / "scripts" / "corpus_skiplist.json" + +DEFAULT_JS_URL = ( + "https://raw.githubusercontent.com/growthbook/growthbook/main/" + "packages/sdk-js/test/cases.json" +) + +# Top-level keys to diff. Other keys in cases.json (specVersion, decrypt +# binary blobs, urlRedirect which Python doesn't yet wire) are skipped +# either because they're scalar metadata or because the divergence is +# tracked separately. +KEYS_TO_DIFF = ( + "evalCondition", + "feature", + "run", + "hash", + "getBucketRange", + "chooseVariation", + "getQueryStringOverride", + "inNamespace", + "getEqualWeights", + "stickyBucket", +) + + +def _fetch_js_cases(source: str) -> dict: + """Fetch JS cases.json from a URL or local path. + + Raises RuntimeError with a human-readable message on failure. + """ + if source.startswith(("http://", "https://")): + try: + req = urllib.request.Request( + source, headers={"User-Agent": "growthbook-python-corpus-check"} + ) + with urllib.request.urlopen(req, timeout=20) as resp: # noqa: S310 + return json.loads(resp.read().decode("utf-8")) + except urllib.error.URLError as e: + raise RuntimeError(f"fetch failed: {source}: {e}") from e + except json.JSONDecodeError as e: + raise RuntimeError(f"JS source did not return valid JSON: {e}") from e + path = Path(source) + if not path.is_file(): + raise RuntimeError(f"local source not found: {source}") + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as e: + raise RuntimeError(f"local source invalid JSON: {e}") from e + + +def _load_local_cases() -> dict: + if not LOCAL_CASES.is_file(): + raise RuntimeError(f"local cases.json not found: {LOCAL_CASES}") + return json.loads(LOCAL_CASES.read_text(encoding="utf-8")) + + +def _load_skiplist() -> Dict[str, Set[str]]: + """Load skiplist. File format: + + { + "missing": { + "": ["case name 1", "case name 2"] + } + } + + `missing` entries are case names Python deliberately doesn't carry — + drift that won't fail CI. `extra` is reported but never fails so it's + not configured here. The file is optional. + """ + if not SKIPLIST.is_file(): + return {} + try: + data = json.loads(SKIPLIST.read_text(encoding="utf-8")) + except json.JSONDecodeError as e: + raise RuntimeError(f"skiplist invalid JSON: {e}") from e + missing = data.get("missing", {}) or {} + return {k: set(v) for k, v in missing.items()} + + +def _case_names(cases: list) -> List[str]: + """First element of each case is the human-readable name.""" + out = [] + for c in cases: + if isinstance(c, list) and c and isinstance(c[0], str): + out.append(c[0]) + return out + + +def _diff( + js_cases: dict, py_cases: dict, skip: Dict[str, Set[str]] +) -> Tuple[Dict[str, List[str]], Dict[str, List[str]], Dict[str, List[str]]]: + """Return (actionable_missing, skipped_missing, extras) per key.""" + actionable: Dict[str, List[str]] = {} + skipped: Dict[str, List[str]] = {} + extras: Dict[str, List[str]] = {} + + for key in KEYS_TO_DIFF: + js_list = js_cases.get(key, []) + py_list = py_cases.get(key, []) + if not isinstance(js_list, list) or not isinstance(py_list, list): + continue + js_names = _case_names(js_list) + py_names_set = set(_case_names(py_list)) + js_names_set = set(js_names) + + # Order missing by JS's order so the report reads naturally. + missing = [n for n in js_names if n not in py_names_set] + extra = [n for n in _case_names(py_list) if n not in js_names_set] + + key_skip = skip.get(key, set()) + actionable[key] = [n for n in missing if n not in key_skip] + skipped[key] = [n for n in missing if n in key_skip] + extras[key] = extra + + return actionable, skipped, extras + + +def _spec_versions(js_cases: dict, py_cases: dict) -> Tuple[str, str]: + return ( + str(js_cases.get("specVersion", "")), + str(py_cases.get("specVersion", "")), + ) + + +def _format_report( + js_spec: str, + py_spec: str, + actionable: Dict[str, List[str]], + skipped: Dict[str, List[str]], + extras: Dict[str, List[str]], +) -> str: + lines = [] + lines.append("=== Corpus freshness check (Python vs JS SDK) ===") + lines.append(f" JS specVersion: {js_spec}") + lines.append(f" Python specVersion: {py_spec}") + if js_spec != py_spec: + lines.append( + " ⚠ specVersion mismatch — bump Python's value when you " + "catch up to JS's." + ) + lines.append("") + + total_actionable = sum(len(v) for v in actionable.values()) + total_skipped = sum(len(v) for v in skipped.values()) + total_extra = sum(len(v) for v in extras.values()) + + if total_actionable == 0: + lines.append(f"OK: no missing cases (skipped: {total_skipped}, extras: {total_extra})") + else: + lines.append( + f"DRIFT: {total_actionable} case(s) in JS but missing from Python " + f"(plus {total_skipped} on skiplist, {total_extra} Python extras)" + ) + lines.append("") + + def _section(title: str, data: Dict[str, List[str]]) -> None: + if not any(data.values()): + return + lines.append(f"--- {title} ---") + for key, names in data.items(): + if not names: + continue + lines.append(f" [{key}] ({len(names)})") + for n in names: + lines.append(f" - {n}") + lines.append("") + + _section("Missing in Python (FAILS CI)", actionable) + _section("Missing in Python but skipped via corpus_skiplist.json", skipped) + _section("Extra in Python (informational; never fails)", extras) + return "\n".join(lines) + + +def main(argv: List[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0]) + parser.add_argument( + "--js-source", + default=os.environ.get("GB_JS_CASES_URL", DEFAULT_JS_URL), + help="URL or local path to JS cases.json (default: JS SDK main branch)", + ) + parser.add_argument( + "--json", action="store_true", help="output machine-readable JSON instead of text" + ) + args = parser.parse_args(argv) + + try: + js_cases = _fetch_js_cases(args.js_source) + py_cases = _load_local_cases() + skip = _load_skiplist() + except RuntimeError as e: + print(f"corpus check infra error: {e}", file=sys.stderr) + return 2 + + actionable, skipped, extras = _diff(js_cases, py_cases, skip) + js_spec, py_spec = _spec_versions(js_cases, py_cases) + + if args.json: + print( + json.dumps( + { + "js_specVersion": js_spec, + "py_specVersion": py_spec, + "missing_actionable": actionable, + "missing_skipped": skipped, + "extras": extras, + }, + indent=2, + sort_keys=True, + ) + ) + else: + print(_format_report(js_spec, py_spec, actionable, skipped, extras)) + + return 1 if any(actionable.values()) else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/scripts/corpus_skiplist.json b/tests/scripts/corpus_skiplist.json new file mode 100644 index 0000000..6845cd6 --- /dev/null +++ b/tests/scripts/corpus_skiplist.json @@ -0,0 +1,7 @@ +{ + "_doc": "Cases that the corpus freshness check should NOT treat as drift. Used only for the `missing` direction (JS has, Python deliberately doesn't). Extras (Python has, JS doesn't) are reported but never fail CI, so they don't need entries here. Add a comment in `_reasons` per entry so future maintainers know why something is excluded.", + "missing": {}, + "_reasons": { + "_example": "evalCondition::case name -> short reason. Populate when we deliberately omit JS cases (e.g., they test a feature Python doesn't yet support and we don't intend to backport)." + } +} From 7f9d2d24e6df1b863a1dc269933e4868105a1ac5 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Tue, 16 Jun 2026 15:18:26 -0700 Subject: [PATCH 05/12] fix: align $eq/$ne with JS strict semantics and $ini/$nini/$alli with ASCII-only case-fold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two canonical behaviors per the tier-1 cross-SDK audit: D6a (strict equality). JS's `$eq` is `===` (strict); only `<`/`>` coerce. Python previously used compare() for $eq too, which coerced string→float making `{$eq: 5}` against "5" return True. Real production divergence: a customer with int feature rules and string frontend attributes saw different variations between JS and Python clients. Routed $eq/$ne through a new _js_strict_equal helper that requires getType() match. Comparison operators ($lt/$lte/$gt/$gte) still call compare() and keep JS coercion semantics (null→0, string→float). D10 (ASCII-only case fold). Python's str.lower() is Unicode-aware ("İ".lower() → "i̇", "ß".lower() → "ß"), Rust's to_lowercase() is Unicode-aware (differently), Go's strings.EqualFold is Unicode-aware (differently still), JS's String.prototype.toLowerCase is naive ASCII. Three SDKs producing three different non-ASCII behaviors is worse than all four being naive. Locked the canonical to ASCII-only via a new _ascii_lower translate table. ASCII still folds (US matches us); non-ASCII chars pass through unchanged (Turkish İ no longer matches i). tests/cases.json adds 16 cases: - 5 for D6 (strict equality across number/string, bool/int, no-op equality) - 2 for D10 (Turkish İ, German ß) - 7 defensive pins for D1-D5, D7, D8 (Python already passed these but no case-locked them; pinning so JS-aligned behavior can't silently drift on future refactors) - 2 for D14/D15 (parentCondition gate semantics — Python passes today, Rust will need fixes before it can land these upstream) --- growthbook/core.py | 49 ++++++--- tests/cases.json | 253 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+), 16 deletions(-) diff --git a/growthbook/core.py b/growthbook/core.py index a5820ee..5cb9bb7 100644 --- a/growthbook/core.py +++ b/growthbook/core.py @@ -55,6 +55,19 @@ def isOperatorObject(obj: Any) -> bool: def _is_numeric(v: Any) -> bool: return isinstance(v, (int, float)) and not isinstance(v, bool) + +# ASCII-only A-Z → a-z mapping. Case-insensitive operators ($ini/$nini/$alli) +# fold via this rather than str.lower(): Python's str.lower() is Unicode-aware +# (Turkish "İ" → "i̇" with combining mark; German "ß" stays "ß") while JS's +# String.prototype.toLowerCase() is naive ASCII. Locking ASCII as canonical +# avoids 3-way divergence between Python/Rust/Go's various Unicode folds. +_ASCII_LOWER = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz") + + +def _ascii_lower(s: str) -> str: + return s.translate(_ASCII_LOWER) + def getType(attributeValue) -> str: if attributeValue is None: return "null" @@ -88,7 +101,7 @@ def evalConditionValue(conditionValue, attributeValue, savedGroups, insensitive: # Simple equality comparison with optional case-insensitivity if insensitive and isinstance(conditionValue, str) and isinstance(attributeValue, str): - return conditionValue.lower() == attributeValue.lower() + return _ascii_lower(conditionValue) == _ascii_lower(attributeValue) return bool(conditionValue == attributeValue) @@ -134,22 +147,26 @@ def compare(val1, val2) -> int: return -1 return 0 +def _js_strict_equal(a, b) -> bool: + """JS === semantics: same JS-level type AND same value. + + `compare()` coerces (string→float, None→0) which is correct for `<`/`>` + in JS but wrong for `===`. $eq and $ne route through here instead so + `{$eq: 5}` against `"5"` is False (matches JS strict), while $lt/$gt + keep using compare() and match JS's coercing `<`/`>` semantics. + + NaN handling falls out of Python's IEEE 754-compliant `==` (NaN != NaN). + """ + if getType(a) != getType(b): + return False + return a == b + + def evalOperatorCondition(operator, attributeValue, conditionValue, savedGroups) -> bool: if operator == "$eq": - try: - return compare(attributeValue, conditionValue) == 0 - except Exception: - return False + return _js_strict_equal(attributeValue, conditionValue) elif operator == "$ne": - try: - return compare(attributeValue, conditionValue) != 0 - except Exception: - # Incomparable values (e.g. missing attribute → None vs a string — - # compare() raises TypeError on the None > str comparison) are by - # definition NOT equal. Matches Mongo/JS/Go/Rust SDKs: $ne on a - # missing attribute returns True. The False default that's correct - # for $eq is inverted for $ne. - return True + return not _js_strict_equal(attributeValue, conditionValue) elif operator == "$lt": try: return compare(attributeValue, conditionValue) < 0 @@ -295,9 +312,9 @@ def _paddedVersionString(input: str) -> str: def isIn(conditionValue, attributeValue, insensitive: bool = False) -> bool: if insensitive: - # Helper function to case-fold values (lowercase for strings) + # ASCII-only case fold; see _ascii_lower for rationale. def case_fold(val): - return val.lower() if isinstance(val, str) else val + return _ascii_lower(val) if isinstance(val, str) else val # Do an intersection if attribute is an array (insensitive) if isinstance(attributeValue, list): diff --git a/tests/cases.json b/tests/cases.json index 62d2aa3..37a53fd 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -3513,6 +3513,180 @@ "tags": "hello" }, false + ], + [ + "[D1] $lt against missing attribute coerces missing to 0 (JS spec)", + { + "x": { + "$lt": 5 + } + }, + {}, + true + ], + [ + "[D2a] $gte against missing attribute returns false (JS spec)", + { + "x": { + "$gte": 5 + } + }, + {}, + false + ], + [ + "[D2b] $lte against missing attribute returns true (JS spec)", + { + "x": { + "$lte": 5 + } + }, + {}, + true + ], + [ + "[D3] $gt against missing attribute returns false (JS spec)", + { + "x": { + "$gt": 5 + } + }, + {}, + false + ], + [ + "[D5] $elemMatch tests all array elements including falsy", + { + "x": { + "$elemMatch": { + "$eq": 0 + } + } + }, + { + "x": [ + 0 + ] + }, + true + ], + [ + "[D5] $elemMatch tests falsy false", + { + "x": { + "$elemMatch": { + "$eq": false + } + } + }, + { + "x": [ + false + ] + }, + true + ], + [ + "[D5] $elemMatch tests falsy empty string", + { + "x": { + "$elemMatch": { + "$eq": "" + } + } + }, + { + "x": [ + "" + ] + }, + true + ], + [ + "[D6a] $eq integer vs string is strict (no coercion)", + { + "x": { + "$eq": 5 + } + }, + { + "x": "5" + }, + false + ], + [ + "[D6b] equality (no operator) integer vs string is strict", + { + "x": 5 + }, + { + "x": "5" + }, + false + ], + [ + "[D6] $eq boolean vs integer is strict (different types)", + { + "x": { + "$eq": true + } + }, + { + "x": 1 + }, + false + ], + [ + "[D7] $type on empty object returns true for type 'object'", + { + "x": { + "$type": "object" + } + }, + { + "x": {} + }, + true + ], + [ + "[D8] $regex on integer attribute returns false (not true)", + { + "x": { + "$regex": "^5$" + } + }, + { + "x": 5 + }, + false + ], + [ + "[D10] $ini ASCII case-fold only \u2014 non-ASCII chars unchanged (Turkish dotted I)", + { + "x": { + "$ini": [ + "\u0130" + ] + } + }, + { + "x": "i" + }, + false + ], + [ + "[D10] $ini ASCII case-fold only \u2014 German \u00df does not unfold to SS", + { + "x": { + "$ini": [ + "STRA\u00dfE" + ] + } + }, + { + "x": "STRASSE" + }, + false ] ], "hash": [ @@ -5487,6 +5661,85 @@ "source": "defaultValue", "ruleId": "" } + ], + [ + "[D15] parentCondition gate=false fail skips rule, continues to next", + { + "attributes": { + "id": "u1" + }, + "features": { + "B": { + "defaultValue": "off" + }, + "A": { + "defaultValue": "hello", + "rules": [ + { + "parentConditions": [ + { + "id": "B", + "condition": { + "value": "on" + }, + "gate": false + } + ], + "force": "rule1_value" + }, + { + "force": "fallback_rule" + } + ] + } + } + }, + "A", + { + "value": "fallback_rule", + "on": true, + "off": false, + "source": "force", + "ruleId": "" + } + ], + [ + "[D14] Rule combining parentConditions + force \u2014 both must pass (gate=true)", + { + "attributes": { + "id": "u1" + }, + "features": { + "B": { + "defaultValue": "on" + }, + "A": { + "defaultValue": "hello", + "rules": [ + { + "parentConditions": [ + { + "id": "B", + "condition": { + "value": "on" + }, + "gate": true + } + ], + "force": "combined_rule" + } + ] + } + } + }, + "A", + { + "value": "combined_rule", + "on": true, + "off": false, + "source": "force", + "ruleId": "" + } ] ], "run": [ From 6b5afb10a809ca834b37594521a4a878aa354d1b Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Tue, 16 Jun 2026 15:18:30 -0700 Subject: [PATCH 06/12] ci: trim corpus-freshness job comment --- .github/workflows/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3ba76ac..467eb78 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,8 +40,6 @@ jobs: corpus-freshness: # Drift check between tests/cases.json and the JS SDK's cases.json. - # JS is source of truth; bumping Python's specVersion without pulling - # in JS's new cases is how we lost track of 30 cases under 0.7.1. # Fails the build on any "missing" case not listed in # tests/scripts/corpus_skiplist.json. # Extras (Python's local additions) are reported but never fail. From b011fee86fadad6f3db58bdf234c9dc6fe099285 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Tue, 16 Jun 2026 19:09:56 -0700 Subject: [PATCH 07/12] fix: cast _js_strict_equal result to bool to satisfy mypy --- growthbook/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/growthbook/core.py b/growthbook/core.py index 5cb9bb7..28a4ef8 100644 --- a/growthbook/core.py +++ b/growthbook/core.py @@ -159,7 +159,7 @@ def _js_strict_equal(a, b) -> bool: """ if getType(a) != getType(b): return False - return a == b + return bool(a == b) def evalOperatorCondition(operator, attributeValue, conditionValue, savedGroups) -> bool: From 5f45087a75a68896a0efdd43107a55da1600f85f Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Wed, 17 Jun 2026 11:40:50 -0700 Subject: [PATCH 08/12] =?UTF-8?q?fix:=20revert=20phantom=20D10=20"ASCII-on?= =?UTF-8?q?ly=20fold"=20=E2=80=94=20JS=20toLowerCase=20is=20Unicode-aware?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical verification via Node showed JS's String.prototype.toLowerCase() is Unicode-aware ("Σ" → "σ", "А" → "а"), byte-identical to Python's str.lower() on the test inputs. The earlier "ASCII-only" canonical decision was based on three Explore-agent reports that mischaracterized JS as "naive ASCII" — a common misconception that survived to the deliverable. Effect of the wrong fix: Python had been switched to a translate-table ASCII-only lower, which made $ini/$nini/$alli return false for Cyrillic/Greek pairs ("А" vs "а", "Σ" vs "σ") that JS treats as matches. So the "fix" actually broke JS-alignment for those characters. Restored str.lower() in core.py. Removed the misleading "ASCII case-fold only" framing in the cases.json case names; the case bodies ("Turkish İ vs i → false", "German ß vs SS → false") are still correct regression cases because both JS toLowerCase and Python str.lower produce "i̇" (with combining mark) for "İ" and don't fold ß. Added two positive cases pinning the Unicode-aware path: "Σ" vs "σ" → true and Cyrillic "А" vs "а" → true. --- growthbook/core.py | 23 +++++++---------------- tests/cases.json | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/growthbook/core.py b/growthbook/core.py index 28a4ef8..027eee1 100644 --- a/growthbook/core.py +++ b/growthbook/core.py @@ -55,19 +55,6 @@ def isOperatorObject(obj: Any) -> bool: def _is_numeric(v: Any) -> bool: return isinstance(v, (int, float)) and not isinstance(v, bool) - -# ASCII-only A-Z → a-z mapping. Case-insensitive operators ($ini/$nini/$alli) -# fold via this rather than str.lower(): Python's str.lower() is Unicode-aware -# (Turkish "İ" → "i̇" with combining mark; German "ß" stays "ß") while JS's -# String.prototype.toLowerCase() is naive ASCII. Locking ASCII as canonical -# avoids 3-way divergence between Python/Rust/Go's various Unicode folds. -_ASCII_LOWER = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "abcdefghijklmnopqrstuvwxyz") - - -def _ascii_lower(s: str) -> str: - return s.translate(_ASCII_LOWER) - def getType(attributeValue) -> str: if attributeValue is None: return "null" @@ -101,7 +88,7 @@ def evalConditionValue(conditionValue, attributeValue, savedGroups, insensitive: # Simple equality comparison with optional case-insensitivity if insensitive and isinstance(conditionValue, str) and isinstance(attributeValue, str): - return _ascii_lower(conditionValue) == _ascii_lower(attributeValue) + return conditionValue.lower() == attributeValue.lower() return bool(conditionValue == attributeValue) @@ -312,9 +299,13 @@ def _paddedVersionString(input: str) -> str: def isIn(conditionValue, attributeValue, insensitive: bool = False) -> bool: if insensitive: - # ASCII-only case fold; see _ascii_lower for rationale. + # Helper function to case-fold values (lowercase for strings). + # Uses Python str.lower(), which is byte-identical to JS toLowerCase() + # for the relevant inputs: both do Unicode-aware single-char mapping + # without multi-char folds (e.g., "İ".lower() == "i̇" in both; + # "ß".lower() == "ß" in both; "Σ".lower() == "σ" in both). def case_fold(val): - return _ascii_lower(val) if isinstance(val, str) else val + return val.lower() if isinstance(val, str) else val # Do an intersection if attribute is an array (insensitive) if isinstance(attributeValue, list): diff --git a/tests/cases.json b/tests/cases.json index 37a53fd..5c04fb6 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -3661,7 +3661,7 @@ false ], [ - "[D10] $ini ASCII case-fold only \u2014 non-ASCII chars unchanged (Turkish dotted I)", + "[D10] $ini Turkish dotted-I vs ASCII i \u2014 toLowerCase produces 'i\u0307' (combining mark), not 'i'", { "x": { "$ini": [ @@ -3675,7 +3675,7 @@ false ], [ - "[D10] $ini ASCII case-fold only \u2014 German \u00df does not unfold to SS", + "[D10] $ini German \u00df vs SS \u2014 toLowerCase does NOT fold \u00df to ss", { "x": { "$ini": [ @@ -3687,6 +3687,34 @@ "x": "STRASSE" }, false + ], + [ + "[D10] $ini Greek capital sigma vs small sigma \u2014 simple Unicode fold applies", + { + "x": { + "$ini": [ + "\u03a3" + ] + } + }, + { + "x": "\u03c3" + }, + true + ], + [ + "[D10] $ini Cyrillic capital \u0410 vs small \u0430 \u2014 simple Unicode fold applies", + { + "x": { + "$ini": [ + "\u0410" + ] + } + }, + { + "x": "\u0430" + }, + true ] ], "hash": [ From 076eab84771ecb3293c129d064519eb1e250e42c Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Wed, 17 Jun 2026 12:05:12 -0700 Subject: [PATCH 09/12] test: drop wrong D8 $regex case that pinned Python's broken behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical verification against JS via Node and against Rust via a cargo-built test harness: new RegExp("^5$").test(5) → true (JS .test() coerces non-string) Rust regex_comparison.rs:20 → true (it.to_string() coerces) Python re.search(pat, 5) → false (TypeError caught) So Python is the diverger; the case I had committed claimed Rust was wrong (matching the agent's misread of regex_comparison.rs:28). The case is removed. The real Python divergence is tracked in tier1.md D8 (revised) but the fix is non-trivial (Python str(None) ≠ JS String(null), so coercion would need a custom table) — deferred. --- tests/cases.json | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/cases.json b/tests/cases.json index 5c04fb6..7b67894 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -3648,18 +3648,6 @@ }, true ], - [ - "[D8] $regex on integer attribute returns false (not true)", - { - "x": { - "$regex": "^5$" - } - }, - { - "x": 5 - }, - false - ], [ "[D10] $ini Turkish dotted-I vs ASCII i \u2014 toLowerCase produces 'i\u0307' (combining mark), not 'i'", { From a8ff302a985f123ebea77a4d624814a376632979 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Wed, 17 Jun 2026 12:23:08 -0700 Subject: [PATCH 10/12] fix: _js_strict_equal returns False for array/object operands (JS === reference identity) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JS mongrule.ts:218 uses `actual === expected` for $eq/$ne. For arrays and objects, JS === is reference identity — two distinct literals never match (verified empirically: `[1,2] === [1,2]` and `{a:1} === {a:1}` both return false). Python's previous fallthrough was `a == b` which does deep value equality, so distinct-but-equal containers DID match — diverging from JS. The simple (non-operator) equality path at mongrule.ts:100 uses JSON.stringify deep comparison; Python's `==` on parsed-JSON dicts and lists generally agrees with that, so it's left untouched. Within feature evaluation we can't observe Python object identity in any meaningful way — condition and attribute come from independent JSON parses, never the same object. So container $eq is effectively always-false in practice, matching JS. The fix encodes that directly. Three new cases.json entries pin the behavior under tag [D6c]. --- growthbook/core.py | 29 ++++++++++++++++++++--------- tests/cases.json | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/growthbook/core.py b/growthbook/core.py index 027eee1..e7f5657 100644 --- a/growthbook/core.py +++ b/growthbook/core.py @@ -135,16 +135,27 @@ def compare(val1, val2) -> int: return 0 def _js_strict_equal(a, b) -> bool: - """JS === semantics: same JS-level type AND same value. - - `compare()` coerces (string→float, None→0) which is correct for `<`/`>` - in JS but wrong for `===`. $eq and $ne route through here instead so - `{$eq: 5}` against `"5"` is False (matches JS strict), while $lt/$gt - keep using compare() and match JS's coercing `<`/`>` semantics. - - NaN handling falls out of Python's IEEE 754-compliant `==` (NaN != NaN). + """JS === semantics for $eq/$ne. Routed here instead of compare() so + $lt/$gt keep their JS-aligned coercion via compare() while $eq stays + strict. + + Three buckets: + + * **Different types** → False (e.g. number 5 vs string "5", + number 1 vs boolean true). + * **Container types (array, object)** → False unconditionally. + JS `===` is reference equality for arrays/objects, and within + feature evaluation the operator's two operands always come from + separate JSON parses — different references, never `===`. So in + the only context this code observes, container $eq must be False. + * **Primitive same type** (number, string, boolean, null) → + Python `a == b`. Matches `===` for ints/floats and strings; + NaN handled correctly because `NaN == NaN` is False in Python. """ - if getType(a) != getType(b): + ta = getType(a) + if ta != getType(b): + return False + if ta == "array" or ta == "object": return False return bool(a == b) diff --git a/tests/cases.json b/tests/cases.json index 7b67894..b85b911 100644 --- a/tests/cases.json +++ b/tests/cases.json @@ -3703,6 +3703,42 @@ "x": "\u0430" }, true + ], + [ + "[D6c] $eq array literal vs equal array literal is strict-identity false (JS === for containers)", + { + "x": { + "$eq": [1, 2, 3] + } + }, + { + "x": [1, 2, 3] + }, + false + ], + [ + "[D6c] $eq object literal vs equal object literal is strict-identity false (JS === for containers)", + { + "x": { + "$eq": {"a": 1, "b": 2} + } + }, + { + "x": {"a": 1, "b": 2} + }, + false + ], + [ + "[D6c] $ne array literal vs equal array literal is true (negation of strict-identity)", + { + "x": { + "$ne": [1, 2, 3] + } + }, + { + "x": [1, 2, 3] + }, + true ] ], "hash": [ From 460f581423d3ddd13a99d047a56495d1913fbe90 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Wed, 17 Jun 2026 12:23:18 -0700 Subject: [PATCH 11/12] ci: corpus freshness compares full case bodies, not just names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Name-only diff missed real drift: if JS changed an expected outcome under an existing case name (no rename), the old script exited 0 with no signal. Verified the gap by flipping one expected boolean in a temp JS corpus copy — old script said OK, new script flags it as body-drift. Changes: - _case_signatures(): canonical-JSON (sort_keys, compact separators) hash of each case body (everything after the name). SHA1, truncated to 16 hex chars — stable across dict insertion order. - _diff() returns 5 categories now: actionable_missing, skipped_missing, extras, actionable_drift, skipped_drift. Same-name + same-hash → pass. Different hash → drift, fails CI. - corpus_skiplist.json gains a "drift" bucket for the rare case where Python deliberately keeps a different body from JS. - Report adds a "Body-drift" section; JSON output adds drift_actionable and drift_skipped keys. Exit codes unchanged: 0 = clean, 1 = drift, 2 = infra error. --- tests/scripts/check_corpus_freshness.py | 192 ++++++++++++++++-------- tests/scripts/corpus_skiplist.json | 6 +- 2 files changed, 137 insertions(+), 61 deletions(-) diff --git a/tests/scripts/check_corpus_freshness.py b/tests/scripts/check_corpus_freshness.py index 349bc3d..2ca36c1 100644 --- a/tests/scripts/check_corpus_freshness.py +++ b/tests/scripts/check_corpus_freshness.py @@ -2,25 +2,28 @@ """Check Python's tests/cases.json against the JS SDK's cases.json. The two corpora are maintained by hand; `specVersion` is a label, not a -contract. This script makes drift visible: - - - "missing" — case present in JS, absent in Python. Treated as an - error (fail CI) so corpus catch-up is an active decision - rather than a silent omission. The only escape is the - skiplist (see corpus_skiplist.json) for cases Python - deliberately can't or shouldn't carry. - - "extra" — case present in Python, absent in JS. Reported as - informational only — Python carries documented - extensions (e.g., $notRegex regression cases) plus - locally-authored regressions. Extras NEVER fail CI. +contract. This script diffs the corpora and makes drift visible. + +Diff categories: + + - "missing" — JS has a case name Python doesn't. + Fails CI unless the name is in skiplist["missing"][key]. + - "drift" — Both sides have the case name, but the bodies differ + (canonical-JSON SHA1 mismatch). Fails CI unless the name + is in skiplist["drift"][key]. Catches the silent + case-body update that pure name-matching misses. + - "extra" — Python has a case name JS doesn't. Reported as + informational only — Python carries documented + extensions plus locally-authored regressions. Never + fails CI. Source-of-truth URL is configurable via --js-source or env GB_JS_CASES_URL. Defaults to the JS SDK's main-branch raw URL. Exit codes: - 0 — no drift, or all drift is in the skiplist - 1 — at least one case missing from Python that isn't in the skiplist - 2 — fetch / parse / IO error (treated as build infra failure, not drift) + 0 — no actionable findings (or all on skiplist) + 1 — at least one missing or drifted case isn't on the skiplist + 2 — fetch / parse / IO error (treated as build infra failure) Run locally: python3 tests/scripts/check_corpus_freshness.py @@ -33,6 +36,7 @@ from __future__ import annotations import argparse +import hashlib import json import os import sys @@ -99,65 +103,114 @@ def _load_local_cases() -> dict: return json.loads(LOCAL_CASES.read_text(encoding="utf-8")) -def _load_skiplist() -> Dict[str, Set[str]]: +def _load_skiplist() -> Dict[str, Dict[str, Set[str]]]: """Load skiplist. File format: { - "missing": { - "": ["case name 1", "case name 2"] - } + "missing": { "": ["case name", ...] }, + "drift": { "": ["case name", ...] } } - `missing` entries are case names Python deliberately doesn't carry — - drift that won't fail CI. `extra` is reported but never fails so it's - not configured here. The file is optional. + `missing` — case names Python deliberately doesn't carry from JS. + `drift` — case names where Python deliberately diverges from JS's + body (rare; reserved for cases that test a Python-only + extension behavior). + + Extras (Python has, JS doesn't) are reported but never fail, so they + don't need a skiplist entry. The file is optional. """ if not SKIPLIST.is_file(): - return {} + return {"missing": {}, "drift": {}} try: data = json.loads(SKIPLIST.read_text(encoding="utf-8")) except json.JSONDecodeError as e: raise RuntimeError(f"skiplist invalid JSON: {e}") from e - missing = data.get("missing", {}) or {} - return {k: set(v) for k, v in missing.items()} + return { + "missing": {k: set(v) for k, v in (data.get("missing") or {}).items()}, + "drift": {k: set(v) for k, v in (data.get("drift") or {}).items()}, + } + + +def _case_signatures(cases: list) -> Dict[str, str]: + """Return {case_name: short_hash_of_body} for every well-formed case. + Body = everything after the name (case[1:]), serialized via canonical + JSON (sorted keys + compact separators) so logically-equal cases hash + the same regardless of dict key insertion order or whitespace. -def _case_names(cases: list) -> List[str]: - """First element of each case is the human-readable name.""" - out = [] + Same-named cases collapse to the *last* occurrence — that's the same + convention pytest_generate_tests uses when it parametrizes, so the + hash here mirrors what the suite would actually exercise. + """ + out: Dict[str, str] = {} for c in cases: - if isinstance(c, list) and c and isinstance(c[0], str): - out.append(c[0]) + if not (isinstance(c, list) and c and isinstance(c[0], str)): + continue + name = c[0] + body = json.dumps(c[1:], sort_keys=True, separators=(",", ":")) + out[name] = hashlib.sha1(body.encode("utf-8")).hexdigest()[:16] return out def _diff( js_cases: dict, py_cases: dict, skip: Dict[str, Set[str]] -) -> Tuple[Dict[str, List[str]], Dict[str, List[str]], Dict[str, List[str]]]: - """Return (actionable_missing, skipped_missing, extras) per key.""" - actionable: Dict[str, List[str]] = {} - skipped: Dict[str, List[str]] = {} +) -> Tuple[ + Dict[str, List[str]], # actionable_missing + Dict[str, List[str]], # skipped_missing + Dict[str, List[str]], # extras + Dict[str, List[str]], # actionable_drift (same name, different body) + Dict[str, List[str]], # skipped_drift +]: + """Diff (name, body-hash) pairs. + + Returns five per-key dictionaries: + * actionable_missing — JS has the name, Python doesn't (fails CI) + * skipped_missing — same, but the name is on the skiplist + * extras — Python has the name, JS doesn't (never fails) + * actionable_drift — same name, hash differs (fails CI) + * skipped_drift — same name, hash differs, name on drift-skiplist + """ + actionable_missing: Dict[str, List[str]] = {} + skipped_missing: Dict[str, List[str]] = {} extras: Dict[str, List[str]] = {} + actionable_drift: Dict[str, List[str]] = {} + skipped_drift: Dict[str, List[str]] = {} + + missing_skip = skip.get("missing", {}) + drift_skip = skip.get("drift", {}) for key in KEYS_TO_DIFF: js_list = js_cases.get(key, []) py_list = py_cases.get(key, []) if not isinstance(js_list, list) or not isinstance(py_list, list): continue - js_names = _case_names(js_list) - py_names_set = set(_case_names(py_list)) - js_names_set = set(js_names) - # Order missing by JS's order so the report reads naturally. - missing = [n for n in js_names if n not in py_names_set] - extra = [n for n in _case_names(py_list) if n not in js_names_set] + js_sigs = _case_signatures(js_list) + py_sigs = _case_signatures(py_list) + + js_names = [c[0] for c in js_list if isinstance(c, list) and c and isinstance(c[0], str)] + py_names = [c[0] for c in py_list if isinstance(c, list) and c and isinstance(c[0], str)] - key_skip = skip.get(key, set()) - actionable[key] = [n for n in missing if n not in key_skip] - skipped[key] = [n for n in missing if n in key_skip] + missing = [n for n in js_names if n not in py_sigs] + extra = [n for n in py_names if n not in js_sigs] + drift = [n for n in js_names if n in py_sigs and js_sigs[n] != py_sigs[n]] + + # Preserve JS ordering, drop duplicates (signatures dict collapsed them already). + seen: Set[str] = set() + missing = [n for n in missing if not (n in seen or seen.add(n))] + seen.clear() + drift = [n for n in drift if not (n in seen or seen.add(n))] + + key_missing_skip = missing_skip.get(key, set()) + key_drift_skip = drift_skip.get(key, set()) + + actionable_missing[key] = [n for n in missing if n not in key_missing_skip] + skipped_missing[key] = [n for n in missing if n in key_missing_skip] extras[key] = extra + actionable_drift[key] = [n for n in drift if n not in key_drift_skip] + skipped_drift[key] = [n for n in drift if n in key_drift_skip] - return actionable, skipped, extras + return actionable_missing, skipped_missing, extras, actionable_drift, skipped_drift def _spec_versions(js_cases: dict, py_cases: dict) -> Tuple[str, str]: @@ -170,9 +223,11 @@ def _spec_versions(js_cases: dict, py_cases: dict) -> Tuple[str, str]: def _format_report( js_spec: str, py_spec: str, - actionable: Dict[str, List[str]], - skipped: Dict[str, List[str]], + actionable_missing: Dict[str, List[str]], + skipped_missing: Dict[str, List[str]], extras: Dict[str, List[str]], + actionable_drift: Dict[str, List[str]], + skipped_drift: Dict[str, List[str]], ) -> str: lines = [] lines.append("=== Corpus freshness check (Python vs JS SDK) ===") @@ -185,16 +240,23 @@ def _format_report( ) lines.append("") - total_actionable = sum(len(v) for v in actionable.values()) - total_skipped = sum(len(v) for v in skipped.values()) - total_extra = sum(len(v) for v in extras.values()) + n_missing = sum(len(v) for v in actionable_missing.values()) + n_skip_missing = sum(len(v) for v in skipped_missing.values()) + n_drift = sum(len(v) for v in actionable_drift.values()) + n_skip_drift = sum(len(v) for v in skipped_drift.values()) + n_extra = sum(len(v) for v in extras.values()) - if total_actionable == 0: - lines.append(f"OK: no missing cases (skipped: {total_skipped}, extras: {total_extra})") + n_fail = n_missing + n_drift + if n_fail == 0: + lines.append( + f"OK: no missing/drifted cases " + f"(skipped-missing: {n_skip_missing}, skipped-drift: {n_skip_drift}, extras: {n_extra})" + ) else: lines.append( - f"DRIFT: {total_actionable} case(s) in JS but missing from Python " - f"(plus {total_skipped} on skiplist, {total_extra} Python extras)" + f"DRIFT: {n_missing} missing + {n_drift} body-drift " + f"(skipped: {n_skip_missing} missing, {n_skip_drift} drift; " + f"{n_extra} Python extras)" ) lines.append("") @@ -210,8 +272,10 @@ def _section(title: str, data: Dict[str, List[str]]) -> None: lines.append(f" - {n}") lines.append("") - _section("Missing in Python (FAILS CI)", actionable) - _section("Missing in Python but skipped via corpus_skiplist.json", skipped) + _section("Missing in Python (FAILS CI)", actionable_missing) + _section("Body-drift: same name, different case body (FAILS CI)", actionable_drift) + _section("Missing in Python — skipped via corpus_skiplist.json", skipped_missing) + _section("Body-drift — skipped via corpus_skiplist.json", skipped_drift) _section("Extra in Python (informational; never fails)", extras) return "\n".join(lines) @@ -236,7 +300,9 @@ def main(argv: List[str] | None = None) -> int: print(f"corpus check infra error: {e}", file=sys.stderr) return 2 - actionable, skipped, extras = _diff(js_cases, py_cases, skip) + actionable_missing, skipped_missing, extras, actionable_drift, skipped_drift = _diff( + js_cases, py_cases, skip + ) js_spec, py_spec = _spec_versions(js_cases, py_cases) if args.json: @@ -245,8 +311,10 @@ def main(argv: List[str] | None = None) -> int: { "js_specVersion": js_spec, "py_specVersion": py_spec, - "missing_actionable": actionable, - "missing_skipped": skipped, + "missing_actionable": actionable_missing, + "missing_skipped": skipped_missing, + "drift_actionable": actionable_drift, + "drift_skipped": skipped_drift, "extras": extras, }, indent=2, @@ -254,9 +322,15 @@ def main(argv: List[str] | None = None) -> int: ) ) else: - print(_format_report(js_spec, py_spec, actionable, skipped, extras)) + print( + _format_report( + js_spec, py_spec, actionable_missing, skipped_missing, + extras, actionable_drift, skipped_drift, + ) + ) - return 1 if any(actionable.values()) else 0 + fail = any(actionable_missing.values()) or any(actionable_drift.values()) + return 1 if fail else 0 if __name__ == "__main__": diff --git a/tests/scripts/corpus_skiplist.json b/tests/scripts/corpus_skiplist.json index 6845cd6..49a54ce 100644 --- a/tests/scripts/corpus_skiplist.json +++ b/tests/scripts/corpus_skiplist.json @@ -1,7 +1,9 @@ { - "_doc": "Cases that the corpus freshness check should NOT treat as drift. Used only for the `missing` direction (JS has, Python deliberately doesn't). Extras (Python has, JS doesn't) are reported but never fail CI, so they don't need entries here. Add a comment in `_reasons` per entry so future maintainers know why something is excluded.", + "_doc": "Skiplist for the corpus freshness check. Two buckets:\n * \"missing\" — case names JS has and Python deliberately doesn't. Use when JS adds a case for a feature Python doesn't (yet) support.\n * \"drift\" — case names where Python deliberately keeps a different body from JS. Use sparingly — body-drift on a shared case is usually a bug.\nExtras (Python has, JS doesn't) are reported but never fail CI, so they don't need an entry here.\nAdd a brief reason in `_reasons` whenever you add an entry so future maintainers know why.", "missing": {}, + "drift": {}, "_reasons": { - "_example": "evalCondition::case name -> short reason. Populate when we deliberately omit JS cases (e.g., they test a feature Python doesn't yet support and we don't intend to backport)." + "_example_missing": "evalCondition::case name -> short reason (e.g. 'tests $someOperator that Python doesn't implement yet').", + "_example_drift": "evalCondition::case name -> short reason (e.g. 'JS shipped a behavioral change in case X; Python intentionally kept the prior expectation pending review')." } } From a030b669a41a2cff2831f98195898dff4e00ebe3 Mon Sep 17 00:00:00 2001 From: Madhu Chavva Date: Wed, 17 Jun 2026 14:29:34 -0700 Subject: [PATCH 12/12] ci: corpus drift checker groups by name and compares hash multisets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous version stored one body-hash per name in a dict, so duplicates collapsed to the last occurrence. cases.json carries duplicate names (e.g. $not - pass twice, $not - fail twice) and pytest's pytest_generate_tests parametrizes the full list — so all duplicates DO run. Drift on any earlier duplicate was silently missed. Reproduced empirically: flipped an expected value on the FIRST '$not - pass' case in a temp JS corpus copy; old checker exited 0. Fix: - _case_signatures_grouped() returns {name: [hash, hash, ...]} keeping every occurrence (renamed from _case_signatures). - Drift compares Counter(js_hashes) != Counter(py_hashes) per name, catching: (a) single-occurrence body change, (b) one-of-many duplicates' body change, (c) duplicate-count mismatch (JS has 3, Python has 2). - Multiset comparison so duplicate order within a name doesn't matter. Verified across four scenarios — clean, drift-of-first-duplicate, single-occurrence drift, duplicate count mismatch. All produce correct exit codes and report sections. --- tests/scripts/check_corpus_freshness.py | 65 ++++++++++++++++--------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/tests/scripts/check_corpus_freshness.py b/tests/scripts/check_corpus_freshness.py index 2ca36c1..0661fd0 100644 --- a/tests/scripts/check_corpus_freshness.py +++ b/tests/scripts/check_corpus_freshness.py @@ -42,6 +42,7 @@ import sys import urllib.error import urllib.request +from collections import Counter from pathlib import Path from typing import Dict, List, Set, Tuple @@ -131,24 +132,30 @@ def _load_skiplist() -> Dict[str, Dict[str, Set[str]]]: } -def _case_signatures(cases: list) -> Dict[str, str]: - """Return {case_name: short_hash_of_body} for every well-formed case. +def _case_signatures_grouped(cases: list) -> Dict[str, List[str]]: + """Return {case_name: [body_hash, body_hash, ...]} preserving order. Body = everything after the name (case[1:]), serialized via canonical JSON (sorted keys + compact separators) so logically-equal cases hash the same regardless of dict key insertion order or whitespace. - Same-named cases collapse to the *last* occurrence — that's the same - convention pytest_generate_tests uses when it parametrizes, so the - hash here mirrors what the suite would actually exercise. + Same-named cases keep every occurrence in the list because pytest's + `pytest_generate_tests` parametrizes the FULL case list — so all + duplicates run. Collapsing them to a single entry would silently hide + body drift in any occurrence except the last (the original bug here). + + Drift detection compares per-name multisets, so the order of + duplicates within a name doesn't matter; only the set of body hashes + does. """ - out: Dict[str, str] = {} + out: Dict[str, List[str]] = {} for c in cases: if not (isinstance(c, list) and c and isinstance(c[0], str)): continue name = c[0] body = json.dumps(c[1:], sort_keys=True, separators=(",", ":")) - out[name] = hashlib.sha1(body.encode("utf-8")).hexdigest()[:16] + h = hashlib.sha1(body.encode("utf-8")).hexdigest()[:16] + out.setdefault(name, []).append(h) return out @@ -185,21 +192,35 @@ def _diff( if not isinstance(js_list, list) or not isinstance(py_list, list): continue - js_sigs = _case_signatures(js_list) - py_sigs = _case_signatures(py_list) - - js_names = [c[0] for c in js_list if isinstance(c, list) and c and isinstance(c[0], str)] - py_names = [c[0] for c in py_list if isinstance(c, list) and c and isinstance(c[0], str)] - - missing = [n for n in js_names if n not in py_sigs] - extra = [n for n in py_names if n not in js_sigs] - drift = [n for n in js_names if n in py_sigs and js_sigs[n] != py_sigs[n]] - - # Preserve JS ordering, drop duplicates (signatures dict collapsed them already). - seen: Set[str] = set() - missing = [n for n in missing if not (n in seen or seen.add(n))] - seen.clear() - drift = [n for n in drift if not (n in seen or seen.add(n))] + js_grouped = _case_signatures_grouped(js_list) + py_grouped = _case_signatures_grouped(py_list) + + # Preserve JS file ordering for the missing/drift reports; iterate + # the original list and dedupe so each unique name appears once. + js_names_ordered: List[str] = [] + seen_js: Set[str] = set() + for c in js_list: + if isinstance(c, list) and c and isinstance(c[0], str) and c[0] not in seen_js: + js_names_ordered.append(c[0]) + seen_js.add(c[0]) + + py_names_ordered: List[str] = [] + seen_py: Set[str] = set() + for c in py_list: + if isinstance(c, list) and c and isinstance(c[0], str) and c[0] not in seen_py: + py_names_ordered.append(c[0]) + seen_py.add(c[0]) + + missing = [n for n in js_names_ordered if n not in py_grouped] + extra = [n for n in py_names_ordered if n not in js_grouped] + + # Drift = same name on both sides, but the multiset of body hashes + # differs. Catches single-occurrence drift AND drift where only + # ONE of several duplicates changed (the original bug). + drift = [ + n for n in js_names_ordered + if n in py_grouped and Counter(js_grouped[n]) != Counter(py_grouped[n]) + ] key_missing_skip = missing_skip.get(key, set()) key_drift_skip = drift_skip.get(key, set())