From 6de406df7116eda17e125a045757474fa28598aa Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 29 Jun 2026 19:22:11 +0100 Subject: [PATCH 1/5] fix(translator): Hash identity-key percentage splits without a context identity A PERCENTAGE_SPLIT keyed on the identity (implicit `$.identity.key`, `$.identity.key`, or `$.identity.identifier`) compiled to FALSE when the evaluation context carried no identity. The translated predicate runs per-row over IDENTITIES, where the identity-key and identifier columns are always present, so it should hash the row column regardless of whether the row-oriented context supplies an identity. The guard made row-oriented callers (segment-membership counts/members) see every percentage-split segment as empty. Drop the eval-context identity guard for those three cases and emit the per-row hash. Trait-keyed splits are unchanged. This intentionally diverges from flag_engine's single-eval "no identity -> False" for the identity-less context, which is correct for the row-oriented SQL engine where every row is an identity; the one engine-test-data case that exercises it (test_percentage_split__no_identity_key__should_match) is added to the ClickHouse harness's _XFAIL_CASE_NAMES. The translator unit tests assert the full emitted SQL. beep boop Claude-Session: https://claude.ai/code/session_01EgZ5iHpDASZzCapiHRxHLB --- src/flagsmith_sql_flag_engine/translator.py | 13 ++--- tests/harnesses/clickhouse.py | 6 ++ tests/test_translator_unit.py | 62 ++++++++++++++++----- 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py index c836120..c3d8f16 100644 --- a/src/flagsmith_sql_flag_engine/translator.py +++ b/src/flagsmith_sql_flag_engine/translator.py @@ -435,19 +435,14 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str | identity: dict[str, object] = ctx.evaluation_context.get("identity") or {} # type: ignore[assignment] kind = classification.kind if not prop: - # Implicit `$.identity.key` — engine returns False when no - # identity, or when the identity lacks `key`. The engine - # never synthesises one from env+identifier. - if not identity.get("key"): - return "FALSE" + # Implicit `$.identity.key`. The key is always present in the store, + # so the split is translatable whether or not the evaluation context + # carries an identity. This intentionally diverges from the engine's + # "no identity → False" verdict. value_expr = ctx.dialect.cast_string(ctx.identity_key_expr) elif kind == "key": - if not identity.get("key"): - return "FALSE" value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.key")) elif kind == "identifier": - if not identity.get("identifier"): - return "FALSE" value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.identifier")) elif kind == "identity_object": # PERCENTAGE_SPLIT on `$.identity` — the whole dict. Engine diff --git a/tests/harnesses/clickhouse.py b/tests/harnesses/clickhouse.py index 2378a3c..6f9966f 100644 --- a/tests/harnesses/clickhouse.py +++ b/tests/harnesses/clickhouse.py @@ -39,6 +39,12 @@ # condition, so we accept the divergence on this niche shape # (`$.`-prefixed trait names) and let callers fall back to the engine. "test_jsonpath_like_trait__existing_jsonpath__should_match_trait", + # Engine returns False for a PERCENTAGE_SPLIT when the eval context has + # no identity key. The SQL engine intentionally diverges: it runs over + # IDENTITIES rows where the identity key always exists, so it hashes the + # row's key rather than folding to FALSE — the behaviour row-oriented + # callers (segment-membership counts/members) need. + "test_percentage_split__no_identity_key__should_match", } diff --git a/tests/test_translator_unit.py b/tests/test_translator_unit.py index 946b5c5..8cea084 100644 --- a/tests/test_translator_unit.py +++ b/tests/test_translator_unit.py @@ -542,9 +542,10 @@ def _ctx_identity_without(field: str) -> TranslateContext: ) -def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_to_false() -> None: - # Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an eval - # context with no identity +def test_translate_segment__percentage_split_implicit_key_no_identity__inlines_md5() -> None: + # Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an + # eval context with no identity — the row-oriented case (e.g. segment-membership + # counting over the whole IDENTITIES table) seg: SegmentContext = { "key": "ps2", "name": "s", @@ -556,12 +557,25 @@ def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_ ], } - # When / Then the predicate collapses to FALSE — engine returns False without identity - assert translate_segment(seg, _ctx_no_identity()) == "((FALSE))" + # When translated + sql = translate_segment(seg, _ctx_no_identity()) + + # Then it hashes the per-row identity-key column rather than folding to + # FALSE — the row supplies the subject, so a context identity is not needed + assert sql == ( + "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" + " (toString(i.identity_key))), 1, 4))) * 7291 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" + " (toString(i.identity_key))), 5, 4))) * 1897 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" + " (toString(i.identity_key))), 9, 4))) * 6835 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" + " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" + ) -def test_translate_segment__percentage_split_identity_key_missing__compiles_to_false() -> None: - # Given the eval context's identity has no `key` +def test_translate_segment__percentage_split_key_no_context_identity__inlines_md5() -> None: + # Given a PERCENTAGE_SPLIT on `$.identity.key` and a context identity without `key` seg: SegmentContext = { "key": "ps3", "name": "s", @@ -575,14 +589,23 @@ def test_translate_segment__percentage_split_identity_key_missing__compiles_to_f ], } - # When / Then the predicate collapses to FALSE - assert translate_segment(seg, _ctx_identity_without("key")) == "((FALSE))" + # When / Then it still hashes the per-row identity-key column + sql = translate_segment(seg, _ctx_identity_without("key")) + assert sql == ( + "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" + " (toString(i.identity_key))), 1, 4))) * 7291 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" + " (toString(i.identity_key))), 5, 4))) * 1897 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" + " (toString(i.identity_key))), 9, 4))) * 6835 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" + " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" + ) -def test_translate_segment__percentage_split_identity_identifier_missing__compiles_to_false() -> ( - None -): - # Given the eval context's identity has no `identifier` +def test_translate_segment__percentage_split_identifier_no_context_identity__inlines_md5() -> None: + # Given a PERCENTAGE_SPLIT on `$.identity.identifier` and a context identity + # without `identifier` seg: SegmentContext = { "key": "ps4", "name": "s", @@ -600,8 +623,17 @@ def test_translate_segment__percentage_split_identity_identifier_missing__compil ], } - # When / Then the predicate collapses to FALSE - assert translate_segment(seg, _ctx_identity_without("identifier")) == "((FALSE))" + # When / Then it still hashes the per-row identifier column + sql = translate_segment(seg, _ctx_identity_without("identifier")) + assert sql == ( + "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||" + " (toString(i.identifier))), 1, 4))) * 7291 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier)))," + " 5, 4))) * 1897 + reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||" + " (toString(i.identifier))), 9, 4))) * 6835 +" + " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier)))," + " 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" + ) def test_translate_segment__percentage_split_unknown_jsonpath__returns_none() -> None: From e5011a3e12962aeff8c72b26a48d6e608bf89ab4 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 29 Jun 2026 21:16:53 +0100 Subject: [PATCH 2/5] deslop identity key comment --- src/flagsmith_sql_flag_engine/translator.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py index c3d8f16..0f6c122 100644 --- a/src/flagsmith_sql_flag_engine/translator.py +++ b/src/flagsmith_sql_flag_engine/translator.py @@ -435,10 +435,9 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str | identity: dict[str, object] = ctx.evaluation_context.get("identity") or {} # type: ignore[assignment] kind = classification.kind if not prop: - # Implicit `$.identity.key`. The key is always present in the store, - # so the split is translatable whether or not the evaluation context - # carries an identity. This intentionally diverges from the engine's - # "no identity → False" verdict. + # In tranditional engine implementations, this branch implies + # an identity-less context, which makes no sense for the SQL engine. + # Assume identity key. value_expr = ctx.dialect.cast_string(ctx.identity_key_expr) elif kind == "key": value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.key")) From e7e6af45a32f599a62f57f3dc53971fc64688413 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 29 Jun 2026 21:19:45 +0100 Subject: [PATCH 3/5] Deslop XFAIL exclusion comment --- tests/harnesses/clickhouse.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/harnesses/clickhouse.py b/tests/harnesses/clickhouse.py index 6f9966f..0753299 100644 --- a/tests/harnesses/clickhouse.py +++ b/tests/harnesses/clickhouse.py @@ -39,11 +39,8 @@ # condition, so we accept the divergence on this niche shape # (`$.`-prefixed trait names) and let callers fall back to the engine. "test_jsonpath_like_trait__existing_jsonpath__should_match_trait", - # Engine returns False for a PERCENTAGE_SPLIT when the eval context has - # no identity key. The SQL engine intentionally diverges: it runs over - # IDENTITIES rows where the identity key always exists, so it hashes the - # row's key rather than folding to FALSE — the behaviour row-oriented - # callers (segment-membership counts/members) need. + # Does not apply to SQL engine implementation; we always have access to + # identity key. "test_percentage_split__no_identity_key__should_match", } From 4196d631dd45c26ff8c76114be910758d40042c9 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 30 Jun 2026 11:04:55 +0100 Subject: [PATCH 4/5] fix typo Co-authored-by: Matthew Elwell --- src/flagsmith_sql_flag_engine/translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py index 0f6c122..a49ff87 100644 --- a/src/flagsmith_sql_flag_engine/translator.py +++ b/src/flagsmith_sql_flag_engine/translator.py @@ -435,7 +435,7 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str | identity: dict[str, object] = ctx.evaluation_context.get("identity") or {} # type: ignore[assignment] kind = classification.kind if not prop: - # In tranditional engine implementations, this branch implies + # In traditional engine implementations, this branch implies # an identity-less context, which makes no sense for the SQL engine. # Assume identity key. value_expr = ctx.dialect.cast_string(ctx.identity_key_expr) From 0c7ec9a07e3348f9e2bcf83788dd0ec60fa90005 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Tue, 30 Jun 2026 11:08:11 +0100 Subject: [PATCH 5/5] test: assert % split does not fold to FALSE instead of pinning full MD5 SQL beep boop --- tests/test_translator_unit.py | 44 +++++------------------------------ 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/tests/test_translator_unit.py b/tests/test_translator_unit.py index 8cea084..df12a6c 100644 --- a/tests/test_translator_unit.py +++ b/tests/test_translator_unit.py @@ -557,21 +557,8 @@ def test_translate_segment__percentage_split_implicit_key_no_identity__inlines_m ], } - # When translated - sql = translate_segment(seg, _ctx_no_identity()) - - # Then it hashes the per-row identity-key column rather than folding to - # FALSE — the row supplies the subject, so a context identity is not needed - assert sql == ( - "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" - " (toString(i.identity_key))), 1, 4))) * 7291 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" - " (toString(i.identity_key))), 5, 4))) * 1897 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" - " (toString(i.identity_key))), 9, 4))) * 6835 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||" - " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" - ) + # When / Then it hashes the per-row identity key rather than folding to FALSE + assert translate_segment(seg, _ctx_no_identity()) != "((FALSE))" def test_translate_segment__percentage_split_key_no_context_identity__inlines_md5() -> None: @@ -589,18 +576,8 @@ def test_translate_segment__percentage_split_key_no_context_identity__inlines_md ], } - # When / Then it still hashes the per-row identity-key column - sql = translate_segment(seg, _ctx_identity_without("key")) - assert sql == ( - "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" - " (toString(i.identity_key))), 1, 4))) * 7291 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" - " (toString(i.identity_key))), 5, 4))) * 1897 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" - " (toString(i.identity_key))), 9, 4))) * 6835 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||" - " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" - ) + # When / Then it still hashes the per-row identity key rather than folding to FALSE + assert translate_segment(seg, _ctx_identity_without("key")) != "((FALSE))" def test_translate_segment__percentage_split_identifier_no_context_identity__inlines_md5() -> None: @@ -623,17 +600,8 @@ def test_translate_segment__percentage_split_identifier_no_context_identity__inl ], } - # When / Then it still hashes the per-row identifier column - sql = translate_segment(seg, _ctx_identity_without("identifier")) - assert sql == ( - "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||" - " (toString(i.identifier))), 1, 4))) * 7291 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier)))," - " 5, 4))) * 1897 + reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||" - " (toString(i.identifier))), 9, 4))) * 6835 +" - " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier)))," - " 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))" - ) + # When / Then it still hashes the per-row identifier rather than folding to FALSE + assert translate_segment(seg, _ctx_identity_without("identifier")) != "((FALSE))" def test_translate_segment__percentage_split_unknown_jsonpath__returns_none() -> None: