From 6de406df7116eda17e125a045757474fa28598aa Mon Sep 17 00:00:00 2001
From: Kim Gustyr <kim.gustyr@flagsmith.com>
Date: Mon, 29 Jun 2026 19:22:11 +0100
Subject: [PATCH 1/5] fix(translator): Hash identity-key percentage splits
 without a context identity

A PERCENTAGE_SPLIT keyed on the identity (implicit `$.identity.key`,
`$.identity.key`, or `$.identity.identifier`) compiled to FALSE when the
evaluation context carried no identity. The translated predicate runs
per-row over IDENTITIES, where the identity-key and identifier columns are
always present, so it should hash the row column regardless of whether the
row-oriented context supplies an identity. The guard made row-oriented
callers (segment-membership counts/members) see every percentage-split
segment as empty.

Drop the eval-context identity guard for those three cases and emit the
per-row hash. Trait-keyed splits are unchanged.

This intentionally diverges from flag_engine's single-eval "no identity ->
False" for the identity-less context, which is correct for the row-oriented
SQL engine where every row is an identity; the one engine-test-data case
that exercises it (test_percentage_split__no_identity_key__should_match) is
added to the ClickHouse harness's _XFAIL_CASE_NAMES. The translator unit
tests assert the full emitted SQL.

beep boop
Claude-Session: https://claude.ai/code/session_01EgZ5iHpDASZzCapiHRxHLB
---
 src/flagsmith_sql_flag_engine/translator.py | 13 ++---
 tests/harnesses/clickhouse.py               |  6 ++
 tests/test_translator_unit.py               | 62 ++++++++++++++++-----
 3 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py
index c836120..c3d8f16 100644
--- a/src/flagsmith_sql_flag_engine/translator.py
+++ b/src/flagsmith_sql_flag_engine/translator.py
@@ -435,19 +435,14 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str |
         identity: dict[str, object] = ctx.evaluation_context.get("identity") or {}  # type: ignore[assignment]
         kind = classification.kind
         if not prop:
-            # Implicit `$.identity.key` — engine returns False when no
-            # identity, or when the identity lacks `key`. The engine
-            # never synthesises one from env+identifier.
-            if not identity.get("key"):
-                return "FALSE"
+            # Implicit `$.identity.key`. The key is always present in the store,
+            # so the split is translatable whether or not the evaluation context
+            # carries an identity. This intentionally diverges from the engine's
+            # "no identity → False" verdict.
             value_expr = ctx.dialect.cast_string(ctx.identity_key_expr)
         elif kind == "key":
-            if not identity.get("key"):
-                return "FALSE"
             value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.key"))
         elif kind == "identifier":
-            if not identity.get("identifier"):
-                return "FALSE"
             value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.identifier"))
         elif kind == "identity_object":
             # PERCENTAGE_SPLIT on `$.identity` — the whole dict. Engine
diff --git a/tests/harnesses/clickhouse.py b/tests/harnesses/clickhouse.py
index 2378a3c..6f9966f 100644
--- a/tests/harnesses/clickhouse.py
+++ b/tests/harnesses/clickhouse.py
@@ -39,6 +39,12 @@
     # condition, so we accept the divergence on this niche shape
     # (`$.`-prefixed trait names) and let callers fall back to the engine.
     "test_jsonpath_like_trait__existing_jsonpath__should_match_trait",
+    # Engine returns False for a PERCENTAGE_SPLIT when the eval context has
+    # no identity key. The SQL engine intentionally diverges: it runs over
+    # IDENTITIES rows where the identity key always exists, so it hashes the
+    # row's key rather than folding to FALSE — the behaviour row-oriented
+    # callers (segment-membership counts/members) need.
+    "test_percentage_split__no_identity_key__should_match",
 }
 
 
diff --git a/tests/test_translator_unit.py b/tests/test_translator_unit.py
index 946b5c5..8cea084 100644
--- a/tests/test_translator_unit.py
+++ b/tests/test_translator_unit.py
@@ -542,9 +542,10 @@ def _ctx_identity_without(field: str) -> TranslateContext:
     )
 
 
-def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_to_false() -> None:
-    # Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an eval
-    # context with no identity
+def test_translate_segment__percentage_split_implicit_key_no_identity__inlines_md5() -> None:
+    # Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an
+    # eval context with no identity — the row-oriented case (e.g. segment-membership
+    # counting over the whole IDENTITIES table)
     seg: SegmentContext = {
         "key": "ps2",
         "name": "s",
@@ -556,12 +557,25 @@ def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_
         ],
     }
 
-    # When / Then the predicate collapses to FALSE — engine returns False without identity
-    assert translate_segment(seg, _ctx_no_identity()) == "((FALSE))"
+    # When translated
+    sql = translate_segment(seg, _ctx_no_identity())
+
+    # Then it hashes the per-row identity-key column rather than folding to
+    # FALSE — the row supplies the subject, so a context identity is not needed
+    assert sql == (
+        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
+        " (toString(i.identity_key))), 1, 4))) * 7291 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
+        " (toString(i.identity_key))), 5, 4))) * 1897 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
+        " (toString(i.identity_key))), 9, 4))) * 6835 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
+        " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
+    )
 
 
-def test_translate_segment__percentage_split_identity_key_missing__compiles_to_false() -> None:
-    # Given the eval context's identity has no `key`
+def test_translate_segment__percentage_split_key_no_context_identity__inlines_md5() -> None:
+    # Given a PERCENTAGE_SPLIT on `$.identity.key` and a context identity without `key`
     seg: SegmentContext = {
         "key": "ps3",
         "name": "s",
@@ -575,14 +589,23 @@ def test_translate_segment__percentage_split_identity_key_missing__compiles_to_f
         ],
     }
 
-    # When / Then the predicate collapses to FALSE
-    assert translate_segment(seg, _ctx_identity_without("key")) == "((FALSE))"
+    # When / Then it still hashes the per-row identity-key column
+    sql = translate_segment(seg, _ctx_identity_without("key"))
+    assert sql == (
+        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
+        " (toString(i.identity_key))), 1, 4))) * 7291 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
+        " (toString(i.identity_key))), 5, 4))) * 1897 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
+        " (toString(i.identity_key))), 9, 4))) * 6835 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
+        " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
+    )
 
 
-def test_translate_segment__percentage_split_identity_identifier_missing__compiles_to_false() -> (
-    None
-):
-    # Given the eval context's identity has no `identifier`
+def test_translate_segment__percentage_split_identifier_no_context_identity__inlines_md5() -> None:
+    # Given a PERCENTAGE_SPLIT on `$.identity.identifier` and a context identity
+    # without `identifier`
     seg: SegmentContext = {
         "key": "ps4",
         "name": "s",
@@ -600,8 +623,17 @@ def test_translate_segment__percentage_split_identity_identifier_missing__compil
         ],
     }
 
-    # When / Then the predicate collapses to FALSE
-    assert translate_segment(seg, _ctx_identity_without("identifier")) == "((FALSE))"
+    # When / Then it still hashes the per-row identifier column
+    sql = translate_segment(seg, _ctx_identity_without("identifier"))
+    assert sql == (
+        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
+        " (toString(i.identifier))), 1, 4))) * 7291 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
+        " 5, 4))) * 1897 + reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
+        " (toString(i.identifier))), 9, 4))) * 6835 +"
+        " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
+        " 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
+    )
 
 
 def test_translate_segment__percentage_split_unknown_jsonpath__returns_none() -> None:

From e5011a3e12962aeff8c72b26a48d6e608bf89ab4 Mon Sep 17 00:00:00 2001
From: Kim Gustyr <kim.gustyr@flagsmith.com>
Date: Mon, 29 Jun 2026 21:16:53 +0100
Subject: [PATCH 2/5] deslop identity key comment

---
 src/flagsmith_sql_flag_engine/translator.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py
index c3d8f16..0f6c122 100644
--- a/src/flagsmith_sql_flag_engine/translator.py
+++ b/src/flagsmith_sql_flag_engine/translator.py
@@ -435,10 +435,9 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str |
         identity: dict[str, object] = ctx.evaluation_context.get("identity") or {}  # type: ignore[assignment]
         kind = classification.kind
         if not prop:
-            # Implicit `$.identity.key`. The key is always present in the store,
-            # so the split is translatable whether or not the evaluation context
-            # carries an identity. This intentionally diverges from the engine's
-            # "no identity → False" verdict.
+            # In tranditional engine implementations, this branch implies
+            # an identity-less context, which makes no sense for the SQL engine.
+            # Assume identity key.
             value_expr = ctx.dialect.cast_string(ctx.identity_key_expr)
         elif kind == "key":
             value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.key"))

From e7e6af45a32f599a62f57f3dc53971fc64688413 Mon Sep 17 00:00:00 2001
From: Kim Gustyr <kim.gustyr@flagsmith.com>
Date: Mon, 29 Jun 2026 21:19:45 +0100
Subject: [PATCH 3/5] Deslop XFAIL exclusion comment

---
 tests/harnesses/clickhouse.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/harnesses/clickhouse.py b/tests/harnesses/clickhouse.py
index 6f9966f..0753299 100644
--- a/tests/harnesses/clickhouse.py
+++ b/tests/harnesses/clickhouse.py
@@ -39,11 +39,8 @@
     # condition, so we accept the divergence on this niche shape
     # (`$.`-prefixed trait names) and let callers fall back to the engine.
     "test_jsonpath_like_trait__existing_jsonpath__should_match_trait",
-    # Engine returns False for a PERCENTAGE_SPLIT when the eval context has
-    # no identity key. The SQL engine intentionally diverges: it runs over
-    # IDENTITIES rows where the identity key always exists, so it hashes the
-    # row's key rather than folding to FALSE — the behaviour row-oriented
-    # callers (segment-membership counts/members) need.
+    # Does not apply to SQL engine implementation; we always have access to
+    # identity key.
     "test_percentage_split__no_identity_key__should_match",
 }
 

From 4196d631dd45c26ff8c76114be910758d40042c9 Mon Sep 17 00:00:00 2001
From: Kim Gustyr <kim.gustyr@flagsmith.com>
Date: Tue, 30 Jun 2026 11:04:55 +0100
Subject: [PATCH 4/5] fix typo

Co-authored-by: Matthew Elwell <mjelwell89@gmail.com>
---
 src/flagsmith_sql_flag_engine/translator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/flagsmith_sql_flag_engine/translator.py b/src/flagsmith_sql_flag_engine/translator.py
index 0f6c122..a49ff87 100644
--- a/src/flagsmith_sql_flag_engine/translator.py
+++ b/src/flagsmith_sql_flag_engine/translator.py
@@ -435,7 +435,7 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str |
         identity: dict[str, object] = ctx.evaluation_context.get("identity") or {}  # type: ignore[assignment]
         kind = classification.kind
         if not prop:
-            # In tranditional engine implementations, this branch implies
+            # In traditional engine implementations, this branch implies
             # an identity-less context, which makes no sense for the SQL engine.
             # Assume identity key.
             value_expr = ctx.dialect.cast_string(ctx.identity_key_expr)

From 0c7ec9a07e3348f9e2bcf83788dd0ec60fa90005 Mon Sep 17 00:00:00 2001
From: Kim Gustyr <kim.gustyr@flagsmith.com>
Date: Tue, 30 Jun 2026 11:08:11 +0100
Subject: [PATCH 5/5] test: assert % split does not fold to FALSE instead of
 pinning full MD5 SQL

beep boop
---
 tests/test_translator_unit.py | 44 +++++------------------------------
 1 file changed, 6 insertions(+), 38 deletions(-)

diff --git a/tests/test_translator_unit.py b/tests/test_translator_unit.py
index 8cea084..df12a6c 100644
--- a/tests/test_translator_unit.py
+++ b/tests/test_translator_unit.py
@@ -557,21 +557,8 @@ def test_translate_segment__percentage_split_implicit_key_no_identity__inlines_m
         ],
     }
 
-    # When translated
-    sql = translate_segment(seg, _ctx_no_identity())
-
-    # Then it hashes the per-row identity-key column rather than folding to
-    # FALSE — the row supplies the subject, so a context identity is not needed
-    assert sql == (
-        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
-        " (toString(i.identity_key))), 1, 4))) * 7291 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
-        " (toString(i.identity_key))), 5, 4))) * 1897 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
-        " (toString(i.identity_key))), 9, 4))) * 6835 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
-        " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
-    )
+    # When / Then it hashes the per-row identity key rather than folding to FALSE
+    assert translate_segment(seg, _ctx_no_identity()) != "((FALSE))"
 
 
 def test_translate_segment__percentage_split_key_no_context_identity__inlines_md5() -> None:
@@ -589,18 +576,8 @@ def test_translate_segment__percentage_split_key_no_context_identity__inlines_md
         ],
     }
 
-    # When / Then it still hashes the per-row identity-key column
-    sql = translate_segment(seg, _ctx_identity_without("key"))
-    assert sql == (
-        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
-        " (toString(i.identity_key))), 1, 4))) * 7291 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
-        " (toString(i.identity_key))), 5, 4))) * 1897 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
-        " (toString(i.identity_key))), 9, 4))) * 6835 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
-        " (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
-    )
+    # When / Then it still hashes the per-row identity key rather than folding to FALSE
+    assert translate_segment(seg, _ctx_identity_without("key")) != "((FALSE))"
 
 
 def test_translate_segment__percentage_split_identifier_no_context_identity__inlines_md5() -> None:
@@ -623,17 +600,8 @@ def test_translate_segment__percentage_split_identifier_no_context_identity__inl
         ],
     }
 
-    # When / Then it still hashes the per-row identifier column
-    sql = translate_segment(seg, _ctx_identity_without("identifier"))
-    assert sql == (
-        "(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
-        " (toString(i.identifier))), 1, 4))) * 7291 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
-        " 5, 4))) * 1897 + reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
-        " (toString(i.identifier))), 9, 4))) * 6835 +"
-        " reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
-        " 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
-    )
+    # When / Then it still hashes the per-row identifier rather than folding to FALSE
+    assert translate_segment(seg, _ctx_identity_without("identifier")) != "((FALSE))"
 
 
 def test_translate_segment__percentage_split_unknown_jsonpath__returns_none() -> None: