diff --git a/dql/parser.go b/dql/parser.go index 0dd6e1db7ac..186b6a235dd 100644 --- a/dql/parser.go +++ b/dql/parser.go @@ -177,6 +177,9 @@ type Function struct { IsCount bool // gt(count(friends),0) IsValueVar bool // eq(val(s), 5) IsLenVar bool // eq(len(s), 5) + // VectorFilterVar names a uid variable that restricts a similar_to ANN search to + // its uid set (pre-filtered ANN), set by the `filter: ` option. + VectorFilterVar string } // filterOpPrecedence is a map from filterOp (a string) to its precedence. @@ -1917,7 +1920,7 @@ L: if ok && next.Typ == itemColon { key := strings.ToLower(collectName(it, itemInFunc.Val)) switch key { - case "ef", "distance_threshold": + case "ef", "distance_threshold", "filter": default: return nil, itemInFunc.Errorf("Unknown option %q in similar_to", key) } @@ -1933,6 +1936,32 @@ L: return nil, it.Errorf("Expected value for %s", key) } valItem := it.Item() + + // `filter: ` restricts the ANN search to a uid variable's + // set (pre-filtered ANN). The value is a variable name, not a + // literal: register it as a uid dependency and record it on the + // function. It is resolved at the query layer and shipped to the + // worker as the search's allow-set, so it is NOT appended to Args. + if key == "filter" { + if valItem.Typ != itemName { + return nil, valItem.Errorf("filter option expects a uid variable name") + } + varName := strings.TrimSpace(collectName(it, valItem.Val)) + if varName == "" { + return nil, valItem.Errorf("filter option expects a non-empty uid variable name") + } + function.NeedsVar = append(function.NeedsVar, + VarContext{Name: varName, Typ: UidVar}) + function.VectorFilterVar = varName + // Emit a marker arg (not the var name) so the worker knows a + // filter is active even when the resolved allow-set is empty — + // an empty scope must reject all, not fall back to a global + // search. The allow-set itself travels via the task UidList. + function.Args = append(function.Args, + Arg{Value: "filter"}, Arg{Value: "1"}) + expectArg = false + continue + } switch valItem.Typ { case itemDollar: varName, err := parseVarName(it) diff --git a/dql/prefilter_parser_test.go b/dql/prefilter_parser_test.go new file mode 100644 index 00000000000..c93e36c8ba5 --- /dev/null +++ b/dql/prefilter_parser_test.go @@ -0,0 +1,158 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package dql + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseSimilarTo_FilterOption(t *testing.T) { + query := ` + { + allowed as var(func: type(Chunk)) + q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed)) { + uid + } + }` + res, err := Parse(Request{Str: query}) + require.NoError(t, err) + + var sim *GraphQuery + for _, b := range res.Query { + if b.Func != nil && b.Func.Name == "similar_to" { + sim = b + } + } + require.NotNil(t, sim) + + // The filter var is recorded and registered as a uid dependency. + require.Equal(t, "allowed", sim.Func.VectorFilterVar) + found := false + for _, nv := range sim.Func.NeedsVar { + if nv.Name == "allowed" { + require.Equal(t, UidVar, nv.Typ) + found = true + } + } + require.True(t, found, "filter var must be a NeedsVar (uid)") + + // A `filter` marker arg is present (so the worker can tell an empty scope from no + // filter), but the var NAME itself must never be an arg — the allow-set travels + // via the task UidList, and the var name lives only in NeedsVar/VectorFilterVar. + hasMarker := false + for i := 0; i+1 < len(sim.Func.Args); i += 2 { + if sim.Func.Args[i].Value == "filter" { + hasMarker = true + } + require.NotEqual(t, "allowed", sim.Func.Args[i].Value) + require.NotEqual(t, "allowed", sim.Func.Args[i+1].Value) + } + require.True(t, hasMarker, "a filter marker arg must be present") +} + +func TestParseSimilarTo_FilterWithEf(t *testing.T) { + query := ` + { + allowed as var(func: type(Chunk)) + q(func: similar_to(emb, 10, "[0.1, 0.2]", ef: 64, filter: allowed)) { + uid + } + }` + res, err := Parse(Request{Str: query}) + require.NoError(t, err) + for _, b := range res.Query { + if b.Func != nil && b.Func.Name == "similar_to" { + require.Equal(t, "allowed", b.Func.VectorFilterVar) + } + } +} + +func TestParseSimilarTo_FilterUndefinedVarErrors(t *testing.T) { + // A filter var that no block defines is a parse-time dependency error. + query := ` + { + q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ghost)) { + uid + } + }` + _, err := Parse(Request{Str: query}) + require.Error(t, err) + require.Contains(t, err.Error(), "not defined") +} + +// TestParseSimilarTo_FilterPropagatesToGraphQueryNeedsVar guards the dependency +// edge the whole feature relies on. The parser records the filter var on +// Func.NeedsVar, but the var-dependency scheduler (collectVars) and newGraph both +// read the GraphQuery-level NeedsVar — populated for the root func at parse time. +// If that propagation ever regresses, scheduling would not resolve the var first +// and the filter would silently see an empty/garbage allow-set, so assert it here. +func TestParseSimilarTo_FilterPropagatesToGraphQueryNeedsVar(t *testing.T) { + query := ` + { + allowed as var(func: type(Chunk)) + q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed)) { + uid + } + }` + res, err := Parse(Request{Str: query}) + require.NoError(t, err) + + var sim *GraphQuery + for _, b := range res.Query { + if b.Func != nil && b.Func.Name == "similar_to" { + sim = b + } + } + require.NotNil(t, sim) + + found := false + for _, nv := range sim.NeedsVar { + if nv.Name == "allowed" { + require.Equal(t, UidVar, nv.Typ) + found = true + } + } + require.True(t, found, + "filter var must surface on GraphQuery.NeedsVar so the scheduler resolves it first") +} + +func TestParseSimilarTo_DuplicateFilterErrors(t *testing.T) { + query := ` + { + allowed as var(func: type(Chunk)) + q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed, filter: allowed)) { + uid + } + }` + _, err := Parse(Request{Str: query}) + require.Error(t, err) + require.Contains(t, err.Error(), "Duplicate key") +} + +func TestParseSimilarTo_FilterNonNameValueErrors(t *testing.T) { + // The filter option's value must be a bare uid-variable name. Non-name tokens + // ($var, lists, parens) are rejected at the option parser with an explicit + // message; string/number literals lex as names and are rejected downstream as + // undefined vars. Either way they must not parse into a valid filter. + explicit := []string{`$x`, `[1, 2]`, `()`} + for _, val := range explicit { + query := `{ q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ` + val + `)) { uid } }` + _, err := Parse(Request{Str: query}) + require.Error(t, err, "filter: %s should be rejected", val) + require.Contains(t, err.Error(), "filter option expects a uid variable name", + "filter: %s", val) + } + + downstream := []string{`"foo"`, `5`} + for _, val := range downstream { + query := `{ q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ` + val + `)) { uid } }` + _, err := Parse(Request{Str: query}) + require.Error(t, err, "filter: %s should be rejected", val) + require.Contains(t, err.Error(), "not defined", "filter: %s", val) + } +} diff --git a/query/query.go b/query/query.go index 6926e2ac6ed..05ba65810b7 100644 --- a/query/query.go +++ b/query/query.go @@ -130,6 +130,10 @@ type params struct { // NeedsVar is the list of variables required by this SubGraph along with their type. NeedsVar []dql.VarContext + // VectorFilterVar names a uid variable that pre-filters a similar_to ANN search + // (the `filter: ` option). Its resolved uids become the search allow-set. + VectorFilterVar string + // ParentVars is a map of variables passed down recursively to children of a SubGraph in a query // block. These are used to filter uids defined in a parent using a variable. // TODO (pawan) - This can potentially be simplified to a map[string]*pb.List since we don't @@ -278,6 +282,10 @@ type SubGraph struct { // SrcUIDs is a list of unique source UIDs. They are always copies of destUIDs // of parent nodes in GraphQL structure. SrcUIDs *pb.List + // vectorFilterUids is the resolved allow-set for a pre-filtered similar_to ANN + // search (from the `filter: ` option). It is carried to the worker via the + // task's UidList and does not participate in the normal SrcUIDs/DestUIDs flow. + vectorFilterUids *pb.List // SrcFunc specified using func. Should only be non-nil at root. At other levels, // filters are used. SrcFunc *Function @@ -329,6 +337,7 @@ func (sg *SubGraph) createSrcFunction(gf *dql.Function) { IsValueVar: gf.IsValueVar, IsLenVar: gf.IsLenVar, } + sg.Params.VectorFilterVar = gf.VectorFilterVar // type function is just an alias for eq(type, "dgraph.type"). if gf.Name == "type" { @@ -981,6 +990,11 @@ func createTaskQuery(ctx context.Context, sg *SubGraph) (*pb.Query, error) { if sg.SrcUIDs != nil { out.UidList = sg.SrcUIDs } + // A pre-filtered similar_to search ships its allow-set to the worker via UidList. + // For a root similar_to, SrcUIDs is nil, so this does not conflict. + if sg.vectorFilterUids != nil { + out.UidList = sg.vectorFilterUids + } return out, nil } @@ -1836,6 +1850,13 @@ func (sg *SubGraph) fillVars(mp map[string]varValue) error { } sg.SrcFunc.Args = srcFuncArgs + case v.Name == sg.Params.VectorFilterVar && v.Typ == dql.UidVar && + sg.SrcFunc != nil && sg.SrcFunc.Name == "similar_to": + // Pre-filtered ANN: the filter var's uids scope the similar_to search. + // Capture them as the allow-set; do NOT merge into DestUIDs (this var + // restricts the search, it is not part of the result set). + sg.vectorFilterUids = l.Uids + case (v.Typ == dql.AnyVar || v.Typ == dql.UidVar) && l.Uids != nil: lists = append(lists, l.Uids) diff --git a/query/vector/prefilter_test.go b/query/vector/prefilter_test.go new file mode 100644 index 00000000000..ee92939c747 --- /dev/null +++ b/query/vector/prefilter_test.go @@ -0,0 +1,176 @@ +//go:build integration + +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package query + +import ( + "encoding/json" + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +// Pre-filtered ANN: similar_to(..., filter: ) restricts the search to a uid +// set DURING the HNSW traversal. The data below is arranged so the globally-nearest +// vectors are out of scope (group B, clustered at the origin) while the in-scope +// vectors (group A) are farther away — so post-filtering a fixed top-k would return +// nothing, but pre-filtering returns k in-scope neighbors. + +const ( + pfVec = "pfvec" + pfGroup = "pfgroup" +) + +type pfRow struct { + UID string `json:"uid"` + Group string `json:"pfgroup"` +} + +func pfSetup(t *testing.T) { + setSchema(fmt.Sprintf(` + %s: float32vector @index(hnsw(metric: "euclidean")) . + %s: string @index(exact) . + `, pfVec, pfGroup)) + + var sb strings.Builder + // Use small explicit UIDs (well under the zero's default initial lease, as the + // sibling vector tests do): higher UIDs would be rejected with + // "Uid: [...] cannot be greater than lease". + // + // Group B: 8 "distractor" vectors hugging the origin (closest to a [0,0] query). + for i := 1; i <= 8; i++ { + uid := 0x10 + i + fmt.Fprintf(&sb, "<%d> <%s> \"[%g, 0.0]\" .\n", uid, pfVec, float64(i)*0.01) + fmt.Fprintf(&sb, "<%d> <%s> \"B\" .\n", uid, pfGroup) + } + // Group A: 8 in-scope vectors farther from the origin. + for i := 0; i < 8; i++ { + uid := 0x30 + i + fmt.Fprintf(&sb, "<%d> <%s> \"[%g, 0.0]\" .\n", uid, pfVec, 0.5+float64(i)*0.1) + fmt.Fprintf(&sb, "<%d> <%s> \"A\" .\n", uid, pfGroup) + } + require.NoError(t, addTriplesToCluster(sb.String())) +} + +func pfTeardown() { + dropPredicate(pfVec) + dropPredicate(pfGroup) +} + +func pfQuery(t *testing.T, query string) []pfRow { + t.Helper() + resp, err := client.Query(query) + require.NoError(t, err) + var data struct { + Result []pfRow `json:"result"` + } + require.NoError(t, json.Unmarshal(resp.Json, &data)) + return data.Result +} + +// TestPreFilterANN_ScopesToFilterVar is the core test: a filtered search returns k +// in-scope (group A) neighbors even though the nearest vectors are out of scope. +func TestPreFilterANN_ScopesToFilterVar(t *testing.T) { + pfSetup(t) + defer pfTeardown() + + // Sanity: the unfiltered top-5 are all group B (the distractors near the origin). + unfiltered := pfQuery(t, fmt.Sprintf(` + { + result(func: similar_to(%s, 5, "[0.0, 0.0]")) { + uid + %s + } + }`, pfVec, pfGroup)) + require.Len(t, unfiltered, 5) + for _, r := range unfiltered { + require.Equal(t, "B", r.Group, "unfiltered nearest should be group B") + } + + // Pre-filtered to group A: must return 5 group-A neighbors despite the closer + // group-B vectors. Post-filtering the unfiltered top-5 would have yielded zero. + filtered := pfQuery(t, fmt.Sprintf(` + { + allowed as var(func: eq(%s, "A")) + result(func: similar_to(%s, 5, "[0.0, 0.0]", ef: 100, filter: allowed)) { + uid + %s + } + }`, pfGroup, pfVec, pfGroup)) + require.Len(t, filtered, 5, "pre-filter must still return k in-scope results") + for _, r := range filtered { + require.Equal(t, "A", r.Group, "every result must be in scope (group A)") + } +} + +// TestPreFilterANN_ScopesWithoutEfOverride is the same scope restriction as the core +// test but WITHOUT an explicit `ef:` override. A filtered search must route through +// the wider max(k, efSearch) candidate budget on its own, so it still returns k +// in-scope (group A) neighbors even though the nearest vectors are out of scope. With +// the narrow k-only budget this under-returned. +func TestPreFilterANN_ScopesWithoutEfOverride(t *testing.T) { + pfSetup(t) + defer pfTeardown() + + filtered := pfQuery(t, fmt.Sprintf(` + { + allowed as var(func: eq(%s, "A")) + result(func: similar_to(%s, 5, "[0.0, 0.0]", filter: allowed)) { + uid + %s + } + }`, pfGroup, pfVec, pfGroup)) + require.Len(t, filtered, 5, "pre-filter must return k in-scope results without an ef override") + for _, r := range filtered { + require.Equal(t, "A", r.Group, "every result must be in scope (group A)") + } +} + +// TestPreFilterANN_EmptyScope: a filter var resolving to no uids yields no results. +func TestPreFilterANN_EmptyScope(t *testing.T) { + pfSetup(t) + defer pfTeardown() + + res := pfQuery(t, fmt.Sprintf(` + { + allowed as var(func: eq(%s, "NOPE")) + result(func: similar_to(%s, 5, "[0.0, 0.0]", filter: allowed)) { + uid + %s + } + }`, pfGroup, pfVec, pfGroup)) + require.Empty(t, res, "empty scope returns no results") +} + +// TestPreFilterANN_ScopeSupersetEqualsUnfiltered: when the filter admits every uid, +// the result matches the unfiltered top-k. +func TestPreFilterANN_ScopeSupersetEqualsUnfiltered(t *testing.T) { + pfSetup(t) + defer pfTeardown() + + unfiltered := pfQuery(t, fmt.Sprintf(` + { + result(func: similar_to(%s, 5, "[0.0, 0.0]")) { uid %s } + }`, pfVec, pfGroup)) + + filtered := pfQuery(t, fmt.Sprintf(` + { + allowed as var(func: has(%s)) + result(func: similar_to(%s, 5, "[0.0, 0.0]", ef: 100, filter: allowed)) { uid %s } + }`, pfVec, pfVec, pfGroup)) + + require.Len(t, filtered, len(unfiltered)) + got := map[string]bool{} + for _, r := range filtered { + got[r.UID] = true + } + for _, r := range unfiltered { + require.True(t, got[r.UID], "uid %s from unfiltered top-k missing under all-admitting filter", r.UID) + } +} diff --git a/worker/task.go b/worker/task.go index 409ec3f0fc4..362f32908e0 100644 --- a/worker/task.go +++ b/worker/task.go @@ -375,8 +375,17 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er return err } var nnUids []uint64 - // Build optional search options if provided + // Pre-filtered ANN: when `similar_to(..., filter: var)` is used, restrict the + // search to the allow-set (carried in q.UidList) by applying a membership + // filter DURING the HNSW traversal. The search layer grows its candidate + // budget to still return k in-scope neighbors, instead of post-filtering a + // fixed k and under-returning. An empty scope rejects everything (nothing is in + // scope) — it must NOT fall back to a global search. Absent a filter option, + // accept all (unchanged behavior, zero overhead). filter := index.AcceptAll[float32] + if srcFn.vsHasFilter { + filter = uidMembershipFilter(q.UidList.GetUids()) + } opts := index.VectorIndexOptions[float32]{Filter: filter} if srcFn.vsEfOverride > 0 { opts.EfOverride = srcFn.vsEfOverride @@ -384,7 +393,12 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er if srcFn.vsDistanceThreshold != nil { opts.DistanceThreshold = srcFn.vsDistanceThreshold } - hasOptions := opts.EfOverride > 0 || opts.DistanceThreshold != nil + // Route through the options path when a filter is active, even without an ef + // override: SearchWithOptions uses a max(k, efSearch) bottom-layer candidate + // budget, whereas the legacy Search path uses just k. Pre-filtering needs the + // wider budget to traverse past out-of-scope nodes and still return k in-scope + // neighbors — with the narrow budget a scoped search under-returns. + hasOptions := opts.EfOverride > 0 || opts.DistanceThreshold != nil || srcFn.vsHasFilter if o, ok := indexer.(index.OptionalSearchOptions[float32]); ok && hasOptions { if srcFn.vectorInfo != nil { nnUids, err = o.SearchWithOptions(ctx, qc, srcFn.vectorInfo, int(numNeighbors), opts) @@ -394,10 +408,10 @@ func (qs *queryState) handleValuePostings(ctx context.Context, args funcArgs) er } else { if srcFn.vectorInfo != nil { nnUids, err = indexer.Search(ctx, qc, srcFn.vectorInfo, - int(numNeighbors), index.AcceptAll[float32]) + int(numNeighbors), filter) } else { nnUids, err = indexer.SearchWithUid(ctx, qc, srcFn.vectorUid, - int(numNeighbors), index.AcceptAll[float32]) + int(numNeighbors), filter) } } @@ -1835,6 +1849,10 @@ type functionContext struct { // Optional vector search options parsed from a 3rd arg on similar_to vsEfOverride int vsDistanceThreshold *float64 + // vsHasFilter is set when similar_to was given a `filter: ` option. The + // allow-set arrives via the task UidList; this flag distinguishes a requested + // filter whose scope is empty (reject all) from no filter at all (accept all). + vsHasFilter bool } const ( @@ -2814,6 +2832,10 @@ func parseSimilarToOptions(args []string, fc *functionContext) error { } fc.vsDistanceThreshold = new(float64) *fc.vsDistanceThreshold = f + case "filter": + // Marker that a pre-filter allow-set (uid variable) is in effect. The set + // itself is carried in the task UidList, not here. + fc.vsHasFilter = true default: return errors.Errorf("Unknown option in similar_to: %q", k) } diff --git a/worker/vector_filter.go b/worker/vector_filter.go new file mode 100644 index 00000000000..bf2ccefde8c --- /dev/null +++ b/worker/vector_filter.go @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package worker + +import ( + "sort" + + "github.com/dgraph-io/dgraph/v25/tok/index" +) + +// uidMembershipFilter builds a SearchFilter that accepts only uids present in the +// given allow-set. It powers pre-filtered ANN: the filter is applied during the HNSW +// traversal (not after it), so a scoped similar_to search explores enough of the +// graph to return k in-scope neighbors instead of post-filtering a fixed k and +// returning fewer. The query and candidate vectors are irrelevant to membership, so +// only the uid is examined. +// +// Membership uses a sorted copy of the uids plus binary search rather than a hash +// set: the allow-set can be large (whole-tenant scopes), and a compact 8-bytes/uid +// slice avoids the per-query allocation and GC pressure of a map with millions of +// entries while keeping lookups at O(log n). An empty allow-set rejects everything. +func uidMembershipFilter(uids []uint64) index.SearchFilter[float32] { + // Copy before sorting: the input may alias a shared var list that other goroutines + // read concurrently; we must not mutate it. uid variables reach the worker already + // sorted ascending (algo.MergeSorted), so the common case skips the O(n log n) sort + // — a meaningful saving for whole-tenant scopes (millions of uids per query). We + // still sort defensively when the invariant does not hold, since the binary-search + // lookup below requires ascending order for correctness. + sorted := make([]uint64, len(uids)) + copy(sorted, uids) + less := func(i, j int) bool { return sorted[i] < sorted[j] } + if !sort.SliceIsSorted(sorted, less) { + sort.Slice(sorted, less) + } + + return func(_, _ []float32, uid uint64) bool { + i := sort.Search(len(sorted), func(i int) bool { return sorted[i] >= uid }) + return i < len(sorted) && sorted[i] == uid + } +} diff --git a/worker/vector_filter_test.go b/worker/vector_filter_test.go new file mode 100644 index 00000000000..4a0775f65fd --- /dev/null +++ b/worker/vector_filter_test.go @@ -0,0 +1,91 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package worker + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUidMembershipFilter(t *testing.T) { + f := uidMembershipFilter([]uint64{2, 5, 9}) + + // In-set uids are accepted; out-of-set rejected. Query/result vectors are + // irrelevant to a membership filter, so nil is fine. + require.True(t, f(nil, nil, 2)) + require.True(t, f(nil, nil, 5)) + require.True(t, f(nil, nil, 9)) + require.False(t, f(nil, nil, 1)) + require.False(t, f(nil, nil, 0)) + require.False(t, f(nil, nil, 100)) +} + +func TestUidMembershipFilter_Empty(t *testing.T) { + // An empty allow-set rejects everything (nothing is in scope). + f := uidMembershipFilter(nil) + require.False(t, f(nil, nil, 1)) + require.False(t, f(nil, nil, 0)) +} + +func TestUidMembershipFilter_Duplicates(t *testing.T) { + // Duplicate uids in the input collapse to a single membership entry. + f := uidMembershipFilter([]uint64{7, 7, 7}) + require.True(t, f(nil, nil, 7)) + require.False(t, f(nil, nil, 8)) +} + +// uidMembershipFilter documents that it sorts a copy because the input may arrive +// unsorted (and may alias a shared var list other goroutines read). The other tests +// only feed already-sorted slices, so neither property is actually exercised. These +// two close that gap. + +func TestUidMembershipFilter_UnsortedInput(t *testing.T) { + // Membership must be correct even when the allow-set arrives unsorted with dups. + f := uidMembershipFilter([]uint64{9, 2, 5, 2, 100}) + for _, in := range []uint64{2, 5, 9, 100} { + require.True(t, f(nil, nil, in), "expected %d in set", in) + } + for _, out := range []uint64{0, 1, 3, 6, 99, 101} { + require.False(t, f(nil, nil, out), "expected %d not in set", out) + } +} + +func TestUidMembershipFilter_DoesNotMutateInput(t *testing.T) { + // The filter must not sort/reorder its input in place: the caller's slice may be + // a shared, concurrently-read var list. Build the filter from an unsorted slice + // and assert the original ordering is preserved. + shared := []uint64{9, 2, 5, 2, 100} + before := append([]uint64(nil), shared...) + + f := uidMembershipFilter(shared) + require.True(t, f(nil, nil, 5)) // force use of the sorted copy + + require.Equal(t, before, shared, "input slice must not be mutated") +} + +func TestUidMembershipFilter_ConcurrentInputReadsRaceFree(t *testing.T) { + // Building filters from a shared slice while other goroutines read it must be + // race-free (meaningful under `go test -race`): the implementation copies before + // sorting precisely so it never writes through the shared backing array. + shared := []uint64{1, 2, 3, 4, 5, 6, 7, 8} + var wg sync.WaitGroup + for g := 0; g < 8; g++ { + wg.Add(1) + go func() { + defer wg.Done() + f := uidMembershipFilter(shared) + _ = f(nil, nil, 4) + var sum uint64 + for _, u := range shared { // concurrent read of the shared input + sum += u + } + _ = sum + }() + } + wg.Wait() +}