Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion dql/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ type Function struct {
IsCount bool // gt(count(friends),0)
IsValueVar bool // eq(val(s), 5)
IsLenVar bool // eq(len(s), 5)
// VectorFilterVar names a uid variable that restricts a similar_to ANN search to
// its uid set (pre-filtered ANN), set by the `filter: <var>` option.
VectorFilterVar string
}

// filterOpPrecedence is a map from filterOp (a string) to its precedence.
Expand Down Expand Up @@ -1917,7 +1920,7 @@ L:
if ok && next.Typ == itemColon {
key := strings.ToLower(collectName(it, itemInFunc.Val))
switch key {
case "ef", "distance_threshold":
case "ef", "distance_threshold", "filter":
default:
return nil, itemInFunc.Errorf("Unknown option %q in similar_to", key)
}
Expand All @@ -1933,6 +1936,32 @@ L:
return nil, it.Errorf("Expected value for %s", key)
}
valItem := it.Item()

// `filter: <var>` restricts the ANN search to a uid variable's
// set (pre-filtered ANN). The value is a variable name, not a
// literal: register it as a uid dependency and record it on the
// function. It is resolved at the query layer and shipped to the
// worker as the search's allow-set, so it is NOT appended to Args.
if key == "filter" {
if valItem.Typ != itemName {
return nil, valItem.Errorf("filter option expects a uid variable name")
}
varName := strings.TrimSpace(collectName(it, valItem.Val))
if varName == "" {
return nil, valItem.Errorf("filter option expects a non-empty uid variable name")
}
function.NeedsVar = append(function.NeedsVar,
VarContext{Name: varName, Typ: UidVar})
function.VectorFilterVar = varName
// Emit a marker arg (not the var name) so the worker knows a
// filter is active even when the resolved allow-set is empty —
// an empty scope must reject all, not fall back to a global
// search. The allow-set itself travels via the task UidList.
function.Args = append(function.Args,
Arg{Value: "filter"}, Arg{Value: "1"})
expectArg = false
continue
}
switch valItem.Typ {
case itemDollar:
varName, err := parseVarName(it)
Expand Down
158 changes: 158 additions & 0 deletions dql/prefilter_parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package dql

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestParseSimilarTo_FilterOption(t *testing.T) {
query := `
{
allowed as var(func: type(Chunk))
q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed)) {
uid
}
}`
res, err := Parse(Request{Str: query})
require.NoError(t, err)

var sim *GraphQuery
for _, b := range res.Query {
if b.Func != nil && b.Func.Name == "similar_to" {
sim = b
}
}
require.NotNil(t, sim)

// The filter var is recorded and registered as a uid dependency.
require.Equal(t, "allowed", sim.Func.VectorFilterVar)
found := false
for _, nv := range sim.Func.NeedsVar {
if nv.Name == "allowed" {
require.Equal(t, UidVar, nv.Typ)
found = true
}
}
require.True(t, found, "filter var must be a NeedsVar (uid)")

// A `filter` marker arg is present (so the worker can tell an empty scope from no
// filter), but the var NAME itself must never be an arg — the allow-set travels
// via the task UidList, and the var name lives only in NeedsVar/VectorFilterVar.
hasMarker := false
for i := 0; i+1 < len(sim.Func.Args); i += 2 {
if sim.Func.Args[i].Value == "filter" {
hasMarker = true
}
require.NotEqual(t, "allowed", sim.Func.Args[i].Value)
require.NotEqual(t, "allowed", sim.Func.Args[i+1].Value)
}
require.True(t, hasMarker, "a filter marker arg must be present")
}

func TestParseSimilarTo_FilterWithEf(t *testing.T) {
query := `
{
allowed as var(func: type(Chunk))
q(func: similar_to(emb, 10, "[0.1, 0.2]", ef: 64, filter: allowed)) {
uid
}
}`
res, err := Parse(Request{Str: query})
require.NoError(t, err)
for _, b := range res.Query {
if b.Func != nil && b.Func.Name == "similar_to" {
require.Equal(t, "allowed", b.Func.VectorFilterVar)
}
}
}

func TestParseSimilarTo_FilterUndefinedVarErrors(t *testing.T) {
// A filter var that no block defines is a parse-time dependency error.
query := `
{
q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ghost)) {
uid
}
}`
_, err := Parse(Request{Str: query})
require.Error(t, err)
require.Contains(t, err.Error(), "not defined")
}

// TestParseSimilarTo_FilterPropagatesToGraphQueryNeedsVar guards the dependency
// edge the whole feature relies on. The parser records the filter var on
// Func.NeedsVar, but the var-dependency scheduler (collectVars) and newGraph both
// read the GraphQuery-level NeedsVar — populated for the root func at parse time.
// If that propagation ever regresses, scheduling would not resolve the var first
// and the filter would silently see an empty/garbage allow-set, so assert it here.
func TestParseSimilarTo_FilterPropagatesToGraphQueryNeedsVar(t *testing.T) {
query := `
{
allowed as var(func: type(Chunk))
q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed)) {
uid
}
}`
res, err := Parse(Request{Str: query})
require.NoError(t, err)

var sim *GraphQuery
for _, b := range res.Query {
if b.Func != nil && b.Func.Name == "similar_to" {
sim = b
}
}
require.NotNil(t, sim)

found := false
for _, nv := range sim.NeedsVar {
if nv.Name == "allowed" {
require.Equal(t, UidVar, nv.Typ)
found = true
}
}
require.True(t, found,
"filter var must surface on GraphQuery.NeedsVar so the scheduler resolves it first")
}

func TestParseSimilarTo_DuplicateFilterErrors(t *testing.T) {
query := `
{
allowed as var(func: type(Chunk))
q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: allowed, filter: allowed)) {
uid
}
}`
_, err := Parse(Request{Str: query})
require.Error(t, err)
require.Contains(t, err.Error(), "Duplicate key")
}

func TestParseSimilarTo_FilterNonNameValueErrors(t *testing.T) {
// The filter option's value must be a bare uid-variable name. Non-name tokens
// ($var, lists, parens) are rejected at the option parser with an explicit
// message; string/number literals lex as names and are rejected downstream as
// undefined vars. Either way they must not parse into a valid filter.
explicit := []string{`$x`, `[1, 2]`, `()`}
for _, val := range explicit {
query := `{ q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ` + val + `)) { uid } }`
_, err := Parse(Request{Str: query})
require.Error(t, err, "filter: %s should be rejected", val)
require.Contains(t, err.Error(), "filter option expects a uid variable name",
"filter: %s", val)
}

downstream := []string{`"foo"`, `5`}
for _, val := range downstream {
query := `{ q(func: similar_to(emb, 10, "[0.1, 0.2]", filter: ` + val + `)) { uid } }`
_, err := Parse(Request{Str: query})
require.Error(t, err, "filter: %s should be rejected", val)
require.Contains(t, err.Error(), "not defined", "filter: %s", val)
}
}
21 changes: 21 additions & 0 deletions query/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ type params struct {
// NeedsVar is the list of variables required by this SubGraph along with their type.
NeedsVar []dql.VarContext

// VectorFilterVar names a uid variable that pre-filters a similar_to ANN search
// (the `filter: <var>` option). Its resolved uids become the search allow-set.
VectorFilterVar string

// ParentVars is a map of variables passed down recursively to children of a SubGraph in a query
// block. These are used to filter uids defined in a parent using a variable.
// TODO (pawan) - This can potentially be simplified to a map[string]*pb.List since we don't
Expand Down Expand Up @@ -278,6 +282,10 @@ type SubGraph struct {
// SrcUIDs is a list of unique source UIDs. They are always copies of destUIDs
// of parent nodes in GraphQL structure.
SrcUIDs *pb.List
// vectorFilterUids is the resolved allow-set for a pre-filtered similar_to ANN
// search (from the `filter: <var>` option). It is carried to the worker via the
// task's UidList and does not participate in the normal SrcUIDs/DestUIDs flow.
vectorFilterUids *pb.List
// SrcFunc specified using func. Should only be non-nil at root. At other levels,
// filters are used.
SrcFunc *Function
Expand Down Expand Up @@ -329,6 +337,7 @@ func (sg *SubGraph) createSrcFunction(gf *dql.Function) {
IsValueVar: gf.IsValueVar,
IsLenVar: gf.IsLenVar,
}
sg.Params.VectorFilterVar = gf.VectorFilterVar

// type function is just an alias for eq(type, "dgraph.type").
if gf.Name == "type" {
Expand Down Expand Up @@ -981,6 +990,11 @@ func createTaskQuery(ctx context.Context, sg *SubGraph) (*pb.Query, error) {
if sg.SrcUIDs != nil {
out.UidList = sg.SrcUIDs
}
// A pre-filtered similar_to search ships its allow-set to the worker via UidList.
// For a root similar_to, SrcUIDs is nil, so this does not conflict.
if sg.vectorFilterUids != nil {
out.UidList = sg.vectorFilterUids
}
return out, nil
}

Expand Down Expand Up @@ -1836,6 +1850,13 @@ func (sg *SubGraph) fillVars(mp map[string]varValue) error {
}
sg.SrcFunc.Args = srcFuncArgs

case v.Name == sg.Params.VectorFilterVar && v.Typ == dql.UidVar &&
sg.SrcFunc != nil && sg.SrcFunc.Name == "similar_to":
// Pre-filtered ANN: the filter var's uids scope the similar_to search.
// Capture them as the allow-set; do NOT merge into DestUIDs (this var
// restricts the search, it is not part of the result set).
sg.vectorFilterUids = l.Uids

case (v.Typ == dql.AnyVar || v.Typ == dql.UidVar) && l.Uids != nil:
lists = append(lists, l.Uids)

Expand Down
Loading
Loading