From a2b1b163e5d291049f9a9877278da54a96ee9c10 Mon Sep 17 00:00:00 2001 From: Shaun Patterson Date: Thu, 28 May 2026 14:16:52 -0400 Subject: [PATCH] perf(query): pool encoder scaffolding across requests Reuse the encoder struct + attrMap + idSlice via a sync.Pool, cleared by free() in toFastJSON's defer. buf is deliberately NOT pooled and is allocated fresh per request: toFastJSON returns enc.buf.Bytes() (a slice aliasing buf's backing array) to its caller after the encoder is freed, so pooling buf would let a concurrent request overwrite an in-flight response. BenchmarkEncoderLifecycle: allocs/op 10 -> 4 (-60%), time -14%. --- query/outputnode.go | 51 +++++++++++++++++++++++++++++++++------- query/outputnode_test.go | 15 ++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/query/outputnode.go b/query/outputnode.go index 88c6d9a36ec..ac7da0c11ec 100644 --- a/query/outputnode.go +++ b/query/outputnode.go @@ -122,23 +122,54 @@ type node struct { var nodeSize = int(unsafe.Sizeof(node{})) +// encoderPool reuses the per-request scaffolding (attrMap and idSlice) across +// requests. buf is deliberately NOT pooled: toFastJSON returns enc.buf.Bytes(), +// a slice aliasing buf's backing array, to its caller after the encoder is +// freed. Pooling buf would let a concurrent request overwrite that array and +// corrupt an in-flight response, so each request gets a fresh buf. arena comes +// from arenaPool and alloc is fresh per request (z.Allocator.Release +// invalidates the underlying memory). +var encoderPool = sync.Pool{ + New: func() interface{} { + return &encoder{ + attrMap: make(map[string]uint16), + idSlice: make([]string, 1), + } + }, +} + func newEncoder() *encoder { - idSlice := make([]string, 1) + e := encoderPool.Get().(*encoder) a := (arenaPool.Get()).(*arena) a.reset() - - e := &encoder{ - attrMap: make(map[string]uint16), - idSlice: idSlice, - arena: a, - alloc: z.NewAllocator(4<<10, "OutputNode.Encoder"), - buf: &bytes.Buffer{}, - } + e.arena = a + e.alloc = z.NewAllocator(4<<10, "OutputNode.Encoder") + e.buf = &bytes.Buffer{} + e.curSize = 0 + e.uidAttr = 0 e.uidAttr = e.idForAttr("uid") return e } +// free resets the reusable encoder scaffolding (attrMap, idSlice) and returns +// it to the pool. arena is returned to arenaPool and alloc is released by the +// caller (existing defer in toFastJSON); buf is dropped (not pooled — see +// encoderPool) so the returned response bytes remain valid. +func (e *encoder) free() { + for k := range e.attrMap { + delete(e.attrMap, k) + } + e.idSlice = e.idSlice[:1] + e.idSlice[0] = "" + e.arena = nil + e.alloc = nil + e.buf = nil + e.curSize = 0 + e.uidAttr = 0 + encoderPool.Put(e) +} + // Sort the given fastJson list func (enc *encoder) MergeSort(headRef *fastJsonNode) { if headRef == nil || (*headRef).next == nil { @@ -1189,6 +1220,8 @@ func (sg *SubGraph) toFastJSON(ctx context.Context, l *Latency, field gqlSchema. // Put encoder's arena back to arena pool. arenaPool.Put(enc.arena) enc.alloc.Release() + // Return the encoder itself for reuse (clears attrMap, idSlice, buf). + enc.free() }() var err error diff --git a/query/outputnode_test.go b/query/outputnode_test.go index 729c234e7d0..cdfac4e5358 100644 --- a/query/outputnode_test.go +++ b/query/outputnode_test.go @@ -212,6 +212,21 @@ func BenchmarkFastJsonNode2Chilren(b *testing.B) { } } +// BenchmarkEncoderLifecycle measures the per-request cost of acquiring and +// releasing an encoder. This is the path /query → toFastJSON walks for every +// query response, so it is a direct proxy for response allocation overhead. +func BenchmarkEncoderLifecycle(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + enc := newEncoder() + // Touch the encoder so it is not optimized away. + _ = enc.newNode(enc.idForAttr(testAttr)) + arenaPool.Put(enc.arena) + enc.alloc.Release() + enc.free() + } +} + func TestChildrenOrder(t *testing.T) { enc := newEncoder() root := enc.newNode(1)