From 1283ab9d89f38a92563bd69fe882e1b8ae3355e1 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 18 Jun 2026 17:27:33 +0200 Subject: [PATCH 1/3] perf: faster model resolution, JSON decoding, and request snapshotting Reduce per-request CPU and allocations on the gateway hot path. Changes are behavior-preserving for all valid input; the one intentional difference is documented and tested. Model resolution (O(1)): - Add a lazy provider-selector index to the registry (qualifiedByName / qualifiedByType), invalidated at the existing single cache-invalidation point. - Route qualified-selector resolution through it via an optional qualifiedSelectorResolver interface, with the catalog scan kept as a fallback for non-indexed lookups and raw slash-shaped model IDs. - Resolution is now O(1) and constant in catalog size (was O(N), copying the full catalog several times per request): ~31us/164KB -> ~0.8us/0.3KB at 300 models. Deduplicated the redundant name/type scans in resolveQualifiedSelector. JSON decoding (goccy/go-json): - Migrate internal/ + cmd/ from encoding/json to github.com/goccy/go-json (drop-in; package is named json). gjson is unchanged. - ~3.8x faster realistic chat-body decode with fewer allocations. - goccy is slightly more lenient than encoding/json on a couple of malformed inputs (leading-zero numbers; malformed values in skipped passthrough fields). Accepted under the gateway's accept-generously principle and pinned by TestDecoderLeniencyIsBounded. - Drop the redundant gjson.ValidBytes walk in extractUnknownJSONFields (callers already validate via the preceding Unmarshal). Request snapshot allocations: - Add NewRequestSnapshotWithOwnedMaps so ingress capture owns the freshly built route/query/trace maps and body, cloning only the live header map. - Add HeadersView (zero-copy) and route read-only callers to it. - Remove the now-superseded NewRequestSnapshotWithOwnedBody constructor. Perf harness: - Make the gateway hot-path benchmark exercise the real Router + populated catalog (it previously bypassed routing, giving false confidence) and add a guard case for it. Add a resolution micro-benchmark. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/gomodel/health.go | 3 +- cmd/recordapi/main.go | 3 +- go.mod | 1 + go.sum | 2 + internal/admin/handler_guardrails.go | 3 +- internal/admin/handler_live.go | 3 +- internal/aliases/batch_preparer.go | 3 +- internal/anthropicapi/request.go | 3 +- internal/anthropicapi/response.go | 3 +- internal/anthropicapi/stream.go | 3 +- internal/anthropicapi/types.go | 2 +- internal/app/app.go | 3 +- internal/auditlog/auditlog.go | 3 +- internal/auditlog/entry_capture.go | 5 +- internal/auditlog/middleware.go | 3 +- internal/auditlog/reader_postgresql.go | 3 +- internal/auditlog/reader_sqlite.go | 3 +- internal/batch/store.go | 3 +- internal/cache/modelcache/local.go | 3 +- internal/cache/modelcache/modelcache.go | 3 +- internal/cache/modelcache/redis.go | 3 +- internal/conversationstore/store.go | 3 +- internal/conversationstore/store_memory.go | 3 +- internal/core/audio.go | 3 +- internal/core/batch.go | 2 +- internal/core/batch_json.go | 2 +- internal/core/batch_preparation.go | 3 +- internal/core/chat_content.go | 3 +- internal/core/chat_json.go | 2 +- internal/core/conversations.go | 3 +- internal/core/embeddings_encoding.go | 3 +- internal/core/embeddings_json.go | 2 +- internal/core/errors.go | 3 +- internal/core/json_fields.go | 14 ++- internal/core/json_fields_test.go | 55 ++++++++++-- internal/core/message_json.go | 3 +- internal/core/request_snapshot.go | 42 +++++++-- internal/core/request_snapshot_test.go | 8 +- internal/core/responses.go | 2 +- internal/core/responses_json.go | 3 +- internal/core/semantic_canonical.go | 3 +- internal/core/types.go | 2 +- internal/core/usage_json.go | 2 +- internal/embedding/embedding.go | 3 +- internal/gateway/batch_usage.go | 3 +- internal/guardrails/batch_rewrite.go | 3 +- internal/guardrails/batch_rewrite_test.go | 14 +-- internal/guardrails/definitions.go | 3 +- internal/guardrails/executor.go | 3 +- .../guardrails/responses_message_apply.go | 3 +- internal/guardrails/store_mongodb.go | 3 +- internal/live/broker.go | 3 +- internal/llmclient/client.go | 3 +- internal/modeldata/fetcher.go | 3 +- internal/modeloverrides/batch_preparer.go | 3 +- internal/modeloverrides/store.go | 3 +- internal/modeloverrides/store_postgresql.go | 3 +- internal/modeloverrides/store_sqlite.go | 3 +- internal/pricingoverrides/store.go | 3 +- internal/pricingoverrides/store_postgresql.go | 3 +- internal/pricingoverrides/store_sqlite.go | 3 +- internal/providers/anthropic/anthropic.go | 3 +- internal/providers/anthropic/batch.go | 3 +- internal/providers/anthropic/chat.go | 3 +- internal/providers/anthropic/chat_stream.go | 3 +- .../anthropic/request_translation.go | 3 +- internal/providers/anthropic/responses.go | 3 +- internal/providers/anthropic/types.go | 2 +- internal/providers/bailian/bailian.go | 3 +- .../providers/batch_results_file_adapter.go | 3 +- internal/providers/bedrock/chat.go | 3 +- internal/providers/bedrock/chat_stream.go | 3 +- internal/providers/chat_stream_normalize.go | 3 +- internal/providers/deepseek/deepseek.go | 3 +- internal/providers/gemini/gemini.go | 3 +- internal/providers/gemini/native.go | 3 +- internal/providers/gemini/native_stream.go | 3 +- internal/providers/googlecommon/auth.go | 3 +- internal/providers/ollama/ollama.go | 3 +- internal/providers/openai/openai.go | 3 +- internal/providers/registry.go | 90 ++++++++++++++++++- internal/providers/registry_metadata.go | 3 +- internal/providers/resolve_bench_test.go | 70 +++++++++++++++ internal/providers/responses_adapter.go | 3 +- internal/providers/responses_content.go | 3 +- internal/providers/responses_converter.go | 3 +- internal/providers/responses_input.go | 3 +- internal/providers/responses_output.go | 3 +- internal/providers/responses_output_state.go | 3 +- internal/providers/router.go | 39 ++++---- internal/providers/vertex/vertex.go | 3 +- internal/providers/xai/xai.go | 5 +- internal/providers/xiaomi/audio.go | 3 +- internal/responsecache/responsecache.go | 2 +- internal/responsecache/semantic.go | 3 +- internal/responsecache/simple.go | 3 +- internal/responsecache/sse_validation.go | 3 +- internal/responsecache/stream_cache.go | 3 +- internal/responsecache/stream_cache_chat.go | 3 +- .../responsecache/stream_cache_responses.go | 3 +- internal/responsecache/vecstore_pinecone.go | 3 +- internal/responsecache/vecstore_qdrant.go | 3 +- internal/responsecache/vecstore_weaviate.go | 3 +- internal/responsestore/store.go | 3 +- internal/server/conversation_responses.go | 3 +- .../internal_chat_completion_executor.go | 3 +- .../server/native_conversation_service.go | 3 +- internal/server/native_response_service.go | 3 +- internal/server/request_selector_peek.go | 3 +- internal/server/request_snapshot.go | 4 +- internal/server/response_input_items.go | 3 +- .../server/translated_inference_service.go | 3 +- internal/streaming/observed_sse_stream.go | 3 +- internal/usage/audio.go | 3 +- internal/usage/cost.go | 3 +- internal/usage/extractor.go | 3 +- internal/usage/reader_postgresql.go | 3 +- internal/usage/reader_sqlite.go | 3 +- internal/usage/realtime.go | 3 +- internal/usage/recalculate_pricing.go | 3 +- internal/usage/store_sqlite.go | 3 +- internal/workflows/store_postgresql.go | 3 +- internal/workflows/store_sqlite.go | 3 +- internal/workflows/types.go | 3 +- tests/perf/README.md | 13 +++ tests/perf/hotpath_test.go | 88 +++++++++++++++++- 126 files changed, 608 insertions(+), 165 deletions(-) create mode 100644 internal/providers/resolve_bench_test.go diff --git a/cmd/gomodel/health.go b/cmd/gomodel/health.go index d719e460..0fcbfd09 100644 --- a/cmd/gomodel/health.go +++ b/cmd/gomodel/health.go @@ -2,7 +2,6 @@ package main import ( "context" - "encoding/json" "fmt" "io" "net" @@ -11,6 +10,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/config" ) diff --git a/cmd/recordapi/main.go b/cmd/recordapi/main.go index 60a0e4a9..44969438 100644 --- a/cmd/recordapi/main.go +++ b/cmd/recordapi/main.go @@ -9,7 +9,6 @@ package main import ( "bytes" - "encoding/json" "flag" "fmt" "io" @@ -18,6 +17,8 @@ import ( "path/filepath" "strings" "time" + + "github.com/goccy/go-json" ) const oracleDefaultModel = "openai.gpt-oss-120b" diff --git a/go.mod b/go.mod index 1ae6f2cc..409fc454 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.53.5 github.com/cespare/xxhash/v2 v2.3.0 github.com/coder/websocket v1.8.15 + github.com/goccy/go-json v0.10.6 github.com/google/uuid v1.6.0 github.com/jackc/pgx/v5 v5.10.0 github.com/joho/godotenv v1.5.1 diff --git a/go.sum b/go.sum index cea4e5fb..edac9070 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/go-openapi/testify/enable/yaml/v2 v2.4.0 h1:7SgOMTvJkM8yWrQlU8Jm18VeD github.com/go-openapi/testify/enable/yaml/v2 v2.4.0/go.mod h1:14iV8jyyQlinc9StD7w1xVPW3CO3q1Gj04Jy//Kw4VM= github.com/go-openapi/testify/v2 v2.4.0 h1:8nsPrHVCWkQ4p8h1EsRVymA2XABB4OT40gcvAu+voFM= github.com/go-openapi/testify/v2 v2.4.0/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= +github.com/goccy/go-json v0.10.6 h1:p8HrPJzOakx/mn/bQtjgNjdTcN+/S6FcG2CTtQOrHVU= +github.com/goccy/go-json v0.10.6/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= diff --git a/internal/admin/handler_guardrails.go b/internal/admin/handler_guardrails.go index 35732c82..20f77d0f 100644 --- a/internal/admin/handler_guardrails.go +++ b/internal/admin/handler_guardrails.go @@ -1,11 +1,12 @@ package admin import ( - "encoding/json" "errors" "net/http" "strings" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "gomodel/internal/core" diff --git a/internal/admin/handler_live.go b/internal/admin/handler_live.go index 18722e15..a6e2c299 100644 --- a/internal/admin/handler_live.go +++ b/internal/admin/handler_live.go @@ -1,13 +1,14 @@ package admin import ( - "encoding/json" "fmt" "net/http" "strconv" "strings" "time" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "gomodel/internal/core" diff --git a/internal/aliases/batch_preparer.go b/internal/aliases/batch_preparer.go index de3c6462..3886f635 100644 --- a/internal/aliases/batch_preparer.go +++ b/internal/aliases/batch_preparer.go @@ -2,10 +2,11 @@ package aliases import ( "context" - "encoding/json" "fmt" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/anthropicapi/request.go b/internal/anthropicapi/request.go index c4424df6..a5800ee9 100644 --- a/internal/anthropicapi/request.go +++ b/internal/anthropicapi/request.go @@ -2,11 +2,12 @@ package anthropicapi import ( "bytes" - "encoding/json" "fmt" "io" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/anthropicapi/response.go b/internal/anthropicapi/response.go index 817c4480..a2ccc8b6 100644 --- a/internal/anthropicapi/response.go +++ b/internal/anthropicapi/response.go @@ -2,9 +2,10 @@ package anthropicapi import ( "bytes" - "encoding/json" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/anthropicapi/stream.go b/internal/anthropicapi/stream.go index 55fe41a8..1eca9b65 100644 --- a/internal/anthropicapi/stream.go +++ b/internal/anthropicapi/stream.go @@ -3,9 +3,10 @@ package anthropicapi import ( "bufio" "bytes" - "encoding/json" "io" + "github.com/goccy/go-json" + "gomodel/internal/streaming" ) diff --git a/internal/anthropicapi/types.go b/internal/anthropicapi/types.go index 22d455b2..9da4436f 100644 --- a/internal/anthropicapi/types.go +++ b/internal/anthropicapi/types.go @@ -4,7 +4,7 @@ // independent of which provider ultimately serves the request. package anthropicapi -import "encoding/json" +import "github.com/goccy/go-json" // MessagesRequest is the Anthropic Messages API request body. // System and message content fields are polymorphic on the wire (string or diff --git a/internal/app/app.go b/internal/app/app.go index 1fc60831..11b3f0e1 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -4,7 +4,6 @@ package app import ( "context" - "encoding/json" "errors" "fmt" "log/slog" @@ -14,6 +13,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "gomodel/config" "gomodel/internal/admin" "gomodel/internal/admin/dashboard" diff --git a/internal/auditlog/auditlog.go b/internal/auditlog/auditlog.go index 56fe5b2b..066f4743 100644 --- a/internal/auditlog/auditlog.go +++ b/internal/auditlog/auditlog.go @@ -4,10 +4,11 @@ package auditlog import ( "context" - "encoding/json" "log/slog" "strings" "time" + + "github.com/goccy/go-json" ) // LogStore defines the interface for audit log storage backends. diff --git a/internal/auditlog/entry_capture.go b/internal/auditlog/entry_capture.go index a385f007..41a36208 100644 --- a/internal/auditlog/entry_capture.go +++ b/internal/auditlog/entry_capture.go @@ -2,12 +2,13 @@ package auditlog import ( "context" - "encoding/json" "errors" "net/http" "net/url" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) @@ -211,7 +212,7 @@ func internalJSONAuditHeaders(ctx context.Context, requestID string) http.Header headers.Set(core.UserPathHeaderNameFromContext(ctx), userPath) } if snapshot := core.GetRequestSnapshot(ctx); snapshot != nil { - snapshotHeaders := snapshot.GetHeaders() + snapshotHeaders := snapshot.HeadersView() for _, key := range []string{"Traceparent", "Tracestate", "Baggage"} { for _, value := range snapshotHeaders[key] { headers.Add(key, value) diff --git a/internal/auditlog/middleware.go b/internal/auditlog/middleware.go index 1044937b..2e8279e7 100644 --- a/internal/auditlog/middleware.go +++ b/internal/auditlog/middleware.go @@ -8,7 +8,6 @@ import ( "context" "crypto/sha256" "encoding/hex" - "encoding/json" "io" "net" "net/http" @@ -16,6 +15,8 @@ import ( "time" "unicode/utf8" + "github.com/goccy/go-json" + "github.com/andybalholm/brotli" "github.com/google/uuid" "github.com/labstack/echo/v5" diff --git a/internal/auditlog/reader_postgresql.go b/internal/auditlog/reader_postgresql.go index a1adb88f..b455e65c 100644 --- a/internal/auditlog/reader_postgresql.go +++ b/internal/auditlog/reader_postgresql.go @@ -4,10 +4,11 @@ import ( "gomodel/internal/storage/sqlutil" "context" - "encoding/json" "fmt" "log/slog" + "github.com/goccy/go-json" + "github.com/jackc/pgx/v5/pgxpool" ) diff --git a/internal/auditlog/reader_sqlite.go b/internal/auditlog/reader_sqlite.go index 56602a56..bdf06fbb 100644 --- a/internal/auditlog/reader_sqlite.go +++ b/internal/auditlog/reader_sqlite.go @@ -5,11 +5,12 @@ import ( "context" "database/sql" - "encoding/json" "fmt" "log/slog" "sort" "time" + + "github.com/goccy/go-json" ) const sqliteTimestampBoundaryLayout = "2006-01-02T15:04:05" diff --git a/internal/batch/store.go b/internal/batch/store.go index 8ccd5cdb..9e76258f 100644 --- a/internal/batch/store.go +++ b/internal/batch/store.go @@ -3,13 +3,14 @@ package batch import ( "context" - "encoding/json" "errors" "fmt" "strconv" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/cache/modelcache/local.go b/internal/cache/modelcache/local.go index da89b1a8..68524ac6 100644 --- a/internal/cache/modelcache/local.go +++ b/internal/cache/modelcache/local.go @@ -2,11 +2,12 @@ package modelcache import ( "context" - "encoding/json" "fmt" "os" "path/filepath" "sync" + + "github.com/goccy/go-json" ) // LocalCache implements Cache using local file storage. diff --git a/internal/cache/modelcache/modelcache.go b/internal/cache/modelcache/modelcache.go index dc173ad4..b113fe50 100644 --- a/internal/cache/modelcache/modelcache.go +++ b/internal/cache/modelcache/modelcache.go @@ -5,8 +5,9 @@ package modelcache import ( "context" - "encoding/json" "time" + + "github.com/goccy/go-json" ) // ModelCache represents the cached model data structure. diff --git a/internal/cache/modelcache/redis.go b/internal/cache/modelcache/redis.go index fbb62188..36a85f89 100644 --- a/internal/cache/modelcache/redis.go +++ b/internal/cache/modelcache/redis.go @@ -2,11 +2,12 @@ package modelcache import ( "context" - "encoding/json" "fmt" "log/slog" "time" + "github.com/goccy/go-json" + "gomodel/internal/cache" ) diff --git a/internal/conversationstore/store.go b/internal/conversationstore/store.go index 32f2a963..b334836d 100644 --- a/internal/conversationstore/store.go +++ b/internal/conversationstore/store.go @@ -4,12 +4,13 @@ package conversationstore import ( "context" - "encoding/json" "errors" "fmt" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/conversationstore/store_memory.go b/internal/conversationstore/store_memory.go index d7a9d69b..4dc55af1 100644 --- a/internal/conversationstore/store_memory.go +++ b/internal/conversationstore/store_memory.go @@ -2,12 +2,13 @@ package conversationstore import ( "context" - "encoding/json" "fmt" "sort" "sync" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/core/audio.go b/internal/core/audio.go index 05fd338a..1541302c 100644 --- a/internal/core/audio.go +++ b/internal/core/audio.go @@ -1,9 +1,10 @@ package core import ( - "encoding/json" "io" "strings" + + "github.com/goccy/go-json" ) // AudioSpeechRequest is an OpenAI-compatible POST /v1/audio/speech diff --git a/internal/core/batch.go b/internal/core/batch.go index b64cf39c..b08114ac 100644 --- a/internal/core/batch.go +++ b/internal/core/batch.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" // BatchRequest is OpenAI-compatible for core fields and extends with inline requests. // diff --git a/internal/core/batch_json.go b/internal/core/batch_json.go index f51f7a33..f7cbe07a 100644 --- a/internal/core/batch_json.go +++ b/internal/core/batch_json.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" func (r *BatchRequest) UnmarshalJSON(data []byte) error { var raw struct { diff --git a/internal/core/batch_preparation.go b/internal/core/batch_preparation.go index 2437416b..32808c31 100644 --- a/internal/core/batch_preparation.go +++ b/internal/core/batch_preparation.go @@ -3,12 +3,13 @@ package core import ( "bytes" "context" - "encoding/json" "errors" "fmt" "maps" "net/http" "strings" + + "github.com/goccy/go-json" ) // BatchPreparationMetadata captures request-scoped batch preprocessing effects diff --git a/internal/core/chat_content.go b/internal/core/chat_content.go index 1aea4685..4fc2c474 100644 --- a/internal/core/chat_content.go +++ b/internal/core/chat_content.go @@ -2,9 +2,10 @@ package core import ( "bytes" - "encoding/json" "fmt" "strings" + + "github.com/goccy/go-json" ) // ContentPart represents a single OpenAI-compatible multimodal chat content part. diff --git a/internal/core/chat_json.go b/internal/core/chat_json.go index 66901a25..dbf631e8 100644 --- a/internal/core/chat_json.go +++ b/internal/core/chat_json.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" func (r *ChatRequest) UnmarshalJSON(data []byte) error { var raw struct { diff --git a/internal/core/conversations.go b/internal/core/conversations.go index f87f80df..f564995e 100644 --- a/internal/core/conversations.go +++ b/internal/core/conversations.go @@ -2,9 +2,10 @@ package core import ( "bytes" - "encoding/json" "fmt" "unicode/utf8" + + "github.com/goccy/go-json" ) // ConversationObject is the value of the "object" field on a conversation. diff --git a/internal/core/embeddings_encoding.go b/internal/core/embeddings_encoding.go index cb67330b..3f2bbeb0 100644 --- a/internal/core/embeddings_encoding.go +++ b/internal/core/embeddings_encoding.go @@ -4,9 +4,10 @@ import ( "bytes" "encoding/base64" "encoding/binary" - "encoding/json" "math" "strings" + + "github.com/goccy/go-json" ) // maxEmbeddingDims caps how large a single vector may be before encoding diff --git a/internal/core/embeddings_json.go b/internal/core/embeddings_json.go index d07571bb..228a2773 100644 --- a/internal/core/embeddings_json.go +++ b/internal/core/embeddings_json.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" func (r *EmbeddingRequest) UnmarshalJSON(data []byte) error { var raw struct { diff --git a/internal/core/errors.go b/internal/core/errors.go index 5c30ac79..b1097796 100644 --- a/internal/core/errors.go +++ b/internal/core/errors.go @@ -3,10 +3,11 @@ package core import ( "bytes" - "encoding/json" "fmt" "net/http" "strings" + + "github.com/goccy/go-json" ) // ErrorType represents the type of error that occurred diff --git a/internal/core/json_fields.go b/internal/core/json_fields.go index 25c894a8..5b5767a6 100644 --- a/internal/core/json_fields.go +++ b/internal/core/json_fields.go @@ -2,12 +2,13 @@ package core import ( "bytes" - "encoding/json" "fmt" "math" "slices" "sort" + "github.com/goccy/go-json" + "github.com/tidwall/gjson" ) @@ -221,14 +222,19 @@ func (fields UnknownJSONFields) IsEmpty() bool { return len(trimmed) == 0 || bytes.Equal(trimmed, []byte("{}")) } +// extractUnknownJSONFields captures the object's keys that are not in +// knownFields, preserving their raw bytes for passthrough (Postel's Law). +// +// Precondition: data must already be valid JSON. Every caller is an +// UnmarshalJSON method that calls json.Unmarshal on the same bytes first, so a +// separate gjson.ValidBytes walk here would re-scan the whole document for no +// benefit. The cheap first-byte and IsObject checks remain to reject non-object +// JSON explicitly. func extractUnknownJSONFields(data []byte, knownFields ...string) (UnknownJSONFields, error) { data = bytes.TrimSpace(data) if len(data) == 0 || data[0] != '{' { return UnknownJSONFields{}, fmt.Errorf("expected JSON object") } - if !gjson.ValidBytes(data) { - return UnknownJSONFields{}, fmt.Errorf("invalid JSON object") - } root := gjson.ParseBytes(data) if !root.IsObject() { diff --git a/internal/core/json_fields_test.go b/internal/core/json_fields_test.go index 62505580..8d2cb5aa 100644 --- a/internal/core/json_fields_test.go +++ b/internal/core/json_fields_test.go @@ -190,22 +190,61 @@ func TestMergeUnknownJSONFields_NoAdditionsReturnsBase(t *testing.T) { } } -func TestExtractUnknownJSONFields_RejectsInvalidJSONSyntax(t *testing.T) { +// extractUnknownJSONFields assumes its input is already valid JSON: every +// production caller is an UnmarshalJSON method that runs json.Unmarshal on the +// same bytes first. This test pins the meaningful guarantee at that boundary — +// structurally malformed bodies are rejected before unknown-field extraction +// runs — rather than re-validating inside the helper. +// +// Note on the JSON decoder: the project uses github.com/goccy/go-json, which is +// slightly more lenient than encoding/json on a couple of malformed-input edge +// cases (notably trailing commas inside skipped unknown/passthrough fields, and +// leading-zero numbers). That extra input tolerance is acceptable under the +// gateway's "accept generously" principle, so this test covers structural +// errors that remain rejected; see TestDecoderLeniencyIsBounded for the +// documented, intentional acceptances. +func TestUnmarshalJSON_RejectsInvalidJSONSyntax(t *testing.T) { tests := []struct { name string body string }{ - {name: "invalid bare literal", body: `{"known":"value","x":wat}`}, - {name: "missing object comma", body: `{"known":"value" "x":1}`}, - {name: "trailing object comma", body: `{"known":"value","x":1,}`}, - {name: "trailing array comma", body: `{"known":"value","x":[1,]}`}, - {name: "trailing top-level data", body: `{"known":"value","x":1}{"extra":true}`}, + {name: "invalid bare literal", body: `{"model":"m","x":wat}`}, + {name: "missing object comma", body: `{"model":"m" "x":1}`}, + {name: "trailing object comma", body: `{"model":"m","x":1,}`}, + {name: "trailing top-level data", body: `{"model":"m","x":1}{"extra":true}`}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if _, err := extractUnknownJSONFields([]byte(tt.body), "known"); err == nil { - t.Fatalf("extractUnknownJSONFields(%q) error = nil, want syntax error", tt.body) + var req ChatRequest + if err := req.UnmarshalJSON([]byte(tt.body)); err == nil { + t.Fatalf("ChatRequest.UnmarshalJSON(%q) error = nil, want syntax error", tt.body) + } + }) + } +} + +// TestDecoderLeniencyIsBounded documents the known, intentional input-tolerance +// differences introduced by github.com/goccy/go-json relative to encoding/json. +// These are accepted (the gateway favors accepting generously and normalizing), +// but pinning them here makes the behavior explicit and flags any future change. +func TestDecoderLeniencyIsBounded(t *testing.T) { + accepted := []struct { + name string + body string + }{ + // Malformed values inside an unknown/passthrough field are skipped + // leniently rather than rejected. + {name: "trailing array comma in passthrough field", body: `{"model":"m","x":[1,]}`}, + // Leading-zero numbers are tolerated. + {name: "leading-zero number in passthrough field", body: `{"model":"m","x":01}`}, + } + + for _, tt := range accepted { + t.Run(tt.name, func(t *testing.T) { + var req ChatRequest + if err := req.UnmarshalJSON([]byte(tt.body)); err != nil { + t.Fatalf("ChatRequest.UnmarshalJSON(%q) error = %v, want accepted", tt.body, err) } }) } diff --git a/internal/core/message_json.go b/internal/core/message_json.go index 685448ee..d9c0c6ea 100644 --- a/internal/core/message_json.go +++ b/internal/core/message_json.go @@ -1,8 +1,9 @@ package core import ( - "encoding/json" "strings" + + "github.com/goccy/go-json" ) // Message.UnmarshalJSON validates chat request message content, preserves diff --git a/internal/core/request_snapshot.go b/internal/core/request_snapshot.go index d060d0b4..908831de 100644 --- a/internal/core/request_snapshot.go +++ b/internal/core/request_snapshot.go @@ -45,11 +45,27 @@ func NewRequestSnapshot(method, path string, routeParams map[string]string, quer return newRequestSnapshot(method, path, routeParams, queryParams, headers, contentType, capturedBody, bodyNotCaptured, requestID, traceMetadata, true, userPath...) } -// NewRequestSnapshotWithOwnedBody constructs a RequestSnapshot that takes -// ownership of capturedBody without cloning it. Callers must ensure the slice -// will not be mutated after passing it here. -func NewRequestSnapshotWithOwnedBody(method, path string, routeParams map[string]string, queryParams, headers map[string][]string, contentType string, capturedBody []byte, bodyNotCaptured bool, requestID string, traceMetadata map[string]string, userPath ...string) *RequestSnapshot { - return newRequestSnapshot(method, path, routeParams, queryParams, headers, contentType, capturedBody, bodyNotCaptured, requestID, traceMetadata, false, userPath...) +// NewRequestSnapshotWithOwnedMaps constructs a RequestSnapshot that takes +// ownership of routeParams, queryParams, traceMetadata, and capturedBody +// (callers must not mutate them afterwards) while still defensively cloning +// headers, which is typically the live request header map mutated downstream. +// +// Use this on the ingress hot path, where the route/query/trace maps and body +// are freshly built for the snapshot and would otherwise be cloned for no benefit. +func NewRequestSnapshotWithOwnedMaps(method, path string, routeParams map[string]string, queryParams, headers map[string][]string, contentType string, capturedBody []byte, bodyNotCaptured bool, requestID string, traceMetadata map[string]string, userPath ...string) *RequestSnapshot { + return &RequestSnapshot{ + Method: method, + Path: path, + UserPath: firstUserPath(userPath), + routeParams: routeParams, + queryParams: queryParams, + headers: cloneMultiMap(headers), + ContentType: contentType, + capturedBody: capturedBody, + BodyNotCaptured: bodyNotCaptured, + RequestID: requestID, + traceMetadata: traceMetadata, + } } func newRequestSnapshot(method, path string, routeParams map[string]string, queryParams, headers map[string][]string, contentType string, capturedBody []byte, bodyNotCaptured bool, requestID string, traceMetadata map[string]string, cloneBody bool, userPath ...string) *RequestSnapshot { @@ -121,6 +137,13 @@ func (s *RequestSnapshot) WithOwnedCapturedBody(capturedBody []byte, bodyNotCapt return &cloned } +// The snapshot is immutable after construction and exposes its body, headers, +// and parameter maps two ways: a Get*/CapturedBody accessor that returns a +// defensive copy (for callers that need an independently mutable value), and a +// *View accessor that returns the underlying value with no allocation (for +// read-only callers). The request hot path uses the View accessors; the copying +// accessors exist for callers that mutate the result. + // CapturedBody returns a defensive copy of the captured request body bytes. func (s *RequestSnapshot) CapturedBody() []byte { if s == nil { @@ -162,6 +185,15 @@ func (s *RequestSnapshot) GetHeaders() map[string][]string { return cloneMultiMap(s.headers) } +// HeadersView returns the captured request headers without cloning. Callers +// must treat the returned map as read-only. +func (s *RequestSnapshot) HeadersView() map[string][]string { + if s == nil { + return nil + } + return s.headers +} + // GetTraceMetadata returns a defensive copy of the captured trace metadata. func (s *RequestSnapshot) GetTraceMetadata() map[string]string { if s == nil { diff --git a/internal/core/request_snapshot_test.go b/internal/core/request_snapshot_test.go index 71e9247d..1dc3ff21 100644 --- a/internal/core/request_snapshot_test.go +++ b/internal/core/request_snapshot_test.go @@ -71,10 +71,10 @@ func TestNewRequestSnapshot_DefensivelyCopiesMutableFields(t *testing.T) { } } -func TestNewRequestSnapshotWithOwnedBody_TakesOwnershipOfCapturedBytes(t *testing.T) { +func TestNewRequestSnapshotWithOwnedMaps_TakesOwnershipOfCapturedBytes(t *testing.T) { rawBody := []byte(`{"model":"gpt-5-mini"}`) - snapshot := NewRequestSnapshotWithOwnedBody( + snapshot := NewRequestSnapshotWithOwnedMaps( "POST", "/v1/chat/completions", nil, @@ -114,12 +114,12 @@ func BenchmarkNewRequestSnapshotClonedBody(b *testing.B) { } } -func BenchmarkNewRequestSnapshotWithOwnedBody(b *testing.B) { +func BenchmarkNewRequestSnapshotWithOwnedMaps(b *testing.B) { body := []byte(`{"model":"gpt-5-mini","messages":[{"role":"user","content":"hello world"}],"response_format":{"type":"json_schema"}}`) b.ReportAllocs() for b.Loop() { - _ = NewRequestSnapshotWithOwnedBody("POST", "/v1/chat/completions", nil, nil, nil, "application/json", body, false, "req-123", nil) + _ = NewRequestSnapshotWithOwnedMaps("POST", "/v1/chat/completions", nil, nil, nil, "application/json", body, false, "req-123", nil) } } diff --git a/internal/core/responses.go b/internal/core/responses.go index 4f0a1cd8..ed7dc5a3 100644 --- a/internal/core/responses.go +++ b/internal/core/responses.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" // ResponsesRequest represents the request body for the Responses API. // This is the OpenAI-compatible /v1/responses endpoint. Unknown JSON members diff --git a/internal/core/responses_json.go b/internal/core/responses_json.go index b8460622..912dc164 100644 --- a/internal/core/responses_json.go +++ b/internal/core/responses_json.go @@ -2,8 +2,9 @@ package core import ( "bytes" - "encoding/json" "fmt" + + "github.com/goccy/go-json" ) // responsesUtilityRequestFields lists the JSON fields recognized on responses diff --git a/internal/core/semantic_canonical.go b/internal/core/semantic_canonical.go index 5f124a88..7e1775b9 100644 --- a/internal/core/semantic_canonical.go +++ b/internal/core/semantic_canonical.go @@ -1,10 +1,11 @@ package core import ( - "encoding/json" "fmt" "strconv" "strings" + + "github.com/goccy/go-json" ) type canonicalJSONSpec[T any] struct { diff --git a/internal/core/types.go b/internal/core/types.go index 60978620..f409f136 100644 --- a/internal/core/types.go +++ b/internal/core/types.go @@ -1,6 +1,6 @@ package core -import "encoding/json" +import "github.com/goccy/go-json" // StreamOptions controls streaming behavior options. // This is used to request usage data in streaming responses. diff --git a/internal/core/usage_json.go b/internal/core/usage_json.go index 257a1606..ef2ece91 100644 --- a/internal/core/usage_json.go +++ b/internal/core/usage_json.go @@ -1,7 +1,7 @@ package core import ( - "encoding/json" + "github.com/goccy/go-json" ) var usageKnownFields = map[string]struct{}{ diff --git a/internal/embedding/embedding.go b/internal/embedding/embedding.go index 268a75c7..fa5f4d32 100644 --- a/internal/embedding/embedding.go +++ b/internal/embedding/embedding.go @@ -3,7 +3,6 @@ package embedding import ( "bytes" "context" - "encoding/json" "fmt" "io" "log/slog" @@ -11,6 +10,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/config" ) diff --git a/internal/gateway/batch_usage.go b/internal/gateway/batch_usage.go index 750172c0..ba97c598 100644 --- a/internal/gateway/batch_usage.go +++ b/internal/gateway/batch_usage.go @@ -1,7 +1,6 @@ package gateway import ( - "encoding/json" "fmt" "math" "net/http" @@ -9,6 +8,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" batchstore "gomodel/internal/batch" diff --git a/internal/guardrails/batch_rewrite.go b/internal/guardrails/batch_rewrite.go index 5d8f0f3a..f55ac553 100644 --- a/internal/guardrails/batch_rewrite.go +++ b/internal/guardrails/batch_rewrite.go @@ -1,9 +1,10 @@ package guardrails import ( - "encoding/json" "errors" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/guardrails/batch_rewrite_test.go b/internal/guardrails/batch_rewrite_test.go index da1d8fe2..3b9f5fcc 100644 --- a/internal/guardrails/batch_rewrite_test.go +++ b/internal/guardrails/batch_rewrite_test.go @@ -26,12 +26,12 @@ func TestRewriteGuardedChatBatchBody(t *testing.T) { } tests := []struct { - name string - originalBody func(orig *core.ChatRequest) json.RawMessage - original *core.ChatRequest - modified *core.ChatRequest - wantErrIs core.ErrorType // empty = expect success - wantBodyHas string // substring assertion when no error + name string + originalBody func(orig *core.ChatRequest) json.RawMessage + original *core.ChatRequest + modified *core.ChatRequest + wantErrIs core.ErrorType // empty = expect success + wantBodyHas string // substring assertion when no error }{ { name: "nil modified rejected with invalid_request_error", @@ -55,7 +55,7 @@ func TestRewriteGuardedChatBatchBody(t *testing.T) { wantBodyHas: `"rewritten"`, }, { - name: "validation error from message reorder propagates as invalid_request_error", + name: "validation error from message reorder propagates as invalid_request_error", originalBody: originalBody, original: makeReq("user", "hello"), modified: &core.ChatRequest{ diff --git a/internal/guardrails/definitions.go b/internal/guardrails/definitions.go index a45a0cb3..d7ffb3ed 100644 --- a/internal/guardrails/definitions.go +++ b/internal/guardrails/definitions.go @@ -3,12 +3,13 @@ package guardrails import ( "bytes" "context" - "encoding/json" "fmt" "net/http" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/responsecache" ) diff --git a/internal/guardrails/executor.go b/internal/guardrails/executor.go index 43531841..f956ad28 100644 --- a/internal/guardrails/executor.go +++ b/internal/guardrails/executor.go @@ -2,7 +2,8 @@ package guardrails import ( "context" - "encoding/json" + + "github.com/goccy/go-json" "gomodel/internal/core" ) diff --git a/internal/guardrails/responses_message_apply.go b/internal/guardrails/responses_message_apply.go index 5922f0f9..5035917d 100644 --- a/internal/guardrails/responses_message_apply.go +++ b/internal/guardrails/responses_message_apply.go @@ -1,10 +1,11 @@ package guardrails import ( - "encoding/json" "reflect" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/guardrails/store_mongodb.go b/internal/guardrails/store_mongodb.go index 49f272d5..736819c3 100644 --- a/internal/guardrails/store_mongodb.go +++ b/internal/guardrails/store_mongodb.go @@ -3,11 +3,12 @@ package guardrails import ( "bytes" "context" - "encoding/json" "errors" "fmt" "time" + "github.com/goccy/go-json" + "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" diff --git a/internal/live/broker.go b/internal/live/broker.go index c8dc2f18..e0d97dcc 100644 --- a/internal/live/broker.go +++ b/internal/live/broker.go @@ -2,12 +2,13 @@ package live import ( - "encoding/json" "sort" "strings" "sync" "time" + "github.com/goccy/go-json" + "gomodel/internal/auditlog" "gomodel/internal/usage" ) diff --git a/internal/llmclient/client.go b/internal/llmclient/client.go index 85330793..01394f9a 100644 --- a/internal/llmclient/client.go +++ b/internal/llmclient/client.go @@ -8,7 +8,6 @@ package llmclient import ( "bytes" "context" - "encoding/json" "errors" "fmt" "io" @@ -20,6 +19,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "gomodel/config" "gomodel/internal/core" "gomodel/internal/httpclient" diff --git a/internal/modeldata/fetcher.go b/internal/modeldata/fetcher.go index 4989723c..6ae84445 100644 --- a/internal/modeldata/fetcher.go +++ b/internal/modeldata/fetcher.go @@ -2,11 +2,12 @@ package modeldata import ( "context" - "encoding/json" "fmt" "io" "net/http" "time" + + "github.com/goccy/go-json" ) // httpClient is a shared HTTP client for model list fetching. diff --git a/internal/modeloverrides/batch_preparer.go b/internal/modeloverrides/batch_preparer.go index caf19d0a..04339a64 100644 --- a/internal/modeloverrides/batch_preparer.go +++ b/internal/modeloverrides/batch_preparer.go @@ -2,10 +2,11 @@ package modeloverrides import ( "context" - "encoding/json" "fmt" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/modeloverrides/store.go b/internal/modeloverrides/store.go index 00446992..f83daefb 100644 --- a/internal/modeloverrides/store.go +++ b/internal/modeloverrides/store.go @@ -2,11 +2,12 @@ package modeloverrides import ( "context" - "encoding/json" "errors" "fmt" "time" + "github.com/goccy/go-json" + "gomodel/internal/modelselectors" ) diff --git a/internal/modeloverrides/store_postgresql.go b/internal/modeloverrides/store_postgresql.go index 5750eea1..fc5c1959 100644 --- a/internal/modeloverrides/store_postgresql.go +++ b/internal/modeloverrides/store_postgresql.go @@ -2,11 +2,12 @@ package modeloverrides import ( "context" - "encoding/json" "fmt" "strings" "time" + "github.com/goccy/go-json" + "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" ) diff --git a/internal/modeloverrides/store_sqlite.go b/internal/modeloverrides/store_sqlite.go index 7bbe7408..6ecc28c2 100644 --- a/internal/modeloverrides/store_sqlite.go +++ b/internal/modeloverrides/store_sqlite.go @@ -3,10 +3,11 @@ package modeloverrides import ( "context" "database/sql" - "encoding/json" "fmt" "strings" "time" + + "github.com/goccy/go-json" ) // SQLiteStore stores model overrides in SQLite. diff --git a/internal/pricingoverrides/store.go b/internal/pricingoverrides/store.go index 82d40a7b..e5b2f182 100644 --- a/internal/pricingoverrides/store.go +++ b/internal/pricingoverrides/store.go @@ -2,11 +2,12 @@ package pricingoverrides import ( "context" - "encoding/json" "errors" "fmt" "time" + "github.com/goccy/go-json" + "gomodel/internal/modelselectors" ) diff --git a/internal/pricingoverrides/store_postgresql.go b/internal/pricingoverrides/store_postgresql.go index c5b755bd..4b5aae1a 100644 --- a/internal/pricingoverrides/store_postgresql.go +++ b/internal/pricingoverrides/store_postgresql.go @@ -2,11 +2,12 @@ package pricingoverrides import ( "context" - "encoding/json" "fmt" "strings" "time" + "github.com/goccy/go-json" + "github.com/jackc/pgx/v5/pgxpool" ) diff --git a/internal/pricingoverrides/store_sqlite.go b/internal/pricingoverrides/store_sqlite.go index 690fa1c8..76975f51 100644 --- a/internal/pricingoverrides/store_sqlite.go +++ b/internal/pricingoverrides/store_sqlite.go @@ -3,10 +3,11 @@ package pricingoverrides import ( "context" "database/sql" - "encoding/json" "fmt" "strings" "time" + + "github.com/goccy/go-json" ) // SQLiteStore stores model pricing overrides in SQLite. diff --git a/internal/providers/anthropic/anthropic.go b/internal/providers/anthropic/anthropic.go index dcef0cca..9a6a7ed4 100644 --- a/internal/providers/anthropic/anthropic.go +++ b/internal/providers/anthropic/anthropic.go @@ -4,7 +4,6 @@ package anthropic import ( "bytes" "context" - "encoding/json" "io" "log/slog" "maps" @@ -13,6 +12,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" diff --git a/internal/providers/anthropic/batch.go b/internal/providers/anthropic/batch.go index 1ccd43c0..7c69e6ad 100644 --- a/internal/providers/anthropic/batch.go +++ b/internal/providers/anthropic/batch.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "context" - "encoding/json" "fmt" "io" "log/slog" @@ -14,6 +13,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" ) diff --git a/internal/providers/anthropic/chat.go b/internal/providers/anthropic/chat.go index 16a78e34..26442b14 100644 --- a/internal/providers/anthropic/chat.go +++ b/internal/providers/anthropic/chat.go @@ -2,10 +2,11 @@ package anthropic import ( "context" - "encoding/json" "net/http" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" ) diff --git a/internal/providers/anthropic/chat_stream.go b/internal/providers/anthropic/chat_stream.go index a3883806..c26d00bc 100644 --- a/internal/providers/anthropic/chat_stream.go +++ b/internal/providers/anthropic/chat_stream.go @@ -3,7 +3,6 @@ package anthropic import ( "bufio" "context" - "encoding/json" "fmt" "io" "log/slog" @@ -11,6 +10,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/streaming" diff --git a/internal/providers/anthropic/request_translation.go b/internal/providers/anthropic/request_translation.go index dca6cbff..4c20ae29 100644 --- a/internal/providers/anthropic/request_translation.go +++ b/internal/providers/anthropic/request_translation.go @@ -2,7 +2,6 @@ package anthropic import ( "bytes" - "encoding/json" "errors" "fmt" "io" @@ -13,6 +12,8 @@ import ( "strings" "sync" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/providers" ) diff --git a/internal/providers/anthropic/responses.go b/internal/providers/anthropic/responses.go index 5557bbfa..1adcfc9b 100644 --- a/internal/providers/anthropic/responses.go +++ b/internal/providers/anthropic/responses.go @@ -3,7 +3,6 @@ package anthropic import ( "bufio" "context" - "encoding/json" "fmt" "io" "log/slog" @@ -11,6 +10,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/providers/anthropic/types.go b/internal/providers/anthropic/types.go index 45746745..b45a59b6 100644 --- a/internal/providers/anthropic/types.go +++ b/internal/providers/anthropic/types.go @@ -1,6 +1,6 @@ package anthropic -import "encoding/json" +import "github.com/goccy/go-json" // anthropicThinking represents the thinking configuration for Anthropic's extended thinking. // For adaptive-thinking models (Opus 4.6+): {type: "adaptive"} (budget_tokens omitted). diff --git a/internal/providers/bailian/bailian.go b/internal/providers/bailian/bailian.go index 796d5f6a..bd21bf85 100644 --- a/internal/providers/bailian/bailian.go +++ b/internal/providers/bailian/bailian.go @@ -7,11 +7,12 @@ package bailian import ( "bytes" "context" - "encoding/json" "io" "log/slog" "net/http" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" diff --git a/internal/providers/batch_results_file_adapter.go b/internal/providers/batch_results_file_adapter.go index cb5eec58..6dddbab3 100644 --- a/internal/providers/batch_results_file_adapter.go +++ b/internal/providers/batch_results_file_adapter.go @@ -4,13 +4,14 @@ import ( "bufio" "bytes" "context" - "encoding/json" "fmt" "io" "net/http" "net/url" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" ) diff --git a/internal/providers/bedrock/chat.go b/internal/providers/bedrock/chat.go index c0f148b9..bdddfebe 100644 --- a/internal/providers/bedrock/chat.go +++ b/internal/providers/bedrock/chat.go @@ -2,12 +2,13 @@ package bedrock import ( "context" - "encoding/json" "fmt" "math" "strings" "time" + "github.com/goccy/go-json" + awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" brdoc "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/document" diff --git a/internal/providers/bedrock/chat_stream.go b/internal/providers/bedrock/chat_stream.go index 496c64a7..78c7bc76 100644 --- a/internal/providers/bedrock/chat_stream.go +++ b/internal/providers/bedrock/chat_stream.go @@ -2,13 +2,14 @@ package bedrock import ( "context" - "encoding/json" "fmt" "io" "log/slog" "sync" "time" + "github.com/goccy/go-json" + awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" brtypes "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" diff --git a/internal/providers/chat_stream_normalize.go b/internal/providers/chat_stream_normalize.go index ca9c5924..6378bade 100644 --- a/internal/providers/chat_stream_normalize.go +++ b/internal/providers/chat_stream_normalize.go @@ -3,8 +3,9 @@ package providers import ( "bufio" "bytes" - "encoding/json" "io" + + "github.com/goccy/go-json" ) // chatDonePayload terminates a chat completions SSE stream. diff --git a/internal/providers/deepseek/deepseek.go b/internal/providers/deepseek/deepseek.go index b4d2aba0..e6712a84 100644 --- a/internal/providers/deepseek/deepseek.go +++ b/internal/providers/deepseek/deepseek.go @@ -3,11 +3,12 @@ package deepseek import ( "context" - "encoding/json" "io" "net/http" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" diff --git a/internal/providers/gemini/gemini.go b/internal/providers/gemini/gemini.go index bf419255..2d15720c 100644 --- a/internal/providers/gemini/gemini.go +++ b/internal/providers/gemini/gemini.go @@ -3,7 +3,6 @@ package gemini import ( "context" - "encoding/json" "fmt" "io" "net/http" @@ -13,6 +12,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/httpclient" "gomodel/internal/llmclient" diff --git a/internal/providers/gemini/native.go b/internal/providers/gemini/native.go index db5bb701..54926c7d 100644 --- a/internal/providers/gemini/native.go +++ b/internal/providers/gemini/native.go @@ -3,7 +3,6 @@ package gemini import ( "bytes" "encoding/base64" - "encoding/json" "fmt" "mime" "net/http" @@ -12,6 +11,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/providers/gemini/native_stream.go b/internal/providers/gemini/native_stream.go index 04e6a80d..7a238421 100644 --- a/internal/providers/gemini/native_stream.go +++ b/internal/providers/gemini/native_stream.go @@ -2,12 +2,13 @@ package gemini import ( "bufio" - "encoding/json" "io" "strconv" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/providers/googlecommon/auth.go b/internal/providers/googlecommon/auth.go index d7800eea..e1251066 100644 --- a/internal/providers/googlecommon/auth.go +++ b/internal/providers/googlecommon/auth.go @@ -8,12 +8,13 @@ package googlecommon import ( "context" "encoding/base64" - "encoding/json" "fmt" "net/http" "os" "strings" + "github.com/goccy/go-json" + "golang.org/x/oauth2" "golang.org/x/oauth2/google" ) diff --git a/internal/providers/ollama/ollama.go b/internal/providers/ollama/ollama.go index f4daaf9f..3fd6436f 100644 --- a/internal/providers/ollama/ollama.go +++ b/internal/providers/ollama/ollama.go @@ -3,7 +3,6 @@ package ollama import ( "context" - "encoding/json" "fmt" "io" "net/http" @@ -11,6 +10,8 @@ import ( "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" diff --git a/internal/providers/openai/openai.go b/internal/providers/openai/openai.go index 5919bea5..95f9ee9f 100644 --- a/internal/providers/openai/openai.go +++ b/internal/providers/openai/openai.go @@ -2,10 +2,11 @@ package openai import ( - "encoding/json" "net/http" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" diff --git a/internal/providers/registry.go b/internal/providers/registry.go index dec96c62..104ea26c 100644 --- a/internal/providers/registry.go +++ b/internal/providers/registry.go @@ -2,7 +2,6 @@ package providers import ( - "encoding/json" "fmt" "slices" "sort" @@ -10,6 +9,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "gomodel/config" "gomodel/internal/cache/modelcache" "gomodel/internal/core" @@ -57,6 +58,14 @@ type ModelRegistry struct { sortedModels []core.Model sortedModelsWithProvider []ModelWithProvider categoryCache map[core.ModelCategory][]ModelWithProvider + + // Lazy O(1) resolution index from qualified selector keys ("/") + // to concrete provider-name-qualified selectors. qualifiedByName is keyed by + // provider instance name, qualifiedByType by provider type. nil means the + // index needs rebuilding; both maps are built together and cleared by + // invalidateSortedCaches whenever the catalog changes. Protected by mu. + qualifiedByName map[string]core.ModelSelector + qualifiedByType map[string]core.ModelSelector } type metadataEnrichmentStats struct { @@ -100,6 +109,85 @@ func (r *ModelRegistry) invalidateSortedCaches() { r.sortedModels = nil r.sortedModelsWithProvider = nil r.categoryCache = nil + r.qualifiedByName = nil + r.qualifiedByType = nil +} + +// ResolveProviderSelector resolves a qualified "/" selector, +// where segment is a provider instance name or a provider type, to the concrete +// provider-name-qualified selector. Provider-name matches take precedence over +// provider-type matches, mirroring catalog-scan resolution. Returns ok=false +// when the segment+model pair is not a direct name/type match so callers can +// fall back to slower resolution for raw slash-shaped IDs and other edge cases. +// +// This is O(1) and exists so the per-request routing path does not copy and +// linearly scan the entire model catalog. +func (r *ModelRegistry) ResolveProviderSelector(segment, modelID string) (core.ModelSelector, bool) { + segment = strings.TrimSpace(segment) + modelID = strings.TrimSpace(modelID) + if segment == "" || modelID == "" { + return core.ModelSelector{}, false + } + key := segment + "/" + modelID + + r.mu.RLock() + if r.qualifiedByName != nil { + sel, ok := lookupSelectorIndex(r.qualifiedByName, r.qualifiedByType, key) + r.mu.RUnlock() + return sel, ok + } + r.mu.RUnlock() + + r.mu.Lock() + r.buildSelectorIndexLocked() + sel, ok := lookupSelectorIndex(r.qualifiedByName, r.qualifiedByType, key) + r.mu.Unlock() + return sel, ok +} + +func lookupSelectorIndex(byName, byType map[string]core.ModelSelector, key string) (core.ModelSelector, bool) { + if sel, ok := byName[key]; ok { + return sel, true + } + if sel, ok := byType[key]; ok { + return sel, true + } + return core.ModelSelector{}, false +} + +// buildSelectorIndexLocked populates the qualified selector index from the +// current catalog. Caller must hold the write lock. On provider-type collisions +// it keeps the lexicographically smallest provider name so resolution is +// deterministic and matches the previous sorted-scan behavior. +func (r *ModelRegistry) buildSelectorIndexLocked() { + if r.qualifiedByName != nil { + return + } + total := 0 + for _, providerModels := range r.modelsByProvider { + total += len(providerModels) + } + byName := make(map[string]core.ModelSelector, total) + byType := make(map[string]core.ModelSelector, total) + for providerName, providerModels := range r.modelsByProvider { + for _, info := range providerModels { + publicName := providerName + if info.ProviderName != "" { + publicName = info.ProviderName + } + id := strings.TrimSpace(info.Model.ID) + sel := core.ModelSelector{Provider: publicName, Model: info.Model.ID} + byName[publicName+"/"+id] = sel + if info.ProviderType != "" { + typeKey := info.ProviderType + "/" + id + if existing, ok := byType[typeKey]; !ok || sel.Provider < existing.Provider { + byType[typeKey] = sel + } + } + } + } + r.qualifiedByName = byName + r.qualifiedByType = byType } // RegisterProvider adds a provider to the registry diff --git a/internal/providers/registry_metadata.go b/internal/providers/registry_metadata.go index d154451a..577a31b1 100644 --- a/internal/providers/registry_metadata.go +++ b/internal/providers/registry_metadata.go @@ -1,12 +1,13 @@ package providers import ( - "encoding/json" "maps" "reflect" "slices" "strings" + "github.com/goccy/go-json" + "gomodel/config" "gomodel/internal/core" "gomodel/internal/modeldata" diff --git a/internal/providers/resolve_bench_test.go b/internal/providers/resolve_bench_test.go new file mode 100644 index 00000000..5df1b46c --- /dev/null +++ b/internal/providers/resolve_bench_test.go @@ -0,0 +1,70 @@ +package providers + +import ( + "fmt" + "testing" + + "gomodel/internal/core" +) + +// buildBenchRegistry creates a registry with `providersN` providers each holding +// `perProvider` models, mirroring a realistic multi-provider catalog. +func buildBenchRegistry(providersN, perProvider int) *ModelRegistry { + entries := make([]registryModelEntry, 0, providersN*perProvider) + for p := 0; p < providersN; p++ { + name := fmt.Sprintf("prov%d", p) + prov := &mockProvider{name: name} + for m := 0; m < perProvider; m++ { + entries = append(entries, registryModelEntry{ + provider: prov, + providerName: name, + providerType: name, + modelID: fmt.Sprintf("model-%d-%d", p, m), + }) + } + } + return newTestRegistryWithModels(entries...) +} + +// BenchmarkResolvePerRequest simulates the resolution calls a single chat +// request makes through the Router against a populated catalog: ResolveModel + +// Supports + GetProviderType + GetProviderName (the ~per-request fan-out). +func BenchmarkResolvePerRequest(b *testing.B) { + for _, n := range []int{50, 300, 1000} { + b.Run(fmt.Sprintf("models=%d", n), func(b *testing.B) { + reg := buildBenchRegistry(6, n/6) + router, err := NewRouter(reg) + if err != nil { + b.Fatalf("NewRouter: %v", err) + } + // A mid-catalog qualified selector, the common production case. + sel := fmt.Sprintf("prov3/model-3-%d", (n/6)/2) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + requested := core.NewRequestedModelSelector(sel, "") + if _, _, err := router.ResolveModel(requested); err != nil { + b.Fatalf("ResolveModel: %v", err) + } + _ = router.Supports(sel) + _ = router.GetProviderType(sel) + _ = router.GetProviderName(sel) + } + }) + } +} + +// BenchmarkListModelsWithProvider isolates the full-catalog defensive copy. +func BenchmarkListModelsWithProvider(b *testing.B) { + for _, n := range []int{50, 300, 1000} { + b.Run(fmt.Sprintf("models=%d", n), func(b *testing.B) { + reg := buildBenchRegistry(6, n/6) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = reg.ListModelsWithProvider() + } + }) + } +} diff --git a/internal/providers/responses_adapter.go b/internal/providers/responses_adapter.go index 90d1f85a..15649e2d 100644 --- a/internal/providers/responses_adapter.go +++ b/internal/providers/responses_adapter.go @@ -2,12 +2,13 @@ package providers import ( "context" - "encoding/json" "fmt" "io" "maps" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/providers/responses_content.go b/internal/providers/responses_content.go index 51ad1245..76c0b521 100644 --- a/internal/providers/responses_content.go +++ b/internal/providers/responses_content.go @@ -1,9 +1,10 @@ package providers import ( - "encoding/json" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/providers/responses_converter.go b/internal/providers/responses_converter.go index 7f6a9b76..176ec9b3 100644 --- a/internal/providers/responses_converter.go +++ b/internal/providers/responses_converter.go @@ -2,13 +2,14 @@ package providers import ( "bytes" - "encoding/json" "io" "log/slog" "slices" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/streaming" diff --git a/internal/providers/responses_input.go b/internal/providers/responses_input.go index 129773b1..477b7d96 100644 --- a/internal/providers/responses_input.go +++ b/internal/providers/responses_input.go @@ -1,10 +1,11 @@ package providers import ( - "encoding/json" "fmt" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/providers/responses_output.go b/internal/providers/responses_output.go index de7ccb46..6195fd14 100644 --- a/internal/providers/responses_output.go +++ b/internal/providers/responses_output.go @@ -1,9 +1,10 @@ package providers import ( - "encoding/json" "strings" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/providers/responses_output_state.go b/internal/providers/responses_output_state.go index 392a63ee..8abfe741 100644 --- a/internal/providers/responses_output_state.go +++ b/internal/providers/responses_output_state.go @@ -1,11 +1,12 @@ package providers import ( - "encoding/json" "fmt" "log/slog" "strings" + "github.com/goccy/go-json" + "github.com/google/uuid" ) diff --git a/internal/providers/router.go b/internal/providers/router.go index b5bb9246..84eaacb9 100644 --- a/internal/providers/router.go +++ b/internal/providers/router.go @@ -52,6 +52,14 @@ type modelWithProviderLister interface { ListModelsWithProvider() []ModelWithProvider } +// qualifiedSelectorResolver is an optional fast path for qualified selector +// resolution. Implementations resolve a "/" pair via an O(1) +// index instead of scanning the catalog. A false result means the caller should +// fall back to the slower catalog scan for raw/edge-case selectors. +type qualifiedSelectorResolver interface { + ResolveProviderSelector(segment, modelID string) (core.ModelSelector, bool) +} + type providerModelRefresher interface { RefreshProviderModels(ctx context.Context, providerSelector string) (int, error) } @@ -135,28 +143,23 @@ func (r *Router) resolveQualifiedSelector(requested core.RequestedModelSelector, return core.ModelSelector{}, false } - entries := models.ListModelsWithProvider() - - for _, entry := range entries { - if strings.TrimSpace(entry.ProviderName) != providerSegment { - continue - } - if strings.TrimSpace(entry.Model.ID) != modelID { - continue + // O(1) fast path: direct provider name/type match. Falls through to the + // catalog scan only for raw slash-shaped IDs and other edge cases. + if resolver, ok := r.lookup.(qualifiedSelectorResolver); ok { + if concrete, ok := resolver.ResolveProviderSelector(providerSegment, modelID); ok { + return concrete, true } - return core.ModelSelector{Provider: entry.ProviderName, Model: entry.Model.ID}, true } - for _, entry := range entries { - if strings.TrimSpace(entry.ProviderType) != providerSegment { - continue - } - if strings.TrimSpace(entry.Model.ID) != modelID { - continue - } - return core.ModelSelector{Provider: entry.ProviderName, Model: entry.Model.ID}, true - } + // Fallback for lookups that don't implement qualifiedSelectorResolver (and for + // raw slash-shaped model IDs the fast path can't key on). The parsed-modelID + // pass mirrors the fast path for non-indexed lookups; the requested.Model pass + // additionally resolves models whose own IDs contain a slash. + entries := models.ListModelsWithProvider() + if concrete, ok := resolveProviderOwnedRawSelector(entries, providerSegment, modelID); ok { + return concrete, true + } if concrete, ok := resolveProviderOwnedRawSelector(entries, providerSegment, requested.Model); ok { return concrete, true } diff --git a/internal/providers/vertex/vertex.go b/internal/providers/vertex/vertex.go index f8d2595d..e1d2de35 100644 --- a/internal/providers/vertex/vertex.go +++ b/internal/providers/vertex/vertex.go @@ -5,7 +5,6 @@ import ( "context" "encoding/base64" "encoding/binary" - "encoding/json" "fmt" "io" "math" @@ -13,6 +12,8 @@ import ( "net/url" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/httpclient" "gomodel/internal/llmclient" diff --git a/internal/providers/xai/xai.go b/internal/providers/xai/xai.go index 37e1ed40..e6dd5f50 100644 --- a/internal/providers/xai/xai.go +++ b/internal/providers/xai/xai.go @@ -5,12 +5,13 @@ import ( "context" "crypto/sha256" "encoding/hex" - "encoding/json" "io" "net/http" "net/url" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" "gomodel/internal/llmclient" "gomodel/internal/providers" @@ -108,7 +109,7 @@ func xGrokConversationIDFromSnapshot(ctx context.Context) string { if snapshot == nil { return "" } - for key, values := range snapshot.GetHeaders() { + for key, values := range snapshot.HeadersView() { if !strings.EqualFold(key, grokConvIDHeader) { continue } diff --git a/internal/providers/xiaomi/audio.go b/internal/providers/xiaomi/audio.go index 4525e75f..02a18298 100644 --- a/internal/providers/xiaomi/audio.go +++ b/internal/providers/xiaomi/audio.go @@ -3,12 +3,13 @@ package xiaomi import ( "context" "encoding/base64" - "encoding/json" "io" "path" "strconv" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/responsecache/responsecache.go b/internal/responsecache/responsecache.go index a199ee84..3eafb716 100644 --- a/internal/responsecache/responsecache.go +++ b/internal/responsecache/responsecache.go @@ -266,7 +266,7 @@ func (m *ResponseCacheMiddleware) Close() error { func internalRequestHeaders(ctx context.Context) http.Header { headers := make(http.Header) if snapshot := core.GetRequestSnapshot(ctx); snapshot != nil { - for key, values := range snapshot.GetHeaders() { + for key, values := range snapshot.HeadersView() { key = http.CanonicalHeaderKey(key) if _, allowed := internalRequestHeaderAllowlist[key]; !allowed { continue diff --git a/internal/responsecache/semantic.go b/internal/responsecache/semantic.go index 7616c7a1..8593ee28 100644 --- a/internal/responsecache/semantic.go +++ b/internal/responsecache/semantic.go @@ -5,7 +5,6 @@ import ( "context" "crypto/sha256" "encoding/hex" - "encoding/json" "fmt" "hash" "io" @@ -17,6 +16,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "github.com/cespare/xxhash/v2" "github.com/labstack/echo/v5" diff --git a/internal/responsecache/simple.go b/internal/responsecache/simple.go index f0ea817d..ff7e041e 100644 --- a/internal/responsecache/simple.go +++ b/internal/responsecache/simple.go @@ -5,7 +5,6 @@ import ( "context" "crypto/sha256" "encoding/hex" - "encoding/json" "io" "log/slog" "net/http" @@ -13,6 +12,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "github.com/tidwall/gjson" diff --git a/internal/responsecache/sse_validation.go b/internal/responsecache/sse_validation.go index 17ab2529..49c15b74 100644 --- a/internal/responsecache/sse_validation.go +++ b/internal/responsecache/sse_validation.go @@ -2,7 +2,8 @@ package responsecache import ( "bytes" - "encoding/json" + + "github.com/goccy/go-json" ) // validateCacheableSSE reports whether raw is a complete, cache-safe SSE body. diff --git a/internal/responsecache/stream_cache.go b/internal/responsecache/stream_cache.go index 950432cc..7c12320a 100644 --- a/internal/responsecache/stream_cache.go +++ b/internal/responsecache/stream_cache.go @@ -2,10 +2,11 @@ package responsecache import ( "bytes" - "encoding/json" "net/http" "strings" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "gomodel/internal/auditlog" diff --git a/internal/responsecache/stream_cache_chat.go b/internal/responsecache/stream_cache_chat.go index a654ddfb..51da756e 100644 --- a/internal/responsecache/stream_cache_chat.go +++ b/internal/responsecache/stream_cache_chat.go @@ -2,10 +2,11 @@ package responsecache import ( "bytes" - "encoding/json" "sort" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/responsecache/stream_cache_responses.go b/internal/responsecache/stream_cache_responses.go index aaf5da36..0531f297 100644 --- a/internal/responsecache/stream_cache_responses.go +++ b/internal/responsecache/stream_cache_responses.go @@ -2,9 +2,10 @@ package responsecache import ( "bytes" - "encoding/json" "sort" "strings" + + "github.com/goccy/go-json" ) type responsesOutputState struct { diff --git a/internal/responsecache/vecstore_pinecone.go b/internal/responsecache/vecstore_pinecone.go index f1359977..31d14031 100644 --- a/internal/responsecache/vecstore_pinecone.go +++ b/internal/responsecache/vecstore_pinecone.go @@ -4,13 +4,14 @@ import ( "bytes" "context" "encoding/base64" - "encoding/json" "fmt" "io" "net/http" "strings" "time" + "github.com/goccy/go-json" + "gomodel/config" ) diff --git a/internal/responsecache/vecstore_qdrant.go b/internal/responsecache/vecstore_qdrant.go index d242318a..cc2bcbde 100644 --- a/internal/responsecache/vecstore_qdrant.go +++ b/internal/responsecache/vecstore_qdrant.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/base64" - "encoding/json" "fmt" "io" "net/http" @@ -12,6 +11,8 @@ import ( "sync" "time" + "github.com/goccy/go-json" + "gomodel/config" ) diff --git a/internal/responsecache/vecstore_weaviate.go b/internal/responsecache/vecstore_weaviate.go index eae037fe..2f7e532f 100644 --- a/internal/responsecache/vecstore_weaviate.go +++ b/internal/responsecache/vecstore_weaviate.go @@ -4,13 +4,14 @@ import ( "bytes" "context" "encoding/base64" - "encoding/json" "fmt" "io" "net/http" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/config" diff --git a/internal/responsestore/store.go b/internal/responsestore/store.go index f99a2ff6..a78a93bf 100644 --- a/internal/responsestore/store.go +++ b/internal/responsestore/store.go @@ -4,12 +4,13 @@ package responsestore import ( "context" - "encoding/json" "errors" "fmt" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/server/conversation_responses.go b/internal/server/conversation_responses.go index b70462b5..d0f5ab71 100644 --- a/internal/server/conversation_responses.go +++ b/internal/server/conversation_responses.go @@ -2,11 +2,12 @@ package server import ( "context" - "encoding/json" "errors" "fmt" "log/slog" + "github.com/goccy/go-json" + "gomodel/internal/conversationstore" "gomodel/internal/core" ) diff --git a/internal/server/internal_chat_completion_executor.go b/internal/server/internal_chat_completion_executor.go index cad041c5..1319dced 100644 --- a/internal/server/internal_chat_completion_executor.go +++ b/internal/server/internal_chat_completion_executor.go @@ -2,13 +2,14 @@ package server import ( "context" - "encoding/json" "errors" "log/slog" "net/http" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "github.com/labstack/echo/v5" diff --git a/internal/server/native_conversation_service.go b/internal/server/native_conversation_service.go index d6814109..e4ae5810 100644 --- a/internal/server/native_conversation_service.go +++ b/internal/server/native_conversation_service.go @@ -2,13 +2,14 @@ package server import ( "context" - "encoding/json" "errors" "fmt" "net/http" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "github.com/labstack/echo/v5" diff --git a/internal/server/native_response_service.go b/internal/server/native_response_service.go index ebeece7b..00bd0960 100644 --- a/internal/server/native_response_service.go +++ b/internal/server/native_response_service.go @@ -2,12 +2,13 @@ package server import ( "context" - "encoding/json" "errors" "net/http" "strconv" "strings" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "gomodel/internal/auditlog" diff --git a/internal/server/request_selector_peek.go b/internal/server/request_selector_peek.go index c038b4c4..71fe83d4 100644 --- a/internal/server/request_selector_peek.go +++ b/internal/server/request_selector_peek.go @@ -2,11 +2,12 @@ package server import ( "bytes" - "encoding/json" "io" "net/http" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/server/request_snapshot.go b/internal/server/request_snapshot.go index 85c9230e..310f0ee5 100644 --- a/internal/server/request_snapshot.go +++ b/internal/server/request_snapshot.go @@ -44,7 +44,9 @@ func RequestSnapshotCapture(userPathHeader ...string) echo.MiddlewareFunc { return handleError(c, core.NewInvalidRequestError("failed to read request body", err)) } - snapshot := core.NewRequestSnapshotWithOwnedBody( + // Query/route/trace maps are freshly built here, so the snapshot can + // own them directly; only req.Header is live and gets cloned. + snapshot := core.NewRequestSnapshotWithOwnedMaps( req.Method, req.URL.Path, snapshotRouteParams(req.URL.Path, routeParamsMap(c.PathValues())), diff --git a/internal/server/response_input_items.go b/internal/server/response_input_items.go index c08fb425..97aa3986 100644 --- a/internal/server/response_input_items.go +++ b/internal/server/response_input_items.go @@ -1,10 +1,11 @@ package server import ( - "encoding/json" "fmt" "strings" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/server/translated_inference_service.go b/internal/server/translated_inference_service.go index 08f091c8..1f7f053a 100644 --- a/internal/server/translated_inference_service.go +++ b/internal/server/translated_inference_service.go @@ -2,7 +2,6 @@ package server import ( "context" - "encoding/json" "errors" "io" "log/slog" @@ -11,6 +10,8 @@ import ( "sync" "syscall" + "github.com/goccy/go-json" + "github.com/labstack/echo/v5" "gomodel/internal/auditlog" diff --git a/internal/streaming/observed_sse_stream.go b/internal/streaming/observed_sse_stream.go index 58ca45c4..1fa1ded4 100644 --- a/internal/streaming/observed_sse_stream.go +++ b/internal/streaming/observed_sse_stream.go @@ -2,8 +2,9 @@ package streaming import ( "bytes" - "encoding/json" "io" + + "github.com/goccy/go-json" ) const maxPendingEventBytes = 256 * 1024 diff --git a/internal/usage/audio.go b/internal/usage/audio.go index 20f44829..bec78890 100644 --- a/internal/usage/audio.go +++ b/internal/usage/audio.go @@ -1,9 +1,10 @@ package usage import ( - "encoding/json" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/usage/cost.go b/internal/usage/cost.go index 37d1badc..d4bfd563 100644 --- a/internal/usage/cost.go +++ b/internal/usage/cost.go @@ -1,7 +1,6 @@ package usage import ( - "encoding/json" "fmt" "math" "reflect" @@ -9,6 +8,8 @@ import ( "strconv" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/usage/extractor.go b/internal/usage/extractor.go index 3ded63c7..2e662f9a 100644 --- a/internal/usage/extractor.go +++ b/internal/usage/extractor.go @@ -2,13 +2,14 @@ package usage import ( "bytes" - "encoding/json" "io" "maps" "path" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/usage/reader_postgresql.go b/internal/usage/reader_postgresql.go index 33bc938f..d4902ee4 100644 --- a/internal/usage/reader_postgresql.go +++ b/internal/usage/reader_postgresql.go @@ -4,11 +4,12 @@ import ( "gomodel/internal/storage/sqlutil" "context" - "encoding/json" "fmt" "log/slog" "strings" + "github.com/goccy/go-json" + "github.com/jackc/pgx/v5/pgxpool" ) diff --git a/internal/usage/reader_sqlite.go b/internal/usage/reader_sqlite.go index 12e427ad..8b54b23f 100644 --- a/internal/usage/reader_sqlite.go +++ b/internal/usage/reader_sqlite.go @@ -5,11 +5,12 @@ import ( "context" "database/sql" - "encoding/json" "fmt" "log/slog" "strings" "time" + + "github.com/goccy/go-json" ) // SQLiteReader implements UsageReader for SQLite databases. diff --git a/internal/usage/realtime.go b/internal/usage/realtime.go index 9c89030f..991f192e 100644 --- a/internal/usage/realtime.go +++ b/internal/usage/realtime.go @@ -1,9 +1,10 @@ package usage import ( - "encoding/json" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "gomodel/internal/core" diff --git a/internal/usage/recalculate_pricing.go b/internal/usage/recalculate_pricing.go index 41436859..bbf856ae 100644 --- a/internal/usage/recalculate_pricing.go +++ b/internal/usage/recalculate_pricing.go @@ -2,11 +2,12 @@ package usage import ( "context" - "encoding/json" "fmt" "log/slog" "strings" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/internal/usage/store_sqlite.go b/internal/usage/store_sqlite.go index 5574e4c2..abe7a912 100644 --- a/internal/usage/store_sqlite.go +++ b/internal/usage/store_sqlite.go @@ -3,12 +3,13 @@ package usage import ( "context" "database/sql" - "encoding/json" "fmt" "log/slog" "strings" "sync" "time" + + "github.com/goccy/go-json" ) // SQLite has a default limit of 999 bindable parameters per query (SQLITE_MAX_VARIABLE_NUMBER). diff --git a/internal/workflows/store_postgresql.go b/internal/workflows/store_postgresql.go index 5fd23402..e00bdec6 100644 --- a/internal/workflows/store_postgresql.go +++ b/internal/workflows/store_postgresql.go @@ -2,12 +2,13 @@ package workflows import ( "context" - "encoding/json" "errors" "fmt" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgconn" diff --git a/internal/workflows/store_sqlite.go b/internal/workflows/store_sqlite.go index bf6b67b6..9540ca0f 100644 --- a/internal/workflows/store_sqlite.go +++ b/internal/workflows/store_sqlite.go @@ -3,12 +3,13 @@ package workflows import ( "context" "database/sql" - "encoding/json" "errors" "fmt" "strings" "time" + "github.com/goccy/go-json" + "github.com/google/uuid" ) diff --git a/internal/workflows/types.go b/internal/workflows/types.go index c5a399bc..98ee23eb 100644 --- a/internal/workflows/types.go +++ b/internal/workflows/types.go @@ -3,11 +3,12 @@ package workflows import ( "crypto/sha256" "encoding/hex" - "encoding/json" "sort" "strings" "time" + "github.com/goccy/go-json" + "gomodel/internal/core" ) diff --git a/tests/perf/README.md b/tests/perf/README.md index 1ede82e8..304253ef 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -14,3 +14,16 @@ Run the underlying benchmarks with allocation output: ```bash make perf-bench ``` + +## Bare vs. routed hot path + +`BenchmarkGatewayHotPathChatCompletion` passes a bare provider to `server.New` +and isolates serialization + middleware cost. It does **not** exercise model +resolution. + +`BenchmarkGatewayHotPathChatCompletionRouted` wires a real `Router` + +`ModelRegistry` (the production shape) with a representative catalog, so it +covers the per-request resolution path. This routed path currently allocates an +order of magnitude more per request because resolution re-copies the full model +catalog several times; its guard ceilings should tighten significantly once +resolution is computed once per request and reused. diff --git a/tests/perf/hotpath_test.go b/tests/perf/hotpath_test.go index 281064a1..b5c02750 100644 --- a/tests/perf/hotpath_test.go +++ b/tests/perf/hotpath_test.go @@ -30,7 +30,12 @@ const ( "data: [DONE]\n\n" ) -type benchProvider struct{} +// benchProvider is a mock provider. When models is empty it advertises a single +// default model; otherwise ListModels returns the supplied catalog so the +// registry/router resolution path can be exercised at a realistic catalog size. +type benchProvider struct { + models []core.Model +} func (benchProvider) ChatCompletion(_ context.Context, req *core.ChatRequest) (*core.ChatResponse, error) { model := "gpt-4o-mini" @@ -66,7 +71,10 @@ func (benchProvider) StreamChatCompletion(_ context.Context, _ *core.ChatRequest return io.NopCloser(strings.NewReader(sampleChatStream)), nil } -func (benchProvider) ListModels(_ context.Context) (*core.ModelsResponse, error) { +func (p benchProvider) ListModels(_ context.Context) (*core.ModelsResponse, error) { + if len(p.models) > 0 { + return &core.ModelsResponse{Object: "list", Data: p.models}, nil + } return &core.ModelsResponse{ Object: "list", Data: []core.Model{ @@ -169,6 +177,10 @@ func TestMain(m *testing.M) { os.Exit(code) } +// BenchmarkGatewayHotPathChatCompletion measures the pipeline overhead with a +// bare provider (no Router/registry). It isolates serialization + middleware +// cost; it does NOT cover model resolution. See the Routed variant for the +// production-shaped path. func BenchmarkGatewayHotPathChatCompletion(b *testing.B) { srv := server.New(benchProvider{}, &server.Config{LogOnlyModelInteractions: true}) body := []byte(sampleChatRequest) @@ -189,6 +201,67 @@ func BenchmarkGatewayHotPathChatCompletion(b *testing.B) { } } +// routedCatalogSize is a representative multi-provider catalog size. Real +// deployments aggregating several upstreams routinely exceed this. +const routedCatalogSize = 256 + +// newRoutedBenchServer wires the server the way production does: through a real +// ModelRegistry + Router populated with `modelCount` models. Unlike passing a +// bare provider to server.New, this exercises the per-request model-resolution +// path (ResolveModel/Supports/GetProviderType/GetProviderName), which is where +// catalog-sized allocation and CPU cost actually live. +func newRoutedBenchServer(tb testing.TB, modelCount int) *server.Server { + tb.Helper() + + models := make([]core.Model, 0, modelCount) + models = append(models, core.Model{ID: "gpt-4o-mini", Object: "model", OwnedBy: "mock", Created: 1700000000}) + for i := 1; i < modelCount; i++ { + models = append(models, core.Model{ + ID: fmt.Sprintf("filler-model-%04d", i), + Object: "model", + OwnedBy: "mock", + Created: 1700000000, + }) + } + + registry := providers.NewModelRegistry() + registry.RegisterProviderWithNameAndType(&benchProvider{models: models}, "mock", "mock") + if err := registry.Initialize(context.Background()); err != nil { + tb.Fatalf("registry initialize: %v", err) + } + + router, err := providers.NewRouter(registry) + if err != nil { + tb.Fatalf("new router: %v", err) + } + + return server.New(router, &server.Config{LogOnlyModelInteractions: true}) +} + +// BenchmarkGatewayHotPathChatCompletionRouted measures the hot path through a +// real Router with a realistic catalog. Compare against +// BenchmarkGatewayHotPathChatCompletion (bare provider, no routing) to see the +// cost the routing/resolution layer adds per request. +func BenchmarkGatewayHotPathChatCompletionRouted(b *testing.B) { + srv := newRoutedBenchServer(b, routedCatalogSize) + body := []byte(sampleChatRequest) + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + + rec := httptest.NewRecorder() + srv.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + b.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) + } + } +} + func BenchmarkOpenAIResponsesStreamConverter(b *testing.B) { b.ReportAllocs() b.ResetTimer() @@ -300,6 +373,17 @@ func TestHotPathPerfGuard(t *testing.T) { maxAllocs: 125, maxBytes: 15 * 1024, }, + { + // Production-shaped path: request resolves through a real Router + + // catalog. Resolution uses an O(1) selector index, so the ceilings + // sit close to the bare-provider case and are independent of catalog + // size. A regression to catalog-scanning resolution (which copied the + // full catalog several times per request) would blow these limits. + name: "gateway_chat_completion_hot_path_routed", + bench: BenchmarkGatewayHotPathChatCompletionRouted, + maxAllocs: 160, + maxBytes: 18 * 1024, + }, { name: "openai_responses_stream_converter", bench: BenchmarkOpenAIResponsesStreamConverter, From 4f26d43b05924d4da61acd6e71fea09ef20656ca Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 18 Jun 2026 18:52:35 +0200 Subject: [PATCH 2/3] test(perf): tighten hot-path allocation guard thresholds CI (linux/amd64) and local (darwin/arm64) produce identical allocation counts and near-identical byte counts, confirming these are deterministic. Tighten the ceilings from "intentionally generous" to ~10% over the measured baseline so the guard catches smaller regressions while still absorbing Go/dependency drift: hot_path: 125 -> 120 allocs (baseline 113) routed: 160 -> 150 allocs, 18->16 KB (baseline 137 / ~14.7 KB) responses_stream: 310 -> 222 allocs, 25->22 KB (baseline 202 / ~19.6 KB) shared_observers: unchanged (already tight, no headroom to trim) Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/perf/hotpath_test.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/perf/hotpath_test.go b/tests/perf/hotpath_test.go index b5c02750..2fef93fd 100644 --- a/tests/perf/hotpath_test.go +++ b/tests/perf/hotpath_test.go @@ -358,9 +358,12 @@ func formatPerfGuardResult(name string, result testing.BenchmarkResult, maxAlloc func TestHotPathPerfGuard(t *testing.T) { t.Helper() - // These ceilings are intentionally generous. They are here to catch obvious - // allocation regressions in the hottest code paths, not to freeze the exact - // current profile. + // Ceilings sit ~10% above the measured baseline: tight enough to catch real + // allocation regressions, loose enough to absorb minor Go/dependency drift. + // Allocation counts here are deterministic and match across architectures + // (linux/amd64 CI == darwin/arm64 local), so these are stable. When a change + // legitimately adds allocations, re-measure with `make perf-bench` and bump + // the affected ceiling in the same commit. cases := []struct { name string bench func(*testing.B) @@ -370,8 +373,8 @@ func TestHotPathPerfGuard(t *testing.T) { { name: "gateway_chat_completion_hot_path", bench: BenchmarkGatewayHotPathChatCompletion, - maxAllocs: 125, - maxBytes: 15 * 1024, + maxAllocs: 120, // baseline 113 + maxBytes: 15 * 1024, // baseline ~13.9 KB }, { // Production-shaped path: request resolves through a real Router + @@ -381,20 +384,20 @@ func TestHotPathPerfGuard(t *testing.T) { // full catalog several times per request) would blow these limits. name: "gateway_chat_completion_hot_path_routed", bench: BenchmarkGatewayHotPathChatCompletionRouted, - maxAllocs: 160, - maxBytes: 18 * 1024, + maxAllocs: 150, // baseline 137 + maxBytes: 16 * 1024, // baseline ~14.7 KB }, { name: "openai_responses_stream_converter", bench: BenchmarkOpenAIResponsesStreamConverter, - maxAllocs: 310, - maxBytes: 25 * 1024, + maxAllocs: 222, // baseline 202 + maxBytes: 22 * 1024, // baseline ~19.6 KB }, { name: "shared_stream_audit_and_usage_observers", bench: BenchmarkSharedStreamingAuditAndUsageObservers, - maxAllocs: 170, - maxBytes: 9 * 1024, + maxAllocs: 170, // baseline 159; already tight, no headroom to trim + maxBytes: 9 * 1024, // baseline ~8.9 KB; already tight }, } From 39d477cd753fc8105d22054a1ef2a5dd5524cce0 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Thu, 18 Jun 2026 19:04:31 +0200 Subject: [PATCH 3/3] fix(providers): address PR review on selector index and benchmarks - registry: trim publicName/ProviderType when building the qualified-selector index and skip empty keys, matching the trimmed lookup inputs and the previous catalog scan (which compared trimmed fields on both sides). Prevents the O(1) fast path from missing when provider metadata carries whitespace padding. - resolve_bench_test: build exactly totalModels (round-robin across providers) instead of providersN*(n/6); the models=50/1000 cases previously benchmarked 48/996 models due to integer truncation. Add benchSelector helper. - request_snapshot_test: extend the owned-maps test to assert route/query/trace maps are owned (not cloned) while headers are still defensively cloned. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/core/request_snapshot_test.go | 34 +++++++++++++++-- internal/providers/registry.go | 13 +++++-- internal/providers/resolve_bench_test.go | 47 +++++++++++++++--------- 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/internal/core/request_snapshot_test.go b/internal/core/request_snapshot_test.go index 1dc3ff21..d9f53835 100644 --- a/internal/core/request_snapshot_test.go +++ b/internal/core/request_snapshot_test.go @@ -72,19 +72,23 @@ func TestNewRequestSnapshot_DefensivelyCopiesMutableFields(t *testing.T) { } func TestNewRequestSnapshotWithOwnedMaps_TakesOwnershipOfCapturedBytes(t *testing.T) { + routeParams := map[string]string{"provider": "openai"} + queryParams := map[string][]string{"limit": {"5"}} + headers := map[string][]string{"X-Test": {"a"}} + traceMetadata := map[string]string{"Traceparent": "trace-1"} rawBody := []byte(`{"model":"gpt-5-mini"}`) snapshot := NewRequestSnapshotWithOwnedMaps( "POST", "/v1/chat/completions", - nil, - nil, - nil, + routeParams, + queryParams, + headers, "application/json", rawBody, false, "req-123", - nil, + traceMetadata, "/team/a", ) @@ -103,6 +107,28 @@ func TestNewRequestSnapshotWithOwnedMaps_TakesOwnershipOfCapturedBytes(t *testin if &clonedBody[0] == &rawBody[0] { t.Fatal("CapturedBody returned owned bytes directly, want defensive copy") } + + // Route/query/trace maps are owned: mutating the caller's map is visible + // through the snapshot (no defensive copy was taken at construction). + routeParams["provider"] = "anthropic" + if got := snapshot.GetRouteParams()["provider"]; got != "anthropic" { + t.Fatalf("route params not owned: provider = %q, want anthropic", got) + } + queryParams["limit"] = []string{"9"} + if got := snapshot.GetQueryParams()["limit"]; len(got) != 1 || got[0] != "9" { + t.Fatalf("query params not owned: limit = %v, want [9]", got) + } + traceMetadata["Traceparent"] = "trace-2" + if got := snapshot.GetTraceMetadata()["Traceparent"]; got != "trace-2" { + t.Fatalf("trace metadata not owned: Traceparent = %q, want trace-2", got) + } + + // Headers are still defensively cloned: mutating the caller's map after + // construction must not affect the snapshot. + headers["X-Test"] = []string{"b"} + if got := snapshot.HeadersView()["X-Test"]; len(got) != 1 || got[0] != "a" { + t.Fatalf("headers not cloned: X-Test = %v, want [a]", got) + } } func BenchmarkNewRequestSnapshotClonedBody(b *testing.B) { diff --git a/internal/providers/registry.go b/internal/providers/registry.go index 104ea26c..abb60c38 100644 --- a/internal/providers/registry.go +++ b/internal/providers/registry.go @@ -171,15 +171,20 @@ func (r *ModelRegistry) buildSelectorIndexLocked() { byType := make(map[string]core.ModelSelector, total) for providerName, providerModels := range r.modelsByProvider { for _, info := range providerModels { - publicName := providerName + publicName := strings.TrimSpace(providerName) if info.ProviderName != "" { - publicName = info.ProviderName + publicName = strings.TrimSpace(info.ProviderName) } id := strings.TrimSpace(info.Model.ID) + if publicName == "" || id == "" { + continue + } + // Keys are trimmed to match the trimmed lookup inputs and the + // previous scan, which compared trimmed fields on both sides. sel := core.ModelSelector{Provider: publicName, Model: info.Model.ID} byName[publicName+"/"+id] = sel - if info.ProviderType != "" { - typeKey := info.ProviderType + "/" + id + if providerType := strings.TrimSpace(info.ProviderType); providerType != "" { + typeKey := providerType + "/" + id if existing, ok := byType[typeKey]; !ok || sel.Provider < existing.Provider { byType[typeKey] = sel } diff --git a/internal/providers/resolve_bench_test.go b/internal/providers/resolve_bench_test.go index 5df1b46c..cef5c758 100644 --- a/internal/providers/resolve_bench_test.go +++ b/internal/providers/resolve_bench_test.go @@ -7,38 +7,49 @@ import ( "gomodel/internal/core" ) -// buildBenchRegistry creates a registry with `providersN` providers each holding -// `perProvider` models, mirroring a realistic multi-provider catalog. -func buildBenchRegistry(providersN, perProvider int) *ModelRegistry { - entries := make([]registryModelEntry, 0, providersN*perProvider) - for p := 0; p < providersN; p++ { - name := fmt.Sprintf("prov%d", p) - prov := &mockProvider{name: name} - for m := 0; m < perProvider; m++ { - entries = append(entries, registryModelEntry{ - provider: prov, - providerName: name, - providerType: name, - modelID: fmt.Sprintf("model-%d-%d", p, m), - }) - } +// buildBenchRegistry creates a registry holding exactly totalModels models, +// distributed round-robin across providersN providers, mirroring a realistic +// multi-provider catalog. Model IDs are globally unique (model-) so the count +// is exact regardless of how it divides across providers. +func buildBenchRegistry(providersN, totalModels int) *ModelRegistry { + provs := make([]*mockProvider, providersN) + for p := range provs { + provs[p] = &mockProvider{name: fmt.Sprintf("prov%d", p)} + } + entries := make([]registryModelEntry, 0, totalModels) + for i := 0; i < totalModels; i++ { + p := i % providersN + entries = append(entries, registryModelEntry{ + provider: provs[p], + providerName: provs[p].name, + providerType: provs[p].name, + modelID: fmt.Sprintf("model-%d", i), + }) } return newTestRegistryWithModels(entries...) } +// benchSelector returns a "/" selector that exists in a registry +// built by buildBenchRegistry(providersN, totalModels), picking a mid-catalog +// model. Position is irrelevant to the O(1) index but the model must exist. +func benchSelector(providersN, totalModels int) string { + mid := totalModels / 2 + return fmt.Sprintf("prov%d/model-%d", mid%providersN, mid) +} + // BenchmarkResolvePerRequest simulates the resolution calls a single chat // request makes through the Router against a populated catalog: ResolveModel + // Supports + GetProviderType + GetProviderName (the ~per-request fan-out). func BenchmarkResolvePerRequest(b *testing.B) { for _, n := range []int{50, 300, 1000} { b.Run(fmt.Sprintf("models=%d", n), func(b *testing.B) { - reg := buildBenchRegistry(6, n/6) + reg := buildBenchRegistry(6, n) router, err := NewRouter(reg) if err != nil { b.Fatalf("NewRouter: %v", err) } // A mid-catalog qualified selector, the common production case. - sel := fmt.Sprintf("prov3/model-3-%d", (n/6)/2) + sel := benchSelector(6, n) b.ReportAllocs() b.ResetTimer() @@ -59,7 +70,7 @@ func BenchmarkResolvePerRequest(b *testing.B) { func BenchmarkListModelsWithProvider(b *testing.B) { for _, n := range []int{50, 300, 1000} { b.Run(fmt.Sprintf("models=%d", n), func(b *testing.B) { - reg := buildBenchRegistry(6, n/6) + reg := buildBenchRegistry(6, n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ {