Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cmd/engine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ func run() error {
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Ingest.Tables)),
Logger: logger,
Mode: cfg.Ingest.Mode,
HyDEEnabled: cfg.Ingest.HyDE.Enabled,
HyDEModel: cfg.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Ingest.HyDE.NumQuestions,
Expand All @@ -184,7 +185,9 @@ func run() error {
SummaryAxesMaxNumbers: cfg.Ingest.SummaryAxes.MaxNumbers,
GlobalLLMConcurrency: cfg.Ingest.GlobalLLMConcurrency,
})
if cfg.Ingest.Tables.Enabled {
if cfg.Ingest.Mode == ingest.ModeMinimal {
logger.Info("ingest: MINIMAL mode — parse→persist→ready; skipping summarize/HyDE/multi-axis/TOC + table extraction")
} else if cfg.Ingest.Tables.Enabled {
logger.Info("ingest: pdf table extraction enabled",
"vertical_strategy", cfg.Ingest.Tables.VerticalStrategy,
"horizontal_strategy", cfg.Ingest.Tables.HorizontalStrategy,
Expand Down
5 changes: 4 additions & 1 deletion cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func run() error {
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Engine.Ingest.Tables)),
Logger: logger,
Mode: cfg.Engine.Ingest.Mode,
HyDEEnabled: cfg.Engine.Ingest.HyDE.Enabled,
HyDEModel: cfg.Engine.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Engine.Ingest.HyDE.NumQuestions,
Expand All @@ -214,7 +215,9 @@ func run() error {
TOCCheckPages: cfg.Engine.Ingest.TOC.TOCCheckPages,
GlobalLLMConcurrency: cfg.Engine.Ingest.GlobalLLMConcurrency,
})
if cfg.Engine.Ingest.Tables.Enabled {
if cfg.Engine.Ingest.Mode == ingest.ModeMinimal {
logger.Info("ingest: MINIMAL mode — parse→persist→ready; skipping summarize/HyDE/multi-axis/TOC + table extraction")
} else if cfg.Engine.Ingest.Tables.Enabled {
logger.Info("ingest: pdf table extraction enabled",
"vertical_strategy", cfg.Engine.Ingest.Tables.VerticalStrategy,
"horizontal_strategy", cfg.Engine.Ingest.Tables.HorizontalStrategy,
Expand Down
27 changes: 27 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,37 @@ retrieval:
model: ""

ingest:
# Ingest mode — how much work the pipeline does before a document is
# marked `ready` (queryable).
#
# full (default) parse -> build tree -> persist -> summarize ->
# HyDE -> multi-axis summaries -> TOC build. Maximises
# retrieval quality but costs ~1,000-3,000 LLM calls plus a
# pdftable table-finding pass on a large filing — minutes of
# wall time for a 90-page 10-K.
#
# minimal parse -> build tree -> persist -> ready. Skips ALL
# per-section LLM enrichment (summarize, HyDE, multi-axis,
# TOC build) AND the pdftable table-extraction pass, so a
# document becomes queryable in ~parse-speed (seconds).
# The page-based strategy (/v1/answer/pageindex) needs none
# of the skipped work: it navigates a TOC synthesised from
# the section tree (documents.toc_tree is left NULL) and
# reads raw section/page text at query time — and that raw
# page text still contains the tables' text, so dropping
# table *sections* loses nothing for it. The
# summary-dependent strategies (chunked-tree, agentic)
# degrade to titles + raw content with no summaries.
#
# Override per-process with VLE_INGEST_MODE; on the deployed
# vectorless-server use VLS_INGEST_MODE=minimal (no secret edit needed).
mode: "full"

# The summarize and HyDE stages run concurrently. This caps the total
# number of LLM calls in flight across both stages combined, so the
# provider's per-tenant concurrency limit isn't exceeded. 0 disables
# the global cap; default applied by the engine is 12.
# (Ignored when mode: minimal — no LLM stages run.)
global_llm_concurrency: 12

# HyDE candidate-question stage. For each leaf section the pipeline asks
Expand Down
12 changes: 12 additions & 0 deletions config.server.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,21 @@ engine:
include_sibling_breadcrumbs: true

ingest:
# Ingest mode: full (default) | minimal.
# full parse -> persist -> summarize -> HyDE -> multi-axis ->
# TOC build. Maximum retrieval quality; minutes on a large
# filing.
# minimal parse -> persist -> ready. Skips every LLM enrichment
# stage AND table extraction — queryable in seconds. The
# page-based strategy (/v1/answer/pageindex) works on it
# unchanged (synthesised TOC + raw page reads).
# Flip the live service without a secret edit: VLS_INGEST_MODE=minimal.
mode: "full"

# The summarize and HyDE stages run concurrently. This caps the total
# number of LLM calls in flight across both stages combined.
# 0 disables the global cap; default is 12.
# (Ignored when mode: minimal — no LLM stages run.)
global_llm_concurrency: 12

# HyDE candidate-question generation per leaf section. Folded into
Expand Down
6 changes: 6 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,12 @@ func applyEnvOverrides(c *Config) {
if v := firstEnv("VLS_LLM_DRIVER", "VLE_LLM_DRIVER"); v != "" {
c.Engine.LLM.Driver = v
}
// Ingest mode (full | minimal). Forwarded so the live
// vectorless-server can be flipped to minimal ingest with a single
// env var, no secret/config edit. VLS_-prefixed wins over VLE_.
if v := firstEnv("VLS_INGEST_MODE", "VLE_INGEST_MODE"); v != "" {
c.Engine.Ingest.Mode = v
}
// Anthropic-compatible gateway overrides (e.g. GLM/Zhipu via
// https://api.z.ai/api/anthropic): base URL + model, so the
// anthropic driver can run a non-Anthropic model without a secret
Expand Down
41 changes: 41 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,35 @@ type Config struct {
// IngestConfig configures retrieval-quality boosters that run during
// the ingest pipeline (between summarize and StatusReady).
type IngestConfig struct {
// Mode selects how much work the ingest pipeline does before a
// document is marked ready.
//
// "full" (default) — parse → build tree → persist → summarize
// → HyDE → multi-axis summaries → TOC build.
// Maximises retrieval quality at the cost of
// ~1,000-3,000 LLM calls + a table-extraction
// pass on a large filing (minutes of wall time).
//
// "minimal" — parse → build tree → persist → ready.
// Skips ALL per-section LLM enrichment
// (summarize, HyDE, multi-axis, TOC build)
// AND the pdftable table-finding pass, so a
// document becomes queryable in ~parse-speed
// (seconds). The page-based retrieval strategy
// (/v1/answer/pageindex) needs none of the
// skipped enrichment: it navigates a TOC tree
// (synthesised from the section tree when
// documents.toc_tree is NULL) and reads raw
// section/page text at query time — and the raw
// page text still contains the tables' text, so
// dropping table *sections* loses nothing for
// it. The summary-dependent strategies
// (chunked-tree, agentic) degrade to using
// titles + raw content with no summaries.
//
// Empty defaults to "full". Engine env override: VLE_INGEST_MODE.
Mode string `yaml:"mode"`

HyDE HyDEConfig `yaml:"hyde"`

// Tables configures pdftable's table-finding pass over PDF inputs.
Expand Down Expand Up @@ -695,6 +724,7 @@ func Default() Config {
},
},
Ingest: IngestConfig{
Mode: "full",
GlobalLLMConcurrency: 12,
LLMCallTimeoutSeconds: 90,
MaxSections: 400,
Expand Down Expand Up @@ -838,6 +868,11 @@ func applyEnvOverrides(c *Config) {
if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MODEL"); v != "" {
c.Retrieval.Agentic.Model = v
}
// Ingest mode switch (full | minimal). A single env var flips the
// engine into fast/minimal ingest with no secret edit.
if v := os.Getenv("VLE_INGEST_MODE"); v != "" {
c.Ingest.Mode = v
}
// Ingest / HyDE knobs. Booleans accept the usual truthy strings —
// kept narrow so a typo doesn't silently flip the flag.
if v := os.Getenv("VLE_INGEST_HYDE_ENABLED"); v != "" {
Expand Down Expand Up @@ -1144,6 +1179,12 @@ func (c Config) Validate() error {
return fmt.Errorf("server.tls.min_version must be 1.2 or 1.3, got %q", v)
}

switch c.Ingest.Mode {
case "", "full", "minimal":
default:
return fmt.Errorf("ingest.mode must be one of full|minimal, got %q", c.Ingest.Mode)
}

if c.Ingest.HyDE.NumQuestions < 0 {
return fmt.Errorf("ingest.hyde.num_questions must be >= 0, got %d", c.Ingest.HyDE.NumQuestions)
}
Expand Down
46 changes: 46 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,52 @@ func TestDefaultValues(t *testing.T) {
}
}

// TestIngestModeDefault locks the default ingest mode to "full" so the
// current full-enrichment behaviour is preserved unless explicitly
// switched.
func TestIngestModeDefault(t *testing.T) {
t.Parallel()
cfg := Default()
if cfg.Ingest.Mode != "full" {
t.Errorf("ingest.mode = %q, want full (default)", cfg.Ingest.Mode)
}
}

// TestIngestModeEnvOverride covers the VLE_INGEST_MODE override — the
// single env var that flips the engine into fast/minimal ingest.
func TestIngestModeEnvOverride(t *testing.T) {
prev := os.Getenv("VLE_INGEST_MODE")
defer os.Setenv("VLE_INGEST_MODE", prev)

os.Setenv("VLE_INGEST_MODE", "minimal")
cfg := Default()
applyEnvOverrides(&cfg)
if cfg.Ingest.Mode != "minimal" {
t.Errorf("VLE_INGEST_MODE=minimal not applied, got %q", cfg.Ingest.Mode)
}
}

// TestIngestModeValidate asserts Validate accepts the documented values
// (and empty, which Default normalises to full) and rejects garbage.
func TestIngestModeValidate(t *testing.T) {
t.Parallel()
for _, m := range []string{"", "full", "minimal"} {
cfg := Default()
cfg.Database.URL = "postgres://localhost/test"
cfg.Ingest.Mode = m
if err := cfg.Validate(); err != nil {
t.Errorf("ingest.mode=%q should pass validation, got %v", m, err)
}
}

cfg := Default()
cfg.Database.URL = "postgres://localhost/test"
cfg.Ingest.Mode = "turbo"
if err := cfg.Validate(); err == nil {
t.Error("ingest.mode=turbo should fail validation")
}
}

func TestTOCEnvOverride(t *testing.T) {
// Mutates env — restore on exit. Not parallel.
keys := []string{
Expand Down
Loading
Loading