From 249d4b21294236dff0a0a1cff4330f0d57157813 Mon Sep 17 00:00:00 2001
From: Halleluyah Oludele <halleluyaholudele@gmail.com>
Date: Thu, 28 May 2026 23:40:15 +0100
Subject: [PATCH 1/3] feat(config): add ingest mode switch (full|minimal) with
 env forwarding

Add IngestConfig.Mode (yaml `mode`, values full|minimal, default full)
to the engine config, with VLE_INGEST_MODE env override and Validate
rejecting unknown values. Forward it from the deployed server's config
wrapper via firstEnv("VLS_INGEST_MODE", "VLE_INGEST_MODE") so the live
vectorless-server can be flipped to minimal ingest with a single env
var, no secret edit.
---
 internal/config/config.go |  6 ++++++
 pkg/config/config.go      | 41 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/internal/config/config.go b/internal/config/config.go
index 82b8118..1646af0 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -320,6 +320,12 @@ func applyEnvOverrides(c *Config) {
 	if v := firstEnv("VLS_LLM_DRIVER", "VLE_LLM_DRIVER"); v != "" {
 		c.Engine.LLM.Driver = v
 	}
+	// Ingest mode (full | minimal). Forwarded so the live
+	// vectorless-server can be flipped to minimal ingest with a single
+	// env var, no secret/config edit. VLS_-prefixed wins over VLE_.
+	if v := firstEnv("VLS_INGEST_MODE", "VLE_INGEST_MODE"); v != "" {
+		c.Engine.Ingest.Mode = v
+	}
 	// Anthropic-compatible gateway overrides (e.g. GLM/Zhipu via
 	// https://api.z.ai/api/anthropic): base URL + model, so the
 	// anthropic driver can run a non-Anthropic model without a secret
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 3add5b0..8c7c327 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -33,6 +33,35 @@ type Config struct {
 // IngestConfig configures retrieval-quality boosters that run during
 // the ingest pipeline (between summarize and StatusReady).
 type IngestConfig struct {
+	// Mode selects how much work the ingest pipeline does before a
+	// document is marked ready.
+	//
+	//   "full"    (default) — parse → build tree → persist → summarize
+	//                          → HyDE → multi-axis summaries → TOC build.
+	//                          Maximises retrieval quality at the cost of
+	//                          ~1,000-3,000 LLM calls + a table-extraction
+	//                          pass on a large filing (minutes of wall time).
+	//
+	//   "minimal"           — parse → build tree → persist → ready.
+	//                          Skips ALL per-section LLM enrichment
+	//                          (summarize, HyDE, multi-axis, TOC build)
+	//                          AND the pdftable table-finding pass, so a
+	//                          document becomes queryable in ~parse-speed
+	//                          (seconds). The page-based retrieval strategy
+	//                          (/v1/answer/pageindex) needs none of the
+	//                          skipped enrichment: it navigates a TOC tree
+	//                          (synthesised from the section tree when
+	//                          documents.toc_tree is NULL) and reads raw
+	//                          section/page text at query time — and the raw
+	//                          page text still contains the tables' text, so
+	//                          dropping table *sections* loses nothing for
+	//                          it. The summary-dependent strategies
+	//                          (chunked-tree, agentic) degrade to using
+	//                          titles + raw content with no summaries.
+	//
+	// Empty defaults to "full". Engine env override: VLE_INGEST_MODE.
+	Mode string `yaml:"mode"`
+
 	HyDE HyDEConfig `yaml:"hyde"`
 
 	// Tables configures pdftable's table-finding pass over PDF inputs.
@@ -695,6 +724,7 @@ func Default() Config {
 			},
 		},
 		Ingest: IngestConfig{
+			Mode:                  "full",
 			GlobalLLMConcurrency:  12,
 			LLMCallTimeoutSeconds: 90,
 			MaxSections:           400,
@@ -838,6 +868,11 @@ func applyEnvOverrides(c *Config) {
 	if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MODEL"); v != "" {
 		c.Retrieval.Agentic.Model = v
 	}
+	// Ingest mode switch (full | minimal). A single env var flips the
+	// engine into fast/minimal ingest with no secret edit.
+	if v := os.Getenv("VLE_INGEST_MODE"); v != "" {
+		c.Ingest.Mode = v
+	}
 	// Ingest / HyDE knobs. Booleans accept the usual truthy strings —
 	// kept narrow so a typo doesn't silently flip the flag.
 	if v := os.Getenv("VLE_INGEST_HYDE_ENABLED"); v != "" {
@@ -1144,6 +1179,12 @@ func (c Config) Validate() error {
 		return fmt.Errorf("server.tls.min_version must be 1.2 or 1.3, got %q", v)
 	}
 
+	switch c.Ingest.Mode {
+	case "", "full", "minimal":
+	default:
+		return fmt.Errorf("ingest.mode must be one of full|minimal, got %q", c.Ingest.Mode)
+	}
+
 	if c.Ingest.HyDE.NumQuestions < 0 {
 		return fmt.Errorf("ingest.hyde.num_questions must be >= 0, got %d", c.Ingest.HyDE.NumQuestions)
 	}

From 6444532f6184ec52b65435a0679d5c75b24dfb36 Mon Sep 17 00:00:00 2001
From: Halleluyah Oludele <halleluyaholudele@gmail.com>
Date: Thu, 28 May 2026 23:49:01 +0100
Subject: [PATCH 2/3] =?UTF-8?q?feat(ingest):=20minimal-mode=20pipeline=20p?=
 =?UTF-8?q?ath=20=E2=80=94=20parse=E2=86=92persist=E2=86=92ready,=20no=20L?=
 =?UTF-8?q?LM/tables?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Pipeline.Mode; when "minimal", Run dispatches to runMinimal which
does parse → build tree → persist → ready and skips every per-section
LLM stage (summarize, HyDE, multi-axis summaries, TOC build). The
parser registry is rebuilt with table extraction DISABLED (nil opts)
regardless of ingest.tables.enabled, since the pdftable table-finding
pass is the slow/hang-prone part of parse and the page-based strategy
reads raw page text (which still contains the table's text).

persistTree/parse/fail now take the persistence target through a narrow
docPersister interface (*db.Pool satisfies it) so the minimal path is
exercisable without a live Postgres. Both cmd/engine and cmd/server set
Mode from cfg.Ingest.Mode and log when minimal mode is active.
---
 cmd/engine/main.go   |   5 +-
 cmd/server/main.go   |   5 +-
 pkg/ingest/ingest.go | 116 ++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 112 insertions(+), 14 deletions(-)

diff --git a/cmd/engine/main.go b/cmd/engine/main.go
index 3f2742b..8da6abf 100644
--- a/cmd/engine/main.go
+++ b/cmd/engine/main.go
@@ -174,6 +174,7 @@ func run() error {
 		LLM:                    llmClient,
 		Parsers:                ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Ingest.Tables)),
 		Logger:                 logger,
+		Mode:                   cfg.Ingest.Mode,
 		HyDEEnabled:            cfg.Ingest.HyDE.Enabled,
 		HyDEModel:              cfg.Ingest.HyDE.Model,
 		HyDENumQuestions:       cfg.Ingest.HyDE.NumQuestions,
@@ -184,7 +185,9 @@ func run() error {
 		SummaryAxesMaxNumbers:  cfg.Ingest.SummaryAxes.MaxNumbers,
 		GlobalLLMConcurrency:   cfg.Ingest.GlobalLLMConcurrency,
 	})
-	if cfg.Ingest.Tables.Enabled {
+	if cfg.Ingest.Mode == ingest.ModeMinimal {
+		logger.Info("ingest: MINIMAL mode — parse→persist→ready; skipping summarize/HyDE/multi-axis/TOC + table extraction")
+	} else if cfg.Ingest.Tables.Enabled {
 		logger.Info("ingest: pdf table extraction enabled",
 			"vertical_strategy", cfg.Ingest.Tables.VerticalStrategy,
 			"horizontal_strategy", cfg.Ingest.Tables.HorizontalStrategy,
diff --git a/cmd/server/main.go b/cmd/server/main.go
index a241eda..c14b608 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -200,6 +200,7 @@ func run() error {
 		LLM:                    llmClient,
 		Parsers:                ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Engine.Ingest.Tables)),
 		Logger:                 logger,
+		Mode:                   cfg.Engine.Ingest.Mode,
 		HyDEEnabled:            cfg.Engine.Ingest.HyDE.Enabled,
 		HyDEModel:              cfg.Engine.Ingest.HyDE.Model,
 		HyDENumQuestions:       cfg.Engine.Ingest.HyDE.NumQuestions,
@@ -214,7 +215,9 @@ func run() error {
 		TOCCheckPages:          cfg.Engine.Ingest.TOC.TOCCheckPages,
 		GlobalLLMConcurrency:   cfg.Engine.Ingest.GlobalLLMConcurrency,
 	})
-	if cfg.Engine.Ingest.Tables.Enabled {
+	if cfg.Engine.Ingest.Mode == ingest.ModeMinimal {
+		logger.Info("ingest: MINIMAL mode — parse→persist→ready; skipping summarize/HyDE/multi-axis/TOC + table extraction")
+	} else if cfg.Engine.Ingest.Tables.Enabled {
 		logger.Info("ingest: pdf table extraction enabled",
 			"vertical_strategy", cfg.Engine.Ingest.Tables.VerticalStrategy,
 			"horizontal_strategy", cfg.Engine.Ingest.Tables.HorizontalStrategy,
diff --git a/pkg/ingest/ingest.go b/pkg/ingest/ingest.go
index 2adf859..a840acc 100644
--- a/pkg/ingest/ingest.go
+++ b/pkg/ingest/ingest.go
@@ -45,6 +45,22 @@ import (
 	"github.com/hallelx2/vectorless-engine/pkg/tree"
 )
 
+// ModeMinimal is the Pipeline.Mode value that collapses ingest to
+// parse → build tree → persist → ready, skipping all LLM enrichment
+// and table extraction. Any other value runs the full pipeline.
+const ModeMinimal = "minimal"
+
+// docPersister is the narrow slice of *db.Pool the parse → persist →
+// ready path depends on. Declaring it here (rather than threading the
+// concrete *db.Pool) lets the minimal-mode runner be exercised with a
+// fake store, so the "zero LLM calls, still reaches ready" guarantee is
+// provable without a live Postgres. *db.Pool satisfies it.
+type docPersister interface {
+	SetDocumentStatus(ctx context.Context, id tree.DocumentID, s db.DocumentStatus, errMsg string) error
+	SetDocumentTitle(ctx context.Context, id tree.DocumentID, title string) error
+	UpsertSection(ctx context.Context, s db.Section) error
+}
+
 // Payload is the JSON body attached to an ingest job.
 type Payload struct {
 	DocumentID  tree.DocumentID `json:"document_id"`
@@ -65,6 +81,19 @@ type Pipeline struct {
 	Parsers *parser.Registry
 	Logger  *slog.Logger
 
+	// Mode selects how much work Run does before marking a document
+	// ready. "minimal" collapses ingest to parse → build tree → persist
+	// → ready, skipping every per-section LLM stage (summarize, HyDE,
+	// multi-axis summaries, TOC build) AND the pdftable table-finding
+	// pass. Anything else (including the empty Go zero value used by
+	// Pipeline literals in tests) runs the full enrichment pipeline.
+	//
+	// The page-based retrieval strategy (/v1/answer/pageindex) needs none
+	// of the skipped enrichment — it navigates a synthesised-from-sections
+	// TOC and reads raw section/page text at query time — so a
+	// minimal-ingested document is immediately queryable through it.
+	Mode string
+
 	// SummaryMaxChars caps the content window sent to the LLM per section.
 	// Sections longer than this are truncated — we're generating a short
 	// summary, not reproducing the text.
@@ -301,8 +330,16 @@ func (p *Pipeline) Handler() queue.Handler {
 	}
 }
 
-// Run executes the full pipeline for one document. Safe to retry.
+// Run executes the pipeline for one document. Safe to retry.
+//
+// When Mode == ModeMinimal it dispatches to runMinimal — parse → build
+// tree → persist → ready, with no LLM enrichment and no table
+// extraction. Otherwise it runs the full enrichment pipeline below.
 func (p *Pipeline) Run(ctx context.Context, pl Payload) error {
+	if p.Mode == ModeMinimal {
+		return p.runMinimal(ctx, p.DB, pl)
+	}
+
 	log := p.Logger.With("document_id", string(pl.DocumentID))
 	log.Info("ingest: start", "source_ref", pl.SourceRef)
 
@@ -310,15 +347,15 @@ func (p *Pipeline) Run(ctx context.Context, pl Payload) error {
 		return err
 	}
 
-	parsed, err := p.parse(ctx, pl)
+	parsed, err := p.parse(ctx, p.Parsers, pl)
 	if err != nil {
-		p.fail(ctx, pl.DocumentID, "parse", err)
+		p.fail(ctx, p.DB, pl.DocumentID, "parse", err)
 		return err
 	}
 	log.Info("ingest: parsed", "sections", len(parsed.Flatten()), "title", parsed.Title)
 
-	if err := p.persistTree(ctx, pl.DocumentID, parsed); err != nil {
-		p.fail(ctx, pl.DocumentID, "persist tree", err)
+	if err := p.persistTree(ctx, p.DB, pl.DocumentID, parsed); err != nil {
+		p.fail(ctx, p.DB, pl.DocumentID, "persist tree", err)
 		return err
 	}
 
@@ -504,25 +541,80 @@ func runParallelStages(ctx context.Context, summarizeFn, hydeFn func(context.Con
 	return summarizeErr, hydeErr
 }
 
-func (p *Pipeline) parse(ctx context.Context, pl Payload) (*parser.ParsedDoc, error) {
+func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Payload) (*parser.ParsedDoc, error) {
 	rc, _, err := p.Storage.Get(ctx, pl.SourceRef)
 	if err != nil {
 		return nil, fmt.Errorf("fetch source: %w", err)
 	}
 	defer rc.Close()
-	return p.Parsers.Parse(ctx, pl.ContentType, pl.Filename, rc)
+	return parsers.Parse(ctx, pl.ContentType, pl.Filename, rc)
+}
+
+// runMinimal is the fast/minimal ingest path: parse → build tree →
+// persist → ready. It does ZERO LLM work — no summarize, no HyDE, no
+// multi-axis summaries, no TOC build — and parses with table extraction
+// DISABLED (the pdftable table-finding pass is the slow/hang-prone part
+// of parse, and the page-based strategy reads raw page text which still
+// contains the table's text, so dropping table *sections* loses nothing
+// for it).
+//
+// The doc reaches StatusReady the moment the section tree is persisted,
+// which is what "ready" means for the page-based strategy: it
+// synthesises its TOC from the section tree (titles + page ranges) when
+// documents.toc_tree is NULL — and minimal mode leaves it NULL — and
+// reads section bodies from storage at query time.
+//
+// store is the persistence target; production passes p.DB. The DB seam
+// is an interface so this path is testable without a live Postgres.
+func (p *Pipeline) runMinimal(ctx context.Context, store docPersister, pl Payload) error {
+	log := p.Logger.With("document_id", string(pl.DocumentID))
+	log.Info("ingest: start (minimal mode)", "source_ref", pl.SourceRef)
+
+	if err := store.SetDocumentStatus(ctx, pl.DocumentID, db.StatusParsing, ""); err != nil {
+		return err
+	}
+
+	// Table extraction is disabled unconditionally in minimal mode,
+	// regardless of ingest.tables.enabled: a nil-opts registry makes the
+	// PDF parser skip the table-finding pass entirely. All other parsers
+	// are unaffected.
+	parsers := RegistryFromTableOpts(nil)
+	parsed, err := p.parse(ctx, parsers, pl)
+	if err != nil {
+		p.fail(ctx, store, pl.DocumentID, "parse", err)
+		return err
+	}
+	log.Info("ingest: parsed", "sections", len(parsed.Flatten()), "title", parsed.Title)
+
+	if err := p.persistTree(ctx, store, pl.DocumentID, parsed); err != nil {
+		p.fail(ctx, store, pl.DocumentID, "persist tree", err)
+		return err
+	}
+
+	// Skip summarize / HyDE / multi-axis / TOC entirely — flip straight
+	// to ready. The document is now queryable via the page-based
+	// strategy (synthesised TOC + raw page reads).
+	if err := store.SetDocumentStatus(ctx, pl.DocumentID, db.StatusReady, ""); err != nil {
+		return err
+	}
+	log.Info("ingest: ready (minimal mode)")
+	return nil
 }
 
 // persistTree writes sections + full content in document order. Parents
 // are written before children so the FK on sections.parent_id holds.
-func (p *Pipeline) persistTree(ctx context.Context, docID tree.DocumentID, doc *parser.ParsedDoc) error {
+//
+// The DB operations go through the narrow docPersister interface so the
+// persist path can be exercised (e.g. by the minimal-mode test) without
+// a live Postgres; production callers pass p.DB, which satisfies it.
+func (p *Pipeline) persistTree(ctx context.Context, store docPersister, docID tree.DocumentID, doc *parser.ParsedDoc) error {
 	// Only overwrite the row's title (which was seeded with the
 	// filename at upload time) when the parsed title looks usable.
 	// Watermarked PDFs whose overlay text shares a Y coordinate with
 	// the real title produce mojibake like "GGlloobbaall SSttrraatteeggyy"
 	// — we'd rather keep the original filename than show that to a user.
 	if doc.Title != "" && !isLikelyMojibakeTitle(doc.Title) {
-		if err := p.DB.SetDocumentTitle(ctx, docID, doc.Title); err != nil {
+		if err := store.SetDocumentTitle(ctx, docID, doc.Title); err != nil {
 			return err
 		}
 	}
@@ -550,7 +642,7 @@ func (p *Pipeline) persistTree(ctx context.Context, docID tree.DocumentID, doc *
 				}
 			}
 
-			if err := p.DB.UpsertSection(ctx, db.Section{
+			if err := store.UpsertSection(ctx, db.Section{
 				ID:         id,
 				DocumentID: docID,
 				ParentID:   parent,
@@ -870,14 +962,14 @@ func fallbackSummary(title, body string) string {
 	return strings.Join(strings.Fields(body), " ")
 }
 
-func (p *Pipeline) fail(ctx context.Context, id tree.DocumentID, stage string, cause error) {
+func (p *Pipeline) fail(ctx context.Context, store docPersister, id tree.DocumentID, stage string, cause error) {
 	msg := fmt.Sprintf("%s: %s", stage, cause.Error())
 	// Use a FRESH context for the failure write — the inbound one is
 	// almost certainly the reason we're failing (timeout/cancel) and
 	// reusing it would leave the doc stuck on "parsing" forever.
 	failCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
-	if err := p.DB.SetDocumentStatus(failCtx, id, db.StatusFailed, msg); err != nil {
+	if err := store.SetDocumentStatus(failCtx, id, db.StatusFailed, msg); err != nil {
 		p.Logger.Error("ingest: failed to mark document failed", "err", err, "cause", cause)
 	}
 }

From 0f9a2cf65d38ebee53182f7acaf40cffd54659f0 Mon Sep 17 00:00:00 2001
From: Halleluyah Oludele <halleluyaholudele@gmail.com>
Date: Fri, 29 May 2026 00:07:13 +0100
Subject: [PATCH 3/3] test(ingest): prove minimal mode does zero LLM work and
 stays queryable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- pkg/ingest/minimal_mode_test.go: a minimal-mode pipeline run with an
  LLM client that fails the test on any call reaches StatusReady with
  sections persisted and a call counter of 0 — proving minimal ingest is
  pure-Go. A second test reconstructs the persisted tree and confirms the
  synthesised-TOC fallback is title-bearing and section bodies load back
  from storage.
- pkg/retrieval: TestPageIndexMinimalIngestedDoc drives the page-based
  strategy end-to-end against a minimal-ingested doc shape (page ranges +
  content refs, NO summaries, nil TOC) and asserts it produces a cited
  answer from the synthesised TOC + raw page reads.
- pkg/config: default mode is "full"; VLE_INGEST_MODE=minimal override
  and Validate accept/reject coverage.
- Document ingest.mode in both example configs.
---
 config.example.yaml                      |  27 ++
 config.server.example.yaml               |  12 +
 pkg/config/config_test.go                |  46 ++++
 pkg/ingest/minimal_mode_test.go          | 317 +++++++++++++++++++++++
 pkg/retrieval/pageindex_strategy_test.go |  81 ++++++
 5 files changed, 483 insertions(+)
 create mode 100644 pkg/ingest/minimal_mode_test.go

diff --git a/config.example.yaml b/config.example.yaml
index 13a40da..31262d7 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -294,10 +294,37 @@ retrieval:
     model: ""
 
 ingest:
+  # Ingest mode — how much work the pipeline does before a document is
+  # marked `ready` (queryable).
+  #
+  #   full     (default) parse -> build tree -> persist -> summarize ->
+  #            HyDE -> multi-axis summaries -> TOC build. Maximises
+  #            retrieval quality but costs ~1,000-3,000 LLM calls plus a
+  #            pdftable table-finding pass on a large filing — minutes of
+  #            wall time for a 90-page 10-K.
+  #
+  #   minimal  parse -> build tree -> persist -> ready. Skips ALL
+  #            per-section LLM enrichment (summarize, HyDE, multi-axis,
+  #            TOC build) AND the pdftable table-extraction pass, so a
+  #            document becomes queryable in ~parse-speed (seconds).
+  #            The page-based strategy (/v1/answer/pageindex) needs none
+  #            of the skipped work: it navigates a TOC synthesised from
+  #            the section tree (documents.toc_tree is left NULL) and
+  #            reads raw section/page text at query time — and that raw
+  #            page text still contains the tables' text, so dropping
+  #            table *sections* loses nothing for it. The
+  #            summary-dependent strategies (chunked-tree, agentic)
+  #            degrade to titles + raw content with no summaries.
+  #
+  # Override per-process with VLE_INGEST_MODE; on the deployed
+  # vectorless-server use VLS_INGEST_MODE=minimal (no secret edit needed).
+  mode: "full"
+
   # The summarize and HyDE stages run concurrently. This caps the total
   # number of LLM calls in flight across both stages combined, so the
   # provider's per-tenant concurrency limit isn't exceeded. 0 disables
   # the global cap; default applied by the engine is 12.
+  # (Ignored when mode: minimal — no LLM stages run.)
   global_llm_concurrency: 12
 
   # HyDE candidate-question stage. For each leaf section the pipeline asks
diff --git a/config.server.example.yaml b/config.server.example.yaml
index 76bc29c..9e56a7b 100644
--- a/config.server.example.yaml
+++ b/config.server.example.yaml
@@ -99,9 +99,21 @@ engine:
       include_sibling_breadcrumbs: true
 
   ingest:
+    # Ingest mode: full (default) | minimal.
+    #   full     parse -> persist -> summarize -> HyDE -> multi-axis ->
+    #            TOC build. Maximum retrieval quality; minutes on a large
+    #            filing.
+    #   minimal  parse -> persist -> ready. Skips every LLM enrichment
+    #            stage AND table extraction — queryable in seconds. The
+    #            page-based strategy (/v1/answer/pageindex) works on it
+    #            unchanged (synthesised TOC + raw page reads).
+    # Flip the live service without a secret edit: VLS_INGEST_MODE=minimal.
+    mode: "full"
+
     # The summarize and HyDE stages run concurrently. This caps the total
     # number of LLM calls in flight across both stages combined.
     # 0 disables the global cap; default is 12.
+    # (Ignored when mode: minimal — no LLM stages run.)
     global_llm_concurrency: 12
 
     # HyDE candidate-question generation per leaf section. Folded into
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index 6da4b92..d8172b2 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -84,6 +84,52 @@ func TestDefaultValues(t *testing.T) {
 	}
 }
 
+// TestIngestModeDefault locks the default ingest mode to "full" so the
+// current full-enrichment behaviour is preserved unless explicitly
+// switched.
+func TestIngestModeDefault(t *testing.T) {
+	t.Parallel()
+	cfg := Default()
+	if cfg.Ingest.Mode != "full" {
+		t.Errorf("ingest.mode = %q, want full (default)", cfg.Ingest.Mode)
+	}
+}
+
+// TestIngestModeEnvOverride covers the VLE_INGEST_MODE override — the
+// single env var that flips the engine into fast/minimal ingest.
+func TestIngestModeEnvOverride(t *testing.T) {
+	prev := os.Getenv("VLE_INGEST_MODE")
+	defer os.Setenv("VLE_INGEST_MODE", prev)
+
+	os.Setenv("VLE_INGEST_MODE", "minimal")
+	cfg := Default()
+	applyEnvOverrides(&cfg)
+	if cfg.Ingest.Mode != "minimal" {
+		t.Errorf("VLE_INGEST_MODE=minimal not applied, got %q", cfg.Ingest.Mode)
+	}
+}
+
+// TestIngestModeValidate asserts Validate accepts the documented values
+// (and empty, which Default normalises to full) and rejects garbage.
+func TestIngestModeValidate(t *testing.T) {
+	t.Parallel()
+	for _, m := range []string{"", "full", "minimal"} {
+		cfg := Default()
+		cfg.Database.URL = "postgres://localhost/test"
+		cfg.Ingest.Mode = m
+		if err := cfg.Validate(); err != nil {
+			t.Errorf("ingest.mode=%q should pass validation, got %v", m, err)
+		}
+	}
+
+	cfg := Default()
+	cfg.Database.URL = "postgres://localhost/test"
+	cfg.Ingest.Mode = "turbo"
+	if err := cfg.Validate(); err == nil {
+		t.Error("ingest.mode=turbo should fail validation")
+	}
+}
+
 func TestTOCEnvOverride(t *testing.T) {
 	// Mutates env — restore on exit. Not parallel.
 	keys := []string{
diff --git a/pkg/ingest/minimal_mode_test.go b/pkg/ingest/minimal_mode_test.go
new file mode 100644
index 0000000..34a96c1
--- /dev/null
+++ b/pkg/ingest/minimal_mode_test.go
@@ -0,0 +1,317 @@
+package ingest
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"log/slog"
+	"os"
+	"sync"
+	"testing"
+
+	"github.com/hallelx2/llmgate"
+
+	"github.com/hallelx2/vectorless-engine/pkg/db"
+	"github.com/hallelx2/vectorless-engine/pkg/storage"
+	"github.com/hallelx2/vectorless-engine/pkg/tree"
+)
+
+// fakeDocStore is an in-memory docPersister. It captures the status
+// transitions and section upserts the minimal pipeline performs so the
+// "reaches ready, sections persisted" guarantee can be asserted without
+// a live Postgres. Safe for the pipeline's concurrent use (minimal mode
+// is sequential, but the mutex keeps the race detector quiet regardless).
+type fakeDocStore struct {
+	mu       sync.Mutex
+	status   db.DocumentStatus
+	errMsg   string
+	title    string
+	sections []db.Section
+}
+
+func (f *fakeDocStore) SetDocumentStatus(_ context.Context, _ tree.DocumentID, s db.DocumentStatus, errMsg string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	f.status = s
+	f.errMsg = errMsg
+	return nil
+}
+
+func (f *fakeDocStore) SetDocumentTitle(_ context.Context, _ tree.DocumentID, title string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	f.title = title
+	return nil
+}
+
+func (f *fakeDocStore) UpsertSection(_ context.Context, s db.Section) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	f.sections = append(f.sections, s)
+	return nil
+}
+
+func (f *fakeDocStore) snapshot() (db.DocumentStatus, string, []db.Section) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	out := make([]db.Section, len(f.sections))
+	copy(out, f.sections)
+	return f.status, f.errMsg, out
+}
+
+// failIfCalledLLM is an llmgate.Client that fails the test the instant
+// any LLM call is issued. It is the proof harness for minimal mode:
+// minimal ingest must do ZERO LLM work, so a single Complete call is a
+// hard test failure. Calls() lets the test assert the counter stayed 0.
+type failIfCalledLLM struct {
+	t     *testing.T
+	calls int
+	mu    sync.Mutex
+}
+
+func (l *failIfCalledLLM) Complete(_ context.Context, _ llmgate.Request) (*llmgate.Response, error) {
+	l.mu.Lock()
+	l.calls++
+	l.mu.Unlock()
+	l.t.Helper()
+	l.t.Errorf("minimal mode issued an LLM Complete call; it must do zero LLM work")
+	return nil, llmgate.ErrNotImplemented
+}
+
+func (l *failIfCalledLLM) CountTokens(_ context.Context, text string) (int, error) {
+	l.mu.Lock()
+	l.calls++
+	l.mu.Unlock()
+	l.t.Helper()
+	l.t.Errorf("minimal mode issued an LLM CountTokens call; it must do zero LLM work")
+	return len(text) / 4, nil
+}
+
+func (l *failIfCalledLLM) callCount() int {
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	return l.calls
+}
+
+// TestMinimalModeZeroLLMCalls is the headline guarantee: a minimal-mode
+// pipeline run reaches StatusReady with sections persisted while making
+// ZERO LLM calls. The LLM client fails the test on any call, and we also
+// assert its call counter stayed at 0 — together proving minimal ingest
+// is pure-Go (parse → persist → ready), no summarize / HyDE / multi-axis
+// / TOC.
+func TestMinimalModeZeroLLMCalls(t *testing.T) {
+	t.Parallel()
+
+	ctx := context.Background()
+
+	store, err := storage.NewLocal(t.TempDir())
+	if err != nil {
+		t.Fatalf("init local storage: %v", err)
+	}
+
+	fixture, err := os.ReadFile("../../testdata/rust-ownership.md")
+	if err != nil {
+		t.Fatalf("read fixture: %v", err)
+	}
+	docID := NewDocumentID()
+	srcKey := SourceKey(docID, "rust-ownership.md")
+	if err := store.Put(ctx, srcKey, bytes.NewReader(fixture), storage.Metadata{
+		ContentType: "text/markdown",
+		Size:        int64(len(fixture)),
+	}); err != nil {
+		t.Fatalf("stage source: %v", err)
+	}
+
+	llm := &failIfCalledLLM{t: t}
+
+	// Construct the pipeline through NewPipeline (the production path) in
+	// minimal mode. HyDE/SummaryAxes flags are intentionally left at
+	// their full-mode-on values to prove the minimal switch — not a pile
+	// of disabled sub-flags — is what suppresses the LLM work.
+	p := NewPipeline(Pipeline{
+		DB:                 nil, // never touched: runMinimal takes the store explicitly
+		Storage:            store,
+		LLM:                llm,
+		Parsers:            DefaultRegistry(),
+		Logger:             slog.New(slog.NewTextHandler(io.Discard, nil)),
+		Mode:               ModeMinimal,
+		HyDEEnabled:        true,
+		SummaryAxesEnabled: true,
+		TOCEnabled:         true,
+	})
+
+	fake := &fakeDocStore{}
+	if err := p.runMinimal(ctx, fake, Payload{
+		DocumentID:  docID,
+		ContentType: "text/markdown",
+		Filename:    "rust-ownership.md",
+		SourceRef:   srcKey,
+	}); err != nil {
+		t.Fatalf("runMinimal: %v", err)
+	}
+
+	status, errMsg, sections := fake.snapshot()
+	if status != db.StatusReady {
+		t.Fatalf("doc status = %q (err=%q); minimal mode did not reach ready", status, errMsg)
+	}
+	if len(sections) == 0 {
+		t.Fatal("minimal mode persisted zero sections")
+	}
+	if n := llm.callCount(); n != 0 {
+		t.Fatalf("minimal mode made %d LLM calls; want 0", n)
+	}
+
+	// No summaries / axes / candidate-questions were written — minimal
+	// mode skips every enrichment stage, so every persisted section is
+	// bare (title + content ref only).
+	for _, s := range sections {
+		if s.Summary != "" {
+			t.Errorf("section %s carries a summary in minimal mode: %q", s.ID, s.Summary)
+		}
+		if s.SummaryAxes != nil {
+			t.Errorf("section %s carries summary_axes in minimal mode", s.ID)
+		}
+		if len(s.CandidateQuestions) != 0 {
+			t.Errorf("section %s carries HyDE questions in minimal mode", s.ID)
+		}
+	}
+}
+
+// TestMinimalModeReadyIsQueryable proves a minimal-ingested document is
+// usable by the page-based retrieval strategy's two run-time inputs:
+//
+//  1. the synthesised TOC (documents.toc_tree is NULL after minimal
+//     ingest, so the strategy falls back to synthesiseTOC over the
+//     section tree) — must be a non-empty, title-bearing structure; and
+//  2. raw section bodies read from storage via the section ContentRef —
+//     must return the persisted text.
+//
+// It reconstructs the tree from exactly what runMinimal persisted, so it
+// exercises the real post-ingest shape. The end-to-end PageIndexStrategy
+// loop is covered in pkg/retrieval (TestPageIndexMinimalIngestedDoc).
+func TestMinimalModeReadyIsQueryable(t *testing.T) {
+	t.Parallel()
+
+	ctx := context.Background()
+	store, err := storage.NewLocal(t.TempDir())
+	if err != nil {
+		t.Fatalf("init local storage: %v", err)
+	}
+
+	fixture, err := os.ReadFile("../../testdata/rust-ownership.md")
+	if err != nil {
+		t.Fatalf("read fixture: %v", err)
+	}
+	docID := NewDocumentID()
+	srcKey := SourceKey(docID, "rust-ownership.md")
+	if err := store.Put(ctx, srcKey, bytes.NewReader(fixture), storage.Metadata{
+		ContentType: "text/markdown",
+		Size:        int64(len(fixture)),
+	}); err != nil {
+		t.Fatalf("stage source: %v", err)
+	}
+
+	p := NewPipeline(Pipeline{
+		Storage: store,
+		LLM:     &failIfCalledLLM{t: t},
+		Parsers: DefaultRegistry(),
+		Logger:  slog.New(slog.NewTextHandler(io.Discard, nil)),
+		Mode:    ModeMinimal,
+	})
+	fake := &fakeDocStore{}
+	if err := p.runMinimal(ctx, fake, Payload{
+		DocumentID:  docID,
+		ContentType: "text/markdown",
+		Filename:    "rust-ownership.md",
+		SourceRef:   srcKey,
+	}); err != nil {
+		t.Fatalf("runMinimal: %v", err)
+	}
+	_, _, sections := fake.snapshot()
+
+	// Reconstruct the tree from persisted rows (mirrors db.buildTree's
+	// parent→children wiring) and confirm it is non-trivial.
+	root := reconstructTree(docID, fake.title, sections)
+	if root == nil {
+		t.Fatal("reconstructed tree root is nil; minimal mode persisted nothing usable")
+	}
+
+	// (1) Synthesised TOC fallback returns a usable, title-bearing view.
+	titleSeen := false
+	var walk func(*tree.Section)
+	walk = func(s *tree.Section) {
+		if s == nil {
+			return
+		}
+		if s.Title != "" {
+			titleSeen = true
+		}
+		for _, c := range s.Children {
+			walk(c)
+		}
+	}
+	walk(root)
+	if !titleSeen {
+		t.Error("reconstructed section tree carries no titles; synthesised TOC would be empty")
+	}
+
+	// (2) At least one persisted leaf has a ContentRef whose bytes load
+	// back from storage — the raw text the page strategy reads at query
+	// time.
+	loadedSomeBody := false
+	for _, s := range sections {
+		if s.ContentRef == "" {
+			continue
+		}
+		rc, _, err := store.Get(ctx, s.ContentRef)
+		if err != nil {
+			t.Fatalf("load section %s content: %v", s.ID, err)
+		}
+		body, _ := io.ReadAll(rc)
+		rc.Close()
+		if len(bytes.TrimSpace(body)) > 0 {
+			loadedSomeBody = true
+		}
+	}
+	if !loadedSomeBody {
+		t.Error("no section body loaded from storage; page strategy would have no raw text to read")
+	}
+}
+
+// reconstructTree wires a flat db.Section list into a tree.Section root,
+// matching db.buildTree's behaviour (which is unexported): a single
+// top-level section becomes the root; multiple are wrapped in a
+// synthetic empty-ID root carrying the document title.
+func reconstructTree(_ tree.DocumentID, title string, rows []db.Section) *tree.Section {
+	byID := make(map[tree.SectionID]*tree.Section, len(rows))
+	for _, r := range rows {
+		byID[r.ID] = &tree.Section{
+			ID:         r.ID,
+			ParentID:   r.ParentID,
+			Ordinal:    r.Ordinal,
+			Title:      r.Title,
+			ContentRef: r.ContentRef,
+			PageStart:  r.PageStart,
+			PageEnd:    r.PageEnd,
+		}
+	}
+	var topLevel []*tree.Section
+	for _, r := range rows {
+		s := byID[r.ID]
+		if s.ParentID == "" {
+			topLevel = append(topLevel, s)
+			continue
+		}
+		if parent, ok := byID[s.ParentID]; ok {
+			parent.Children = append(parent.Children, s)
+		}
+	}
+	switch len(topLevel) {
+	case 0:
+		return nil
+	case 1:
+		return topLevel[0]
+	default:
+		return &tree.Section{Title: title, Children: topLevel}
+	}
+}
diff --git a/pkg/retrieval/pageindex_strategy_test.go b/pkg/retrieval/pageindex_strategy_test.go
index e6b9e0d..4d50b37 100644
--- a/pkg/retrieval/pageindex_strategy_test.go
+++ b/pkg/retrieval/pageindex_strategy_test.go
@@ -190,6 +190,87 @@ func TestPageIndexHappyPath(t *testing.T) {
 	}
 }
 
+// buildMinimalIngestedTree mirrors the post-ingest shape of a document
+// run through MINIMAL ingest mode: sections carry page ranges (the PDF
+// parser populates them) and content refs (persisted bodies) but NO
+// summaries (minimal mode skips the summarize stage) and NO HyDE
+// questions. documents.toc_tree is NULL after minimal ingest, which the
+// strategy models by leaving TOC nil — forcing synthesiseTOC.
+func buildMinimalIngestedTree() *tree.Tree {
+	a1 := &tree.Section{ID: "sec_a1", ParentID: "sec_a", Title: "Ownership", ContentRef: "a1_ref", PageStart: 1, PageEnd: 2}
+	a2 := &tree.Section{ID: "sec_a2", ParentID: "sec_a", Title: "Borrowing", ContentRef: "a2_ref", PageStart: 3, PageEnd: 4}
+	b1 := &tree.Section{ID: "sec_b1", ParentID: "sec_b", Title: "Lifetimes", ContentRef: "b1_ref", PageStart: 5, PageEnd: 7}
+	a := &tree.Section{ID: "sec_a", ParentID: "sec_root", Title: "Memory", Children: []*tree.Section{a1, a2}, PageStart: 1, PageEnd: 4}
+	b := &tree.Section{ID: "sec_b", ParentID: "sec_root", Title: "Advanced", Children: []*tree.Section{b1}, PageStart: 5, PageEnd: 7}
+	root := &tree.Section{ID: "sec_root", Title: "Rust", Children: []*tree.Section{a, b}, PageStart: 1, PageEnd: 7}
+	return &tree.Tree{DocumentID: "doc_minimal", Title: "Rust", Root: root}
+}
+
+// TestPageIndexMinimalIngestedDoc is the cross-package guarantee for the
+// minimal ingest mode: a document ingested with NO LLM enrichment (no
+// summaries, no HyDE, NULL toc_tree) is still fully answerable through
+// the page-based strategy. It drives the canonical structure → get_pages
+// → done loop with TOC left nil (the NULL-toc_tree state) and asserts:
+//
+//   - get_document_structure surfaces the SYNTHESISED TOC (section titles
+//     from the tree) — proving the NULL-toc_tree fallback works; and
+//   - get_pages surfaces RAW section content read via the loader — the
+//     text the strategy answers from, which on a real minimal-ingested
+//     doc is the persisted page text (and still contains any table text).
+//
+// No summaries are present anywhere in the tree, so this also proves the
+// strategy does not hard-require a summary to navigate or answer.
+func TestPageIndexMinimalIngestedDoc(t *testing.T) {
+	t.Parallel()
+
+	tr := buildMinimalIngestedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"get_document_structure","reasoning":"orient by titles"}`,
+			`{"tool":"get_pages","start_page":1,"end_page":2,"reasoning":"ownership lives up front"}`,
+			`{"tool":"done","answer":"Ownership is a set of rules the compiler checks.","cited_pages":[[1,2]],"reasoning":"pages 1-2 define ownership"}`,
+		},
+	}
+	loader := pageMapLoader{data: map[string]string{
+		"a1_ref": "Ownership is a set of rules that govern how a Rust program manages memory.",
+		"a2_ref": "References borrow a value without taking ownership.",
+		"b1_ref": "Lifetimes ensure references are valid.",
+	}}
+
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = loader
+	// s.TOC intentionally left nil — models the NULL documents.toc_tree
+	// state minimal ingest leaves behind. The strategy must synthesise.
+
+	res, err := s.SelectWithCost(context.Background(), tr, "what is ownership?", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost on minimal-ingested doc: %v", err)
+	}
+	if !strings.Contains(res.Reasoning, "Ownership is a set of rules") {
+		t.Errorf("answer must carry the model's reply, got %q", res.Reasoning)
+	}
+	if _, ok := indexOfSection(res.SelectedIDs, "sec_a1"); !ok {
+		t.Errorf("sec_a1 (pages 1-2) must be cited, got %v", res.SelectedIDs)
+	}
+	if len(res.PagesRead) != 1 || res.PagesRead[0].CharCount == 0 {
+		t.Errorf("expected one non-empty get_pages read, got %+v", res.PagesRead)
+	}
+
+	llm.mu.Lock()
+	defer llm.mu.Unlock()
+	if len(llm.lastPrompts) < 3 {
+		t.Fatalf("expected >=3 prompts captured, got %d", len(llm.lastPrompts))
+	}
+	// (1) Synthesised TOC carried a section title (no toc_tree provider).
+	if !strings.Contains(llm.lastPrompts[1], "Ownership") {
+		t.Errorf("synthesised TOC observation should include section titles; got:\n%s", llm.lastPrompts[1])
+	}
+	// (2) get_pages carried the RAW persisted body, not a summary.
+	if !strings.Contains(llm.lastPrompts[2], "Ownership is a set of rules that govern") {
+		t.Errorf("get_pages observation should include raw section content; got:\n%s", llm.lastPrompts[2])
+	}
+}
+
 // TestPageIndexMultiRangeDone covers a done with two cited ranges:
 // the strategy must surface every section that overlaps EITHER
 // range. This is the FinanceBench-shaped pattern: an answer that