Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions cmd/engine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,20 @@ func run() error {
}

pipeline := ingest.NewPipeline(ingest.Pipeline{
DB: pool,
Storage: store,
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Ingest.Tables)),
Logger: logger,
HyDEEnabled: cfg.Ingest.HyDE.Enabled,
HyDEModel: cfg.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Ingest.HyDE.NumQuestions,
HyDEConcurrency: cfg.Ingest.HyDE.Concurrency,
GlobalLLMConcurrency: cfg.Ingest.GlobalLLMConcurrency,
DB: pool,
Storage: store,
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Ingest.Tables)),
Logger: logger,
HyDEEnabled: cfg.Ingest.HyDE.Enabled,
HyDEModel: cfg.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Ingest.HyDE.NumQuestions,
HyDEConcurrency: cfg.Ingest.HyDE.Concurrency,
SummaryAxesEnabled: cfg.Ingest.SummaryAxes.Enabled,
SummaryAxesMaxTopics: cfg.Ingest.SummaryAxes.MaxTopics,
SummaryAxesMaxEntities: cfg.Ingest.SummaryAxes.MaxEntities,
SummaryAxesMaxNumbers: cfg.Ingest.SummaryAxes.MaxNumbers,
GlobalLLMConcurrency: cfg.Ingest.GlobalLLMConcurrency,
})
if cfg.Ingest.Tables.Enabled {
logger.Info("ingest: pdf table extraction enabled",
Expand Down
24 changes: 14 additions & 10 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,20 @@ func run() error {

// ── Ingest pipeline ───────────────────────────────────────────
pipeline := ingest.NewPipeline(ingest.Pipeline{
DB: pool,
Storage: store,
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Engine.Ingest.Tables)),
Logger: logger,
HyDEEnabled: cfg.Engine.Ingest.HyDE.Enabled,
HyDEModel: cfg.Engine.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Engine.Ingest.HyDE.NumQuestions,
HyDEConcurrency: cfg.Engine.Ingest.HyDE.Concurrency,
GlobalLLMConcurrency: cfg.Engine.Ingest.GlobalLLMConcurrency,
DB: pool,
Storage: store,
LLM: llmClient,
Parsers: ingest.RegistryFromTableOpts(tableOptsFromConfig(cfg.Engine.Ingest.Tables)),
Logger: logger,
HyDEEnabled: cfg.Engine.Ingest.HyDE.Enabled,
HyDEModel: cfg.Engine.Ingest.HyDE.Model,
HyDENumQuestions: cfg.Engine.Ingest.HyDE.NumQuestions,
HyDEConcurrency: cfg.Engine.Ingest.HyDE.Concurrency,
SummaryAxesEnabled: cfg.Engine.Ingest.SummaryAxes.Enabled,
SummaryAxesMaxTopics: cfg.Engine.Ingest.SummaryAxes.MaxTopics,
SummaryAxesMaxEntities: cfg.Engine.Ingest.SummaryAxes.MaxEntities,
SummaryAxesMaxNumbers: cfg.Engine.Ingest.SummaryAxes.MaxNumbers,
GlobalLLMConcurrency: cfg.Engine.Ingest.GlobalLLMConcurrency,
})
if cfg.Engine.Ingest.Tables.Enabled {
logger.Info("ingest: pdf table extraction enabled",
Expand Down
20 changes: 20 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,26 @@ ingest:
min_table_rows: 2
min_table_cols: 2

# Multi-axis structured summaries (Phase 2.5). When enabled, the
# summarize stage runs in JSON mode and produces {topics, entities,
# numbers, one_line} per section instead of a single sentence. The
# structured blob lives in sections.summary_axes; the one_line still
# populates sections.summary so older API consumers keep working
# unchanged. The retrieval prompt surfaces entities + numbers from
# the axes block on the section line so the model has direct
# surface-form access to proper-noun and numeric anchors.
#
# ENABLED BY DEFAULT. Flip to false to roll back to the pre-2.5
# single-sentence path without redeploying the binary.
summary_axes:
enabled: true
# Per-axis caps prevent a misbehaving model from blowing up the
# retrieval prompt budget. Values below are the defaults; tune
# only if the model returns systematically truncated output.
max_topics: 4
max_entities: 8
max_numbers: 6

log:
level: "info" # debug | info | warn | error
format: "json" # json | console
11 changes: 11 additions & 0 deletions config.server.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ engine:
num_questions: 5
concurrency: 4

# Multi-axis structured summaries (Phase 2.5). JSON-mode summarizer
# returns {topics, entities, numbers, one_line}. The retrieval
# prompt surfaces entities + numbers on the section line; the
# one_line continues to populate the flat `summary` field for
# backward compatibility.
summary_axes:
enabled: true
max_topics: 4
max_entities: 8
max_numbers: 6

log:
level: "info" # "debug", "info", "warn", "error"
format: "json" # "json" or "console"
38 changes: 38 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ components:
type: string
summary:
type: string
summary_axes:
$ref: "#/components/schemas/SummaryAxes"
children:
type: array
items:
Expand All @@ -482,6 +484,8 @@ components:
type: string
summary:
type: string
summary_axes:
$ref: "#/components/schemas/SummaryAxes"
token_count:
type: integer
page_start:
Expand Down Expand Up @@ -594,6 +598,8 @@ components:
type: string
summary:
type: string
summary_axes:
$ref: "#/components/schemas/SummaryAxes"
token_count:
type: integer
page_start:
Expand Down Expand Up @@ -638,6 +644,38 @@ components:
text:
type: string

SummaryAxes:
type: object
description: |
Phase 2.5 multi-axis structured summary. The summarizer
produces this object alongside the flat `summary` field;
`summary` continues to carry the one-line sentence (which is
the same string as `one_line` here) so older API consumers
keep working. Omitted on sections written before the Phase
2.5 schema migration; will arrive on every newly-ingested
section. Each list axis is also independently optional —
sections without proper-noun mentions or numeric values
leave the corresponding array empty.
properties:
topics:
type: array
items:
type: string
description: Lower-case, hyphenated topic keywords (e.g. `debt`, `long-term-obligations`).
entities:
type: array
items:
type: string
description: Proper-noun mentions extracted verbatim from the section (orgs, people, places, dates).
numbers:
type: array
items:
type: string
description: Standout numeric values with units as they appear (e.g. `$4.2B`, `2.8%`, `Q3 2024`).
one_line:
type: string
description: Human-readable sentence describing the section. Mirrors the flat `summary` field.

AnswerRequest:
type: object
required: [document_id, query]
Expand Down
76 changes: 76 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ type IngestConfig struct {
// hides in a balance sheet that text-only extraction collapses.
Tables TablesConfig `yaml:"tables"`

// SummaryAxes configures the Phase 2.5 multi-axis summarizer.
// Enabled by default — the structured shape unlocks
// entity / numeric matching at retrieval time without changing the
// existing `summary` field's contract (axes.one_line continues to
// populate it).
SummaryAxes SummaryAxesBlock `yaml:"summary_axes"`

// GlobalLLMConcurrency caps the total number of LLM calls in flight
// across the summarize and HyDE stages combined, which now run
// concurrently. Each stage still respects its own per-stage cap
Expand All @@ -53,6 +60,33 @@ type IngestConfig struct {
GlobalLLMConcurrency int `yaml:"global_llm_concurrency"`
}

// SummaryAxesBlock configures the Phase 2.5 structured summarizer.
//
// When enabled, the summarize stage runs in JSON mode and produces
// {topics, entities, numbers, one_line} per section instead of a
// single sentence. The structured blob is persisted in
// sections.summary_axes (JSONB); the one_line continues to land in
// sections.summary so older API consumers keep working.
//
// Disable to roll back to the pre-2.5 single-sentence behaviour
// without un-deploying the binary — useful for A/B comparisons or as
// a fast off-switch if a real-world document triggers a regression.
type SummaryAxesBlock struct {
// Enabled toggles the structured path. Default: true.
Enabled bool `yaml:"enabled"`

// MaxTopics caps the topics axis the summarizer returns per
// section. Default: 4. A misbehaving model that returns 50 topics
// can't push past this cap; the prompt-budget impact stays bounded.
MaxTopics int `yaml:"max_topics"`

// MaxEntities caps the entities axis. Default: 8.
MaxEntities int `yaml:"max_entities"`

// MaxNumbers caps the numbers axis. Default: 6.
MaxNumbers int `yaml:"max_numbers"`
}

// TablesConfig configures the table-extraction stage of the PDF parser.
// The stage runs pdftable's geometry-based finder over every page and
// emits each detected table as its own Section with
Expand Down Expand Up @@ -504,6 +538,12 @@ func Default() Config {
MinTableRows: 2,
MinTableCols: 2,
},
SummaryAxes: SummaryAxesBlock{
Enabled: true,
MaxTopics: 4,
MaxEntities: 8,
MaxNumbers: 6,
},
},
Log: LogConfig{Level: "info", Format: "json"},
}
Expand Down Expand Up @@ -661,6 +701,32 @@ func applyEnvOverrides(c *Config) {
c.Ingest.Tables.MinTableCols = n
}
}
// Phase 2.5 structured-summary knobs. Booleans accept the same
// truthy strings the other ingest toggles use; numeric overrides
// require a positive int (a typo silently preserves the default).
if v := os.Getenv("VLE_INGEST_SUMMARY_AXES_ENABLED"); v != "" {
switch strings.ToLower(strings.TrimSpace(v)) {
case "1", "true", "yes", "on":
c.Ingest.SummaryAxes.Enabled = true
case "0", "false", "no", "off":
c.Ingest.SummaryAxes.Enabled = false
}
}
if v := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
c.Ingest.SummaryAxes.MaxTopics = n
}
}
if v := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_ENTITIES"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
c.Ingest.SummaryAxes.MaxEntities = n
}
}
if v := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_NUMBERS"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n > 0 {
c.Ingest.SummaryAxes.MaxNumbers = n
}
}
if v := os.Getenv("VLE_RETRIEVAL_ANSWER_SPAN_ENABLED"); v != "" {
switch strings.ToLower(strings.TrimSpace(v)) {
case "1", "true", "yes", "on":
Expand Down Expand Up @@ -849,6 +915,16 @@ func (c Config) Validate() error {
return fmt.Errorf("ingest.tables.min_table_cols must be >= 0, got %d", c.Ingest.Tables.MinTableCols)
}

if c.Ingest.SummaryAxes.MaxTopics < 0 {
return fmt.Errorf("ingest.summary_axes.max_topics must be >= 0, got %d", c.Ingest.SummaryAxes.MaxTopics)
}
if c.Ingest.SummaryAxes.MaxEntities < 0 {
return fmt.Errorf("ingest.summary_axes.max_entities must be >= 0, got %d", c.Ingest.SummaryAxes.MaxEntities)
}
if c.Ingest.SummaryAxes.MaxNumbers < 0 {
return fmt.Errorf("ingest.summary_axes.max_numbers must be >= 0, got %d", c.Ingest.SummaryAxes.MaxNumbers)
}

if c.Retrieval.Planning.CacheSize < 0 {
return fmt.Errorf("retrieval.planning.cache_size must be >= 0, got %d", c.Retrieval.Planning.CacheSize)
}
Expand Down
94 changes: 94 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,97 @@ func TestTablesValidateRejectsBadStrategy(t *testing.T) {
t.Error("expected error for negative min_table_rows")
}
}

// TestSummaryAxesDefaults locks the Phase 2.5 defaults: structured
// summaries opt-in by default, with the caps the spec calls for. The
// retrieval prompt downstream relies on these caps to keep the prompt
// budget bounded per section.
func TestSummaryAxesDefaults(t *testing.T) {
t.Parallel()
cfg := Default()
if !cfg.Ingest.SummaryAxes.Enabled {
t.Error("ingest.summary_axes.enabled should default to true (opt-out)")
}
if cfg.Ingest.SummaryAxes.MaxTopics != 4 {
t.Errorf("max_topics = %d, want 4", cfg.Ingest.SummaryAxes.MaxTopics)
}
if cfg.Ingest.SummaryAxes.MaxEntities != 8 {
t.Errorf("max_entities = %d, want 8", cfg.Ingest.SummaryAxes.MaxEntities)
}
if cfg.Ingest.SummaryAxes.MaxNumbers != 6 {
t.Errorf("max_numbers = %d, want 6", cfg.Ingest.SummaryAxes.MaxNumbers)
}
}

// TestSummaryAxesEnvOverride covers the opt-out path: env disables the
// structured summarizer, and the numeric caps re-tune via env.
func TestSummaryAxesEnvOverride(t *testing.T) {
// Mutates env — restore on exit. Not parallel.
prevEnabled := os.Getenv("VLE_INGEST_SUMMARY_AXES_ENABLED")
prevTopics := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS")
prevEntities := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_ENTITIES")
prevNumbers := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_NUMBERS")
defer func() {
os.Setenv("VLE_INGEST_SUMMARY_AXES_ENABLED", prevEnabled)
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS", prevTopics)
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_ENTITIES", prevEntities)
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_NUMBERS", prevNumbers)
}()

os.Setenv("VLE_INGEST_SUMMARY_AXES_ENABLED", "false")
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS", "10")
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_ENTITIES", "20")
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_NUMBERS", "15")

cfg := Default()
applyEnvOverrides(&cfg)
if cfg.Ingest.SummaryAxes.Enabled {
t.Error("VLE_INGEST_SUMMARY_AXES_ENABLED=false should disable")
}
if cfg.Ingest.SummaryAxes.MaxTopics != 10 {
t.Errorf("max_topics env override: got %d, want 10", cfg.Ingest.SummaryAxes.MaxTopics)
}
if cfg.Ingest.SummaryAxes.MaxEntities != 20 {
t.Errorf("max_entities env override: got %d, want 20", cfg.Ingest.SummaryAxes.MaxEntities)
}
if cfg.Ingest.SummaryAxes.MaxNumbers != 15 {
t.Errorf("max_numbers env override: got %d, want 15", cfg.Ingest.SummaryAxes.MaxNumbers)
}
}

// TestSummaryAxesEnvOverrideRejectsBad: garbage values preserve the
// default rather than zeroing the cap (which would silently fail to
// trim model output).
func TestSummaryAxesEnvOverrideRejectsBad(t *testing.T) {
prevTopics := os.Getenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS")
defer os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS", prevTopics)
os.Setenv("VLE_INGEST_SUMMARY_AXES_MAX_TOPICS", "not-a-number")
cfg := Default()
applyEnvOverrides(&cfg)
if cfg.Ingest.SummaryAxes.MaxTopics != 4 {
t.Errorf("garbled env should preserve default 4, got %d", cfg.Ingest.SummaryAxes.MaxTopics)
}
}

// TestSummaryAxesValidateNegatives: negative caps fail validation so a
// typo in the YAML doesn't silently disable trimming.
func TestSummaryAxesValidateNegatives(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
fn func(*Config)
}{
{"topics", func(c *Config) { c.Ingest.SummaryAxes.MaxTopics = -1 }},
{"entities", func(c *Config) { c.Ingest.SummaryAxes.MaxEntities = -1 }},
{"numbers", func(c *Config) { c.Ingest.SummaryAxes.MaxNumbers = -1 }},
} {
t.Run(tc.name, func(t *testing.T) {
cfg := Default()
cfg.Database.URL = "postgres://localhost/test"
tc.fn(&cfg)
if err := cfg.Validate(); err == nil {
t.Errorf("negative %s should fail validation", tc.name)
}
})
}
}
2 changes: 2 additions & 0 deletions pkg/db/migrations/0005_sections_summary_axes.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE sections
DROP COLUMN IF EXISTS summary_axes;
Loading
Loading