rust_sqlite/src/sql/executor.rs at main · joaoh82/rust_sqlite · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Query executors — evaluate parsed SQL statements against the in-memory
//! storage and produce formatted output.

use std::cmp::Ordering;

use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
use sqlparser::ast::{
    AlterTable, AlterTableOperation, AssignmentTarget, BinaryOperator, CreateIndex, Delete, Expr,
    FromTable, FunctionArg, FunctionArgExpr, FunctionArguments, IndexType, ObjectName,
    ObjectNamePart, RenameTableNameKind, Statement, TableFactor, TableWithJoins, UnaryOperator,
    Update, Value as AstValue,
};

use crate::error::{Result, SQLRiteError};
use crate::sql::agg::{AggState, DistinctKey, like_match};
use crate::sql::db::database::Database;
use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
use crate::sql::db::table::{
    DataType, FtsIndexEntry, HnswIndexEntry, Table, Value, parse_vector_literal,
};
use crate::sql::fts::{Bm25Params, PostingList};
use crate::sql::hnsw::{DistanceMetric, HnswIndex};
use crate::sql::parser::select::{
    AggregateArg, JoinType, OrderByClause, Projection, ProjectionItem, ProjectionKind, SelectQuery,
};

// -----------------------------------------------------------------
// SQLR-5 — Row-scope abstraction
// -----------------------------------------------------------------
//
// Single-table SELECT / UPDATE / DELETE evaluate WHERE / ORDER BY /
// projection expressions over `(&Table, rowid)`. JOIN evaluation
// needs the same expression evaluator to look up columns across
// multiple tables, with NULL padding for unmatched outer-join rows.
//
// Rather than fork the evaluator, we abstract "what's in scope when
// I see a column reference" behind a trait. Every callsite that
// previously took `(table, rowid)` now takes `&dyn RowScope`. The
// single-table case constructs a tiny `SingleTableScope`; the join
// case constructs a `JoinedScope` that knows about every table in
// scope plus the per-table rowid (or `None` for a NULL-padded row).
//
// The trait stays small on purpose:
//
//   - `lookup` resolves a column reference (`col` or `t.col`) to a
//     `Value`. NULL-padded joined rows yield `Value::Null` for any
//     column from their side. Ambiguous unqualified references in
//     joined scope error.
//
//   - `single_table_view` lets index-probing helpers (FTS, HNSW,
//     vec_distance) bail out cleanly when invoked over a join — they
//     need a `(Table, rowid)` pair to look up an index, and the
//     joined case can't answer without per-call disambiguation we
//     haven't plumbed yet. Returns `None` in joined scope.
pub(crate) trait RowScope {
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value>;

    /// `Some((table, rowid))` for a single-table scope; `None` for a
    /// joined scope. v1 join support delegates "needs single-table"
    /// helpers (FTS / HNSW / vec_distance with column args) to the
    /// single-table path; calling them from a joined query produces
    /// a `NotImplemented` error rather than wrong results.
    fn single_table_view(&self) -> Option<(&Table, i64)>;
}

/// The default scope for non-join queries: one table, one rowid.
pub(crate) struct SingleTableScope<'a> {
    table: &'a Table,
    rowid: i64,
}

impl<'a> SingleTableScope<'a> {
    pub(crate) fn new(table: &'a Table, rowid: i64) -> Self {
        Self { table, rowid }
    }
}

impl RowScope for SingleTableScope<'_> {
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value> {
        // The qualifier (if any) is ignored — we only have one table
        // in scope, so `t.col` resolves the same as `col`. This
        // matches the historical single-table path which did the
        // same thing in `eval_expr`.
        let _ = qualifier;
        Ok(self.table.get_value(col, self.rowid).unwrap_or(Value::Null))
    }

    fn single_table_view(&self) -> Option<(&Table, i64)> {
        Some((self.table, self.rowid))
    }
}

/// One table participating in a joined query, plus the user-visible
/// name to match against `t.col` qualifiers (alias if present, else
/// the bare table name).
pub(crate) struct JoinedTableRef<'a> {
    pub table: &'a Table,
    pub scope_name: String,
}

/// Multi-table scope used during join execution. `rowids[i]` is the
/// rowid in `tables[i]`, or `None` for a NULL-padded row coming out
/// of an outer join.
pub(crate) struct JoinedScope<'a> {
    pub tables: &'a [JoinedTableRef<'a>],
    pub rowids: &'a [Option<i64>],
}

impl RowScope for JoinedScope<'_> {
    fn lookup(&self, qualifier: Option<&str>, col: &str) -> Result<Value> {
        if let Some(q) = qualifier {
            // Qualified reference: pick the matching table; if it's
            // NULL-padded, the column is NULL; else fetch from row.
            let pos = self
                .tables
                .iter()
                .position(|t| t.scope_name.eq_ignore_ascii_case(q))
                .ok_or_else(|| {
                    SQLRiteError::Internal(format!(
                        "unknown table qualifier '{q}' in column reference '{q}.{col}'"
                    ))
                })?;
            if !self.tables[pos].table.contains_column(col.to_string()) {
                return Err(SQLRiteError::Internal(format!(
                    "column '{col}' does not exist on '{}'",
                    self.tables[pos].scope_name
                )));
            }
            return Ok(match self.rowids[pos] {
                None => Value::Null,
                Some(r) => self.tables[pos]
                    .table
                    .get_value(col, r)
                    .unwrap_or(Value::Null),
            });
        }
        // Unqualified: search every in-scope table. Exactly-one match
        // wins; zero matches → unknown column; multi matches →
        // ambiguous, prompt the user to qualify.
        let mut hit: Option<usize> = None;
        for (i, t) in self.tables.iter().enumerate() {
            if t.table.contains_column(col.to_string()) {
                if hit.is_some() {
                    return Err(SQLRiteError::Internal(format!(
                        "column reference '{col}' is ambiguous — qualify it as <table>.{col}"
                    )));
                }
                hit = Some(i);
            }
        }
        let i = hit.ok_or_else(|| {
            SQLRiteError::Internal(format!(
                "unknown column '{col}' in joined SELECT (no in-scope table has it)"
            ))
        })?;
        Ok(match self.rowids[i] {
            None => Value::Null,
            Some(r) => self.tables[i]
                .table
                .get_value(col, r)
                .unwrap_or(Value::Null),
        })
    }

    fn single_table_view(&self) -> Option<(&Table, i64)> {
        None
    }
}

/// Executes a parsed `SelectQuery` against the database and returns a
/// human-readable rendering of the result set (prettytable). Also returns
/// the number of rows produced, for the top-level status message.
/// Structured result of a SELECT: column names in projection order,
/// and each matching row as a `Vec<Value>` aligned with the columns.
/// Phase 5a introduced this so the public `Connection` / `Statement`
/// API has typed rows to yield; the existing `execute_select` that
/// returns pre-rendered text is now a thin wrapper on top.
pub struct SelectResult {
    pub columns: Vec<String>,
    pub rows: Vec<Vec<Value>>,
}

/// Executes a SELECT and returns structured rows. The typed rows are
/// what the new public API streams to callers; the REPL / Tauri app
/// pre-render into a prettytable via `execute_select`.
pub fn execute_select_rows(query: SelectQuery, db: &Database) -> Result<SelectResult> {
    // SQLR-5 — joined SELECTs go through a dedicated executor that
    // knows how to thread a multi-table scope through expression
    // evaluation. The single-table fast path below stays untouched
    // (and so do its HNSW / FTS / bounded-heap optimizations).
    if !query.joins.is_empty() {
        return execute_select_rows_joined(query, db);
    }

    let table = db
        .get_table(query.table_name.clone())
        .map_err(|_| SQLRiteError::Internal(format!("Table '{}' not found", query.table_name)))?;

    // SQLR-3: Materialize the projection as `Vec<ProjectionItem>` so
    // both the simple-row path and the aggregation path can iterate the
    // same shape. `Projection::All` expands to bare-column items in
    // declaration order; that path then runs the existing rowid pipeline.
    let proj_items: Vec<ProjectionItem> = match &query.projection {
        Projection::All => table
            .column_names()
            .into_iter()
            .map(|c| ProjectionItem {
                kind: ProjectionKind::Column {
                    qualifier: None,
                    name: c,
                },
                alias: None,
            })
            .collect(),
        Projection::Items(items) => items.clone(),
    };
    let has_aggregates = proj_items
        .iter()
        .any(|i| matches!(i.kind, ProjectionKind::Aggregate(_)));
    // Validate bare-column references against the table schema.
    for item in &proj_items {
        if let ProjectionKind::Column { name: c, .. } = &item.kind
            && !table.contains_column(c.clone())
        {
            return Err(SQLRiteError::Internal(format!(
                "Column '{c}' does not exist on table '{}'",
                query.table_name
            )));
        }
    }
    for c in &query.group_by {
        if !table.contains_column(c.clone()) {
            return Err(SQLRiteError::Internal(format!(
                "GROUP BY references unknown column '{c}' on table '{}'",
                query.table_name
            )));
        }
    }
    // Collect matching rowids. If the WHERE is the shape `col = literal`
    // and `col` has a secondary index, probe the index for an O(log N)
    // seek; otherwise fall back to the full table scan.
    let matching = match select_rowids(table, query.selection.as_ref())? {
        RowidSource::IndexProbe(rowids) => rowids,
        RowidSource::FullScan => {
            let mut out = Vec::new();
            for rowid in table.rowids() {
                if let Some(expr) = &query.selection
                    && !eval_predicate(expr, table, rowid)?
                {
                    continue;
                }
                out.push(rowid);
            }
            out
        }
    };
    let mut matching = matching;

    let aggregating = has_aggregates || !query.group_by.is_empty();

    // SQLR-3: aggregation path. When the SELECT contains aggregates or a
    // GROUP BY, the rowid-shaped optimizations (HNSW / FTS / bounded
    // heap) don't compose with grouping — every row contributes to its
    // group, so we walk the full filtered rowid set, accumulate, then
    // sort/truncate the resulting *output rows*.
    if aggregating {
        // Validate aggregate column args.
        for item in &proj_items {
            if let ProjectionKind::Aggregate(call) = &item.kind
                && let AggregateArg::Column(c) = &call.arg
                && !table.contains_column(c.clone())
            {
                return Err(SQLRiteError::Internal(format!(
                    "{}({}) references unknown column '{c}' on table '{}'",
                    call.func.as_str(),
                    c,
                    query.table_name
                )));
            }
        }

        let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();
        let mut rows = aggregate_rows(table, &matching, &query.group_by, &proj_items)?;

        if query.distinct {
            rows = dedupe_rows(rows);
        }

        if let Some(order) = &query.order_by {
            sort_output_rows(&mut rows, &columns, &proj_items, order)?;
        }
        if let Some(k) = query.limit {
            rows.truncate(k);
        }

        return Ok(SelectResult { columns, rows });
    }

    // Non-aggregating path — same flow as before, with the extra
    // affordances that (a) the projection list now goes through
    // `ProjectionItem` and (b) DISTINCT applies after row materialization.

    // Phase 7c — bounded-heap top-k optimization.
    //
    // The naive "ORDER BY <expr>" path (Phase 7b) sorts every matching
    // rowid: O(N log N) sort_by + a truncate. For KNN queries
    //
    //     SELECT id FROM docs
    //     ORDER BY vec_distance_l2(embedding, [...])
    //     LIMIT 10;
    //
    // N is the table row count and k is the LIMIT. With a bounded
    // max-heap of size k we can find the top-k in O(N log k) — same
    // sort_by-per-row cost on the heap operations, but k is typically
    // 10-100 while N can be millions.
    //
    // Phase 7d.2 — HNSW ANN probe.
    //
    // Even better than the bounded heap: if the ORDER BY expression is
    // exactly `vec_distance_l2(<col>, <bracket-array literal>)` AND
    // `<col>` has an HNSW index attached, skip the linear scan
    // entirely and probe the graph in O(log N). Approximate but
    // typically ≥ 0.95 recall (verified by the recall tests in
    // src/sql/hnsw.rs).
    //
    // We branch in cases:
    //   1. ORDER BY + LIMIT k matches the HNSW probe pattern  → graph probe.
    //   2. ORDER BY + LIMIT k matches the FTS probe pattern   → posting probe.
    //   3. ORDER BY + LIMIT k where k < |matching|            → bounded heap (7c).
    //   4. ORDER BY without LIMIT, or LIMIT >= |matching|     → full sort.
    //   5. LIMIT without ORDER BY                              → just truncate.
    //
    // DISTINCT is applied post-projection (we'd over-truncate if LIMIT
    // ran before DISTINCT had a chance to collapse duplicates), so when
    // DISTINCT is on we defer truncation past the dedupe step.
    let defer_limit_for_distinct = query.distinct;
    match (&query.order_by, query.limit) {
        (Some(order), Some(k)) if try_hnsw_probe(table, &order.expr, k).is_some() => {
            matching = try_hnsw_probe(table, &order.expr, k).unwrap();
        }
        (Some(order), Some(k))
            if try_fts_probe(table, &order.expr, order.ascending, k).is_some() =>
        {
            matching = try_fts_probe(table, &order.expr, order.ascending, k).unwrap();
        }
        (Some(order), Some(k)) if !defer_limit_for_distinct && k < matching.len() => {
            matching = select_topk(&matching, table, order, k)?;
        }
        (Some(order), _) => {
            sort_rowids(&mut matching, table, order)?;
            if let Some(k) = query.limit
                && !defer_limit_for_distinct
            {
                matching.truncate(k);
            }
        }
        (None, Some(k)) if !defer_limit_for_distinct => {
            matching.truncate(k);
        }
        _ => {}
    }

    let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();
    let projected_cols: Vec<String> = proj_items
        .iter()
        .map(|i| match &i.kind {
            ProjectionKind::Column { name, .. } => name.clone(),
            ProjectionKind::Aggregate(_) => unreachable!("aggregation handled above"),
        })
        .collect();

    // Build typed rows. Missing cells surface as `Value::Null` — that
    // maps a column-not-present-for-this-rowid case onto the public
    // `Row::get` → `Option<T>` surface cleanly.
    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(matching.len());
    for rowid in &matching {
        let row: Vec<Value> = projected_cols
            .iter()
            .map(|col| table.get_value(col, *rowid).unwrap_or(Value::Null))
            .collect();
        rows.push(row);
    }

    if query.distinct {
        rows = dedupe_rows(rows);
        if let Some(k) = query.limit {
            rows.truncate(k);
        }
    }

    Ok(SelectResult { columns, rows })
}

// -----------------------------------------------------------------
// SQLR-5 — Joined SELECT execution
// -----------------------------------------------------------------
//
// The strategy is a left-folded nested-loop join: start with the
// rowids of the leading FROM table, then for each JOIN clause
// combine the accumulator (`Vec<Vec<Option<i64>>>`) with the rowids
// of the next table. Each join flavor differs only in how it
// handles unmatched left / right rows:
//
//   INNER       — drop unmatched on both sides
//   LEFT OUTER  — keep every left row; pad right side with NULL
//   RIGHT OUTER — keep every right row; pad left side with NULL
//   FULL OUTER  — keep both unmatched sets, NULL-padding the other
//
// This isn't a hash join — every join is O(N×M) in the size of the
// accumulator and the right table. Adequate for SQLRite's "embedded
// learning database" niche; a future phase could layer hash / merge
// joins on equi-join shapes without changing the surface API.
//
// Aggregates / GROUP BY / DISTINCT over joined results are rejected
// at parse time (see SelectQuery::new). They aren't impossible —
// the joined-row stream is just a different rowid source feeding
// the same aggregator — but we left the validator that ties bare
// columns to GROUP BY a single-table assumption, and reworking it
// is outside this phase. Surfaces as a clean NotImplemented today.
fn execute_select_rows_joined(query: SelectQuery, db: &Database) -> Result<SelectResult> {
    // Resolve every participating table once and capture its scope
    // name (alias if supplied, else table name). Scope names are
    // case-sensitive in matching the original identifier text;
    // qualifier matches in `JoinedScope::lookup` use
    // `eq_ignore_ascii_case` so `T1.c1` works whether the user
    // wrote `T1`, `t1`, or `T1` differently than the alias.
    let mut joined_tables: Vec<JoinedTableRef<'_>> = Vec::with_capacity(1 + query.joins.len());

    let primary = db
        .get_table(query.table_name.clone())
        .map_err(|_| SQLRiteError::Internal(format!("Table '{}' not found", query.table_name)))?;
    joined_tables.push(JoinedTableRef {
        table: primary,
        scope_name: query
            .table_alias
            .clone()
            .unwrap_or_else(|| query.table_name.clone()),
    });
    for j in &query.joins {
        let t = db
            .get_table(j.right_table.clone())
            .map_err(|_| SQLRiteError::Internal(format!("Table '{}' not found", j.right_table)))?;
        joined_tables.push(JoinedTableRef {
            table: t,
            scope_name: j
                .right_alias
                .clone()
                .unwrap_or_else(|| j.right_table.clone()),
        });
    }

    // Reject duplicate scope names — `FROM t JOIN t ON ...` without
    // an alias on one side would silently collapse qualifiers and
    // produce confusing results. Forcing the user to alias one side
    // keeps `t1.col` / `t2.col` unambiguous.
    {
        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
        for t in &joined_tables {
            let key = t.scope_name.to_ascii_lowercase();
            if !seen.insert(key) {
                return Err(SQLRiteError::Internal(format!(
                    "duplicate table reference '{}' in FROM/JOIN — use AS to alias one side",
                    t.scope_name
                )));
            }
        }
    }

    // Validate qualified projection column references against the
    // table they qualify. Unqualified names are validated by the
    // first scope lookup at row materialization — the runtime check
    // there gives the same "ambiguous / unknown" message we'd want
    // here, so we don't pre-resolve them.
    let proj_items: Vec<ProjectionItem> = match &query.projection {
        Projection::All => {
            // `SELECT *` over a join expands to every column of every
            // in-scope table, in source order. We use the bare column
            // name as both the projected identifier and the output
            // header — qualified expansion (`t1.col`) would force
            // composite headers like `t1.col` which conflict with
            // alias-less convention. Duplicate header names are
            // permitted (matches SQLite); callers needing
            // disambiguation can `SELECT t.col AS t_col`.
            let mut all = Vec::new();
            for t in &joined_tables {
                for col in t.table.column_names() {
                    all.push(ProjectionItem {
                        kind: ProjectionKind::Column {
                            // Qualify the synthetic items so duplicate
                            // column names across tables route to the
                            // right side at projection time. The output
                            // header still uses the bare `name`.
                            qualifier: Some(t.scope_name.clone()),
                            name: col,
                        },
                        alias: None,
                    });
                }
            }
            all
        }
        Projection::Items(items) => items.clone(),
    };

    let columns: Vec<String> = proj_items.iter().map(|i| i.output_name()).collect();

    // Stage 1: enumerate rows of the leading table. The accumulator
    // is `Vec<Vec<Option<i64>>>` where each inner `Vec` is a join
    // row whose i-th slot is the rowid of `joined_tables[i]` (or
    // None for a NULL-padded row from an outer join).
    let mut acc: Vec<Vec<Option<i64>>> = primary
        .rowids()
        .into_iter()
        .map(|r| {
            let mut row = Vec::with_capacity(joined_tables.len());
            row.push(Some(r));
            row
        })
        .collect();

    // Stage 2: fold each JOIN clause into the accumulator. After
    // join `i`, every row in `acc` has length `i + 2` (primary +
    // i+1 right tables joined). Unmatched-side handling depends on
    // the join flavor.
    for (j_idx, join) in query.joins.iter().enumerate() {
        let right_pos = j_idx + 1;
        let right_table = joined_tables[right_pos].table;
        let right_rowids: Vec<i64> = right_table.rowids();

        // Track which right rowids matched at least once across the
        // entire left accumulator. Used by RIGHT / FULL to emit
        // unmatched right rows after the loop.
        let mut right_matched: Vec<bool> = vec![false; right_rowids.len()];

        let mut next_acc: Vec<Vec<Option<i64>>> = Vec::with_capacity(acc.len());

        // ON evaluation only sees tables that are in scope *at this
        // join level* — the leading FROM table plus every right
        // table joined so far, including the one we're matching.
        // Restricting the scope means a typo like `JOIN c ON a.id =
        // c.id JOIN c ON ...` (referencing `c` before it joins)
        // surfaces as "unknown table qualifier 'c'" rather than
        // silently `NULL → false`-ing every row.
        let on_scope_tables: &[JoinedTableRef<'_>] = &joined_tables[..=right_pos];

        for left_row in acc.into_iter() {
            // Build a row prefix and extend it with each candidate
            // right rowid; record whether any matched (for outer
            // padding on the left side).
            let mut left_match_count = 0usize;
            for (r_idx, &rrid) in right_rowids.iter().enumerate() {
                let mut on_rowids: Vec<Option<i64>> = left_row.clone();
                on_rowids.push(Some(rrid));
                debug_assert_eq!(on_rowids.len(), on_scope_tables.len());
                let scope = JoinedScope {
                    tables: on_scope_tables,
                    rowids: &on_rowids,
                };
                // Reuse `eval_predicate_scope` so ON shares the same
                // truthiness rule WHERE uses — non-zero integers are
                // truthy, NULL is false, etc. — instead of rejecting
                // anything that isn't a literal bool.
                if eval_predicate_scope(&join.on, &scope)? {
                    left_match_count += 1;
                    right_matched[r_idx] = true;
                    // Accumulator entries carry only as many slots
                    // as join levels processed so far; the next
                    // iteration extends them again. No trailing
                    // padding needed here.
                    next_acc.push(on_rowids);
                }
            }

            if left_match_count == 0
                && matches!(join.join_type, JoinType::LeftOuter | JoinType::FullOuter)
            {
                // Outer-join NULL pad on the right side: keep the
                // left row, push None for the right rowid.
                let mut padded = left_row;
                padded.push(None);
                next_acc.push(padded);
            }
        }

        // Right-only emission for RIGHT / FULL: any right rowid that
        // never matched on the entire accumulator surfaces with all
        // left positions NULL-padded.
        if matches!(join.join_type, JoinType::RightOuter | JoinType::FullOuter) {
            for (r_idx, matched) in right_matched.iter().enumerate() {
                if *matched {
                    continue;
                }
                let mut row: Vec<Option<i64>> = vec![None; right_pos];
                row.push(Some(right_rowids[r_idx]));
                next_acc.push(row);
            }
        }

        acc = next_acc;
    }

    // Stage 3: apply WHERE on each fully-joined row. Outer-join
    // NULL-padded rows where WHERE references a NULL'd column will
    // (per SQL three-valued logic) be excluded — this is the same
    // posture as the single-table path.
    let mut filtered: Vec<Vec<Option<i64>>> = if let Some(where_expr) = &query.selection {
        let mut out = Vec::with_capacity(acc.len());
        for row in acc {
            let scope = JoinedScope {
                tables: &joined_tables,
                rowids: &row,
            };
            if eval_predicate_scope(where_expr, &scope)? {
                out.push(row);
            }
        }
        out
    } else {
        acc
    };

    // Stage 4: ORDER BY across the joined scope. We pre-compute the
    // sort key per row (same approach as `sort_rowids`) so the
    // comparator runs on Values, not against the expression tree.
    if let Some(order) = &query.order_by {
        // Validate up front so a bad ORDER BY surfaces a clear
        // error before sort starts.
        let mut keys: Vec<(usize, Value)> = Vec::with_capacity(filtered.len());
        for (i, row) in filtered.iter().enumerate() {
            let scope = JoinedScope {
                tables: &joined_tables,
                rowids: row,
            };
            let v = eval_expr_scope(&order.expr, &scope)?;
            keys.push((i, v));
        }
        keys.sort_by(|(_, a), (_, b)| {
            let ord = compare_values(Some(a), Some(b));
            if order.ascending { ord } else { ord.reverse() }
        });
        let mut sorted = Vec::with_capacity(filtered.len());
        for (i, _) in keys {
            sorted.push(filtered[i].clone());
        }
        filtered = sorted;
    }

    // Stage 5: LIMIT.
    if let Some(k) = query.limit {
        filtered.truncate(k);
    }

    // Stage 6: project. For each row, evaluate every projection item
    // through the joined scope.
    let mut rows: Vec<Vec<Value>> = Vec::with_capacity(filtered.len());
    for row in &filtered {
        let scope = JoinedScope {
            tables: &joined_tables,
            rowids: row,
        };
        let mut out_row = Vec::with_capacity(proj_items.len());
        for item in &proj_items {
            let v = match &item.kind {
                ProjectionKind::Column { qualifier, name } => {
                    scope.lookup(qualifier.as_deref(), name)?
                }
                ProjectionKind::Aggregate(_) => {
                    // SelectQuery::new already rejects this combination,
                    // but defense in depth keeps the pattern match total.
                    return Err(SQLRiteError::Internal(
                        "aggregate functions over JOIN are not supported".to_string(),
                    ));
                }
            };
            out_row.push(v);
        }
        rows.push(out_row);
    }

    Ok(SelectResult { columns, rows })
}

/// Executes a SELECT and returns `(rendered_table, row_count)`. The
/// REPL and Tauri app use this to keep the table-printing behaviour
/// the engine has always shipped. Structured callers use
/// `execute_select_rows` instead.
pub fn execute_select(query: SelectQuery, db: &Database) -> Result<(String, usize)> {
    let result = execute_select_rows(query, db)?;
    let row_count = result.rows.len();

    let mut print_table = PrintTable::new();
    let header_cells: Vec<PrintCell> = result.columns.iter().map(|c| PrintCell::new(c)).collect();
    print_table.add_row(PrintRow::new(header_cells));

    for row in &result.rows {
        let cells: Vec<PrintCell> = row
            .iter()
            .map(|v| PrintCell::new(&v.to_display_string()))
            .collect();
        print_table.add_row(PrintRow::new(cells));
    }

    Ok((print_table.to_string(), row_count))
}

/// Executes a DELETE statement. Returns the number of rows removed.
pub fn execute_delete(stmt: &Statement, db: &mut Database) -> Result<usize> {
    let Statement::Delete(Delete {
        from, selection, ..
    }) = stmt
    else {
        return Err(SQLRiteError::Internal(
            "execute_delete called on a non-DELETE statement".to_string(),
        ));
    };

    let tables = match from {
        FromTable::WithFromKeyword(t) | FromTable::WithoutKeyword(t) => t,
    };
    let table_name = extract_single_table_name(tables)?;

    // Compute matching rowids with an immutable borrow, then mutate.
    let matching: Vec<i64> = {
        let table = db
            .get_table(table_name.clone())
            .map_err(|_| SQLRiteError::Internal(format!("Table '{table_name}' not found")))?;
        match select_rowids(table, selection.as_ref())? {
            RowidSource::IndexProbe(rowids) => rowids,
            RowidSource::FullScan => {
                let mut out = Vec::new();
                for rowid in table.rowids() {
                    if let Some(expr) = selection {
                        if !eval_predicate(expr, table, rowid)? {
                            continue;
                        }
                    }
                    out.push(rowid);
                }
                out
            }
        }
    };

    let table = db.get_table_mut(table_name)?;
    for rowid in &matching {
        table.delete_row(*rowid);
    }
    // Phase 7d.3 — any DELETE invalidates every HNSW index on this
    // table (the deleted node could still appear in other nodes'
    // neighbor lists, breaking subsequent searches). Mark dirty so
    // the next save rebuilds from current rows before serializing.
    //
    // Phase 8b — same posture for FTS indexes (Q7 — rebuild-on-save
    // mirrors HNSW). The deleted rowid still appears in posting
    // lists; leaving it would surface zombie hits in future queries.
    if !matching.is_empty() {
        for entry in &mut table.hnsw_indexes {
            entry.needs_rebuild = true;
        }
        for entry in &mut table.fts_indexes {
            entry.needs_rebuild = true;
        }
    }
    Ok(matching.len())
}

/// Executes an UPDATE statement. Returns the number of rows updated.
pub fn execute_update(stmt: &Statement, db: &mut Database) -> Result<usize> {
    let Statement::Update(Update {
        table,
        assignments,
        from,
        selection,
        ..
    }) = stmt
    else {
        return Err(SQLRiteError::Internal(
            "execute_update called on a non-UPDATE statement".to_string(),
        ));
    };

    if from.is_some() {
        return Err(SQLRiteError::NotImplemented(
            "UPDATE ... FROM is not supported yet".to_string(),
        ));
    }

    let table_name = extract_table_name(table)?;

    // Resolve assignment targets to plain column names and verify they exist.
    let mut parsed_assignments: Vec<(String, Expr)> = Vec::with_capacity(assignments.len());
    {
        let tbl = db
            .get_table(table_name.clone())
            .map_err(|_| SQLRiteError::Internal(format!("Table '{table_name}' not found")))?;
        for a in assignments {
            let col = match &a.target {
                AssignmentTarget::ColumnName(name) => name
                    .0
                    .last()
                    .map(|p| p.to_string())
                    .ok_or_else(|| SQLRiteError::Internal("empty column name".to_string()))?,
                AssignmentTarget::Tuple(_) => {
                    return Err(SQLRiteError::NotImplemented(
                        "tuple assignment targets are not supported".to_string(),
                    ));
                }
            };
            if !tbl.contains_column(col.clone()) {
                return Err(SQLRiteError::Internal(format!(
                    "UPDATE references unknown column '{col}'"
                )));
            }
            parsed_assignments.push((col, a.value.clone()));
        }
    }

    // Gather matching rowids + the new values to write for each assignment, under
    // an immutable borrow. Uses the index-probe fast path when the WHERE is
    // `col = literal` on an indexed column.
    let work: Vec<(i64, Vec<(String, Value)>)> = {
        let tbl = db.get_table(table_name.clone())?;
        let matched_rowids: Vec<i64> = match select_rowids(tbl, selection.as_ref())? {
            RowidSource::IndexProbe(rowids) => rowids,
            RowidSource::FullScan => {
                let mut out = Vec::new();
                for rowid in tbl.rowids() {
                    if let Some(expr) = selection {
                        if !eval_predicate(expr, tbl, rowid)? {
                            continue;
                        }
                    }
                    out.push(rowid);
                }
                out
            }
        };
        let mut rows_to_update = Vec::new();
        for rowid in matched_rowids {
            let mut values = Vec::with_capacity(parsed_assignments.len());
            for (col, expr) in &parsed_assignments {
                // UPDATE's RHS is evaluated in the context of the row being updated,
                // so column references on the right resolve to the current row's values.
                let v = eval_expr(expr, tbl, rowid)?;
                values.push((col.clone(), v));
            }
            rows_to_update.push((rowid, values));
        }
        rows_to_update
    };

    let tbl = db.get_table_mut(table_name)?;
    for (rowid, values) in &work {
        for (col, v) in values {
            tbl.set_value(col, *rowid, v.clone())?;
        }
    }

    // Phase 7d.3 — UPDATE may have changed a vector column that an
    // HNSW index covers. Mark every covering index dirty so save
    // rebuilds from current rows. (Updates that only touched
    // non-vector columns also mark dirty, which is over-conservative
    // but harmless — the rebuild walks rows anyway, and the cost is
    // only paid on save.)
    //
    // Phase 8b — same shape for FTS indexes covering updated TEXT cols.
    if !work.is_empty() {
        let updated_columns: std::collections::HashSet<&str> = work
            .iter()
            .flat_map(|(_, values)| values.iter().map(|(c, _)| c.as_str()))
            .collect();
        for entry in &mut tbl.hnsw_indexes {
            if updated_columns.contains(entry.column_name.as_str()) {
                entry.needs_rebuild = true;
            }
        }
        for entry in &mut tbl.fts_indexes {
            if updated_columns.contains(entry.column_name.as_str()) {
                entry.needs_rebuild = true;
            }
        }
    }
    Ok(work.len())
}

/// Handles `CREATE INDEX [UNIQUE] <name> ON <table> [USING <method>] (<column>)`.
/// Single-column indexes only.
///
/// Two flavours, branching on the optional `USING <method>` clause:
///   - **No USING, or `USING btree`**: regular B-Tree secondary index
///     (Phase 3e). Indexable types: Integer, Text.
///   - **`USING hnsw`**: HNSW ANN index (Phase 7d.2). Indexable types:
///     Vector(N) only. Distance metric is L2 by default; cosine and
///     dot variants are deferred to Phase 7d.x.
///
/// Returns the (possibly synthesized) index name for the status message.
pub fn execute_create_index(stmt: &Statement, db: &mut Database) -> Result<String> {
    let Statement::CreateIndex(CreateIndex {
        name,
        table_name,
        columns,
        using,
        unique,
        if_not_exists,
        predicate,
        with,
        ..
    }) = stmt
    else {
        return Err(SQLRiteError::Internal(
            "execute_create_index called on a non-CREATE-INDEX statement".to_string(),
        ));
    };

    if predicate.is_some() {
        return Err(SQLRiteError::NotImplemented(
            "partial indexes (CREATE INDEX ... WHERE) are not supported yet".to_string(),
        ));
    }

    if columns.len() != 1 {
        return Err(SQLRiteError::NotImplemented(format!(
            "multi-column indexes are not supported yet ({} columns given)",
            columns.len()
        )));
    }

    let index_name = name.as_ref().map(|n| n.to_string()).ok_or_else(|| {
        SQLRiteError::NotImplemented(
            "anonymous CREATE INDEX (no name) is not supported — give it a name".to_string(),
        )
    })?;

    // Detect USING <method>. The `using` field on CreateIndex covers the
    // pre-column form `CREATE INDEX … USING hnsw (col)`. (sqlparser also
    // accepts a post-column form `… (col) USING hnsw` and parks that in
    // `index_options`; we don't bother with it — the canonical form is
    // pre-column and matches PG/pgvector convention.)
    let method = match using {
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("hnsw") => {
            IndexMethod::Hnsw
        }
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("fts") => {
            IndexMethod::Fts
        }
        Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("btree") => {
            IndexMethod::Btree
        }
        Some(other) => {
            return Err(SQLRiteError::NotImplemented(format!(
                "CREATE INDEX … USING {other:?} is not supported \
                 (try `hnsw`, `fts`, or no USING clause)"
            )));
        }
        None => IndexMethod::Btree,
    };

    // Parse `WITH (key = value, …)` options (SQLR-28). The only key
    // recognized today is `metric` for HNSW indexes — `'l2'` /
    // `'cosine'` / `'dot'`. The clause is rejected on non-HNSW indexes
    // so a typo doesn't silently sit on a btree index where it can't
    // do anything useful.
    let hnsw_metric = parse_hnsw_with_options(with, &index_name, method)?;

    let table_name_str = table_name.to_string();
    let column_name = match &columns[0].column.expr {
        Expr::Identifier(ident) => ident.value.clone(),
        Expr::CompoundIdentifier(parts) => parts
            .last()
            .map(|p| p.value.clone())
            .ok_or_else(|| SQLRiteError::Internal("empty compound identifier".to_string()))?,
        other => {
            return Err(SQLRiteError::NotImplemented(format!(
                "CREATE INDEX only supports simple column references, got {other:?}"
            )));
        }
    };

    // Validate: table exists, column exists, type matches the index method,
    // name is unique across both index kinds. Snapshot (rowid, value) pairs
    // up front under the immutable borrow so the mutable attach later
    // doesn't fight over `self`.
    let (datatype, existing_rowids_and_values): (DataType, Vec<(i64, Value)>) = {
        let table = db.get_table(table_name_str.clone()).map_err(|_| {
            SQLRiteError::General(format!(
                "CREATE INDEX references unknown table '{table_name_str}'"
            ))
        })?;
        if !table.contains_column(column_name.clone()) {
            return Err(SQLRiteError::General(format!(
                "CREATE INDEX references unknown column '{column_name}' on table '{table_name_str}'"
            )));
        }
        let col = table
            .columns
            .iter()
            .find(|c| c.column_name == column_name)
            .expect("we just verified the column exists");

        // Name uniqueness check spans ALL index kinds — btree, hnsw, and