Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use tantivy::{
Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, TantivyError,
collector::TopDocs,
query::{BooleanQuery, Occur, QueryParser, TermQuery},
schema::{STORED, Schema, TEXT, Value},
schema::{STORED, STRING, Schema, TEXT, Value},
};

const INDEX_DIR: &str = "devbase/search_index";
Expand All @@ -24,7 +24,7 @@ fn index_path() -> Result<PathBuf, TantivyError> {

fn build_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("id", TEXT | STORED);
schema_builder.add_text_field("id", STRING | STORED);
schema_builder.add_text_field("title", TEXT | STORED);
schema_builder.add_text_field("content", TEXT);
schema_builder.add_text_field("tags", TEXT);
Expand Down
175 changes: 160 additions & 15 deletions src/vault/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Copyright (c) 2026 juice094
use crate::search;
use crate::vault::fs_io;
use tantivy::IndexWriter;
use tantivy::schema::Schema;
use tracing::info;

/// Index all vault notes from the registry into Tantivy.
Expand All @@ -17,23 +19,28 @@ pub fn reindex_vault(conn: &rusqlite::Connection) -> anyhow::Result<()> {
let (index, _reader) = search::init_index()?;
let mut writer = search::get_writer(&index)?;
let schema = index.schema();
reindex_vault_core(&notes, &mut writer, &schema)
}

fn reindex_vault_core(
notes: &[crate::registry::VaultNote],
writer: &mut IndexWriter,
schema: &Schema,
) -> anyhow::Result<()> {
// Delete all existing vault docs
let doc_type = schema.get_field("doc_type")?;
let term = tantivy::Term::from_field_text(doc_type, "vault");
writer.delete_term(term);

let mut indexed = 0;
for note in &notes {
for note in notes {
let title = note.title.as_deref().unwrap_or(&note.id);
let tags: Vec<String> = note.tags.clone();

// P1-1: read content from filesystem; fallback to empty string if unreadable
let content = fs_io::read_note_body(&note.path).map(|(body, _fm)| body).unwrap_or_default();

if let Err(e) =
search::add_vault_doc(&mut writer, &schema, &note.id, title, &content, &tags)
{
if let Err(e) = search::add_vault_doc(writer, schema, &note.id, title, &content, &tags) {
tracing::warn!("Failed to index vault note {}: {}", note.id, e);
} else {
indexed += 1;
Expand All @@ -50,37 +57,175 @@ pub fn index_vault_note(note: &crate::registry::VaultNote) -> anyhow::Result<()>
let (index, _reader) = search::init_index()?;
let mut writer = search::get_writer(&index)?;
let schema = index.schema();
index_vault_note_core(note, &mut writer, &schema)
}

fn index_vault_note_core(
note: &crate::registry::VaultNote,
writer: &mut IndexWriter,
schema: &Schema,
) -> anyhow::Result<()> {
// Delete old doc by id
let id_field = schema.get_field("id")?;
writer.delete_term(tantivy::Term::from_field_text(id_field, &note.id));

let title = note.title.as_deref().unwrap_or(&note.id);
search::add_vault_doc(&mut writer, &schema, &note.id, title, &note.content, &note.tags)?;
search::add_vault_doc(writer, schema, &note.id, title, &note.content, &note.tags)?;
writer.commit()?;
Ok(())
}

#[cfg(test)]
mod tests {
use super::*;
use crate::registry::VaultNote;
use std::io::Write;

fn init_isolated_index()
-> (tempfile::TempDir, tantivy::Index, tantivy::IndexWriter, tantivy::schema::Schema) {
let tmp = tempfile::tempdir().unwrap();
let (index, _reader) = search::init_index_at(tmp.path()).unwrap();
let writer = search::get_writer(&index).unwrap();
let schema = index.schema();
(tmp, index, writer, schema)
}

#[test]
fn test_reindex_vault_core_empty() {
let (_tmp, _index, mut writer, schema) = init_isolated_index();
// Seed a dummy vault doc so we can verify deletion works
search::add_vault_doc(&mut writer, &schema, "dummy", "Dummy", "content", &[]).unwrap();
writer.commit().unwrap();

let notes: Vec<VaultNote> = vec![];
reindex_vault_core(&notes, &mut writer, &schema).unwrap();

// After reindex with empty notes, vault docs should be gone
let reader = _index.reader().unwrap();
let searcher = reader.searcher();
let doc_type = schema.get_field("doc_type").unwrap();
let term = tantivy::Term::from_field_text(doc_type, "vault");
let count = searcher
.search(
&tantivy::query::TermQuery::new(term, tantivy::schema::IndexRecordOption::Basic),
&tantivy::collector::Count,
)
.unwrap();
assert_eq!(count, 0);
}

#[test]
fn test_reindex_vault_core_with_notes() {
let tmp = tempfile::tempdir().unwrap();
let md_path = tmp.path().join("note.md");
let mut file = std::fs::File::create(&md_path).unwrap();
writeln!(file, "# Hello\n\nThis is a test note.").unwrap();
drop(file);

let note = VaultNote {
id: "note-1".to_string(),
path: md_path.to_str().unwrap().to_string(),
title: Some("Hello".to_string()),
content: "ignored".to_string(),
frontmatter: None,
tags: vec!["tag1".to_string()],
outgoing_links: vec![],
linked_repo: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
};

let (_tmp, _index, mut writer, schema) = init_isolated_index();
reindex_vault_core(&[note], &mut writer, &schema).unwrap();

// Verify the note was indexed by searching
let reader = _index.reader().unwrap();
let searcher = reader.searcher();
let doc_type = schema.get_field("doc_type").unwrap();
let term = tantivy::Term::from_field_text(doc_type, "vault");
let count = searcher
.search(
&tantivy::query::TermQuery::new(term, tantivy::schema::IndexRecordOption::Basic),
&tantivy::collector::Count,
)
.unwrap();
assert_eq!(count, 1);
}

#[test]
fn test_index_vault_note_smoke() {
let note = crate::registry::VaultNote {
id: "test-note".to_string(),
path: "/tmp/test.md".to_string(),
title: Some("Test".to_string()),
content: "Hello world".to_string(),
fn test_index_vault_note_core_add() {
let note = VaultNote {
id: "note-add".to_string(),
path: "/tmp/add.md".to_string(),
title: Some("Add".to_string()),
content: "new content".to_string(),
frontmatter: None,
tags: vec!["test".to_string()],
tags: vec!["add".to_string()],
outgoing_links: vec![],
linked_repo: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
};
// This may fail if Tantivy index is locked by another test;
// we only verify it does not panic.
let _ = index_vault_note(&note);

let (_tmp, _index, mut writer, schema) = init_isolated_index();
index_vault_note_core(&note, &mut writer, &schema).unwrap();

let reader = _index.reader().unwrap();
let searcher = reader.searcher();
let doc_type = schema.get_field("doc_type").unwrap();
let term = tantivy::Term::from_field_text(doc_type, "vault");
let count = searcher
.search(
&tantivy::query::TermQuery::new(term, tantivy::schema::IndexRecordOption::Basic),
&tantivy::collector::Count,
)
.unwrap();
assert_eq!(count, 1);
}

#[test]
fn test_index_vault_note_core_update() {
let note = VaultNote {
id: "note-update".to_string(),
path: "/tmp/update.md".to_string(),
title: Some("Original".to_string()),
content: "original content".to_string(),
frontmatter: None,
tags: vec![],
outgoing_links: vec![],
linked_repo: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
};

let (_tmp, _index, mut writer, schema) = init_isolated_index();
index_vault_note_core(&note, &mut writer, &schema).unwrap();

let updated = VaultNote {
id: "note-update".to_string(),
path: "/tmp/update.md".to_string(),
title: Some("Updated".to_string()),
content: "updated content".to_string(),
frontmatter: None,
tags: vec!["new-tag".to_string()],
outgoing_links: vec![],
linked_repo: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
};
index_vault_note_core(&updated, &mut writer, &schema).unwrap();

// Tantivy delete + add semantics: old doc replaced, only 1 doc remains
let reader = _index.reader().unwrap();
let searcher = reader.searcher();
let doc_type = schema.get_field("doc_type").unwrap();
let term = tantivy::Term::from_field_text(doc_type, "vault");
let count = searcher
.search(
&tantivy::query::TermQuery::new(term, tantivy::schema::IndexRecordOption::Basic),
&tantivy::collector::Count,
)
.unwrap();
assert_eq!(count, 1);
}
}
Loading