diff --git a/.gitignore b/.gitignore index 5dffeb0f..62f1dfb1 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,7 @@ python/**/target/ tests/integration/target/ tests/integration/.venv/ tests/integration/install + +# Python bytecode (do not commit) +**/__pycache__/ +*.py[cod] diff --git a/Cargo.lock b/Cargo.lock index 18b01c0d..a1176666 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3800,6 +3800,10 @@ dependencies = [ "datafusion-expr 48.0.1", "datafusion-physical-expr 48.0.1", "datafusion-proto", + "function-stream-config", + "function-stream-logger", + "function-stream-runtime-common", + "function-stream-streaming-planner", "futures", "governor", "itertools 0.14.0", @@ -3838,9 +3842,26 @@ dependencies = [ "xxhash-rust", ] +[[package]] +name = "function-stream-catalog" +version = "0.6.0" +dependencies = [ + "protocol", +] + +[[package]] +name = "function-stream-catalog-storage" +version = "0.6.0" +dependencies = [ + "anyhow", + "function-stream-catalog", + "parking_lot", + "rocksdb", +] + [[package]] name = "function-stream-cli" -version = "0.1.0" +version = "0.6.0" dependencies = [ "arrow-array 52.2.0", "arrow-ipc 52.2.0", @@ -3853,6 +3874,104 @@ dependencies = [ "tonic", ] +[[package]] +name = "function-stream-common" +version = "0.6.0" + +[[package]] +name = "function-stream-config" +version = "0.6.0" +dependencies = [ + "serde", + "serde_yaml", + "uuid", +] + +[[package]] +name = "function-stream-coordinator" +version = "0.6.0" + +[[package]] +name = "function-stream-logger" +version = "0.6.0" +dependencies = [ + "anyhow", + "function-stream-config", + "tracing", + "tracing-appender", + "tracing-subscriber", +] + +[[package]] +name = "function-stream-runtime-common" +version = "0.6.0" +dependencies = [ + "arrow-array 55.2.0", + "bincode", + "parking_lot", + "serde", + "tokio", + "tracing", +] + +[[package]] +name = "function-stream-servicer" +version = "0.6.0" + +[[package]] +name = "function-stream-sqlparser" +version = "0.6.0" + +[[package]] +name = "function-stream-streaming-planner" +version = "0.6.0" +dependencies = [ + "ahash", + "anyhow", + "apache-avro", + "arrow 55.2.0", + "arrow-array 55.2.0", + "arrow-json 55.2.0", + "arrow-schema 55.2.0", + "async-trait", + "bincode", + "bytes", + "chrono", + "datafusion 48.0.1", + "datafusion-common 48.0.1", + "datafusion-execution 48.0.1", + "datafusion-expr 48.0.1", + "datafusion-physical-expr 48.0.1", + "datafusion-proto", + "function-stream-config", + "function-stream-runtime-common", + "futures", + "itertools 0.14.0", + "petgraph 0.7.1", + "prost 0.13.5", + "protocol", + "rand 0.8.5", + "serde", + "serde_json", + "serde_json_path", + "sqlparser 0.55.0", + "strum 0.26.3", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "unicase", + "xxhash-rust", +] + +[[package]] +name = "function-stream-streaming-runtime" +version = "0.6.0" + +[[package]] +name = "function-stream-wasm-runtime" +version = "0.6.0" + [[package]] name = "funty" version = "2.0.0" @@ -6431,7 +6550,7 @@ dependencies = [ [[package]] name = "protocol" -version = "0.1.0" +version = "0.6.0" dependencies = [ "env_logger", "log", diff --git a/Cargo.toml b/Cargo.toml index eebf2a6c..324c882b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,90 +1,25 @@ [workspace] members = [ - ".", + "src/function-stream", "protocol", "cli/cli", + "src/catalog", + "src/catalog_storage", + "src/common", + "src/config", + "src/coordinator", + "src/logger", + "src/runtime_common", + "src/servicer", + "src/sqlparser", + "src/streaming_runtime", + "src/streaming_planner", + "src/wasm_runtime", ] +resolver = "2" +default-members = ["src/function-stream"] -[package] -name = "function-stream" +# Shared crate metadata for workspace members (`version.workspace = true`, etc.). +[workspace.package] version = "0.6.0" edition = "2024" - -[lib] -name = "function_stream" -path = "src/lib.rs" - -[[bin]] -name = "function-stream" -path = "src/main.rs" - - -[dependencies] -tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "sync", "time", "net", "signal"] } -serde = { version = "1.0", features = ["derive"] } -serde_yaml = "0.9" -serde_json = "1.0" -uuid = { version = "1.0", features = ["v4", "v7"] } -log = "0.4" -tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } -tracing-appender = "0.2" -anyhow = "1.0" -thiserror = "2" -tonic = { version = "0.12", features = ["default"] } -async-trait = "0.1" -num_cpus = "1.0" -protocol = { path = "./protocol" } -prost = "0.13" -rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi", "curl"] } -crossbeam-channel = "0.5" -wasmtime = { version = "41.0.3", features = ["component-model", "async"] } -base64 = "0.22" -wasmtime-wasi = "41.0.3" -rocksdb = { version = "0.21", features = ["multi-threaded-cf", "lz4"] } -bincode = { version = "2", features = ["serde"] } -chrono = "0.4" -tokio-stream = "0.1.18" -lru = "0.12" -parking_lot = "0.12" -arrow = { version = "55", default-features = false } -arrow-array = "55" -arrow-ipc = "55" -arrow-schema = { version = "55", features = ["serde"] } -parquet = "55" -object_store = { version = "0.12.5", features = ["aws"] } -bytes = "1" -futures = "0.3" -serde_json_path = "0.7" -xxhash-rust = { version = "0.8", features = ["xxh3"] } -proctitle = "0.1" -unicase = "2.7" -petgraph = "0.7" -rand = { version = "0.8", features = ["small_rng"] } -itertools = "0.14" -strum = { version = "0.26", features = ["derive"] } - -arrow-json = {version = '55.2.0'} -apache-avro = "0.21" -datafusion = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} -datafusion-common = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} -datafusion-execution = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} -datafusion-expr = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} -datafusion-physical-expr = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} -datafusion-proto = {git = 'https://github.com/FunctionStream/datafusion', branch = '48.0.1/fs'} - -sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.58.0/fs" } - -ahash = "0.8" -governor = "0.8.0" -lance = { version = "4.0.0", default-features = false, features = ["aws"] } -arrow-array-lance = { package = "arrow-array", version = "57.3.0" } -arrow-ipc-lance = { package = "arrow-ipc", version = "57.3.0" } - -[features] -default = ["incremental-cache", "python"] -incremental-cache = ["wasmtime/incremental-cache"] -python = [] - -[dev-dependencies] -tempfile = "3.27.0" diff --git a/Makefile b/Makefile index 78138dae..d6340d9d 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. APP_NAME := function-stream +# Version from root `[workspace.package]` (single source of truth). VERSION := $(shell grep '^version' Cargo.toml | head -1 | awk -F '"' '{print $$2}') DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ") @@ -106,6 +107,7 @@ build: .check-env .ensure-target .build-wasm @RUSTFLAGS="$(OPTIMIZE_FLAGS)" \ cargo build --release \ --target $(TRIPLE) \ + -p $(APP_NAME) \ --features python \ --quiet $(call log,BUILD,CLI) @@ -118,14 +120,15 @@ build: .check-env .ensure-target .build-wasm build-lite: .check-env .ensure-target $(call log,BUILD,Rust Lite [$(OS_NAME) / $(TRIPLE)]) - @RUSTFLAGS="$(INDUSTRIAL_RUSTFLAGS)" \ + @RUSTFLAGS="$(OPTIMIZE_FLAGS)" \ cargo build --release \ --target $(TRIPLE) \ + -p $(APP_NAME) \ --no-default-features \ --features incremental-cache \ --quiet $(call log,BUILD,CLI for dist) - @RUSTFLAGS="$(INDUSTRIAL_RUSTFLAGS)" \ + @RUSTFLAGS="$(OPTIMIZE_FLAGS)" \ cargo build --release \ --target $(TRIPLE) \ -p function-stream-cli \ diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml index 49c0a881..75bd6cb4 100644 --- a/cli/cli/Cargo.toml +++ b/cli/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "function-stream-cli" -version = "0.1.0" -edition = "2021" +version.workspace = true +edition.workspace = true [[bin]] name = "cli" diff --git a/cli/cli/src/repl.rs b/cli/cli/src/repl.rs index b442bd07..01209ba1 100644 --- a/cli/cli/src/repl.rs +++ b/cli/cli/src/repl.rs @@ -17,7 +17,7 @@ use arrow_ipc::reader::StreamReader; use arrow_schema::DataType; use comfy_table::presets::UTF8_FULL; use comfy_table::{Attribute, Cell, Color, ContentArrangement, Table, TableComponent}; -use protocol::cli::{function_stream_service_client::FunctionStreamServiceClient, SqlRequest}; +use protocol::cli::{SqlRequest, function_stream_service_client::FunctionStreamServiceClient}; use rustyline::error::ReadlineError; use rustyline::{Config, DefaultEditor, EditMode}; use std::fmt; @@ -158,17 +158,17 @@ impl Repl { } // 3. Strict Data Check: Only proceed if data is explicitly present and non-empty - if let Some(bytes) = response.data { - if !bytes.is_empty() { - // format_arrow_data returns Ok(Some(Table)) ONLY if row_count > 0 - match self.format_arrow_data(&bytes) { - Ok(Some(table)) => println!("{}", table), - Ok(None) => { - // Data was present but contained 0 rows (e.g., empty result set) - // We print nothing here to keep output clean as requested - } - Err(e) => eprintln!("Failed to parse result data: {}", e), + if let Some(bytes) = response.data + && !bytes.is_empty() + { + // format_arrow_data returns Ok(Some(Table)) ONLY if row_count > 0 + match self.format_arrow_data(&bytes) { + Ok(Some(table)) => println!("{}", table), + Ok(None) => { + // Data was present but contained 0 rows (e.g., empty result set) + // We print nothing here to keep output clean as requested } + Err(e) => eprintln!("Failed to parse result data: {}", e), } } @@ -243,11 +243,7 @@ impl Repl { } } - if has_rows { - Ok(Some(table)) - } else { - Ok(None) - } + if has_rows { Ok(Some(table)) } else { Ok(None) } } fn extract_value(&self, column: &dyn Array, row: usize) -> String { @@ -317,7 +313,7 @@ impl Repl { #[cfg(unix)] let mut sigterm = { - use tokio::signal::unix::{signal, SignalKind}; + use tokio::signal::unix::{SignalKind, signal}; signal(SignalKind::terminate()).expect("failed to register SIGTERM handler") }; @@ -403,10 +399,8 @@ impl Repl { println!(); } - if !skip_save_history { - if let Some(ref mut ed) = repl.lock().await.editor { - let _ = ed.save_history(".function-stream-cli-history"); - } + if !skip_save_history && let Some(ref mut ed) = repl.lock().await.editor { + let _ = ed.save_history(".function-stream-cli-history"); } Ok(()) } @@ -448,10 +442,10 @@ impl Repl { } fn add_history_entry(&mut self, entry: &str) { - if let Some(ed) = self.editor.as_mut() { - if !entry.trim().is_empty() { - let _ = ed.add_history_entry(entry.trim()); - } + if let Some(ed) = self.editor.as_mut() + && !entry.trim().is_empty() + { + let _ = ed.add_history_entry(entry.trim()); } } diff --git a/examples/examples-validator/Cargo.toml b/examples/examples-validator/Cargo.toml index de87dbb3..9c204935 100644 --- a/examples/examples-validator/Cargo.toml +++ b/examples/examples-validator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "examples-validator" version = "0.1.0" -edition = "2021" +edition = "2024" [workspace] diff --git a/protocol/Cargo.toml b/protocol/Cargo.toml index 51b1f3c1..96501c30 100644 --- a/protocol/Cargo.toml +++ b/protocol/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "protocol" -version = "0.1.0" -edition = "2024" +version.workspace = true +edition.workspace = true description = "Protocol Buffers protocol definitions for function stream" license = "MIT OR Apache-2.0" repository = "https://github.com/your-username/rust-function-stream" diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml new file mode 100644 index 00000000..2a50735c --- /dev/null +++ b/src/catalog/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "function-stream-catalog" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_catalog" +path = "src/lib.rs" + +[dependencies] +protocol = { path = "../../protocol" } diff --git a/src/catalog/src/error.rs b/src/catalog/src/error.rs new file mode 100644 index 00000000..f5a9378e --- /dev/null +++ b/src/catalog/src/error.rs @@ -0,0 +1,4 @@ +use std::error::Error; + +pub type CatalogError = Box; +pub type CatalogResult = Result; diff --git a/src/catalog/src/lib.rs b/src/catalog/src/lib.rs new file mode 100644 index 00000000..33fb2743 --- /dev/null +++ b/src/catalog/src/lib.rs @@ -0,0 +1,9 @@ +//! Catalog domain types and APIs. + +pub mod error; +pub mod meta_store; +pub mod stream_catalog; + +pub use error::{CatalogError, CatalogResult}; +pub use meta_store::MetaStore; +pub use stream_catalog::{GlobalStreamCatalog, StoredStreamingJob, StreamCatalog}; diff --git a/src/catalog/src/meta_store.rs b/src/catalog/src/meta_store.rs new file mode 100644 index 00000000..b361e03c --- /dev/null +++ b/src/catalog/src/meta_store.rs @@ -0,0 +1,23 @@ +use crate::CatalogResult; + +/// Synchronous metadata key-value backend for catalog records. +pub trait MetaStore: Send + Sync { + fn put(&self, key: &str, value: Vec) -> CatalogResult<()>; + fn get(&self, key: &str) -> CatalogResult>>; + fn delete(&self, key: &str) -> CatalogResult<()>; + fn scan_prefix(&self, prefix: &str) -> CatalogResult)>>; + + /// Atomic apply of many puts (`Some(value)`) and deletes (`None`). + /// + /// Backends should override this with a single transaction or write batch + /// when the storage engine supports it. + fn write_batch(&self, batch: Vec<(String, Option>)>) -> CatalogResult<()> { + for (key, value) in batch { + match value { + Some(value) => self.put(&key, value)?, + None => self.delete(&key)?, + } + } + Ok(()) + } +} diff --git a/src/catalog/src/stream_catalog.rs b/src/catalog/src/stream_catalog.rs new file mode 100644 index 00000000..a5641a34 --- /dev/null +++ b/src/catalog/src/stream_catalog.rs @@ -0,0 +1,69 @@ +use std::sync::Arc; + +use protocol::function_stream_graph::FsProgram; +use protocol::storage as pb; + +use crate::CatalogResult; + +/// One persisted streaming job row from catalog storage. +/// +/// This is intentionally storage-agnostic: the catalog keeps source checkpoint +/// payloads as protocol oneof envelopes and does not inspect source-specific +/// checkpoint data. +#[derive(Debug, Clone)] +pub struct StoredStreamingJob { + pub table_name: String, + pub program: FsProgram, + pub checkpoint_interval_ms: u64, + pub latest_checkpoint_epoch: u64, + pub source_checkpoints: Vec, +} + +/// Interface exposed by the stream catalog manager. +/// +/// The concrete table and planning types are generic so this crate can define +/// the catalog boundary without depending on the monolithic SQL/runtime crates. +pub trait StreamCatalog: Send + Sync { + fn persist_streaming_job( + &self, + table_name: &str, + fs_program: &FsProgram, + comment: &str, + checkpoint_interval_ms: u64, + ) -> CatalogResult<()>; + + fn remove_streaming_job(&self, table_name: &str) -> CatalogResult<()>; + + fn commit_job_checkpoint( + &self, + table_name: &str, + epoch: u64, + source_checkpoints: Vec, + ) -> CatalogResult<()>; + + fn load_streaming_job_definitions(&self) -> CatalogResult>; + + fn add_catalog_table(&self, table: Table) -> CatalogResult<()>; + fn has_catalog_table(&self, name: &str) -> bool; + fn drop_catalog_table(&self, table_name: &str, if_exists: bool) -> CatalogResult<()>; + fn restore_from_store(&self) -> CatalogResult<()>; + fn acquire_planning_context(&self) -> PlanningContext; + fn list_catalog_tables(&self) -> CatalogResult>>; + fn get_catalog_table(&self, name: &str) -> CatalogResult>>; + + fn add_table(&self, table: StreamTable) -> CatalogResult<()>; + fn has_stream_table(&self, name: &str) -> bool; + fn drop_table(&self, table_name: &str, if_exists: bool) -> CatalogResult<()>; + fn list_stream_tables(&self) -> Vec>; + fn get_stream_table(&self, name: &str) -> Option>; +} + +/// Process-global catalog access boundary. +pub trait GlobalStreamCatalog: Send + Sync { + fn init_global(manager: Arc) -> CatalogResult<()>; + fn try_global() -> Option>; + + fn global() -> CatalogResult> { + Self::try_global().ok_or_else(|| "CatalogManager not initialized".into()) + } +} diff --git a/src/catalog_storage/Cargo.toml b/src/catalog_storage/Cargo.toml new file mode 100644 index 00000000..3bb9fed8 --- /dev/null +++ b/src/catalog_storage/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "function-stream-catalog-storage" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_catalog_storage" +path = "src/lib.rs" + +[dependencies] +anyhow = "1.0" +function-stream-catalog = { path = "../catalog" } +parking_lot = "0.12" +rocksdb = { version = "0.21", features = ["multi-threaded-cf", "lz4"] } diff --git a/src/catalog_storage/src/lib.rs b/src/catalog_storage/src/lib.rs new file mode 100644 index 00000000..8fdca0bf --- /dev/null +++ b/src/catalog_storage/src/lib.rs @@ -0,0 +1,10 @@ +//! Persistent catalog storage implementations. +//! +//! The stream catalog manager and task persistence (`stream_catalog/`, `task/`) live in this +//! package and are compiled as part of `function-stream` via `#[path]` in `src/lib.rs` / `src/main.rs`. + +pub mod memory; +pub mod rocksdb; + +pub use memory::InMemoryMetaStore; +pub use rocksdb::RocksDbMetaStore; diff --git a/src/catalog_storage/src/memory.rs b/src/catalog_storage/src/memory.rs new file mode 100644 index 00000000..df6fd0c0 --- /dev/null +++ b/src/catalog_storage/src/memory.rs @@ -0,0 +1,63 @@ +use std::collections::HashMap; + +use function_stream_catalog::{CatalogResult, MetaStore}; +use parking_lot::RwLock; + +/// In-process KV store for single-node deployments and tests. +pub struct InMemoryMetaStore { + db: RwLock>>, +} + +impl InMemoryMetaStore { + pub fn new() -> Self { + Self { + db: RwLock::new(HashMap::new()), + } + } +} + +impl Default for InMemoryMetaStore { + fn default() -> Self { + Self::new() + } +} + +impl MetaStore for InMemoryMetaStore { + fn put(&self, key: &str, value: Vec) -> CatalogResult<()> { + self.db.write().insert(key.to_string(), value); + Ok(()) + } + + fn get(&self, key: &str) -> CatalogResult>> { + Ok(self.db.read().get(key).cloned()) + } + + fn delete(&self, key: &str) -> CatalogResult<()> { + self.db.write().remove(key); + Ok(()) + } + + fn scan_prefix(&self, prefix: &str) -> CatalogResult)>> { + let db = self.db.read(); + Ok(db + .iter() + .filter(|(key, _)| key.starts_with(prefix)) + .map(|(key, value)| (key.clone(), value.clone())) + .collect()) + } + + fn write_batch(&self, batch: Vec<(String, Option>)>) -> CatalogResult<()> { + let mut db = self.db.write(); + for (key, value) in batch { + match value { + Some(value) => { + db.insert(key, value); + } + None => { + db.remove(&key); + } + } + } + Ok(()) + } +} diff --git a/src/catalog_storage/src/rocksdb.rs b/src/catalog_storage/src/rocksdb.rs new file mode 100644 index 00000000..192b9d80 --- /dev/null +++ b/src/catalog_storage/src/rocksdb.rs @@ -0,0 +1,81 @@ +use std::path::Path; +use std::sync::Arc; + +use anyhow::Context; +use function_stream_catalog::{CatalogResult, MetaStore}; +use rocksdb::{DB, Direction, IteratorMode, Options, WriteBatch}; + +/// RocksDB-backed catalog metadata store. +pub struct RocksDbMetaStore { + db: Arc, +} + +impl RocksDbMetaStore { + pub fn open>(path: P) -> anyhow::Result { + let path = path.as_ref(); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("stream catalog: create parent directory {parent:?}"))?; + } + let mut opts = Options::default(); + opts.create_if_missing(true); + let db = DB::open(&opts, path) + .with_context(|| format!("stream catalog: open RocksDB at {}", path.display()))?; + Ok(Self { db: Arc::new(db) }) + } +} + +impl MetaStore for RocksDbMetaStore { + fn put(&self, key: &str, value: Vec) -> CatalogResult<()> { + self.db + .put(key.as_bytes(), value.as_slice()) + .map_err(|e| format!("stream catalog store put: {e}").into()) + } + + fn get(&self, key: &str) -> CatalogResult>> { + self.db + .get(key.as_bytes()) + .map_err(|e| format!("stream catalog store get: {e}").into()) + } + + fn delete(&self, key: &str) -> CatalogResult<()> { + self.db + .delete(key.as_bytes()) + .map_err(|e| format!("stream catalog store delete: {e}").into()) + } + + fn scan_prefix(&self, prefix: &str) -> CatalogResult)>> { + let mut out = Vec::new(); + let iter = self + .db + .iterator(IteratorMode::From(prefix.as_bytes(), Direction::Forward)); + for item in iter { + let (key, value) = item.map_err(|e| format!("stream catalog store scan: {e}"))?; + let key = String::from_utf8(key.to_vec()) + .map_err(|e| format!("stream catalog store: invalid utf8 key: {e}"))?; + if !key.starts_with(prefix) { + break; + } + out.push((key, value.to_vec())); + } + Ok(out) + } + + fn write_batch(&self, batch: Vec<(String, Option>)>) -> CatalogResult<()> { + if batch.is_empty() { + return Ok(()); + } + + let mut write_batch = WriteBatch::default(); + for (key, value) in batch { + match value { + Some(value) => write_batch.put(key.as_bytes(), value.as_slice()), + None => write_batch.delete(key.as_bytes()), + } + } + + self.db + .write(write_batch) + .map_err(|e| format!("stream catalog store write_batch: {e}").into()) + } +} diff --git a/src/storage/stream_catalog/codec.rs b/src/catalog_storage/src/stream_catalog/codec.rs similarity index 100% rename from src/storage/stream_catalog/codec.rs rename to src/catalog_storage/src/stream_catalog/codec.rs diff --git a/src/storage/stream_catalog/manager.rs b/src/catalog_storage/src/stream_catalog/manager.rs similarity index 99% rename from src/storage/stream_catalog/manager.rs rename to src/catalog_storage/src/stream_catalog/manager.rs index fa810e8d..cbdf30b3 100644 --- a/src/storage/stream_catalog/manager.rs +++ b/src/catalog_storage/src/stream_catalog/manager.rs @@ -627,7 +627,7 @@ pub fn restore_global_catalog_from_store() { } pub fn restore_streaming_jobs_from_store() { - use crate::runtime::streaming::job::JobManager; + use crate::streaming::job::JobManager; let Some(catalog) = CatalogManager::try_global() else { warn!("CatalogManager not available; skipping streaming job restore"); @@ -742,7 +742,7 @@ mod tests { use crate::sql::schema::column_descriptor::ColumnDescriptor; use crate::sql::schema::table::CatalogEntity; use crate::sql::schema::temporal_pipeline_config::TemporalPipelineConfig; - use crate::storage::stream_catalog::InMemoryMetaStore; + use crate::stream_catalog::InMemoryMetaStore; use super::CatalogManager; diff --git a/src/storage/stream_catalog/meta_store.rs b/src/catalog_storage/src/stream_catalog/meta_store.rs similarity index 100% rename from src/storage/stream_catalog/meta_store.rs rename to src/catalog_storage/src/stream_catalog/meta_store.rs diff --git a/src/storage/stream_catalog/mod.rs b/src/catalog_storage/src/stream_catalog/mod.rs similarity index 100% rename from src/storage/stream_catalog/mod.rs rename to src/catalog_storage/src/stream_catalog/mod.rs diff --git a/src/storage/stream_catalog/rocksdb_meta_store.rs b/src/catalog_storage/src/stream_catalog/rocksdb_meta_store.rs similarity index 98% rename from src/storage/stream_catalog/rocksdb_meta_store.rs rename to src/catalog_storage/src/stream_catalog/rocksdb_meta_store.rs index 1537f278..58c0b35c 100644 --- a/src/storage/stream_catalog/rocksdb_meta_store.rs +++ b/src/catalog_storage/src/stream_catalog/rocksdb_meta_store.rs @@ -20,7 +20,7 @@ use rocksdb::{DB, Direction, IteratorMode, Options, WriteBatch}; use super::MetaStore; -/// Single-node durable KV used by [`crate::storage::stream_catalog::CatalogManager`]. +/// Single-node durable KV used by [`crate::stream_catalog::CatalogManager`]. pub struct RocksDbMetaStore { db: Arc, } diff --git a/src/storage/task/factory.rs b/src/catalog_storage/src/task/factory.rs similarity index 100% rename from src/storage/task/factory.rs rename to src/catalog_storage/src/task/factory.rs diff --git a/src/storage/task/function_info.rs b/src/catalog_storage/src/task/function_info.rs similarity index 100% rename from src/storage/task/function_info.rs rename to src/catalog_storage/src/task/function_info.rs diff --git a/src/storage/task/mod.rs b/src/catalog_storage/src/task/mod.rs similarity index 100% rename from src/storage/task/mod.rs rename to src/catalog_storage/src/task/mod.rs diff --git a/src/storage/task/proto_codec.rs b/src/catalog_storage/src/task/proto_codec.rs similarity index 99% rename from src/storage/task/proto_codec.rs rename to src/catalog_storage/src/task/proto_codec.rs index 6c1bc8df..78a0426d 100644 --- a/src/storage/task/proto_codec.rs +++ b/src/catalog_storage/src/task/proto_codec.rs @@ -20,7 +20,7 @@ use protocol::storage::{ }; use serde::{Deserialize, Serialize}; -use crate::runtime::common::ComponentState; +use crate::common::ComponentState; use super::storage::TaskModuleBytes; diff --git a/src/storage/task/rocksdb_storage.rs b/src/catalog_storage/src/task/rocksdb_storage.rs similarity index 99% rename from src/storage/task/rocksdb_storage.rs rename to src/catalog_storage/src/task/rocksdb_storage.rs index cea0ceb9..a3927515 100644 --- a/src/storage/task/rocksdb_storage.rs +++ b/src/catalog_storage/src/task/rocksdb_storage.rs @@ -19,8 +19,8 @@ use super::proto_codec::{ encode_task_module_bytes, }; use super::storage::{StoredTaskInfo, TaskStorage}; +use crate::common::ComponentState; use crate::config::storage::RocksDBStorageConfig; -use crate::runtime::common::ComponentState; use anyhow::{Context, Result, anyhow}; use rocksdb::{ColumnFamilyDescriptor, DB, IteratorMode, Options, WriteBatch}; use std::path::Path; diff --git a/src/storage/task/storage.rs b/src/catalog_storage/src/task/storage.rs similarity index 97% rename from src/storage/task/storage.rs rename to src/catalog_storage/src/task/storage.rs index 156ee5d8..6cbec9ea 100644 --- a/src/storage/task/storage.rs +++ b/src/catalog_storage/src/task/storage.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::common::ComponentState; +use crate::common::ComponentState; use anyhow::Result; use serde::{Deserialize, Serialize}; diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml new file mode 100644 index 00000000..4c155c5e --- /dev/null +++ b/src/common/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-common" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_common" +path = "src/lib.rs" diff --git a/src/common/fs_schema.rs b/src/common/src/fs_schema.rs similarity index 100% rename from src/common/fs_schema.rs rename to src/common/src/fs_schema.rs diff --git a/src/common/mod.rs b/src/common/src/legacy_mod.rs similarity index 100% rename from src/common/mod.rs rename to src/common/src/legacy_mod.rs diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs new file mode 100644 index 00000000..0356f6de --- /dev/null +++ b/src/common/src/lib.rs @@ -0,0 +1,3 @@ +//! Shared types and utilities for FunctionStream crates. + +pub const CRATE_NAME: &str = "function-stream-common"; diff --git a/src/config/Cargo.toml b/src/config/Cargo.toml new file mode 100644 index 00000000..d1e437cd --- /dev/null +++ b/src/config/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "function-stream-config" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_config" +path = "src/lib.rs" + +[features] +default = ["python"] +python = [] + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +uuid = { version = "1.0", features = ["v4", "v7"] } diff --git a/src/config/global_config.rs b/src/config/src/global_config.rs similarity index 100% rename from src/config/global_config.rs rename to src/config/src/global_config.rs diff --git a/src/config/mod.rs b/src/config/src/lib.rs similarity index 61% rename from src/config/mod.rs rename to src/config/src/lib.rs index e60dcfde..c07c676c 100644 --- a/src/config/mod.rs +++ b/src/config/src/lib.rs @@ -1,14 +1,4 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +//! Configuration loading and validation. pub mod global_config; pub mod loader; @@ -21,6 +11,11 @@ pub mod streaming_job; pub mod system; pub mod wasm_config; +// Compatibility shim for files that still reference `crate::config::*`. +pub mod config { + pub use crate::*; +} + pub use global_config::{ DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES, DEFAULT_STREAMING_RUNTIME_MEMORY_BYTES, GlobalConfig, }; diff --git a/src/config/loader.rs b/src/config/src/loader.rs similarity index 100% rename from src/config/loader.rs rename to src/config/src/loader.rs diff --git a/src/config/log_config.rs b/src/config/src/log_config.rs similarity index 100% rename from src/config/log_config.rs rename to src/config/src/log_config.rs diff --git a/src/config/paths.rs b/src/config/src/paths.rs similarity index 100% rename from src/config/paths.rs rename to src/config/src/paths.rs diff --git a/src/config/python_config.rs b/src/config/src/python_config.rs similarity index 100% rename from src/config/python_config.rs rename to src/config/src/python_config.rs diff --git a/src/config/service_config.rs b/src/config/src/service_config.rs similarity index 100% rename from src/config/service_config.rs rename to src/config/src/service_config.rs diff --git a/src/config/storage.rs b/src/config/src/storage.rs similarity index 100% rename from src/config/storage.rs rename to src/config/src/storage.rs diff --git a/src/config/streaming_job.rs b/src/config/src/streaming_job.rs similarity index 100% rename from src/config/streaming_job.rs rename to src/config/src/streaming_job.rs diff --git a/src/config/system.rs b/src/config/src/system.rs similarity index 100% rename from src/config/system.rs rename to src/config/src/system.rs diff --git a/src/config/wasm_config.rs b/src/config/src/wasm_config.rs similarity index 100% rename from src/config/wasm_config.rs rename to src/config/src/wasm_config.rs diff --git a/src/coordinator/Cargo.toml b/src/coordinator/Cargo.toml new file mode 100644 index 00000000..f8a6e0b7 --- /dev/null +++ b/src/coordinator/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-coordinator" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_coordinator" +path = "src/lib.rs" diff --git a/src/coordinator/analyze/analysis.rs b/src/coordinator/src/analyze/analysis.rs similarity index 100% rename from src/coordinator/analyze/analysis.rs rename to src/coordinator/src/analyze/analysis.rs diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/src/analyze/analyzer.rs similarity index 100% rename from src/coordinator/analyze/analyzer.rs rename to src/coordinator/src/analyze/analyzer.rs diff --git a/src/coordinator/analyze/mod.rs b/src/coordinator/src/analyze/mod.rs similarity index 100% rename from src/coordinator/analyze/mod.rs rename to src/coordinator/src/analyze/mod.rs diff --git a/src/coordinator/coordinator.rs b/src/coordinator/src/coordinator.rs similarity index 100% rename from src/coordinator/coordinator.rs rename to src/coordinator/src/coordinator.rs diff --git a/src/coordinator/mod.rs b/src/coordinator/src/coordinator_body.rs similarity index 95% rename from src/coordinator/mod.rs rename to src/coordinator/src/coordinator_body.rs index 86598bc5..0b5b7135 100644 --- a/src/coordinator/mod.rs +++ b/src/coordinator/src/coordinator_body.rs @@ -18,12 +18,14 @@ mod execution; mod execution_context; mod plan; mod runtime_context; +mod sql_classify; mod statement; mod streaming_table_options; mod tool; pub use coordinator::Coordinator; pub use dataset::{DataSet, ShowFunctionsResult}; +pub use sql_classify::classify_statement; pub use statement::{ CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropStreamingTableStatement, DropTableStatement, PythonModule, ShowCatalogTables, ShowCreateStreamingTable, ShowCreateTable, diff --git a/src/coordinator/dataset/data_set.rs b/src/coordinator/src/dataset/data_set.rs similarity index 100% rename from src/coordinator/dataset/data_set.rs rename to src/coordinator/src/dataset/data_set.rs diff --git a/src/coordinator/dataset/execute_result.rs b/src/coordinator/src/dataset/execute_result.rs similarity index 100% rename from src/coordinator/dataset/execute_result.rs rename to src/coordinator/src/dataset/execute_result.rs diff --git a/src/coordinator/dataset/mod.rs b/src/coordinator/src/dataset/mod.rs similarity index 100% rename from src/coordinator/dataset/mod.rs rename to src/coordinator/src/dataset/mod.rs diff --git a/src/coordinator/dataset/show_catalog_tables_result.rs b/src/coordinator/src/dataset/show_catalog_tables_result.rs similarity index 100% rename from src/coordinator/dataset/show_catalog_tables_result.rs rename to src/coordinator/src/dataset/show_catalog_tables_result.rs diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/src/dataset/show_create_streaming_table_result.rs similarity index 100% rename from src/coordinator/dataset/show_create_streaming_table_result.rs rename to src/coordinator/src/dataset/show_create_streaming_table_result.rs diff --git a/src/coordinator/dataset/show_create_table_result.rs b/src/coordinator/src/dataset/show_create_table_result.rs similarity index 100% rename from src/coordinator/dataset/show_create_table_result.rs rename to src/coordinator/src/dataset/show_create_table_result.rs diff --git a/src/coordinator/dataset/show_functions_result.rs b/src/coordinator/src/dataset/show_functions_result.rs similarity index 98% rename from src/coordinator/dataset/show_functions_result.rs rename to src/coordinator/src/dataset/show_functions_result.rs index c16edf6d..18673f00 100644 --- a/src/coordinator/dataset/show_functions_result.rs +++ b/src/coordinator/src/dataset/show_functions_result.rs @@ -16,7 +16,7 @@ use arrow_array::{RecordBatch, StringArray}; use arrow_schema::{DataType, Field, Schema}; use super::DataSet; -use crate::storage::task::FunctionInfo; +use crate::task::FunctionInfo; #[derive(Clone, Debug)] pub struct ShowFunctionsResult { diff --git a/src/coordinator/dataset/show_streaming_tables_result.rs b/src/coordinator/src/dataset/show_streaming_tables_result.rs similarity index 97% rename from src/coordinator/dataset/show_streaming_tables_result.rs rename to src/coordinator/src/dataset/show_streaming_tables_result.rs index cae597ac..5b5c8e84 100644 --- a/src/coordinator/dataset/show_streaming_tables_result.rs +++ b/src/coordinator/src/dataset/show_streaming_tables_result.rs @@ -16,7 +16,7 @@ use arrow_array::{Int32Array, StringArray}; use arrow_schema::{DataType, Field, Schema}; use super::DataSet; -use crate::runtime::streaming::job::StreamingJobSummary; +use crate::streaming::job::StreamingJobSummary; #[derive(Clone, Debug)] pub struct ShowStreamingTablesResult { diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/src/execution/executor.rs similarity index 98% rename from src/coordinator/execution/executor.rs rename to src/coordinator/src/execution/executor.rs index 22c5c1b7..a58dc6c0 100644 --- a/src/coordinator/execution/executor.rs +++ b/src/coordinator/src/execution/executor.rs @@ -31,13 +31,13 @@ use crate::coordinator::statement::{ConfigSource, FunctionSource}; use crate::coordinator::streaming_table_options::{ parse_checkpoint_interval_ms, parse_pipeline_parallelism, }; -use crate::runtime::streaming::job::JobManager; -use crate::runtime::streaming::protocol::control::StopMode; -use crate::runtime::wasm::taskexecutor::TaskManager; use crate::sql::schema::catalog::ExternalTable; use crate::sql::schema::show_create_catalog_table; use crate::sql::schema::table::CatalogEntity; -use crate::storage::stream_catalog::CatalogManager; +use crate::stream_catalog::CatalogManager; +use crate::streaming::job::JobManager; +use crate::streaming::protocol::control::StopMode; +use crate::wasm::taskexecutor::TaskManager; #[derive(Error, Debug)] pub enum ExecuteError { diff --git a/src/coordinator/execution/mod.rs b/src/coordinator/src/execution/mod.rs similarity index 100% rename from src/coordinator/execution/mod.rs rename to src/coordinator/src/execution/mod.rs diff --git a/src/coordinator/execution_context.rs b/src/coordinator/src/execution_context.rs similarity index 100% rename from src/coordinator/execution_context.rs rename to src/coordinator/src/execution_context.rs diff --git a/src/coordinator/src/lib.rs b/src/coordinator/src/lib.rs new file mode 100644 index 00000000..6e5ad596 --- /dev/null +++ b/src/coordinator/src/lib.rs @@ -0,0 +1,3 @@ +//! Query planning and job coordination. + +pub const CRATE_NAME: &str = "function-stream-coordinator"; diff --git a/src/coordinator/plan/ast_utils.rs b/src/coordinator/src/plan/ast_utils.rs similarity index 100% rename from src/coordinator/plan/ast_utils.rs rename to src/coordinator/src/plan/ast_utils.rs diff --git a/src/coordinator/plan/compile_error_plan.rs b/src/coordinator/src/plan/compile_error_plan.rs similarity index 100% rename from src/coordinator/plan/compile_error_plan.rs rename to src/coordinator/src/plan/compile_error_plan.rs diff --git a/src/coordinator/plan/create_function_plan.rs b/src/coordinator/src/plan/create_function_plan.rs similarity index 100% rename from src/coordinator/plan/create_function_plan.rs rename to src/coordinator/src/plan/create_function_plan.rs diff --git a/src/coordinator/plan/create_python_function_plan.rs b/src/coordinator/src/plan/create_python_function_plan.rs similarity index 100% rename from src/coordinator/plan/create_python_function_plan.rs rename to src/coordinator/src/plan/create_python_function_plan.rs diff --git a/src/coordinator/plan/create_table_plan.rs b/src/coordinator/src/plan/create_table_plan.rs similarity index 100% rename from src/coordinator/plan/create_table_plan.rs rename to src/coordinator/src/plan/create_table_plan.rs diff --git a/src/coordinator/plan/ddl_compiler.rs b/src/coordinator/src/plan/ddl_compiler.rs similarity index 100% rename from src/coordinator/plan/ddl_compiler.rs rename to src/coordinator/src/plan/ddl_compiler.rs diff --git a/src/coordinator/plan/drop_function_plan.rs b/src/coordinator/src/plan/drop_function_plan.rs similarity index 100% rename from src/coordinator/plan/drop_function_plan.rs rename to src/coordinator/src/plan/drop_function_plan.rs diff --git a/src/coordinator/plan/drop_streaming_table_plan.rs b/src/coordinator/src/plan/drop_streaming_table_plan.rs similarity index 100% rename from src/coordinator/plan/drop_streaming_table_plan.rs rename to src/coordinator/src/plan/drop_streaming_table_plan.rs diff --git a/src/coordinator/plan/drop_table_plan.rs b/src/coordinator/src/plan/drop_table_plan.rs similarity index 100% rename from src/coordinator/plan/drop_table_plan.rs rename to src/coordinator/src/plan/drop_table_plan.rs diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/src/plan/logical_plan_visitor.rs similarity index 100% rename from src/coordinator/plan/logical_plan_visitor.rs rename to src/coordinator/src/plan/logical_plan_visitor.rs diff --git a/src/coordinator/plan/lookup_table_plan.rs b/src/coordinator/src/plan/lookup_table_plan.rs similarity index 100% rename from src/coordinator/plan/lookup_table_plan.rs rename to src/coordinator/src/plan/lookup_table_plan.rs diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/src/plan/mod.rs similarity index 100% rename from src/coordinator/plan/mod.rs rename to src/coordinator/src/plan/mod.rs diff --git a/src/coordinator/plan/optimizer.rs b/src/coordinator/src/plan/optimizer.rs similarity index 100% rename from src/coordinator/plan/optimizer.rs rename to src/coordinator/src/plan/optimizer.rs diff --git a/src/coordinator/plan/show_catalog_tables_plan.rs b/src/coordinator/src/plan/show_catalog_tables_plan.rs similarity index 100% rename from src/coordinator/plan/show_catalog_tables_plan.rs rename to src/coordinator/src/plan/show_catalog_tables_plan.rs diff --git a/src/coordinator/plan/show_create_streaming_table_plan.rs b/src/coordinator/src/plan/show_create_streaming_table_plan.rs similarity index 100% rename from src/coordinator/plan/show_create_streaming_table_plan.rs rename to src/coordinator/src/plan/show_create_streaming_table_plan.rs diff --git a/src/coordinator/plan/show_create_table_plan.rs b/src/coordinator/src/plan/show_create_table_plan.rs similarity index 100% rename from src/coordinator/plan/show_create_table_plan.rs rename to src/coordinator/src/plan/show_create_table_plan.rs diff --git a/src/coordinator/plan/show_functions_plan.rs b/src/coordinator/src/plan/show_functions_plan.rs similarity index 100% rename from src/coordinator/plan/show_functions_plan.rs rename to src/coordinator/src/plan/show_functions_plan.rs diff --git a/src/coordinator/plan/show_streaming_tables_plan.rs b/src/coordinator/src/plan/show_streaming_tables_plan.rs similarity index 100% rename from src/coordinator/plan/show_streaming_tables_plan.rs rename to src/coordinator/src/plan/show_streaming_tables_plan.rs diff --git a/src/coordinator/plan/start_function_plan.rs b/src/coordinator/src/plan/start_function_plan.rs similarity index 100% rename from src/coordinator/plan/start_function_plan.rs rename to src/coordinator/src/plan/start_function_plan.rs diff --git a/src/coordinator/plan/stop_function_plan.rs b/src/coordinator/src/plan/stop_function_plan.rs similarity index 100% rename from src/coordinator/plan/stop_function_plan.rs rename to src/coordinator/src/plan/stop_function_plan.rs diff --git a/src/coordinator/plan/streaming_compiler.rs b/src/coordinator/src/plan/streaming_compiler.rs similarity index 100% rename from src/coordinator/plan/streaming_compiler.rs rename to src/coordinator/src/plan/streaming_compiler.rs diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/src/plan/streaming_table_connector_plan.rs similarity index 100% rename from src/coordinator/plan/streaming_table_connector_plan.rs rename to src/coordinator/src/plan/streaming_table_connector_plan.rs diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/src/plan/streaming_table_plan.rs similarity index 100% rename from src/coordinator/plan/streaming_table_plan.rs rename to src/coordinator/src/plan/streaming_table_plan.rs diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/src/plan/visitor.rs similarity index 100% rename from src/coordinator/plan/visitor.rs rename to src/coordinator/src/plan/visitor.rs diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/src/runtime_context.rs similarity index 93% rename from src/coordinator/runtime_context.rs rename to src/coordinator/src/runtime_context.rs index 21b9d876..2f8dc2a7 100644 --- a/src/coordinator/runtime_context.rs +++ b/src/coordinator/src/runtime_context.rs @@ -16,10 +16,10 @@ use std::sync::Arc; use anyhow::Result; -use crate::runtime::streaming::job::JobManager; -use crate::runtime::wasm::taskexecutor::TaskManager; use crate::sql::schema::StreamSchemaProvider; -use crate::storage::stream_catalog::CatalogManager; +use crate::stream_catalog::CatalogManager; +use crate::streaming::job::JobManager; +use crate::wasm::taskexecutor::TaskManager; /// Dependencies shared by analyze / plan / execute, analogous to installing globals in /// [`TaskManager`], [`CatalogManager`], and [`JobManager`]. diff --git a/src/sql/parse.rs b/src/coordinator/src/sql_classify.rs similarity index 51% rename from src/sql/parse.rs rename to src/coordinator/src/sql_classify.rs index 0c6b9541..0c1417d4 100644 --- a/src/sql/parse.rs +++ b/src/coordinator/src/sql_classify.rs @@ -10,22 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Coordinator-facing SQL parsing (`parse_sql`). -//! -//! **Data-definition / pipeline shape (this entry point)** -//! Only these table-related forms are supported: -//! - **`CREATE TABLE ... (cols [, WATERMARK FOR ...]) WITH ('connector' = '...', 'format' = '...', ...)`** -//! connector-backed **source** DDL (no `AS SELECT`; `connector` in `WITH` selects this path) -//! - **`CREATE TABLE ...`** other forms (including `CREATE TABLE ... AS SELECT` where DataFusion accepts it) -//! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL) -//! - **`DROP TABLE`** / **`DROP TABLE IF EXISTS`** / **`DROP STREAMING TABLE`** (alias for `DROP TABLE` on the stream catalog) -//! - **`SHOW TABLES`** — list stream catalog tables (connector sources and streaming sinks) -//! - **`SHOW CREATE TABLE `** — best-effort DDL text (full `WITH` / `AS SELECT` may not be stored) -//! -//! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or -//! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead. -//! -//! Other supported statements include function lifecycle (`CREATE FUNCTION WITH`, `START FUNCTION`, …). +//! Map sqlparser [`Statement`](datafusion::sql::sqlparser::ast::Statement) values into +//! coordinator [`Statement`](super::statement::Statement) trait objects. use std::collections::HashMap; @@ -34,93 +20,15 @@ use datafusion::error::DataFusionError; use datafusion::sql::sqlparser::ast::{ ObjectType, ShowCreateObject, SqlOption, Statement as DFStatement, }; -use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; -use datafusion::sql::sqlparser::parser::Parser; -use crate::coordinator::{ +use super::{ CreateFunction, CreateTable, DropFunction, DropStreamingTableStatement, DropTableStatement, ShowCatalogTables, ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, - ShowStreamingTables, StartFunction, Statement as CoordinatorStatement, StopFunction, - StreamingTableStatement, + ShowStreamingTables, StartFunction, Statement, StopFunction, StreamingTableStatement, }; -/// Streaming-specific SQL that the sqlparser dialect does not natively handle. -/// -/// Returns `Some(statement)` if the SQL was intercepted, `None` otherwise so -/// the caller falls through to the normal sqlparser pipeline. -fn try_parse_streaming_statement(sql: &str) -> Option> { - let tokens: Vec<&str> = sql.split_whitespace().collect(); - if tokens.is_empty() { - return None; - } - - // SHOW STREAMING TABLES - if tokens.len() == 3 - && tokens[0].eq_ignore_ascii_case("show") - && tokens[1].eq_ignore_ascii_case("streaming") - && tokens[2].eq_ignore_ascii_case("tables") - { - return Some(Box::new(ShowStreamingTables::new())); - } - - // SHOW CREATE STREAMING TABLE - if tokens.len() == 5 - && tokens[0].eq_ignore_ascii_case("show") - && tokens[1].eq_ignore_ascii_case("create") - && tokens[2].eq_ignore_ascii_case("streaming") - && tokens[3].eq_ignore_ascii_case("table") - { - let name = tokens[4].trim_end_matches(';').to_string(); - return Some(Box::new(ShowCreateStreamingTable::new(name))); - } - - // DROP STREAMING TABLE [IF EXISTS] - if tokens.len() >= 4 - && tokens[0].eq_ignore_ascii_case("drop") - && tokens[1].eq_ignore_ascii_case("streaming") - && tokens[2].eq_ignore_ascii_case("table") - { - let (if_exists, name_idx) = if tokens.len() >= 6 - && tokens[3].eq_ignore_ascii_case("if") - && tokens[4].eq_ignore_ascii_case("exists") - { - (true, 5) - } else { - (false, 3) - }; - - if name_idx >= tokens.len() { - return None; - } - let name = tokens[name_idx].trim_end_matches(';').to_string(); - return Some(Box::new(DropStreamingTableStatement::new(name, if_exists))); - } - - None -} - -pub fn parse_sql(query: &str) -> Result>> { - let trimmed = query.trim(); - if trimmed.is_empty() { - return plan_err!("Query is empty"); - } - - if let Some(stmt) = try_parse_streaming_statement(trimmed) { - return Ok(vec![stmt]); - } - - let dialect = FunctionStreamDialect {}; - let statements = Parser::parse_sql(&dialect, trimmed) - .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; - - if statements.is_empty() { - return plan_err!("No SQL statements found"); - } - - statements.into_iter().map(classify_statement).collect() -} - -fn classify_statement(stmt: DFStatement) -> Result> { +/// Convert [`DFStatement`] from the FunctionStream SQL dialect into a coordinator statement. +pub fn classify_statement(stmt: DFStatement) -> Result> { match stmt { DFStatement::CreateFunctionWith { options } => { let properties = sql_options_to_map(&options); @@ -139,34 +47,54 @@ fn classify_statement(stmt: DFStatement) -> Result } DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())), DFStatement::ShowTables { .. } => Ok(Box::new(ShowCatalogTables::new())), - DFStatement::ShowCreate { obj_type, obj_name } => { - if obj_type != ShowCreateObject::Table { - return plan_err!( - "SHOW CREATE {obj_type} is not supported; use SHOW CREATE TABLE " - ); - } - Ok(Box::new(ShowCreateTable::new(obj_name.to_string()))) - } + DFStatement::ShowStreamingTable => Ok(Box::new(ShowStreamingTables::new())), + DFStatement::ShowCreate { obj_type, obj_name } => match obj_type { + ShowCreateObject::Table => Ok(Box::new(ShowCreateTable::new(obj_name.to_string()))), + ShowCreateObject::StreamingTable => Ok(Box::new(ShowCreateStreamingTable::new( + obj_name.to_string(), + ))), + _ => plan_err!( + "SHOW CREATE {obj_type} is not supported; use SHOW CREATE TABLE or SHOW CREATE STREAMING TABLE " + ), + }, s @ DFStatement::CreateTable(_) => Ok(Box::new(CreateTable::new(s))), s @ DFStatement::CreateStreamingTable { .. } => { Ok(Box::new(StreamingTableStatement::new(s))) } stmt @ DFStatement::Drop { .. } => { - { - let DFStatement::Drop { - object_type, names, .. - } = &stmt - else { - unreachable!() - }; - if *object_type != ObjectType::Table { - return plan_err!("Only DROP TABLE is supported in this SQL frontend"); + let DFStatement::Drop { + object_type, + names, + if_exists, + .. + } = &stmt + else { + unreachable!() + }; + match object_type { + ObjectType::Table => { + if names.len() != 1 { + return plan_err!( + "DROP TABLE supports exactly one table name per statement" + ); + } + Ok(Box::new(DropTableStatement::new(stmt))) } - if names.len() != 1 { - return plan_err!("DROP TABLE supports exactly one table name per statement"); + ObjectType::StreamingTable => { + if names.len() != 1 { + return plan_err!( + "DROP STREAMING TABLE supports exactly one table name per statement" + ); + } + let table_name = names[0].to_string(); + Ok(Box::new(DropStreamingTableStatement::new( + table_name, *if_exists, + ))) } + _ => plan_err!( + "Only DROP TABLE and DROP STREAMING TABLE are supported in this SQL frontend" + ), } - Ok(Box::new(DropTableStatement::new(stmt))) } DFStatement::Insert { .. } => plan_err!( "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \ @@ -176,7 +104,6 @@ fn classify_statement(stmt: DFStatement) -> Result } } -/// Convert Vec (KeyValue pairs) into HashMap. fn sql_options_to_map(options: &[SqlOption]) -> HashMap { options .iter() @@ -193,71 +120,72 @@ fn sql_options_to_map(options: &[SqlOption]) -> HashMap { #[cfg(test)] mod tests { use super::*; + use crate::sql::parse::parse_sql; - fn first_stmt(sql: &str) -> Box { + fn first_classified(sql: &str) -> Box { let mut stmts = parse_sql(sql).unwrap(); assert!(!stmts.is_empty()); - stmts.remove(0) + classify_statement(stmts.remove(0)).unwrap() } - fn is_type(stmt: &dyn CoordinatorStatement, prefix: &str) -> bool { - format!("{:?}", stmt).starts_with(prefix) + fn is_type(stmt: &dyn Statement, prefix: &str) -> bool { + format!("{stmt:?}").starts_with(prefix) } #[test] fn test_parse_create_function() { let sql = "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')"; - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!(is_type(stmt.as_ref(), "CreateFunction")); } #[test] fn test_parse_create_function_minimal() { let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')"; - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!(is_type(stmt.as_ref(), "CreateFunction")); } #[test] fn test_parse_drop_function() { - let stmt = first_stmt("DROP FUNCTION my_task"); + let stmt = first_classified("DROP FUNCTION my_task"); assert!(is_type(stmt.as_ref(), "DropFunction")); } #[test] fn test_parse_start_function() { - let stmt = first_stmt("START FUNCTION my_task"); + let stmt = first_classified("START FUNCTION my_task"); assert!(is_type(stmt.as_ref(), "StartFunction")); } #[test] fn test_parse_stop_function() { - let stmt = first_stmt("STOP FUNCTION my_task"); + let stmt = first_classified("STOP FUNCTION my_task"); assert!(is_type(stmt.as_ref(), "StopFunction")); } #[test] fn test_parse_show_functions() { - let stmt = first_stmt("SHOW FUNCTIONS"); + let stmt = first_classified("SHOW FUNCTIONS"); assert!(is_type(stmt.as_ref(), "ShowFunctions")); } #[test] fn test_parse_show_tables() { - let stmt = first_stmt("SHOW TABLES"); + let stmt = first_classified("SHOW TABLES"); assert!(is_type(stmt.as_ref(), "ShowCatalogTables")); } #[test] fn test_parse_show_create_table() { - let stmt = first_stmt("SHOW CREATE TABLE my_src"); + let stmt = first_classified("SHOW CREATE TABLE my_src"); assert!(is_type(stmt.as_ref(), "ShowCreateTable")); } #[test] fn test_parse_create_table() { - let stmt = first_stmt("CREATE TABLE foo (id INT, name VARCHAR)"); + let stmt = first_classified("CREATE TABLE foo (id INT, name VARCHAR)"); assert!(is_type(stmt.as_ref(), "CreateTable")); } @@ -267,47 +195,46 @@ mod tests { "CREATE TABLE kafka_src (id BIGINT, ts TIMESTAMP NOT NULL, WATERMARK FOR ts) ", "WITH ('connector' = 'kafka', 'format' = 'json', 'topic' = 'events')", ); - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!(is_type(stmt.as_ref(), "CreateTable")); } #[test] fn test_parse_drop_table() { - let stmt = first_stmt("DROP TABLE foo"); + let stmt = first_classified("DROP TABLE foo"); assert!(is_type(stmt.as_ref(), "DropTableStatement")); } #[test] fn test_parse_drop_table_if_exists() { - let stmt = first_stmt("DROP TABLE IF EXISTS foo"); + let stmt = first_classified("DROP TABLE IF EXISTS foo"); assert!(is_type(stmt.as_ref(), "DropTableStatement")); } #[test] fn test_parse_drop_streaming_table() { - let stmt = first_stmt("DROP STREAMING TABLE my_sink"); + let stmt = first_classified("DROP STREAMING TABLE my_sink"); assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement")); } #[test] fn test_parse_drop_streaming_table_if_exists() { - let stmt = first_stmt("DROP STREAMING TABLE IF EXISTS my_sink"); + let stmt = first_classified("DROP STREAMING TABLE IF EXISTS my_sink"); assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement")); } #[test] fn test_parse_show_streaming_tables() { - let stmt = first_stmt("SHOW STREAMING TABLES"); + let stmt = first_classified("SHOW STREAMING TABLES"); assert!(is_type(stmt.as_ref(), "ShowStreamingTables")); } #[test] fn test_parse_show_create_streaming_table() { - let stmt = first_stmt("SHOW CREATE STREAMING TABLE my_sink"); + let stmt = first_classified("SHOW CREATE STREAMING TABLE my_sink"); assert!(is_type(stmt.as_ref(), "ShowCreateStreamingTable")); } - /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`). #[test] fn test_parse_create_streaming_table() { let sql = concat!( @@ -315,7 +242,7 @@ mod tests { "WITH ('connector' = 'kafka') ", "AS SELECT id FROM src", ); - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!( is_type(stmt.as_ref(), "StreamingTableStatement"), "expected StreamingTableStatement, got {:?}", @@ -330,22 +257,22 @@ mod tests { "with ('connector' = 'memory') ", "as select 1 as x", ); - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!(is_type(stmt.as_ref(), "StreamingTableStatement")); } #[test] fn test_parse_case_insensitive() { assert!(is_type( - first_stmt("create function with ('function_path'='./test.wasm')").as_ref(), + first_classified("create function with ('function_path'='./test.wasm')").as_ref(), "CreateFunction" )); assert!(is_type( - first_stmt("show functions").as_ref(), + first_classified("show functions").as_ref(), "ShowFunctions" )); assert!(is_type( - first_stmt("start function my_task").as_ref(), + first_classified("start function my_task").as_ref(), "StartFunction" )); } @@ -356,27 +283,24 @@ mod tests { "CREATE TABLE t1 (id INT); ", "CREATE STREAMING TABLE sk WITH ('connector' = 'kafka') AS SELECT id FROM t1", ); - let stmts = parse_sql(sql).unwrap(); - assert_eq!(stmts.len(), 2); - assert!(is_type(stmts[0].as_ref(), "CreateTable")); - assert!(is_type(stmts[1].as_ref(), "StreamingTableStatement")); - } - - #[test] - fn test_parse_empty() { - assert!(parse_sql("").is_err()); - assert!(parse_sql(" ").is_err()); + let mut ast = parse_sql(sql).unwrap(); + assert_eq!(ast.len(), 2); + let s0 = classify_statement(ast.remove(0)).unwrap(); + let s1 = classify_statement(ast.remove(0)).unwrap(); + assert!(is_type(s0.as_ref(), "CreateTable")); + assert!(is_type(s1.as_ref(), "StreamingTableStatement")); } #[test] - fn test_parse_unsupported_statement() { - let result = parse_sql("SELECT 1"); - assert!(result.is_err()); + fn test_classify_unsupported_statement() { + let mut stmts = parse_sql("SELECT 1").unwrap(); + assert!(classify_statement(stmts.remove(0)).is_err()); } #[test] fn test_insert_not_supported() { - let err = parse_sql("INSERT INTO sink SELECT * FROM src").unwrap_err(); + let mut stmts = parse_sql("INSERT INTO sink SELECT * FROM src").unwrap(); + let err = classify_statement(stmts.remove(0)).unwrap_err(); let msg = err.to_string(); assert!( msg.contains("INSERT") && msg.contains("not supported"), @@ -396,7 +320,7 @@ mod tests { 'parallelism'='4', 'memory-limit'='256mb' )"#; - let stmt = first_stmt(sql); + let stmt = first_classified(sql); assert!(is_type(stmt.as_ref(), "CreateFunction")); } } diff --git a/src/coordinator/statement/create_function.rs b/src/coordinator/src/statement/create_function.rs similarity index 100% rename from src/coordinator/statement/create_function.rs rename to src/coordinator/src/statement/create_function.rs diff --git a/src/coordinator/statement/create_python_function.rs b/src/coordinator/src/statement/create_python_function.rs similarity index 100% rename from src/coordinator/statement/create_python_function.rs rename to src/coordinator/src/statement/create_python_function.rs diff --git a/src/coordinator/statement/create_table.rs b/src/coordinator/src/statement/create_table.rs similarity index 100% rename from src/coordinator/statement/create_table.rs rename to src/coordinator/src/statement/create_table.rs diff --git a/src/coordinator/statement/drop_function.rs b/src/coordinator/src/statement/drop_function.rs similarity index 100% rename from src/coordinator/statement/drop_function.rs rename to src/coordinator/src/statement/drop_function.rs diff --git a/src/coordinator/statement/drop_streaming_table.rs b/src/coordinator/src/statement/drop_streaming_table.rs similarity index 100% rename from src/coordinator/statement/drop_streaming_table.rs rename to src/coordinator/src/statement/drop_streaming_table.rs diff --git a/src/coordinator/statement/drop_table.rs b/src/coordinator/src/statement/drop_table.rs similarity index 100% rename from src/coordinator/statement/drop_table.rs rename to src/coordinator/src/statement/drop_table.rs diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/src/statement/mod.rs similarity index 100% rename from src/coordinator/statement/mod.rs rename to src/coordinator/src/statement/mod.rs diff --git a/src/coordinator/statement/show_catalog_tables.rs b/src/coordinator/src/statement/show_catalog_tables.rs similarity index 100% rename from src/coordinator/statement/show_catalog_tables.rs rename to src/coordinator/src/statement/show_catalog_tables.rs diff --git a/src/coordinator/statement/show_create_streaming_table.rs b/src/coordinator/src/statement/show_create_streaming_table.rs similarity index 100% rename from src/coordinator/statement/show_create_streaming_table.rs rename to src/coordinator/src/statement/show_create_streaming_table.rs diff --git a/src/coordinator/statement/show_create_table.rs b/src/coordinator/src/statement/show_create_table.rs similarity index 100% rename from src/coordinator/statement/show_create_table.rs rename to src/coordinator/src/statement/show_create_table.rs diff --git a/src/coordinator/statement/show_functions.rs b/src/coordinator/src/statement/show_functions.rs similarity index 100% rename from src/coordinator/statement/show_functions.rs rename to src/coordinator/src/statement/show_functions.rs diff --git a/src/coordinator/statement/show_streaming_tables.rs b/src/coordinator/src/statement/show_streaming_tables.rs similarity index 100% rename from src/coordinator/statement/show_streaming_tables.rs rename to src/coordinator/src/statement/show_streaming_tables.rs diff --git a/src/coordinator/statement/start_function.rs b/src/coordinator/src/statement/start_function.rs similarity index 100% rename from src/coordinator/statement/start_function.rs rename to src/coordinator/src/statement/start_function.rs diff --git a/src/coordinator/statement/stop_function.rs b/src/coordinator/src/statement/stop_function.rs similarity index 100% rename from src/coordinator/statement/stop_function.rs rename to src/coordinator/src/statement/stop_function.rs diff --git a/src/coordinator/statement/streaming_table.rs b/src/coordinator/src/statement/streaming_table.rs similarity index 100% rename from src/coordinator/statement/streaming_table.rs rename to src/coordinator/src/statement/streaming_table.rs diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/src/statement/visitor.rs similarity index 100% rename from src/coordinator/statement/visitor.rs rename to src/coordinator/src/statement/visitor.rs diff --git a/src/coordinator/streaming_table_options.rs b/src/coordinator/src/streaming_table_options.rs similarity index 100% rename from src/coordinator/streaming_table_options.rs rename to src/coordinator/src/streaming_table_options.rs diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/src/tool/mod.rs similarity index 100% rename from src/coordinator/tool/mod.rs rename to src/coordinator/src/tool/mod.rs diff --git a/src/function-stream/Cargo.toml b/src/function-stream/Cargo.toml new file mode 100644 index 00000000..baf9d5ea --- /dev/null +++ b/src/function-stream/Cargo.toml @@ -0,0 +1,86 @@ +[package] +name = "function-stream" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream" +path = "src/lib.rs" + +[[bin]] +name = "function-stream" +path = "src/main.rs" + +[dependencies] +tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "sync", "time", "net", "signal"] } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +serde_json = "1.0" +uuid = { version = "1.0", features = ["v4", "v7"] } +log = "0.4" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-appender = "0.2" +anyhow = "1.0" +thiserror = "2" +tonic = { version = "0.12", features = ["default"] } +async-trait = "0.1" +num_cpus = "1.0" +protocol = { path = "../../protocol" } +function-stream-config = { path = "../config" } +function-stream-logger = { path = "../logger" } +function-stream-runtime-common = { path = "../runtime_common" } +function-stream-streaming-planner = { path = "../streaming_planner" } +prost = "0.13" +rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi", "curl"] } +crossbeam-channel = "0.5" +wasmtime = { version = "41.0.3", features = ["component-model", "async"] } +base64 = "0.22" +wasmtime-wasi = "41.0.3" +rocksdb = { version = "0.21", features = ["multi-threaded-cf", "lz4"] } +bincode = { version = "2", features = ["serde"] } +chrono = "0.4" +tokio-stream = "0.1.18" +lru = "0.12" +parking_lot = "0.12" +arrow = { version = "55", default-features = false } +arrow-array = "55" +arrow-ipc = "55" +arrow-schema = { version = "55", features = ["serde"] } +parquet = "55" +object_store = { version = "0.12.5", features = ["aws"] } +bytes = "1" +futures = "0.3" +serde_json_path = "0.7" +xxhash-rust = { version = "0.8", features = ["xxh3"] } +proctitle = "0.1" +unicase = "2.7" +petgraph = "0.7" +rand = { version = "0.8", features = ["small_rng"] } +itertools = "0.14" +strum = { version = "0.26", features = ["derive"] } + +arrow-json = { version = "55.2.0" } +apache-avro = "0.21" +datafusion = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-common = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-execution = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-expr = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-physical-expr = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-proto = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } + +sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.58.0/fs" } + +ahash = "0.8" +governor = "0.8.0" +lance = { version = "4.0.0", default-features = false, features = ["aws"] } +arrow-array-lance = { package = "arrow-array", version = "57.3.0" } +arrow-ipc-lance = { package = "arrow-ipc", version = "57.3.0" } + +[features] +default = ["incremental-cache", "python"] +incremental-cache = ["wasmtime/incremental-cache"] +python = [] + +[dev-dependencies] +tempfile = "3.27.0" diff --git a/src/storage/mod.rs b/src/function-stream/src/lib.rs similarity index 70% rename from src/storage/mod.rs rename to src/function-stream/src/lib.rs index ec32bdfa..a3fc9234 100644 --- a/src/storage/mod.rs +++ b/src/function-stream/src/lib.rs @@ -10,12 +10,39 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Library crate for function-stream + +#![allow(dead_code)] + use std::sync::Arc; use anyhow::Context; +pub use function_stream_config as config; +#[path = "../../coordinator/src/coordinator_body.rs"] +pub mod coordinator; +pub use function_stream_logger as logging; + +pub use function_stream_runtime_common::{common, memory}; + +#[path = "../../streaming_runtime/src/streaming/mod.rs"] +pub mod streaming; + +#[path = "../../streaming_runtime/src/util/mod.rs"] +pub mod util; + +#[path = "../../wasm_runtime/src/wasm/mod.rs"] +pub mod wasm; + +pub use wasm::{input, output, processor}; + +#[path = "../../wasm_runtime/src/state_backend/mod.rs"] pub mod state_backend; + +#[path = "../../catalog_storage/src/stream_catalog/mod.rs"] pub mod stream_catalog; + +#[path = "../../catalog_storage/src/task/mod.rs"] pub mod task; /// Install the process-global [`stream_catalog::CatalogManager`] from configuration. @@ -51,3 +78,7 @@ pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow CatalogManager::init_global(store).context("Stream catalog (CatalogManager) global init failed") } + +#[path = "../../servicer/src/servicer_body.rs"] +pub mod server; +pub use function_stream_streaming_planner as sql; diff --git a/src/main.rs b/src/function-stream/src/main.rs similarity index 77% rename from src/main.rs rename to src/function-stream/src/main.rs index 46da3c7a..b7e1093f 100644 --- a/src/main.rs +++ b/src/function-stream/src/main.rs @@ -12,15 +12,71 @@ #![allow(dead_code)] -mod config; +pub use function_stream_config as config; +#[path = "../../coordinator/src/coordinator_body.rs"] mod coordinator; -mod logging; -mod runtime; -mod server; -mod sql; -mod storage; +pub use function_stream_logger as logging; + +pub use function_stream_runtime_common::{common, memory}; + +use std::sync::Arc; use anyhow::{Context, Result}; + +#[path = "../../streaming_runtime/src/streaming/mod.rs"] +mod streaming; + +#[path = "../../streaming_runtime/src/util/mod.rs"] +mod util; + +#[path = "../../wasm_runtime/src/wasm/mod.rs"] +mod wasm; + +pub use wasm::{input, output, processor}; + +#[path = "../../wasm_runtime/src/state_backend/mod.rs"] +mod state_backend; + +#[path = "../../catalog_storage/src/stream_catalog/mod.rs"] +mod stream_catalog; + +#[path = "../../catalog_storage/src/task/mod.rs"] +mod task; + +pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow::Result<()> { + use stream_catalog::{CatalogManager, InMemoryMetaStore, MetaStore, RocksDbMetaStore}; + + let store: Arc = if !config.stream_catalog.persist { + Arc::new(InMemoryMetaStore::new()) + } else { + let path = config + .stream_catalog + .db_path + .as_ref() + .map(|p| crate::config::resolve_path(p)) + .unwrap_or_else(|| crate::config::get_data_dir().join("catalog.db")); + + std::fs::create_dir_all(&path).with_context(|| { + format!( + "Failed to create stream catalog RocksDB directory {}", + path.display() + ) + })?; + + Arc::new(RocksDbMetaStore::open(&path).with_context(|| { + format!( + "Failed to open stream catalog RocksDB at {}", + path.display() + ) + })?) + }; + + CatalogManager::init_global(store).context("Stream catalog (CatalogManager) global init failed") +} + +#[path = "../../servicer/src/servicer_body.rs"] +mod server; +pub use function_stream_streaming_planner as sql; use std::thread; use tokio::sync::oneshot; @@ -144,7 +200,8 @@ fn setup_environment() -> Result { config::GlobalConfig::default() }; - logging::init_logging(&config.logging).context("Logging initialization failed")?; + function_stream_logger::init_logging(&config.logging) + .context("Logging initialization failed")?; log::debug!( "Environment initialized. Data: {}, Conf: {}", diff --git a/src/logger/Cargo.toml b/src/logger/Cargo.toml new file mode 100644 index 00000000..273579a8 --- /dev/null +++ b/src/logger/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "function-stream-logger" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_logger" +path = "src/lib.rs" + +[dependencies] +anyhow = "1.0" +function-stream-config = { path = "../config" } +tracing = "0.1" +tracing-appender = "0.2" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/logger/src/lib.rs b/src/logger/src/lib.rs new file mode 100644 index 00000000..53fb0b31 --- /dev/null +++ b/src/logger/src/lib.rs @@ -0,0 +1,9 @@ +//! Logging setup and helpers. + +pub mod config { + pub use function_stream_config::*; +} + +mod logging; + +pub use logging::init_logging; diff --git a/src/logging/mod.rs b/src/logger/src/logging.rs similarity index 100% rename from src/logging/mod.rs rename to src/logger/src/logging.rs diff --git a/src/runtime_common/Cargo.toml b/src/runtime_common/Cargo.toml new file mode 100644 index 00000000..f8e98ed6 --- /dev/null +++ b/src/runtime_common/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "function-stream-runtime-common" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_runtime_common" +path = "src/lib.rs" + +[dependencies] +arrow-array = "55" +bincode = { version = "2", features = ["serde"] } +parking_lot = "0.12" +serde = { version = "1.0", features = ["derive"] } +tokio = { version = "1.0", features = ["sync"] } +tracing = "0.1" diff --git a/src/runtime/common/component_state.rs b/src/runtime_common/src/common/component_state.rs similarity index 100% rename from src/runtime/common/component_state.rs rename to src/runtime_common/src/common/component_state.rs diff --git a/src/runtime/common/mod.rs b/src/runtime_common/src/common/mod.rs similarity index 100% rename from src/runtime/common/mod.rs rename to src/runtime_common/src/common/mod.rs diff --git a/src/runtime/common/task_completion.rs b/src/runtime_common/src/common/task_completion.rs similarity index 100% rename from src/runtime/common/task_completion.rs rename to src/runtime_common/src/common/task_completion.rs diff --git a/src/runtime/mod.rs b/src/runtime_common/src/lib.rs similarity index 81% rename from src/runtime/mod.rs rename to src/runtime_common/src/lib.rs index 8c72b507..1c97062b 100644 --- a/src/runtime/mod.rs +++ b/src/runtime_common/src/lib.rs @@ -10,14 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Runtime module +//! Shared runtime building blocks (task lifecycle, memory pool) used by streaming and WASM. pub mod common; pub mod memory; -pub mod streaming; -pub mod util; -pub mod wasm; - -pub use wasm::input; -pub use wasm::output; -pub use wasm::processor; +pub mod streaming_protocol; diff --git a/src/runtime/memory/block.rs b/src/runtime_common/src/memory/block.rs similarity index 100% rename from src/runtime/memory/block.rs rename to src/runtime_common/src/memory/block.rs diff --git a/src/runtime/memory/error.rs b/src/runtime_common/src/memory/error.rs similarity index 100% rename from src/runtime/memory/error.rs rename to src/runtime_common/src/memory/error.rs diff --git a/src/runtime/memory/global.rs b/src/runtime_common/src/memory/global.rs similarity index 100% rename from src/runtime/memory/global.rs rename to src/runtime_common/src/memory/global.rs diff --git a/src/runtime/memory/mod.rs b/src/runtime_common/src/memory/mod.rs similarity index 100% rename from src/runtime/memory/mod.rs rename to src/runtime_common/src/memory/mod.rs diff --git a/src/runtime/memory/pool.rs b/src/runtime_common/src/memory/pool.rs similarity index 100% rename from src/runtime/memory/pool.rs rename to src/runtime_common/src/memory/pool.rs diff --git a/src/runtime/memory/ticket.rs b/src/runtime_common/src/memory/ticket.rs similarity index 100% rename from src/runtime/memory/ticket.rs rename to src/runtime_common/src/memory/ticket.rs diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime_common/src/streaming_protocol.rs similarity index 79% rename from src/runtime/streaming/protocol/event.rs rename to src/runtime_common/src/streaming_protocol.rs index 093d99ba..7c578f7a 100644 --- a/src/runtime/streaming/protocol/event.rs +++ b/src/runtime_common/src/streaming_protocol.rs @@ -10,15 +10,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Streaming control-plane types shared by the SQL planner and the execution runtime. + use bincode::{Decode, Encode}; use serde::{Deserialize, Serialize}; -use std::sync::Arc; use std::time::SystemTime; -use arrow_array::RecordBatch; - -use crate::runtime::memory::MemoryTicket; - #[derive(Debug, Copy, Clone, PartialEq, Eq, Encode, Decode, Serialize, Deserialize)] pub enum Watermark { EventTime(SystemTime), @@ -33,44 +30,6 @@ pub struct CheckpointBarrier { pub then_stop: bool, } -#[derive(Debug, Clone)] -pub enum StreamEvent { - Data(RecordBatch), - Watermark(Watermark), - Barrier(CheckpointBarrier), - EndOfStream, -} - -#[derive(Debug, Clone)] -pub enum StreamOutput { - Forward(RecordBatch), - Keyed(u64, RecordBatch), - Broadcast(RecordBatch), - Watermark(Watermark), -} - -#[derive(Debug, Clone)] -pub struct TrackedEvent { - pub event: StreamEvent, - pub _ticket: Option>, -} - -impl TrackedEvent { - pub fn new(event: StreamEvent, ticket: Option) -> Self { - Self { - event, - _ticket: ticket.map(Arc::new), - } - } - - pub fn control(event: StreamEvent) -> Self { - Self { - event, - _ticket: None, - } - } -} - pub fn merge_watermarks(per_input: &[Option]) -> Option { if per_input.iter().any(|w| w.is_none()) { return None; diff --git a/src/servicer/Cargo.toml b/src/servicer/Cargo.toml new file mode 100644 index 00000000..16cd3109 --- /dev/null +++ b/src/servicer/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-servicer" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_servicer" +path = "src/lib.rs" diff --git a/src/server/handler.rs b/src/servicer/src/handler.rs similarity index 96% rename from src/server/handler.rs rename to src/servicer/src/handler.rs index 0319e352..c884eb6f 100644 --- a/src/server/handler.rs +++ b/src/servicer/src/handler.rs @@ -26,7 +26,7 @@ use protocol::service::{ use crate::coordinator::{ Coordinator, CreateFunction, CreatePythonFunction, DataSet, DropFunction, PythonModule, - ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction, + ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction, classify_statement, }; use crate::sql::parse::parse_sql; @@ -135,12 +135,20 @@ impl FunctionStreamService for FunctionStreamServiceImpl { let timer = Instant::now(); let req = request.into_inner(); - let statements = parse_sql(&req.sql).map_err(|e| { + let ast = parse_sql(&req.sql).map_err(|e| { let detail = e.to_string(); warn!("SQL parse rejection: {}", detail); Status::invalid_argument(detail) })?; + let statements: Result>, _> = + ast.into_iter().map(classify_statement).collect(); + let statements = statements.map_err(|e| { + let detail = e.to_string(); + warn!("SQL classification rejection: {}", detail); + Status::invalid_argument(detail) + })?; + if statements.is_empty() { return Ok(TonicResponse::new(Self::build_success_response( StatusCode::Ok, diff --git a/src/server/initializer.rs b/src/servicer/src/initializer.rs similarity index 87% rename from src/server/initializer.rs rename to src/servicer/src/initializer.rs index 8a04608e..78b16c73 100644 --- a/src/server/initializer.rs +++ b/src/servicer/src/initializer.rs @@ -113,7 +113,7 @@ pub fn build_core_registry() -> ComponentRegistry { builder .register( "StreamCatalog", - crate::storage::stream_catalog::initialize_stream_catalog, + crate::stream_catalog::initialize_stream_catalog, ) .register("Coordinator", initialize_coordinator) .build() @@ -124,16 +124,16 @@ pub fn bootstrap_system(config: &GlobalConfig) -> Result<()> { registry.initialize_all(config)?; - crate::storage::stream_catalog::restore_global_catalog_from_store(); - crate::storage::stream_catalog::restore_streaming_jobs_from_store(); + crate::stream_catalog::restore_global_catalog_from_store(); + crate::stream_catalog::restore_streaming_jobs_from_store(); info!("System bootstrap finished. Node is ready to accept traffic."); Ok(()) } fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> { - crate::runtime::processor::wasm::wasm_cache::set_cache_config( - crate::runtime::processor::wasm::wasm_cache::WasmCacheConfig { + crate::processor::wasm::wasm_cache::set_cache_config( + crate::processor::wasm::wasm_cache::WasmCacheConfig { enabled: config.wasm.enable_cache, cache_dir: crate::config::paths::resolve_path(&config.wasm.cache_dir), max_size: config.wasm.max_cache_size, @@ -151,14 +151,14 @@ fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> { } fn initialize_task_manager(config: &GlobalConfig) -> Result<()> { - crate::runtime::wasm::taskexecutor::TaskManager::init(config) + crate::wasm::taskexecutor::TaskManager::init(config) .context("TaskManager service failed to start")?; Ok(()) } #[cfg(feature = "python")] fn initialize_python_service(config: &GlobalConfig) -> Result<()> { - crate::runtime::processor::python::PythonService::initialize(config) + crate::processor::python::PythonService::initialize(config) .context("Python Runtime initialization failed")?; Ok(()) } @@ -168,9 +168,9 @@ fn initialize_memory_service(config: &GlobalConfig) -> Result<()> { } fn initialize_job_manager(config: &GlobalConfig) -> Result<()> { - use crate::runtime::streaming::factory::OperatorFactory; - use crate::runtime::streaming::factory::Registry; - use crate::runtime::streaming::job::{JobManager, StateConfig}; + use crate::streaming::factory::OperatorFactory; + use crate::streaming::factory::Registry; + use crate::streaming::job::{JobManager, StateConfig}; use std::sync::Arc; let per_operator_memory_bytes = config @@ -199,16 +199,16 @@ fn initialize_job_manager(config: &GlobalConfig) -> Result<()> { } fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> { - crate::runtime::wasm::taskexecutor::TaskManager::get() + crate::wasm::taskexecutor::TaskManager::get() .context("Dependency violation: Coordinator requires TaskManager")?; - crate::runtime::memory::try_global_memory_pool() + crate::memory::try_global_memory_pool() .context("Dependency violation: Coordinator requires MemoryService")?; - crate::storage::stream_catalog::CatalogManager::global() + crate::stream_catalog::CatalogManager::global() .context("Dependency violation: Coordinator requires StreamCatalog")?; - crate::runtime::streaming::job::JobManager::global() + crate::streaming::job::JobManager::global() .context("Dependency violation: Coordinator requires JobManager")?; Ok(()) diff --git a/src/servicer/src/lib.rs b/src/servicer/src/lib.rs new file mode 100644 index 00000000..8a442937 --- /dev/null +++ b/src/servicer/src/lib.rs @@ -0,0 +1,3 @@ +//! Service layer implementations and request handling. + +pub const CRATE_NAME: &str = "function-stream-servicer"; diff --git a/src/server/memory_service.rs b/src/servicer/src/memory_service.rs similarity index 97% rename from src/server/memory_service.rs rename to src/servicer/src/memory_service.rs index 2ba24eee..5c03350b 100644 --- a/src/server/memory_service.rs +++ b/src/servicer/src/memory_service.rs @@ -52,7 +52,7 @@ impl MemoryService { let total_pool_bytes = streaming_runtime_memory_bytes.saturating_add(operator_state_store_memory_bytes); - crate::runtime::memory::init_global_memory_pool(total_pool_bytes) + crate::memory::init_global_memory_pool(total_pool_bytes) .context("Global memory pool initialization failed")?; info!("MemoryService initialized"); diff --git a/src/server/service.rs b/src/servicer/src/service.rs similarity index 100% rename from src/server/service.rs rename to src/servicer/src/service.rs diff --git a/src/server/mod.rs b/src/servicer/src/servicer_body.rs similarity index 100% rename from src/server/mod.rs rename to src/servicer/src/servicer_body.rs diff --git a/src/sql/logical_node/mod.rs b/src/sql/logical_node/mod.rs deleted file mode 100644 index 2da1b8a4..00000000 --- a/src/sql/logical_node/mod.rs +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub mod logical; - -mod macros; - -pub(crate) mod streaming_operator_blueprint; -pub(crate) use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; - -pub(crate) mod aggregate; -pub(crate) mod debezium; -pub(crate) mod join; -pub(crate) mod key_calculation; -pub(crate) mod lookup; -pub(crate) mod projection; -pub(crate) mod remote_table; -pub(crate) mod sink; -pub(crate) mod table_source; -pub(crate) mod updating_aggregate; -pub(crate) mod watermark_node; -pub(crate) mod windows_function; - -pub(crate) mod timestamp_append; -pub(crate) use timestamp_append::SystemTimestampInjectorNode; - -pub(crate) mod async_udf; -pub(crate) use async_udf::AsyncFunctionExecutionNode; - -pub(crate) mod is_retract; -pub(crate) use is_retract::IsRetractExtension; - -mod extension_try_from; diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml new file mode 100644 index 00000000..7d6b40a6 --- /dev/null +++ b/src/sqlparser/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-sqlparser" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_sqlparser" +path = "src/lib.rs" diff --git a/src/sqlparser/src/lib.rs b/src/sqlparser/src/lib.rs new file mode 100644 index 00000000..b436f895 --- /dev/null +++ b/src/sqlparser/src/lib.rs @@ -0,0 +1,3 @@ +//! SQL parsing facade for FunctionStream. + +pub const CRATE_NAME: &str = "function-stream-sqlparser"; diff --git a/src/streaming_planner/Cargo.toml b/src/streaming_planner/Cargo.toml new file mode 100644 index 00000000..766aba1c --- /dev/null +++ b/src/streaming_planner/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "function-stream-streaming-planner" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_streaming_planner" +path = "src/lib.rs" + +[dependencies] +protocol = { path = "../../protocol" } +prost = "0.13" +function-stream-config = { path = "../config" } +function-stream-runtime-common = { path = "../runtime_common" } +tokio = { version = "1.0", features = ["macros", "rt-multi-thread", "sync", "time", "net"] } +tokio-stream = "0.1.18" +anyhow = "1.0" +xxhash-rust = { version = "0.8", features = ["xxh3"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +thiserror = "2" +tracing = "0.1" +async-trait = "0.1" +futures = "0.3" +itertools = "0.14" +petgraph = "0.7" +unicase = "2.7" +rand = { version = "0.8", features = ["small_rng"] } +bincode = { version = "2", features = ["serde"] } +chrono = "0.4" +bytes = "1" +ahash = "0.8" +strum = { version = "0.26", features = ["derive"] } +serde_json_path = "0.7" + +arrow = { version = "55", default-features = false } +arrow-array = "55" +arrow-schema = { version = "55", features = ["serde"] } +arrow-json = { version = "55.2.0" } +apache-avro = "0.21" + +datafusion = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-common = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-execution = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-expr = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-physical-expr = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } +datafusion-proto = { git = "https://github.com/FunctionStream/datafusion", branch = "48.0.1/fs" } + +sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.58.0/fs" } diff --git a/src/sql/analysis/aggregate_rewriter.rs b/src/streaming_planner/src/analysis/aggregate_rewriter.rs similarity index 96% rename from src/sql/analysis/aggregate_rewriter.rs rename to src/streaming_planner/src/analysis/aggregate_rewriter.rs index ddcb0294..22dcb03c 100644 --- a/src/sql/analysis/aggregate_rewriter.rs +++ b/src/streaming_planner/src/analysis/aggregate_rewriter.rs @@ -17,19 +17,19 @@ use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, Projecti use datafusion::prelude::col; use std::sync::Arc; -use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; -use crate::sql::logical_node::aggregate::StreamWindowAggregateNode; -use crate::sql::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; -use crate::sql::logical_node::updating_aggregate::ContinuousAggregateNode; -use crate::sql::schema::StreamSchemaProvider; -use crate::sql::types::{ +use crate::analysis::streaming_window_analzer::StreamingWindowAnalzer; +use crate::logical_node::aggregate::StreamWindowAggregateNode; +use crate::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; +use crate::logical_node::updating_aggregate::ContinuousAggregateNode; +use crate::schema::StreamSchemaProvider; +use crate::types::{ QualifiedField, TIMESTAMP_FIELD, WindowBehavior, WindowType, build_df_schema_with_metadata, extract_qualified_fields, extract_window_type, }; /// AggregateRewriter transforms batch DataFusion aggregates into streaming stateful operators. /// It handles windowing (Tumble/Hop/Session), watermarks, and continuous updating aggregates. -pub(crate) struct AggregateRewriter<'a> { +pub struct AggregateRewriter<'a> { pub schema_provider: &'a StreamSchemaProvider, } diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/streaming_planner/src/analysis/async_udf_rewriter.rs similarity index 95% rename from src/sql/analysis/async_udf_rewriter.rs rename to src/streaming_planner/src/analysis/async_udf_rewriter.rs index d6d9b54b..80f3828d 100644 --- a/src/sql/analysis/async_udf_rewriter.rs +++ b/src/streaming_planner/src/analysis/async_udf_rewriter.rs @@ -10,10 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::common::constants::sql_field; -use crate::sql::logical_node::AsyncFunctionExecutionNode; -use crate::sql::logical_node::remote_table::RemoteTableBoundaryNode; -use crate::sql::schema::StreamSchemaProvider; +use crate::common::constants::sql_field; +use crate::logical_node::AsyncFunctionExecutionNode; +use crate::logical_node::remote_table::RemoteTableBoundaryNode; +use crate::schema::StreamSchemaProvider; use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult, TableReference, plan_err}; use datafusion::logical_expr::expr::ScalarFunction; diff --git a/src/sql/analysis/join_rewriter.rs b/src/streaming_planner/src/analysis/join_rewriter.rs similarity index 94% rename from src/sql/analysis/join_rewriter.rs rename to src/streaming_planner/src/analysis/join_rewriter.rs index 8a9e5280..07bc0006 100644 --- a/src/sql/analysis/join_rewriter.rs +++ b/src/streaming_planner/src/analysis/join_rewriter.rs @@ -10,13 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer; -use crate::sql::common::TIMESTAMP_FIELD; -use crate::sql::common::constants::mem_exec_join_side; -use crate::sql::logical_node::join::StreamingJoinNode; -use crate::sql::logical_node::key_calculation::KeyExtractionNode; -use crate::sql::schema::StreamSchemaProvider; -use crate::sql::types::{WindowType, build_df_schema_with_metadata, extract_qualified_fields}; +use crate::analysis::streaming_window_analzer::StreamingWindowAnalzer; +use crate::common::TIMESTAMP_FIELD; +use crate::common::constants::mem_exec_join_side; +use crate::logical_node::join::StreamingJoinNode; +use crate::logical_node::key_calculation::KeyExtractionNode; +use crate::schema::StreamSchemaProvider; +use crate::types::{WindowType, build_df_schema_with_metadata, extract_qualified_fields}; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{ JoinConstraint, JoinType, Result, ScalarValue, TableReference, not_impl_err, plan_err, @@ -29,7 +29,7 @@ use std::sync::Arc; /// JoinRewriter handles the transformation of standard SQL joins into streaming-capable joins. /// It manages stateful "Updating Joins" and time-aligned "Instant Joins". -pub(crate) struct JoinRewriter<'a> { +pub struct JoinRewriter<'a> { pub schema_provider: &'a StreamSchemaProvider, } diff --git a/src/sql/analysis/mod.rs b/src/streaming_planner/src/analysis/mod.rs similarity index 90% rename from src/sql/analysis/mod.rs rename to src/streaming_planner/src/analysis/mod.rs index 019d8bf1..7ac1d4e8 100644 --- a/src/sql/analysis/mod.rs +++ b/src/streaming_planner/src/analysis/mod.rs @@ -12,12 +12,12 @@ #![allow(clippy::new_without_default)] -pub(crate) mod aggregate_rewriter; -pub(crate) mod join_rewriter; -pub(crate) mod row_time_rewriter; -pub(crate) mod stream_rewriter; -pub(crate) mod streaming_window_analzer; -pub(crate) mod window_function_rewriter; +pub mod aggregate_rewriter; +pub mod join_rewriter; +pub mod row_time_rewriter; +pub mod stream_rewriter; +pub mod streaming_window_analzer; +pub mod window_function_rewriter; pub mod async_udf_rewriter; pub mod sink_input_rewriter; @@ -31,7 +31,7 @@ pub use sink_input_rewriter::SinkInputRewriter; pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker}; pub use unnest_rewriter::UNNESTED_COL; -pub use crate::sql::schema::schema_provider::StreamSchemaProvider; +pub use crate::schema::schema_provider::StreamSchemaProvider; use std::collections::HashMap; use std::sync::Arc; @@ -42,11 +42,11 @@ use datafusion::error::DataFusionError; use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; use tracing::{debug, info, instrument}; -use crate::sql::logical_node::StreamingOperatorBlueprint; -use crate::sql::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; -use crate::sql::logical_node::projection::StreamProjectionNode; -use crate::sql::logical_node::sink::StreamEgressNode; -use crate::sql::logical_planner::planner::NamedNode; +use crate::logical_node::StreamingOperatorBlueprint; +use crate::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; +use crate::logical_node::projection::StreamProjectionNode; +use crate::logical_node::sink::StreamEgressNode; +use crate::logical_planner::planner::NamedNode; fn duration_from_sql_expr( expr: &datafusion::sql::sqlparser::ast::Expr, @@ -110,7 +110,7 @@ fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap Result { +pub fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result { let LogicalPlan::Extension(ref ext) = plan else { return Ok(plan); }; diff --git a/src/sql/analysis/row_time_rewriter.rs b/src/streaming_planner/src/analysis/row_time_rewriter.rs similarity index 95% rename from src/sql/analysis/row_time_rewriter.rs rename to src/streaming_planner/src/analysis/row_time_rewriter.rs index 13e2a048..aa558696 100644 --- a/src/sql/analysis/row_time_rewriter.rs +++ b/src/streaming_planner/src/analysis/row_time_rewriter.rs @@ -14,8 +14,8 @@ use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, Result as DFResult}; use datafusion::logical_expr::Expr; -use crate::sql::common::constants::planning_placeholder_udf; -use crate::sql::types::TIMESTAMP_FIELD; +use crate::common::constants::planning_placeholder_udf; +use crate::types::TIMESTAMP_FIELD; /// Replaces the virtual `row_time()` scalar function with a physical reference to `_timestamp`. /// diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/streaming_planner/src/analysis/sink_input_rewriter.rs similarity index 89% rename from src/sql/analysis/sink_input_rewriter.rs rename to src/streaming_planner/src/analysis/sink_input_rewriter.rs index 201439cc..9f8fdcb7 100644 --- a/src/sql/analysis/sink_input_rewriter.rs +++ b/src/streaming_planner/src/analysis/sink_input_rewriter.rs @@ -10,9 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::logical_node::StreamingOperatorBlueprint; -use crate::sql::logical_node::sink::StreamEgressNode; -use crate::sql::logical_planner::planner::NamedNode; +use crate::logical_node::StreamingOperatorBlueprint; +use crate::logical_node::sink::StreamEgressNode; +use crate::logical_planner::planner::NamedNode; use datafusion::common::Result as DFResult; use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter}; use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; @@ -28,7 +28,7 @@ pub struct SinkInputRewriter<'a> { } impl<'a> SinkInputRewriter<'a> { - pub(crate) fn new(sink_inputs: &'a mut SinkInputs) -> Self { + pub fn new(sink_inputs: &'a mut SinkInputs) -> Self { Self { sink_inputs, was_removed: false, diff --git a/src/sql/analysis/source_metadata_visitor.rs b/src/streaming_planner/src/analysis/source_metadata_visitor.rs similarity index 82% rename from src/sql/analysis/source_metadata_visitor.rs rename to src/streaming_planner/src/analysis/source_metadata_visitor.rs index b37d3a1b..ea2821b9 100644 --- a/src/sql/analysis/source_metadata_visitor.rs +++ b/src/streaming_planner/src/analysis/source_metadata_visitor.rs @@ -10,9 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::logical_node::sink::{STREAM_EGRESS_NODE_NAME, StreamEgressNode}; -use crate::sql::logical_node::table_source::{STREAM_INGESTION_NODE_NAME, StreamIngestionNode}; -use crate::sql::schema::StreamSchemaProvider; +use crate::logical_node::sink::{STREAM_EGRESS_NODE_NAME, StreamEgressNode}; +use crate::logical_node::table_source::{STREAM_INGESTION_NODE_NAME, StreamIngestionNode}; +use crate::schema::StreamSchemaProvider; use datafusion::common::Result as DFResult; use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; use datafusion::logical_expr::{Extension, LogicalPlan}; @@ -51,9 +51,9 @@ impl<'a> SourceMetadataVisitor<'a> { let table = self.schema_provider.get_catalog_table(&table_name)?; match table { - crate::sql::schema::table::CatalogEntity::ExternalConnector(b) => match b.as_ref() { - crate::sql::schema::catalog::ExternalTable::Source(t) => t.registry_id, - crate::sql::schema::catalog::ExternalTable::Lookup(t) => t.registry_id, + crate::schema::table::CatalogEntity::ExternalConnector(b) => match b.as_ref() { + crate::schema::catalog::ExternalTable::Source(t) => t.registry_id, + crate::schema::catalog::ExternalTable::Lookup(t) => t.registry_id, _ => None, }, _ => None, diff --git a/src/sql/analysis/source_rewriter.rs b/src/streaming_planner/src/analysis/source_rewriter.rs similarity index 95% rename from src/sql/analysis/source_rewriter.rs rename to src/streaming_planner/src/analysis/source_rewriter.rs index 620ea336..e1a7c75a 100644 --- a/src/sql/analysis/source_rewriter.rs +++ b/src/streaming_planner/src/analysis/source_rewriter.rs @@ -20,16 +20,16 @@ use datafusion::logical_expr::{ self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan, }; -use crate::sql::common::UPDATING_META_FIELD; -use crate::sql::logical_node::debezium::UnrollDebeziumPayloadNode; -use crate::sql::logical_node::remote_table::RemoteTableBoundaryNode; -use crate::sql::logical_node::table_source::StreamIngestionNode; -use crate::sql::logical_node::watermark_node::EventTimeWatermarkNode; -use crate::sql::schema::ColumnDescriptor; -use crate::sql::schema::StreamSchemaProvider; -use crate::sql::schema::catalog::{ExternalTable, SourceTable}; -use crate::sql::schema::table::CatalogEntity; -use crate::sql::types::TIMESTAMP_FIELD; +use crate::common::UPDATING_META_FIELD; +use crate::logical_node::debezium::UnrollDebeziumPayloadNode; +use crate::logical_node::remote_table::RemoteTableBoundaryNode; +use crate::logical_node::table_source::StreamIngestionNode; +use crate::logical_node::watermark_node::EventTimeWatermarkNode; +use crate::schema::ColumnDescriptor; +use crate::schema::StreamSchemaProvider; +use crate::schema::catalog::{ExternalTable, SourceTable}; +use crate::schema::table::CatalogEntity; +use crate::types::TIMESTAMP_FIELD; /// Rewrites table scans: projections are lifted out of scans into a dedicated projection node /// (including virtual fields), using a connector table-source extension instead of a bare diff --git a/src/sql/analysis/stream_rewriter.rs b/src/streaming_planner/src/analysis/stream_rewriter.rs similarity index 96% rename from src/sql/analysis/stream_rewriter.rs rename to src/streaming_planner/src/analysis/stream_rewriter.rs index a4393bd1..efc4e40c 100644 --- a/src/sql/analysis/stream_rewriter.rs +++ b/src/streaming_planner/src/analysis/stream_rewriter.rs @@ -13,16 +13,16 @@ use std::sync::Arc; use super::StreamSchemaProvider; -use crate::sql::analysis::TimeWindowNullCheckRemover; -use crate::sql::analysis::row_time_rewriter::RowTimeRewriter; -use crate::sql::analysis::{ +use crate::analysis::TimeWindowNullCheckRemover; +use crate::analysis::row_time_rewriter::RowTimeRewriter; +use crate::analysis::{ aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter, window_function_rewriter::WindowFunctionRewriter, }; -use crate::sql::logical_node::StreamingOperatorBlueprint; -use crate::sql::logical_node::remote_table::RemoteTableBoundaryNode; -use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; -use crate::sql::types::{QualifiedField, TIMESTAMP_FIELD}; +use crate::logical_node::StreamingOperatorBlueprint; +use crate::logical_node::remote_table::RemoteTableBoundaryNode; +use crate::schema::utils::{add_timestamp_field, has_timestamp_field}; +use crate::types::{QualifiedField, TIMESTAMP_FIELD}; use datafusion::common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err}; use datafusion::logical_expr::{ diff --git a/src/sql/analysis/streaming_window_analzer.rs b/src/streaming_planner/src/analysis/streaming_window_analzer.rs similarity index 95% rename from src/sql/analysis/streaming_window_analzer.rs rename to src/streaming_planner/src/analysis/streaming_window_analzer.rs index b8a7f78f..5e6d3e91 100644 --- a/src/sql/analysis/streaming_window_analzer.rs +++ b/src/streaming_planner/src/analysis/streaming_window_analzer.rs @@ -17,16 +17,16 @@ use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor}; use datafusion::common::{Column, DFSchema, DataFusionError, Result}; use datafusion::logical_expr::{Expr, Extension, LogicalPlan, expr::Alias}; -use crate::sql::logical_node::aggregate::{STREAM_AGG_EXTENSION_NAME, StreamWindowAggregateNode}; -use crate::sql::logical_node::join::STREAM_JOIN_NODE_TYPE; -use crate::sql::types::{ +use crate::logical_node::aggregate::{STREAM_AGG_EXTENSION_NAME, StreamWindowAggregateNode}; +use crate::logical_node::join::STREAM_JOIN_NODE_TYPE; +use crate::types::{ QualifiedField, WindowBehavior, WindowType, extract_qualified_fields, extract_window_type, }; /// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields /// as they propagate upward through the logical plan tree. #[derive(Debug, Default)] -pub(crate) struct StreamingWindowAnalzer { +pub struct StreamingWindowAnalzer { /// The specific window type discovered (Tumble, Hop, etc.) pub(crate) window: Option, /// Set of fields in the current plan node that carry window semantics. @@ -35,7 +35,7 @@ pub(crate) struct StreamingWindowAnalzer { impl StreamingWindowAnalzer { /// Entry point to resolve the WindowType of a given plan branch. - pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result> { + pub fn get_window(logical_plan: &LogicalPlan) -> Result> { let mut visitor = Self::default(); logical_plan.visit_with_subqueries(&mut visitor)?; Ok(visitor.window) @@ -89,7 +89,7 @@ impl StreamingWindowAnalzer { } } -pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> { +pub fn extract_column(expr: &Expr) -> Option<&Column> { match expr { Expr::Column(column) => Some(column), Expr::Alias(Alias { expr, .. }) => extract_column(expr), diff --git a/src/sql/analysis/time_window.rs b/src/streaming_planner/src/analysis/time_window.rs similarity index 100% rename from src/sql/analysis/time_window.rs rename to src/streaming_planner/src/analysis/time_window.rs diff --git a/src/sql/analysis/udafs.rs b/src/streaming_planner/src/analysis/udafs.rs similarity index 100% rename from src/sql/analysis/udafs.rs rename to src/streaming_planner/src/analysis/udafs.rs diff --git a/src/sql/analysis/unnest_rewriter.rs b/src/streaming_planner/src/analysis/unnest_rewriter.rs similarity index 97% rename from src/sql/analysis/unnest_rewriter.rs rename to src/streaming_planner/src/analysis/unnest_rewriter.rs index 147b1f49..e63878f7 100644 --- a/src/sql/analysis/unnest_rewriter.rs +++ b/src/streaming_planner/src/analysis/unnest_rewriter.rs @@ -18,8 +18,8 @@ use datafusion::common::{Column, Result as DFResult, plan_err}; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::{ColumnUnnestList, Expr, LogicalPlan, Projection, Unnest}; -use crate::sql::common::constants::planning_placeholder_udf; -use crate::sql::types::{QualifiedField, build_df_schema, extract_qualified_fields}; +use crate::common::constants::planning_placeholder_udf; +use crate::types::{QualifiedField, build_df_schema, extract_qualified_fields}; pub const UNNESTED_COL: &str = "__unnested"; diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/streaming_planner/src/analysis/window_function_rewriter.rs similarity index 93% rename from src/sql/analysis/window_function_rewriter.rs rename to src/streaming_planner/src/analysis/window_function_rewriter.rs index c1e3396d..569a3c00 100644 --- a/src/sql/analysis/window_function_rewriter.rs +++ b/src/streaming_planner/src/analysis/window_function_rewriter.rs @@ -20,14 +20,14 @@ use datafusion_common::DataFusionError; use std::sync::Arc; use tracing::debug; -use crate::sql::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; -use crate::sql::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; -use crate::sql::logical_node::windows_function::StreamingWindowFunctionNode; -use crate::sql::types::{WindowType, build_df_schema, extract_qualified_fields}; +use crate::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column}; +use crate::logical_node::key_calculation::{KeyExtractionNode, KeyExtractionStrategy}; +use crate::logical_node::windows_function::StreamingWindowFunctionNode; +use crate::types::{WindowType, build_df_schema, extract_qualified_fields}; /// WindowFunctionRewriter transforms standard SQL Window functions into streaming-compatible /// stateful operators, ensuring proper data partitioning and sorting for distributed execution. -pub(crate) struct WindowFunctionRewriter; +pub struct WindowFunctionRewriter; impl WindowFunctionRewriter { /// Recursively unwraps Aliases to find the underlying WindowFunction. @@ -48,7 +48,7 @@ impl WindowFunctionRewriter { &self, params: &WindowFunctionParams, input: &LogicalPlan, - input_window_fields: &std::collections::HashSet, + input_window_fields: &std::collections::HashSet, ) -> DFResult { let matched: Vec<_> = params .partition_by diff --git a/src/sql/api/checkpoints.rs b/src/streaming_planner/src/api/checkpoints.rs similarity index 98% rename from src/sql/api/checkpoints.rs rename to src/streaming_planner/src/api/checkpoints.rs index d9bdc139..bd326b10 100644 --- a/src/sql/api/checkpoints.rs +++ b/src/streaming_planner/src/api/checkpoints.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::common::to_micros; +use crate::common::to_micros; use serde::{Deserialize, Serialize}; use std::time::SystemTime; diff --git a/src/sql/api/connections.rs b/src/streaming_planner/src/api/connections.rs similarity index 99% rename from src/sql/api/connections.rs rename to src/streaming_planner/src/api/connections.rs index 3c5caf76..d2d92bcb 100644 --- a/src/sql/api/connections.rs +++ b/src/streaming_planner/src/api/connections.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::common::formats::{BadData, Format, Framing}; -use crate::sql::common::{FsExtensionType, FsSchema}; +use crate::common::formats::{BadData, Format, Framing}; +use crate::common::{FsExtensionType, FsSchema}; use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; use serde::ser::SerializeMap; use serde::{Deserialize, Serialize, Serializer}; diff --git a/src/sql/api/metrics.rs b/src/streaming_planner/src/api/metrics.rs similarity index 100% rename from src/sql/api/metrics.rs rename to src/streaming_planner/src/api/metrics.rs diff --git a/src/sql/api/mod.rs b/src/streaming_planner/src/api/mod.rs similarity index 100% rename from src/sql/api/mod.rs rename to src/streaming_planner/src/api/mod.rs diff --git a/src/sql/api/pipelines.rs b/src/streaming_planner/src/api/pipelines.rs similarity index 98% rename from src/sql/api/pipelines.rs rename to src/streaming_planner/src/api/pipelines.rs index d6cc5253..990c1ba9 100644 --- a/src/sql/api/pipelines.rs +++ b/src/streaming_planner/src/api/pipelines.rs @@ -11,7 +11,7 @@ // limitations under the License. use super::udfs::Udf; -use crate::sql::common::control::ErrorDomain; +use crate::common::control::ErrorDomain; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Clone, Debug)] diff --git a/src/sql/api/public_ids.rs b/src/streaming_planner/src/api/public_ids.rs similarity index 100% rename from src/sql/api/public_ids.rs rename to src/streaming_planner/src/api/public_ids.rs diff --git a/src/sql/api/schema_resolver.rs b/src/streaming_planner/src/api/schema_resolver.rs similarity index 100% rename from src/sql/api/schema_resolver.rs rename to src/streaming_planner/src/api/schema_resolver.rs diff --git a/src/sql/api/udfs.rs b/src/streaming_planner/src/api/udfs.rs similarity index 100% rename from src/sql/api/udfs.rs rename to src/streaming_planner/src/api/udfs.rs diff --git a/src/sql/api/var_str.rs b/src/streaming_planner/src/api/var_str.rs similarity index 100% rename from src/sql/api/var_str.rs rename to src/streaming_planner/src/api/var_str.rs diff --git a/src/sql/common/arrow_ext.rs b/src/streaming_planner/src/common/arrow_ext.rs similarity index 100% rename from src/sql/common/arrow_ext.rs rename to src/streaming_planner/src/common/arrow_ext.rs diff --git a/src/sql/common/connector_options.rs b/src/streaming_planner/src/common/connector_options.rs similarity index 100% rename from src/sql/common/connector_options.rs rename to src/streaming_planner/src/common/connector_options.rs diff --git a/src/sql/common/constants.rs b/src/streaming_planner/src/common/constants.rs similarity index 100% rename from src/sql/common/constants.rs rename to src/streaming_planner/src/common/constants.rs diff --git a/src/sql/common/control.rs b/src/streaming_planner/src/common/control.rs similarity index 98% rename from src/sql/common/control.rs rename to src/streaming_planner/src/common/control.rs index eba88596..59ef409e 100644 --- a/src/sql/common/control.rs +++ b/src/streaming_planner/src/common/control.rs @@ -13,7 +13,7 @@ use std::collections::HashMap; use std::time::SystemTime; -use crate::runtime::streaming::protocol::CheckpointBarrier; +use function_stream_runtime_common::streaming_protocol::CheckpointBarrier; /// Control messages sent from the controller to worker tasks. #[derive(Debug, Clone)] diff --git a/src/sql/common/converter.rs b/src/streaming_planner/src/common/converter.rs similarity index 100% rename from src/sql/common/converter.rs rename to src/streaming_planner/src/common/converter.rs diff --git a/src/sql/common/date.rs b/src/streaming_planner/src/common/date.rs similarity index 100% rename from src/sql/common/date.rs rename to src/streaming_planner/src/common/date.rs diff --git a/src/sql/common/debezium.rs b/src/streaming_planner/src/common/debezium.rs similarity index 100% rename from src/sql/common/debezium.rs rename to src/streaming_planner/src/common/debezium.rs diff --git a/src/sql/common/errors.rs b/src/streaming_planner/src/common/errors.rs similarity index 97% rename from src/sql/common/errors.rs rename to src/streaming_planner/src/common/errors.rs index fa4a722e..b7e97400 100644 --- a/src/sql/common/errors.rs +++ b/src/streaming_planner/src/common/errors.rs @@ -67,7 +67,7 @@ impl From for DataflowError { #[macro_export] macro_rules! connector_err { ($($arg:tt)*) => { - $crate::sql::common::errors::DataflowError::Connector(format!($($arg)*)) + $crate::common::errors::DataflowError::Connector(format!($($arg)*)) }; } diff --git a/src/sql/common/format_from_opts.rs b/src/streaming_planner/src/common/format_from_opts.rs similarity index 100% rename from src/sql/common/format_from_opts.rs rename to src/streaming_planner/src/common/format_from_opts.rs diff --git a/src/sql/common/formats.rs b/src/streaming_planner/src/common/formats.rs similarity index 100% rename from src/sql/common/formats.rs rename to src/streaming_planner/src/common/formats.rs diff --git a/src/sql/common/fs_schema.rs b/src/streaming_planner/src/common/fs_schema.rs similarity index 99% rename from src/sql/common/fs_schema.rs rename to src/streaming_planner/src/common/fs_schema.rs index 76a08537..9c548f69 100644 --- a/src/sql/common/fs_schema.rs +++ b/src/streaming_planner/src/common/fs_schema.rs @@ -11,7 +11,7 @@ // limitations under the License. use super::{TIMESTAMP_FIELD, to_nanos}; -use crate::sql::common::converter::Converter; +use crate::common::converter::Converter; use arrow::compute::kernels::cmp::gt_eq; use arrow::compute::kernels::numeric::div; use arrow::compute::{SortColumn, filter_record_batch, lexsort_to_indices, partition, take}; diff --git a/src/sql/common/kafka_catalog.rs b/src/streaming_planner/src/common/kafka_catalog.rs similarity index 100% rename from src/sql/common/kafka_catalog.rs rename to src/streaming_planner/src/common/kafka_catalog.rs diff --git a/src/sql/common/mod.rs b/src/streaming_planner/src/common/mod.rs similarity index 92% rename from src/sql/common/mod.rs rename to src/streaming_planner/src/common/mod.rs index af44cb0f..40c11c6c 100644 --- a/src/sql/common/mod.rs +++ b/src/streaming_planner/src/common/mod.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Shared core types and constants for FunctionStream (`crate::sql::common`). +//! Shared core types and constants for FunctionStream streaming planner (`crate::common`). //! //! Used by the runtime, SQL planner, coordinator, and other subsystems — //! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo. @@ -33,8 +33,8 @@ pub mod topology; pub mod with_option_keys; // ── Re-exports from existing modules ── -pub use crate::runtime::streaming::protocol::{CheckpointBarrier, Watermark}; pub use arrow_ext::FsExtensionType; +pub use function_stream_runtime_common::streaming_protocol::{CheckpointBarrier, Watermark}; pub use time_utils::{from_nanos, to_micros, to_millis, to_nanos}; // ── Re-exports from new modules ── diff --git a/src/sql/common/operator_config.rs b/src/streaming_planner/src/common/operator_config.rs similarity index 100% rename from src/sql/common/operator_config.rs rename to src/streaming_planner/src/common/operator_config.rs diff --git a/src/sql/common/time_utils.rs b/src/streaming_planner/src/common/time_utils.rs similarity index 100% rename from src/sql/common/time_utils.rs rename to src/streaming_planner/src/common/time_utils.rs diff --git a/src/sql/common/topology.rs b/src/streaming_planner/src/common/topology.rs similarity index 100% rename from src/sql/common/topology.rs rename to src/streaming_planner/src/common/topology.rs diff --git a/src/sql/common/with_option_keys.rs b/src/streaming_planner/src/common/with_option_keys.rs similarity index 100% rename from src/sql/common/with_option_keys.rs rename to src/streaming_planner/src/common/with_option_keys.rs diff --git a/src/sql/connector/config.rs b/src/streaming_planner/src/connector/config.rs similarity index 100% rename from src/sql/connector/config.rs rename to src/streaming_planner/src/connector/config.rs diff --git a/src/sql/connector/factory.rs b/src/streaming_planner/src/connector/factory.rs similarity index 93% rename from src/sql/connector/factory.rs rename to src/streaming_planner/src/connector/factory.rs index 8c37a15a..e89bafa1 100644 --- a/src/sql/connector/factory.rs +++ b/src/streaming_planner/src/connector/factory.rs @@ -18,9 +18,9 @@ use datafusion::common::Result; use super::config::ConnectorConfig; use super::registry::REGISTRY; use super::sink::runtime_config::SinkRuntimeConfig; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::formats::{BadData, Format}; -use crate::sql::schema::table_role::TableRole; +use crate::common::connector_options::ConnectorOptions; +use crate::common::formats::{BadData, Format}; +use crate::schema::table_role::TableRole; pub fn build_connector_config( connector_name: &str, diff --git a/src/sql/connector/mod.rs b/src/streaming_planner/src/connector/mod.rs similarity index 100% rename from src/sql/connector/mod.rs rename to src/streaming_planner/src/connector/mod.rs diff --git a/src/sql/connector/provider.rs b/src/streaming_planner/src/connector/provider.rs similarity index 93% rename from src/sql/connector/provider.rs rename to src/streaming_planner/src/connector/provider.rs index 8875ee0c..83e46aa7 100644 --- a/src/sql/connector/provider.rs +++ b/src/streaming_planner/src/connector/provider.rs @@ -14,8 +14,8 @@ use datafusion::common::{DataFusionError, Result}; use super::config::ConnectorConfig; use super::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::formats::{BadData, Format}; +use crate::common::connector_options::ConnectorOptions; +use crate::common::formats::{BadData, Format}; pub trait SourceProvider: Send + Sync { fn name(&self) -> &'static str; diff --git a/src/sql/connector/registry.rs b/src/streaming_planner/src/connector/registry.rs similarity index 100% rename from src/sql/connector/registry.rs rename to src/streaming_planner/src/connector/registry.rs diff --git a/src/sql/connector/sink/delta.rs b/src/streaming_planner/src/connector/sink/delta.rs similarity index 83% rename from src/sql/connector/sink/delta.rs rename to src/streaming_planner/src/connector/sink/delta.rs index cd86660d..08d86d96 100644 --- a/src/sql/connector/sink/delta.rs +++ b/src/streaming_planner/src/connector/sink/delta.rs @@ -13,13 +13,13 @@ use datafusion::common::Result; use protocol::function_stream_graph::{DeltaSinkConfig, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::connector_type; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::connector::sink::utils::SinkUtils; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::connector_type; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::connector::sink::utils::SinkUtils; pub struct DeltaSinkConnector; diff --git a/src/sql/connector/sink/filesystem.rs b/src/streaming_planner/src/connector/sink/filesystem.rs similarity index 83% rename from src/sql/connector/sink/filesystem.rs rename to src/streaming_planner/src/connector/sink/filesystem.rs index 224b1805..e529ed79 100644 --- a/src/sql/connector/sink/filesystem.rs +++ b/src/streaming_planner/src/connector/sink/filesystem.rs @@ -13,13 +13,13 @@ use datafusion::common::Result; use protocol::function_stream_graph::{FilesystemSinkConfig, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::connector_type; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::connector::sink::utils::SinkUtils; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::connector_type; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::connector::sink::utils::SinkUtils; pub struct FilesystemSinkConnector; diff --git a/src/sql/connector/sink/iceberg.rs b/src/streaming_planner/src/connector/sink/iceberg.rs similarity index 82% rename from src/sql/connector/sink/iceberg.rs rename to src/streaming_planner/src/connector/sink/iceberg.rs index 12f0d378..4f4854a1 100644 --- a/src/sql/connector/sink/iceberg.rs +++ b/src/streaming_planner/src/connector/sink/iceberg.rs @@ -13,13 +13,13 @@ use datafusion::common::Result; use protocol::function_stream_graph::{IcebergSinkConfig, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::connector_type; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::connector::sink::utils::SinkUtils; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::connector_type; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::connector::sink::utils::SinkUtils; pub struct IcebergSinkConnector; diff --git a/src/sql/connector/sink/kafka.rs b/src/streaming_planner/src/connector/sink/kafka.rs similarity index 94% rename from src/sql/connector/sink/kafka.rs rename to src/streaming_planner/src/connector/sink/kafka.rs index a6fd115c..79292929 100644 --- a/src/sql/connector/sink/kafka.rs +++ b/src/streaming_planner/src/connector/sink/kafka.rs @@ -17,16 +17,16 @@ use protocol::function_stream_graph::{ TimestampFormatProto, format_config, kafka_auth_config, }; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::{connector_type, kafka_with_value}; -use crate::sql::common::formats::{ +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::{connector_type, kafka_with_value}; +use crate::common::formats::{ DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, TimestampFormat as SqlTimestampFormat, }; -use crate::sql::common::with_option_keys as opt; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::common::with_option_keys as opt; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; pub struct KafkaSinkConnector; diff --git a/src/sql/connector/sink/lancedb.rs b/src/streaming_planner/src/connector/sink/lancedb.rs similarity index 84% rename from src/sql/connector/sink/lancedb.rs rename to src/streaming_planner/src/connector/sink/lancedb.rs index aee79735..87ce99be 100644 --- a/src/sql/connector/sink/lancedb.rs +++ b/src/streaming_planner/src/connector/sink/lancedb.rs @@ -13,13 +13,13 @@ use datafusion::common::Result; use protocol::function_stream_graph::{LanceDbSinkConfig, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::with_option_keys as opt; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::connector::sink::utils::SinkUtils; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::with_option_keys as opt; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::connector::sink::utils::SinkUtils; pub struct LanceDbSinkConnector; diff --git a/src/sql/connector/sink/mod.rs b/src/streaming_planner/src/connector/sink/mod.rs similarity index 100% rename from src/sql/connector/sink/mod.rs rename to src/streaming_planner/src/connector/sink/mod.rs diff --git a/src/sql/connector/sink/runtime_config.rs b/src/streaming_planner/src/connector/sink/runtime_config.rs similarity index 95% rename from src/sql/connector/sink/runtime_config.rs rename to src/streaming_planner/src/connector/sink/runtime_config.rs index e0ffaeee..b68f7aa9 100644 --- a/src/sql/connector/sink/runtime_config.rs +++ b/src/streaming_planner/src/connector/sink/runtime_config.rs @@ -14,12 +14,12 @@ use std::collections::HashMap; use datafusion::common::{DataFusionError, Result, plan_err}; -use crate::config::global_config::{ +use crate::common::connector_options::ConnectorOptions; +use crate::common::with_option_keys as opt; +use function_stream_config::global_config::{ DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES, DEFAULT_SINK_BUFFER_MEMORY_BYTES, }; -use crate::config::streaming_job::DEFAULT_CHECKPOINT_INTERVAL_MS; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::with_option_keys as opt; +use function_stream_config::streaming_job::DEFAULT_CHECKPOINT_INTERVAL_MS; #[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct SinkRuntimeConfig { diff --git a/src/sql/connector/sink/s3.rs b/src/streaming_planner/src/connector/sink/s3.rs similarity index 84% rename from src/sql/connector/sink/s3.rs rename to src/streaming_planner/src/connector/sink/s3.rs index 5d04ce46..51cf85bf 100644 --- a/src/sql/connector/sink/s3.rs +++ b/src/streaming_planner/src/connector/sink/s3.rs @@ -13,14 +13,14 @@ use datafusion::common::Result; use protocol::function_stream_graph::{S3SinkConfig, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::connector_type; -use crate::sql::common::with_option_keys as opt; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SinkProvider; -use crate::sql::connector::sink::runtime_config::SinkRuntimeProperties; -use crate::sql::connector::sink::utils::SinkUtils; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::connector_type; +use crate::common::with_option_keys as opt; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SinkProvider; +use crate::connector::sink::runtime_config::SinkRuntimeProperties; +use crate::connector::sink::utils::SinkUtils; pub struct S3SinkConnector; diff --git a/src/sql/connector/sink/utils.rs b/src/streaming_planner/src/connector/sink/utils.rs similarity index 94% rename from src/sql/connector/sink/utils.rs rename to src/streaming_planner/src/connector/sink/utils.rs index e61cd870..481ad3b2 100644 --- a/src/sql/connector/sink/utils.rs +++ b/src/streaming_planner/src/connector/sink/utils.rs @@ -13,10 +13,10 @@ use datafusion::common::{DataFusionError, Result, plan_err}; use protocol::function_stream_graph::{ParquetCompressionProto, SinkFormatProto}; -use crate::sql::common::Format; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::parquet_compression_value; -use crate::sql::common::with_option_keys as opt; +use crate::common::Format; +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::parquet_compression_value; +use crate::common::with_option_keys as opt; pub struct SinkUtils; diff --git a/src/sql/connector/source/kafka.rs b/src/streaming_planner/src/connector/source/kafka.rs similarity index 95% rename from src/sql/connector/source/kafka.rs rename to src/streaming_planner/src/connector/source/kafka.rs index 0bc220d9..71672279 100644 --- a/src/sql/connector/source/kafka.rs +++ b/src/streaming_planner/src/connector/source/kafka.rs @@ -17,15 +17,15 @@ use protocol::function_stream_graph::{ RawStringFormatConfig, TimestampFormatProto, format_config, kafka_auth_config, }; -use crate::sql::common::connector_options::ConnectorOptions; -use crate::sql::common::constants::{connector_type, kafka_with_value}; -use crate::sql::common::formats::{ +use crate::common::connector_options::ConnectorOptions; +use crate::common::constants::{connector_type, kafka_with_value}; +use crate::common::formats::{ BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, TimestampFormat as SqlTimestampFormat, }; -use crate::sql::common::with_option_keys as opt; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::connector::provider::SourceProvider; +use crate::common::with_option_keys as opt; +use crate::connector::config::ConnectorConfig; +use crate::connector::provider::SourceProvider; pub struct KafkaSourceConnector; diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/streaming_planner/src/connector/source/mod.rs similarity index 100% rename from src/runtime/streaming/operators/source/mod.rs rename to src/streaming_planner/src/connector/source/mod.rs diff --git a/src/sql/functions/mod.rs b/src/streaming_planner/src/functions/mod.rs similarity index 98% rename from src/sql/functions/mod.rs rename to src/streaming_planner/src/functions/mod.rs index b78f5d2a..9ec1007f 100644 --- a/src/sql/functions/mod.rs +++ b/src/streaming_planner/src/functions/mod.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::schema::StreamSchemaProvider; +use crate::schema::StreamSchemaProvider; use datafusion::arrow::array::{ Array, ArrayRef, StringArray, UnionArray, builder::{FixedSizeBinaryBuilder, ListBuilder, StringBuilder}, @@ -34,7 +34,7 @@ use std::collections::HashMap; use std::fmt::{Debug, Write}; use std::sync::{Arc, OnceLock}; -use crate::sql::common::constants::scalar_fn; +use crate::common::constants::scalar_fn; /// Borrowed from DataFusion /// @@ -336,14 +336,14 @@ pub fn extract_json_string(args: &[ColumnarValue]) -> Result { // https://github.com/datafusion-contrib/datafusion-functions-json/blob/main/src/common_union.rs // as the `is_json_union` function is not public. It should be kept in sync with that code so // that we are able to detect JSON unions and rewrite them to serialized JSON for sinks. -pub(crate) fn is_json_union(data_type: &DataType) -> bool { +pub fn is_json_union(data_type: &DataType) -> bool { match data_type { DataType::Union(fields, UnionMode::Sparse) => fields == &union_fields(), _ => false, } } -pub(crate) const TYPE_ID_NULL: i8 = 0; +pub const TYPE_ID_NULL: i8 = 0; const TYPE_ID_BOOL: i8 = 1; const TYPE_ID_INT: i8 = 2; const TYPE_ID_FLOAT: i8 = 3; @@ -456,7 +456,7 @@ fn write_value(b: &mut StringBuilder, id: i8, a: &ArrayRef) -> Result<(), std::f Ok(()) } -pub(crate) fn serialize_outgoing_json( +pub fn serialize_outgoing_json( registry: &StreamSchemaProvider, node: Arc, ) -> LogicalPlan { diff --git a/src/sql/mod.rs b/src/streaming_planner/src/lib.rs similarity index 68% rename from src/sql/mod.rs rename to src/streaming_planner/src/lib.rs index f4a0eef6..2dbc2b47 100644 --- a/src/sql/mod.rs +++ b/src/streaming_planner/src/lib.rs @@ -10,6 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![allow(dead_code)] // Planner keeps helpers/types for upcoming features; strict -D dead_code is noisy. + +//! Streaming SQL planning (logical graph, connectors, schema, physical codec). +//! +//! The `function-stream` binary/library re-exports this crate as `function_stream::sql` for +//! stable `crate::sql::…` paths in in-tree modules. + pub mod api; pub mod common; @@ -20,7 +27,7 @@ pub mod logical_node; pub mod logical_planner; pub mod parse; pub mod physical; -pub(crate) mod planning_runtime; +pub mod planning_runtime; pub mod schema; pub mod types; diff --git a/src/sql/logical_node/aggregate.rs b/src/streaming_planner/src/logical_node/aggregate.rs similarity index 97% rename from src/sql/logical_node/aggregate.rs rename to src/streaming_planner/src/logical_node/aggregate.rs index 1e288ab5..cef337c0 100644 --- a/src/sql/logical_node/aggregate.rs +++ b/src/streaming_planner/src/logical_node/aggregate.rs @@ -30,25 +30,25 @@ use protocol::function_stream_graph::{ SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator, }; -use crate::multifield_partial_ord; -use crate::sql::common::constants::{extension_node, proto_operator_name}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{ +use crate::common::constants::{extension_node, proto_operator_name}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{ CompiledTopologyNode, StreamingOperatorBlueprint, SystemTimestampInjectorNode, }; -use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput}; -use crate::sql::physical::{StreamingExtensionCodec, window}; -use crate::sql::types::{ +use crate::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput}; +use crate::multifield_partial_ord; +use crate::physical::{StreamingExtensionCodec, window}; +use crate::types::{ QualifiedField, TIMESTAMP_FIELD, WindowBehavior, WindowType, build_df_schema, build_df_schema_with_metadata, extract_qualified_fields, }; -pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = extension_node::STREAM_WINDOW_AGGREGATE; +pub const STREAM_AGG_EXTENSION_NAME: &str = extension_node::STREAM_WINDOW_AGGREGATE; /// Represents a streaming windowed aggregation node in the logical plan. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct StreamWindowAggregateNode { +pub struct StreamWindowAggregateNode { pub(crate) window_spec: WindowBehavior, pub(crate) base_agg_plan: LogicalPlan, pub(crate) output_schema: DFSchemaRef, diff --git a/src/sql/logical_node/async_udf.rs b/src/streaming_planner/src/logical_node/async_udf.rs similarity index 93% rename from src/sql/logical_node/async_udf.rs rename to src/streaming_planner/src/logical_node/async_udf.rs index 1c35398e..a3d1f68c 100644 --- a/src/sql/logical_node/async_udf.rs +++ b/src/streaming_planner/src/logical_node/async_udf.rs @@ -24,25 +24,25 @@ use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; use protocol::function_stream_graph::{AsyncUdfOperator, AsyncUdfOrdering}; -use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::common::constants::sql_field; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{ +use crate::common::constants::extension_node; +use crate::common::constants::sql_field; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{ DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName, }; -use crate::sql::logical_node::streaming_operator_blueprint::{ +use crate::logical_node::streaming_operator_blueprint::{ CompiledTopologyNode, StreamingOperatorBlueprint, }; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::types::{QualifiedField, build_df_schema, extract_qualified_fields}; +use crate::logical_planner::planner::{NamedNode, Planner}; +use crate::multifield_partial_ord; +use crate::types::{QualifiedField, build_df_schema, extract_qualified_fields}; -pub(crate) const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION; +pub const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION; /// Represents a logical node that executes an external asynchronous function (UDF) /// and projects the final results into the streaming pipeline. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct AsyncFunctionExecutionNode { +pub struct AsyncFunctionExecutionNode { pub(crate) upstream_plan: Arc, pub(crate) operator_name: String, pub(crate) function_config: DylibUdfConfig, diff --git a/src/sql/logical_node/debezium.rs b/src/streaming_planner/src/logical_node/debezium.rs similarity index 95% rename from src/sql/logical_node/debezium.rs rename to src/streaming_planner/src/logical_node/debezium.rs index 8d69c6ec..2b13f090 100644 --- a/src/sql/logical_node/debezium.rs +++ b/src/streaming_planner/src/logical_node/debezium.rs @@ -19,12 +19,12 @@ use datafusion::common::{ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use datafusion::physical_plan::DisplayAs; +use crate::common::constants::{cdc, extension_node}; +use crate::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::{cdc, extension_node}; -use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::updating_meta_field; -use crate::sql::types::TIMESTAMP_FIELD; +use crate::physical::updating_meta_field; +use crate::types::TIMESTAMP_FIELD; use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; @@ -32,19 +32,19 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const UNROLL_NODE_NAME: &str = extension_node::UNROLL_DEBEZIUM_PAYLOAD; -pub(crate) const PACK_NODE_NAME: &str = extension_node::PACK_DEBEZIUM_ENVELOPE; +pub const UNROLL_NODE_NAME: &str = extension_node::UNROLL_DEBEZIUM_PAYLOAD; +pub const PACK_NODE_NAME: &str = extension_node::PACK_DEBEZIUM_ENVELOPE; // ----------------------------------------------------------------------------- // Core Schema Codec // ----------------------------------------------------------------------------- /// Transforms between flat schemas and Debezium CDC envelopes. -pub(crate) struct DebeziumSchemaCodec; +pub struct DebeziumSchemaCodec; impl DebeziumSchemaCodec { /// Wraps a flat physical schema into a Debezium CDC envelope structure. - pub(crate) fn wrap_into_envelope( + pub fn wrap_into_envelope( flat_schema: &DFSchemaRef, qualifier_override: Option, ) -> Result { @@ -299,7 +299,7 @@ impl StreamingOperatorBlueprint for UnrollDebeziumPayloadNode { /// Encodes a flat updating stream back into a Debezium CDC envelope representation. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct PackDebeziumEnvelopeNode { +pub struct PackDebeziumEnvelopeNode { upstream_plan: Arc, envelope_schema: DFSchemaRef, } @@ -307,7 +307,7 @@ pub(crate) struct PackDebeziumEnvelopeNode { multifield_partial_ord!(PackDebeziumEnvelopeNode, upstream_plan); impl PackDebeziumEnvelopeNode { - pub(crate) fn try_new(upstream_plan: LogicalPlan) -> Result { + pub fn try_new(upstream_plan: LogicalPlan) -> Result { let envelope_schema = DebeziumSchemaCodec::wrap_into_envelope(upstream_plan.schema(), None) .map_err(|e| { DataFusionError::Plan(format!("Failed to compile Debezium envelope schema: {e}")) diff --git a/src/sql/logical_node/extension_try_from.rs b/src/streaming_planner/src/logical_node/extension_try_from.rs similarity index 72% rename from src/sql/logical_node/extension_try_from.rs rename to src/streaming_planner/src/logical_node/extension_try_from.rs index 32b12d6c..c13532dc 100644 --- a/src/sql/logical_node/extension_try_from.rs +++ b/src/streaming_planner/src/logical_node/extension_try_from.rs @@ -15,20 +15,20 @@ use std::sync::Arc; use datafusion::common::{DataFusionError, Result}; use datafusion::logical_expr::UserDefinedLogicalNode; -use crate::sql::logical_node::aggregate::StreamWindowAggregateNode; -use crate::sql::logical_node::async_udf::AsyncFunctionExecutionNode; -use crate::sql::logical_node::debezium::{PackDebeziumEnvelopeNode, UnrollDebeziumPayloadNode}; -use crate::sql::logical_node::join::StreamingJoinNode; -use crate::sql::logical_node::key_calculation::KeyExtractionNode; -use crate::sql::logical_node::lookup::StreamReferenceJoinNode; -use crate::sql::logical_node::projection::StreamProjectionNode; -use crate::sql::logical_node::remote_table::RemoteTableBoundaryNode; -use crate::sql::logical_node::sink::StreamEgressNode; -use crate::sql::logical_node::streaming_operator_blueprint::StreamingOperatorBlueprint; -use crate::sql::logical_node::table_source::StreamIngestionNode; -use crate::sql::logical_node::updating_aggregate::ContinuousAggregateNode; -use crate::sql::logical_node::watermark_node::EventTimeWatermarkNode; -use crate::sql::logical_node::windows_function::StreamingWindowFunctionNode; +use crate::logical_node::aggregate::StreamWindowAggregateNode; +use crate::logical_node::async_udf::AsyncFunctionExecutionNode; +use crate::logical_node::debezium::{PackDebeziumEnvelopeNode, UnrollDebeziumPayloadNode}; +use crate::logical_node::join::StreamingJoinNode; +use crate::logical_node::key_calculation::KeyExtractionNode; +use crate::logical_node::lookup::StreamReferenceJoinNode; +use crate::logical_node::projection::StreamProjectionNode; +use crate::logical_node::remote_table::RemoteTableBoundaryNode; +use crate::logical_node::sink::StreamEgressNode; +use crate::logical_node::streaming_operator_blueprint::StreamingOperatorBlueprint; +use crate::logical_node::table_source::StreamIngestionNode; +use crate::logical_node::updating_aggregate::ContinuousAggregateNode; +use crate::logical_node::watermark_node::EventTimeWatermarkNode; +use crate::logical_node::windows_function::StreamingWindowFunctionNode; fn try_from_t( node: &dyn UserDefinedLogicalNode, diff --git a/src/sql/logical_node/is_retract.rs b/src/streaming_planner/src/logical_node/is_retract.rs similarity index 89% rename from src/sql/logical_node/is_retract.rs rename to src/streaming_planner/src/logical_node/is_retract.rs index 4370f6ae..17fad0db 100644 --- a/src/sql/logical_node/is_retract.rs +++ b/src/streaming_planner/src/logical_node/is_retract.rs @@ -17,13 +17,11 @@ use datafusion::common::{DFSchemaRef, Result, TableReference}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use crate::multifield_partial_ord; -use crate::sql::physical::updating_meta_field; -use crate::sql::types::{ - QualifiedField, TIMESTAMP_FIELD, build_df_schema, extract_qualified_fields, -}; +use crate::physical::updating_meta_field; +use crate::types::{QualifiedField, TIMESTAMP_FIELD, build_df_schema, extract_qualified_fields}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct IsRetractExtension { +pub struct IsRetractExtension { pub(crate) input: LogicalPlan, pub(crate) schema: DFSchemaRef, pub(crate) timestamp_qualifier: Option, @@ -32,7 +30,7 @@ pub(crate) struct IsRetractExtension { multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier); impl IsRetractExtension { - pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option) -> Self { + pub fn new(input: LogicalPlan, timestamp_qualifier: Option) -> Self { let mut output_fields = extract_qualified_fields(input.schema()); let timestamp_index = output_fields.len() - 1; diff --git a/src/sql/logical_node/join.rs b/src/streaming_planner/src/logical_node/join.rs similarity index 93% rename from src/sql/logical_node/join.rs rename to src/streaming_planner/src/logical_node/join.rs index 15631f1f..35d645f1 100644 --- a/src/sql/logical_node/join.rs +++ b/src/streaming_planner/src/logical_node/join.rs @@ -22,18 +22,18 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; use protocol::function_stream_graph::JoinOperator; -use crate::sql::common::constants::{extension_node, runtime_operator_kind}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::StreamingExtensionCodec; +use crate::common::constants::{extension_node, runtime_operator_kind}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; +use crate::physical::StreamingExtensionCodec; // ----------------------------------------------------------------------------- // Constants // ----------------------------------------------------------------------------- -pub(crate) const STREAM_JOIN_NODE_TYPE: &str = extension_node::STREAMING_JOIN; +pub const STREAM_JOIN_NODE_TYPE: &str = extension_node::STREAMING_JOIN; // ----------------------------------------------------------------------------- // Logical Node Definition diff --git a/src/sql/logical_node/key_calculation.rs b/src/streaming_planner/src/logical_node/key_calculation.rs similarity index 94% rename from src/sql/logical_node/key_calculation.rs rename to src/streaming_planner/src/logical_node/key_calculation.rs index ec83e108..8bd03b6e 100644 --- a/src/sql/logical_node/key_calculation.rs +++ b/src/streaming_planner/src/logical_node/key_calculation.rs @@ -26,16 +26,16 @@ use prost::Message; use protocol::function_stream_graph::{KeyPlanOperator, ProjectionOperator}; +use crate::common::constants::{extension_node, sql_field}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::{extension_node, sql_field}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::StreamingExtensionCodec; -use crate::sql::types::{build_df_schema_with_metadata, extract_qualified_fields}; +use crate::physical::StreamingExtensionCodec; +use crate::types::{build_df_schema_with_metadata, extract_qualified_fields}; -pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = extension_node::KEY_EXTRACTION; +pub const EXTENSION_NODE_IDENTIFIER: &str = extension_node::KEY_EXTRACTION; /// Routing strategy for shuffling data across the stream topology. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] @@ -46,7 +46,7 @@ pub enum KeyExtractionStrategy { /// Logical node that computes or extracts routing keys. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct KeyExtractionNode { +pub struct KeyExtractionNode { pub(crate) operator_label: Option, pub(crate) upstream_plan: LogicalPlan, pub(crate) extraction_strategy: KeyExtractionStrategy, diff --git a/src/sql/logical_node/logical/dylib_udf_config.rs b/src/streaming_planner/src/logical_node/logical/dylib_udf_config.rs similarity index 100% rename from src/sql/logical_node/logical/dylib_udf_config.rs rename to src/streaming_planner/src/logical_node/logical/dylib_udf_config.rs diff --git a/src/sql/logical_node/logical/fs_program_convert.rs b/src/streaming_planner/src/logical_node/logical/fs_program_convert.rs similarity index 98% rename from src/sql/logical_node/logical/fs_program_convert.rs rename to src/streaming_planner/src/logical_node/logical/fs_program_convert.rs index b05d68f5..53316bae 100644 --- a/src/sql/logical_node/logical/fs_program_convert.rs +++ b/src/streaming_planner/src/logical_node/logical/fs_program_convert.rs @@ -23,8 +23,8 @@ use protocol::function_stream_graph::{ FsSchema as ProtoFsSchema, }; -use crate::sql::api::pipelines::{PipelineEdge, PipelineGraph, PipelineNode}; -use crate::sql::common::FsSchema; +use crate::api::pipelines::{PipelineEdge, PipelineGraph, PipelineNode}; +use crate::common::FsSchema; use super::logical_edge::logical_edge_type_from_proto_i32; use super::operator_chain::{ChainedLogicalOperator, OperatorChain}; diff --git a/src/sql/logical_node/logical/logical_edge.rs b/src/streaming_planner/src/logical_node/logical/logical_edge.rs similarity index 96% rename from src/sql/logical_node/logical/logical_edge.rs rename to src/streaming_planner/src/logical_node/logical/logical_edge.rs index 87950e70..d599f7f1 100644 --- a/src/sql/logical_node/logical/logical_edge.rs +++ b/src/streaming_planner/src/logical_node/logical/logical_edge.rs @@ -17,7 +17,7 @@ use datafusion::common::{DataFusionError, Result}; use protocol::function_stream_graph::EdgeType as ProtoEdgeType; use serde::{Deserialize, Serialize}; -use crate::sql::common::FsSchema; +use crate::common::FsSchema; #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub enum LogicalEdgeType { @@ -64,7 +64,7 @@ impl From for ProtoEdgeType { } } -pub(crate) fn logical_edge_type_from_proto_i32(i: i32) -> Result { +pub fn logical_edge_type_from_proto_i32(i: i32) -> Result { let e = ProtoEdgeType::try_from(i).map_err(|_| { DataFusionError::Plan(format!("invalid protobuf EdgeType discriminant {i}")) })?; diff --git a/src/sql/logical_node/logical/logical_graph.rs b/src/streaming_planner/src/logical_node/logical/logical_graph.rs similarity index 100% rename from src/sql/logical_node/logical/logical_graph.rs rename to src/streaming_planner/src/logical_node/logical/logical_graph.rs diff --git a/src/sql/logical_node/logical/logical_node.rs b/src/streaming_planner/src/logical_node/logical/logical_node.rs similarity index 100% rename from src/sql/logical_node/logical/logical_node.rs rename to src/streaming_planner/src/logical_node/logical/logical_node.rs diff --git a/src/sql/logical_node/logical/logical_program.rs b/src/streaming_planner/src/logical_node/logical/logical_program.rs similarity index 100% rename from src/sql/logical_node/logical/logical_program.rs rename to src/streaming_planner/src/logical_node/logical/logical_program.rs diff --git a/src/sql/logical_node/logical/mod.rs b/src/streaming_planner/src/logical_node/logical/mod.rs similarity index 100% rename from src/sql/logical_node/logical/mod.rs rename to src/streaming_planner/src/logical_node/logical/mod.rs diff --git a/src/sql/logical_node/logical/operator_chain.rs b/src/streaming_planner/src/logical_node/logical/operator_chain.rs similarity index 99% rename from src/sql/logical_node/logical/operator_chain.rs rename to src/streaming_planner/src/logical_node/logical/operator_chain.rs index 2aecddd6..34be2f57 100644 --- a/src/sql/logical_node/logical/operator_chain.rs +++ b/src/streaming_planner/src/logical_node/logical/operator_chain.rs @@ -18,7 +18,7 @@ use protocol::function_stream_graph::ConnectorOp; use serde::{Deserialize, Serialize}; use super::operator_name::OperatorName; -use crate::sql::common::FsSchema; +use crate::common::FsSchema; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ChainedLogicalOperator { diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/streaming_planner/src/logical_node/logical/operator_name.rs similarity index 98% rename from src/sql/logical_node/logical/operator_name.rs rename to src/streaming_planner/src/logical_node/logical/operator_name.rs index 57f53f90..4377b0bd 100644 --- a/src/sql/logical_node/logical/operator_name.rs +++ b/src/streaming_planner/src/logical_node/logical/operator_name.rs @@ -15,7 +15,7 @@ use std::str::FromStr; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{Display, EnumString, IntoStaticStr}; -use crate::sql::common::constants::operator_feature; +use crate::common::constants::operator_feature; #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display, IntoStaticStr)] pub enum OperatorName { diff --git a/src/sql/logical_node/logical/program_config.rs b/src/streaming_planner/src/logical_node/logical/program_config.rs similarity index 100% rename from src/sql/logical_node/logical/program_config.rs rename to src/streaming_planner/src/logical_node/logical/program_config.rs diff --git a/src/sql/logical_node/logical/python_udf_config.rs b/src/streaming_planner/src/logical_node/logical/python_udf_config.rs similarity index 100% rename from src/sql/logical_node/logical/python_udf_config.rs rename to src/streaming_planner/src/logical_node/logical/python_udf_config.rs diff --git a/src/sql/logical_node/lookup.rs b/src/streaming_planner/src/logical_node/lookup.rs similarity index 95% rename from src/sql/logical_node/lookup.rs rename to src/streaming_planner/src/logical_node/lookup.rs index d2817c85..ca95c4a6 100644 --- a/src/sql/logical_node/lookup.rs +++ b/src/streaming_planner/src/logical_node/lookup.rs @@ -23,14 +23,14 @@ use prost::Message; use protocol::function_stream_graph; use protocol::function_stream_graph::{ConnectorOp, LookupJoinCondition, LookupJoinOperator}; +use crate::common::constants::extension_node; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::schema::LookupTable; -use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::schema::LookupTable; +use crate::schema::utils::add_timestamp_field_arrow; pub const DICTIONARY_SOURCE_NODE_NAME: &str = extension_node::REFERENCE_TABLE_SOURCE; pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = extension_node::STREAM_REFERENCE_JOIN; diff --git a/src/sql/logical_node/macros.rs b/src/streaming_planner/src/logical_node/macros.rs similarity index 100% rename from src/sql/logical_node/macros.rs rename to src/streaming_planner/src/logical_node/macros.rs diff --git a/src/streaming_planner/src/logical_node/mod.rs b/src/streaming_planner/src/logical_node/mod.rs new file mode 100644 index 00000000..3ef02fc8 --- /dev/null +++ b/src/streaming_planner/src/logical_node/mod.rs @@ -0,0 +1,42 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod logical; + +mod macros; + +pub mod streaming_operator_blueprint; +pub use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint}; + +pub mod aggregate; +pub mod debezium; +pub mod join; +pub mod key_calculation; +pub mod lookup; +pub mod projection; +pub mod remote_table; +pub mod sink; +pub mod table_source; +pub mod updating_aggregate; +pub mod watermark_node; +pub mod windows_function; + +pub mod timestamp_append; +pub use timestamp_append::SystemTimestampInjectorNode; + +pub mod async_udf; +pub use async_udf::AsyncFunctionExecutionNode; + +pub mod is_retract; +pub use is_retract::IsRetractExtension; + +mod extension_try_from; diff --git a/src/sql/logical_node/projection.rs b/src/streaming_planner/src/logical_node/projection.rs similarity index 92% rename from src/sql/logical_node/projection.rs rename to src/streaming_planner/src/logical_node/projection.rs index 3c5cfccb..f56b109f 100644 --- a/src/sql/logical_node/projection.rs +++ b/src/streaming_planner/src/logical_node/projection.rs @@ -21,19 +21,19 @@ use prost::Message; use protocol::function_stream_graph::ProjectionOperator; +use crate::common::constants::{extension_node, sql_field}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::{extension_node, sql_field}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::types::{QualifiedField, build_df_schema}; +use crate::types::{QualifiedField, build_df_schema}; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = extension_node::STREAM_PROJECTION; +pub const STREAM_PROJECTION_NODE_NAME: &str = extension_node::STREAM_PROJECTION; const DEFAULT_PROJECTION_LABEL: &str = sql_field::DEFAULT_PROJECTION_LABEL; // ----------------------------------------------------------------------------- @@ -42,7 +42,7 @@ const DEFAULT_PROJECTION_LABEL: &str = sql_field::DEFAULT_PROJECTION_LABEL; /// Projection within a streaming execution topology. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct StreamProjectionNode { +pub struct StreamProjectionNode { pub(crate) upstream_plans: Vec, pub(crate) operator_label: Option, pub(crate) projection_exprs: Vec, @@ -53,7 +53,7 @@ pub(crate) struct StreamProjectionNode { multifield_partial_ord!(StreamProjectionNode, operator_label, projection_exprs); impl StreamProjectionNode { - pub(crate) fn try_new( + pub fn try_new( upstream_plans: Vec, operator_label: Option, projection_exprs: Vec, @@ -81,7 +81,7 @@ impl StreamProjectionNode { }) } - pub(crate) fn with_shuffle_routing(mut self) -> Self { + pub fn with_shuffle_routing(mut self) -> Self { self.requires_shuffle = true; self } diff --git a/src/sql/logical_node/remote_table.rs b/src/streaming_planner/src/logical_node/remote_table.rs similarity index 92% rename from src/sql/logical_node/remote_table.rs rename to src/streaming_planner/src/logical_node/remote_table.rs index bde1d47f..302228e7 100644 --- a/src/sql/logical_node/remote_table.rs +++ b/src/streaming_planner/src/logical_node/remote_table.rs @@ -21,19 +21,19 @@ use prost::Message; use protocol::function_stream_graph::ValuePlanOperator; +use crate::common::constants::extension_node; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::StreamingExtensionCodec; +use crate::physical::StreamingExtensionCodec; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const REMOTE_TABLE_NODE_NAME: &str = extension_node::REMOTE_TABLE_BOUNDARY; +pub const REMOTE_TABLE_NODE_NAME: &str = extension_node::REMOTE_TABLE_BOUNDARY; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -41,7 +41,7 @@ pub(crate) const REMOTE_TABLE_NODE_NAME: &str = extension_node::REMOTE_TABLE_BOU /// Segments the execution graph and merges nodes sharing the same identifier; acts as a boundary. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct RemoteTableBoundaryNode { +pub struct RemoteTableBoundaryNode { pub(crate) upstream_plan: LogicalPlan, pub(crate) table_identifier: TableReference, pub(crate) resolved_schema: DFSchemaRef, diff --git a/src/sql/logical_node/sink.rs b/src/streaming_planner/src/logical_node/sink.rs similarity index 94% rename from src/sql/logical_node/sink.rs rename to src/streaming_planner/src/logical_node/sink.rs index d767afe3..f0b66187 100644 --- a/src/sql/logical_node/sink.rs +++ b/src/streaming_planner/src/logical_node/sink.rs @@ -17,14 +17,14 @@ use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore}; use prost::Message; +use crate::common::constants::extension_node; +use crate::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::schema::CatalogEntity; -use crate::sql::schema::catalog::ExternalTable; +use crate::schema::CatalogEntity; +use crate::schema::catalog::ExternalTable; use super::debezium::PackDebeziumEnvelopeNode; use super::remote_table::RemoteTableBoundaryNode; @@ -33,7 +33,7 @@ use super::remote_table::RemoteTableBoundaryNode; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_EGRESS_NODE_NAME: &str = extension_node::STREAM_EGRESS; +pub const STREAM_EGRESS_NODE_NAME: &str = extension_node::STREAM_EGRESS; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -41,7 +41,7 @@ pub(crate) const STREAM_EGRESS_NODE_NAME: &str = extension_node::STREAM_EGRESS; /// Terminal node routing processed data into an external sink (e.g. Kafka, PostgreSQL). #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct StreamEgressNode { +pub struct StreamEgressNode { pub(crate) target_identifier: TableReference, pub(crate) destination_table: CatalogEntity, pub(crate) egress_schema: DFSchemaRef, diff --git a/src/sql/logical_node/streaming_operator_blueprint.rs b/src/streaming_planner/src/logical_node/streaming_operator_blueprint.rs similarity index 88% rename from src/sql/logical_node/streaming_operator_blueprint.rs rename to src/streaming_planner/src/logical_node/streaming_operator_blueprint.rs index d3f9d459..d9afaeab 100644 --- a/src/sql/logical_node/streaming_operator_blueprint.rs +++ b/src/streaming_planner/src/logical_node/streaming_operator_blueprint.rs @@ -14,16 +14,16 @@ use std::fmt::Debug; use datafusion::common::Result; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalNode}; +use crate::logical_planner::planner::{NamedNode, Planner}; // ----------------------------------------------------------------------------- // Core Execution Blueprint // ----------------------------------------------------------------------------- /// Atomic unit within a streaming execution topology: translates streaming SQL into graph nodes. -pub(crate) trait StreamingOperatorBlueprint: Debug { +pub trait StreamingOperatorBlueprint: Debug { /// Canonical named identity for this operator, if any (sources, sinks, etc.). fn operator_identity(&self) -> Option; @@ -50,7 +50,7 @@ pub(crate) trait StreamingOperatorBlueprint: Debug { /// Compiled vertex: execution unit plus upstream routing edges. #[derive(Debug, Clone)] -pub(crate) struct CompiledTopologyNode { +pub struct CompiledTopologyNode { pub execution_unit: LogicalNode, pub routing_edges: Vec, } diff --git a/src/sql/logical_node/table_source.rs b/src/streaming_planner/src/logical_node/table_source.rs similarity index 91% rename from src/sql/logical_node/table_source.rs rename to src/streaming_planner/src/logical_node/table_source.rs index b1c6bfdd..dad8f921 100644 --- a/src/sql/logical_node/table_source.rs +++ b/src/streaming_planner/src/logical_node/table_source.rs @@ -17,15 +17,15 @@ use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; use prost::Message; +use crate::common::constants::extension_node; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::debezium::DebeziumSchemaCodec; +use crate::logical_node::logical::{LogicalNode, OperatorName}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::debezium::DebeziumSchemaCodec; -use crate::sql::logical_node::logical::{LogicalNode, OperatorName}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::schema::SourceTable; -use crate::sql::schema::utils::add_timestamp_field; -use crate::sql::types::build_df_schema; +use crate::schema::SourceTable; +use crate::schema::utils::add_timestamp_field; +use crate::types::build_df_schema; use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; @@ -33,7 +33,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAM_INGESTION_NODE_NAME: &str = extension_node::STREAM_INGESTION; +pub const STREAM_INGESTION_NODE_NAME: &str = extension_node::STREAM_INGESTION; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -41,7 +41,7 @@ pub(crate) const STREAM_INGESTION_NODE_NAME: &str = extension_node::STREAM_INGES /// Foundational ingestion point: connects to external systems and injects raw or CDC data. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct StreamIngestionNode { +pub struct StreamIngestionNode { pub(crate) source_identifier: TableReference, pub(crate) source_definition: SourceTable, pub(crate) resolved_schema: DFSchemaRef, diff --git a/src/sql/logical_node/timestamp_append.rs b/src/streaming_planner/src/logical_node/timestamp_append.rs similarity index 92% rename from src/sql/logical_node/timestamp_append.rs rename to src/streaming_planner/src/logical_node/timestamp_append.rs index 630e5a66..a490243a 100644 --- a/src/sql/logical_node/timestamp_append.rs +++ b/src/streaming_planner/src/logical_node/timestamp_append.rs @@ -15,15 +15,15 @@ use std::fmt::Formatter; use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err}; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; +use crate::common::constants::extension_node; use crate::multifield_partial_ord; -use crate::sql::common::constants::extension_node; -use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field}; +use crate::schema::utils::{add_timestamp_field, has_timestamp_field}; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = extension_node::SYSTEM_TIMESTAMP_INJECTOR; +pub const TIMESTAMP_INJECTOR_NODE_NAME: &str = extension_node::SYSTEM_TIMESTAMP_INJECTOR; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -31,7 +31,7 @@ pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = extension_node::SYSTEM_TIM /// Injects the mandatory system `_timestamp` field into the upstream streaming schema. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct SystemTimestampInjectorNode { +pub struct SystemTimestampInjectorNode { pub(crate) upstream_plan: LogicalPlan, pub(crate) target_qualifier: Option, pub(crate) resolved_schema: DFSchemaRef, @@ -40,7 +40,7 @@ pub(crate) struct SystemTimestampInjectorNode { multifield_partial_ord!(SystemTimestampInjectorNode, upstream_plan, target_qualifier); impl SystemTimestampInjectorNode { - pub(crate) fn try_new( + pub fn try_new( upstream_plan: LogicalPlan, target_qualifier: Option, ) -> Result { diff --git a/src/sql/logical_node/updating_aggregate.rs b/src/streaming_planner/src/logical_node/updating_aggregate.rs similarity index 92% rename from src/sql/logical_node/updating_aggregate.rs rename to src/streaming_planner/src/logical_node/updating_aggregate.rs index 0ddb2b28..14f52ea7 100644 --- a/src/sql/logical_node/updating_aggregate.rs +++ b/src/streaming_planner/src/logical_node/updating_aggregate.rs @@ -25,21 +25,19 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use prost::Message; use protocol::function_stream_graph::UpdatingAggregateOperator; -use crate::sql::common::constants::{extension_node, proto_operator_name, updating_state_field}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::functions::multi_hash; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{ - CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint, -}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::StreamingExtensionCodec; +use crate::common::constants::{extension_node, proto_operator_name, updating_state_field}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::functions::multi_hash; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; +use crate::physical::StreamingExtensionCodec; // ----------------------------------------------------------------------------- // Constants & Configuration // ----------------------------------------------------------------------------- -pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = extension_node::CONTINUOUS_AGGREGATE; +pub const CONTINUOUS_AGGREGATE_NODE_NAME: &str = extension_node::CONTINUOUS_AGGREGATE; const DEFAULT_FLUSH_INTERVAL_MICROS: u64 = 10_000_000; @@ -51,7 +49,7 @@ const STATIC_HASH_SIZE_BYTES: i32 = 16; /// Stateful continuous aggregation: running aggregates with updating / retraction semantics. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct ContinuousAggregateNode { +pub struct ContinuousAggregateNode { pub(crate) base_aggregate_plan: LogicalPlan, pub(crate) partition_key_indices: Vec, pub(crate) retract_injected_plan: LogicalPlan, diff --git a/src/sql/logical_node/watermark_node.rs b/src/streaming_planner/src/logical_node/watermark_node.rs similarity index 91% rename from src/sql/logical_node/watermark_node.rs rename to src/streaming_planner/src/logical_node/watermark_node.rs index 9a8fc9d6..8d51fc47 100644 --- a/src/sql/logical_node/watermark_node.rs +++ b/src/streaming_planner/src/logical_node/watermark_node.rs @@ -21,20 +21,20 @@ use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; use prost::Message; use protocol::function_stream_graph::ExpressionWatermarkConfig; +use crate::common::constants::{extension_node, runtime_operator_kind}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::logical_planner::planner::{NamedNode, Planner}; use crate::multifield_partial_ord; -use crate::sql::common::constants::{extension_node, runtime_operator_kind}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::schema::utils::add_timestamp_field; -use crate::sql::types::TIMESTAMP_FIELD; +use crate::schema::utils::add_timestamp_field; +use crate::types::TIMESTAMP_FIELD; // ----------------------------------------------------------------------------- // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = extension_node::EVENT_TIME_WATERMARK; +pub const EVENT_TIME_WATERMARK_NODE_NAME: &str = extension_node::EVENT_TIME_WATERMARK; const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000; @@ -44,7 +44,7 @@ const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000; /// Event-time watermark from a user strategy; drives time progress in stateful operators. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct EventTimeWatermarkNode { +pub struct EventTimeWatermarkNode { pub(crate) upstream_plan: LogicalPlan, pub(crate) namespace_qualifier: TableReference, pub(crate) watermark_strategy_expr: Expr, @@ -61,7 +61,7 @@ multifield_partial_ord!( ); impl EventTimeWatermarkNode { - pub(crate) fn try_new( + pub fn try_new( upstream_plan: LogicalPlan, namespace_qualifier: TableReference, watermark_strategy_expr: Expr, @@ -89,7 +89,7 @@ impl EventTimeWatermarkNode { }) } - pub(crate) fn generate_fs_schema(&self) -> FsSchema { + pub fn generate_fs_schema(&self) -> FsSchema { FsSchema::new_unkeyed( Arc::new(self.resolved_schema.as_ref().into()), self.internal_timestamp_offset, diff --git a/src/sql/logical_node/windows_function.rs b/src/streaming_planner/src/logical_node/windows_function.rs similarity index 92% rename from src/sql/logical_node/windows_function.rs rename to src/streaming_planner/src/logical_node/windows_function.rs index 9be37382..5fdfb9ce 100644 --- a/src/sql/logical_node/windows_function.rs +++ b/src/streaming_planner/src/logical_node/windows_function.rs @@ -21,12 +21,12 @@ use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNod use prost::Message; use protocol::function_stream_graph::WindowFunctionOperator; -use crate::sql::common::constants::{extension_node, proto_operator_name, runtime_operator_kind}; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; -use crate::sql::logical_planner::planner::{NamedNode, Planner}; -use crate::sql::physical::StreamingExtensionCodec; -use crate::sql::types::TIMESTAMP_FIELD; +use crate::common::constants::{extension_node, proto_operator_name, runtime_operator_kind}; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName}; +use crate::logical_planner::planner::{NamedNode, Planner}; +use crate::physical::StreamingExtensionCodec; +use crate::types::TIMESTAMP_FIELD; use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; @@ -34,7 +34,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint}; // Constants & Identifiers // ----------------------------------------------------------------------------- -pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = extension_node::STREAMING_WINDOW_FUNCTION; +pub const STREAMING_WINDOW_NODE_NAME: &str = extension_node::STREAMING_WINDOW_FUNCTION; // ----------------------------------------------------------------------------- // Logical Node Definition @@ -42,7 +42,7 @@ pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = extension_node::STREAMING_WI /// Stateful streaming window: temporal binning plus underlying window evaluation plan. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub(crate) struct StreamingWindowFunctionNode { +pub struct StreamingWindowFunctionNode { pub(crate) underlying_evaluation_plan: LogicalPlan, pub(crate) partition_key_indices: Vec, } diff --git a/src/sql/logical_planner/mod.rs b/src/streaming_planner/src/logical_planner/mod.rs similarity index 87% rename from src/sql/logical_planner/mod.rs rename to src/streaming_planner/src/logical_planner/mod.rs index 9ecfb676..d04ccce3 100644 --- a/src/sql/logical_planner/mod.rs +++ b/src/streaming_planner/src/logical_planner/mod.rs @@ -12,5 +12,5 @@ pub mod optimizers; -pub(crate) mod streaming_planner; -pub(crate) use streaming_planner as planner; +pub mod streaming_planner; +pub use streaming_planner as planner; diff --git a/src/sql/logical_planner/optimizers/chaining.rs b/src/streaming_planner/src/logical_planner/optimizers/chaining.rs similarity index 97% rename from src/sql/logical_planner/optimizers/chaining.rs rename to src/streaming_planner/src/logical_planner/optimizers/chaining.rs index ea7bd885..60b36115 100644 --- a/src/sql/logical_planner/optimizers/chaining.rs +++ b/src/streaming_planner/src/logical_planner/optimizers/chaining.rs @@ -14,7 +14,7 @@ use petgraph::prelude::*; use petgraph::visit::NodeIndexable; use tracing::debug; -use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; +use crate::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer}; pub struct ChainingOptimizer {} @@ -119,8 +119,8 @@ mod tests { use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; - use crate::sql::common::FsSchema; - use crate::sql::logical_node::logical::{ + use crate::common::FsSchema; + use crate::logical_node::logical::{ LogicalEdge, LogicalEdgeType, LogicalGraph, LogicalNode, OperatorName, Optimizer, }; diff --git a/src/sql/logical_planner/optimizers/mod.rs b/src/streaming_planner/src/logical_planner/optimizers/mod.rs similarity index 100% rename from src/sql/logical_planner/optimizers/mod.rs rename to src/streaming_planner/src/logical_planner/optimizers/mod.rs diff --git a/src/sql/logical_planner/optimizers/optimized_plan.rs b/src/streaming_planner/src/logical_planner/optimizers/optimized_plan.rs similarity index 98% rename from src/sql/logical_planner/optimizers/optimized_plan.rs rename to src/streaming_planner/src/logical_planner/optimizers/optimized_plan.rs index fbb64845..df380fe1 100644 --- a/src/sql/logical_planner/optimizers/optimized_plan.rs +++ b/src/streaming_planner/src/logical_planner/optimizers/optimized_plan.rs @@ -41,7 +41,7 @@ use datafusion::optimizer::simplify_expressions::SimplifyExpressions; use datafusion::sql::planner::SqlToRel; use datafusion::sql::sqlparser::ast::Statement; -use crate::sql::schema::StreamSchemaProvider; +use crate::schema::StreamSchemaProvider; /// Converts a SQL statement into an optimized DataFusion logical plan. /// diff --git a/src/sql/logical_planner/streaming_planner.rs b/src/streaming_planner/src/logical_planner/streaming_planner.rs similarity index 90% rename from src/sql/logical_planner/streaming_planner.rs rename to src/streaming_planner/src/logical_planner/streaming_planner.rs index 1e999c2a..a5f3d5f6 100644 --- a/src/sql/logical_planner/streaming_planner.rs +++ b/src/streaming_planner/src/logical_planner/streaming_planner.rs @@ -42,35 +42,33 @@ use datafusion_common::TableReference; use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec; use datafusion_proto::physical_plan::to_proto::serialize_physical_expr; -use crate::sql::common::constants::sql_planning_default; -use crate::sql::common::{FsSchema, FsSchemaRef}; -use crate::sql::logical_node::debezium::{ - PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode, -}; -use crate::sql::logical_node::key_calculation::KeyExtractionNode; -use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode}; -use crate::sql::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; -use crate::sql::physical::{ +use crate::common::constants::sql_planning_default; +use crate::common::{FsSchema, FsSchemaRef}; +use crate::logical_node::debezium::{PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode}; +use crate::logical_node::key_calculation::KeyExtractionNode; +use crate::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode}; +use crate::logical_node::{CompiledTopologyNode, StreamingOperatorBlueprint}; +use crate::physical::{ CdcDebeziumPackExec, CdcDebeziumUnrollExec, FsMemExec, StreamingDecodingContext, StreamingExtensionCodec, }; -use crate::sql::schema::StreamSchemaProvider; -use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::schema::StreamSchemaProvider; +use crate::schema::utils::add_timestamp_field_arrow; -pub(crate) struct SplitPlanOutput { +pub struct SplitPlanOutput { pub(crate) partial_aggregation_plan: PhysicalPlanNode, pub(crate) partial_schema: FsSchema, pub(crate) finish_plan: PhysicalPlanNode, } #[derive(Eq, Hash, PartialEq, Debug)] -pub(crate) enum NamedNode { +pub enum NamedNode { Source(TableReference), Watermark(TableReference), RemoteTable(TableReference), Sink(TableReference), } -pub(crate) struct PlanToGraphVisitor<'a> { +pub struct PlanToGraphVisitor<'a> { graph: DiGraph, output_schemas: HashMap, named_nodes: HashMap, @@ -90,7 +88,7 @@ impl<'a> PlanToGraphVisitor<'a> { } } -pub(crate) struct Planner<'a> { +pub struct Planner<'a> { schema_provider: &'a StreamSchemaProvider, planner: DefaultPhysicalPlanner, session_state: &'a SessionState, @@ -98,25 +96,22 @@ pub(crate) struct Planner<'a> { impl<'a> Planner<'a> { #[inline] - pub(crate) fn default_parallelism(&self) -> usize { + pub fn default_parallelism(&self) -> usize { self.schema_provider.default_parallelism() } #[inline] - pub(crate) fn key_by_parallelism(&self) -> usize { + pub fn key_by_parallelism(&self) -> usize { self.schema_provider.key_by_parallelism() } /// Parallelism for operators that consume a keyed shuffle (non-empty partition keys). #[inline] - pub(crate) fn keyed_aggregate_parallelism(&self) -> usize { + pub fn keyed_aggregate_parallelism(&self) -> usize { sql_planning_default::KEYED_AGGREGATE_DEFAULT_PARALLELISM } - pub(crate) fn new( - schema_provider: &'a StreamSchemaProvider, - session_state: &'a SessionState, - ) -> Self { + pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self { let planner = DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]); Self { @@ -126,7 +121,7 @@ impl<'a> Planner<'a> { } } - pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result> { + pub fn sync_plan(&self, plan: &LogicalPlan) -> Result> { let fut = self.planner.create_physical_plan(plan, self.session_state); let (tx, mut rx) = oneshot::channel(); thread::scope(|s| { @@ -150,7 +145,7 @@ impl<'a> Planner<'a> { rx.try_recv().unwrap() } - pub(crate) fn create_physical_expr( + pub fn create_physical_expr( &self, expr: &Expr, input_dfschema: &DFSchema, @@ -159,17 +154,13 @@ impl<'a> Planner<'a> { .create_physical_expr(expr, input_dfschema, self.session_state) } - pub(crate) fn serialize_as_physical_expr( - &self, - expr: &Expr, - schema: &DFSchema, - ) -> Result> { + pub fn serialize_as_physical_expr(&self, expr: &Expr, schema: &DFSchema) -> Result> { let physical = self.create_physical_expr(expr, schema)?; let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?; Ok(proto.encode_to_vec()) } - pub(crate) fn split_physical_plan( + pub fn split_physical_plan( &self, key_indices: Vec, aggregate: &LogicalPlan, @@ -313,7 +304,7 @@ impl PlanToGraphVisitor<'_> { } } - pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> { + pub fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> { self.traversal.clear(); plan.visit(self)?; Ok(()) diff --git a/src/streaming_planner/src/parse.rs b/src/streaming_planner/src/parse.rs new file mode 100644 index 00000000..563214aa --- /dev/null +++ b/src/streaming_planner/src/parse.rs @@ -0,0 +1,82 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! FunctionStream SQL parsing (`parse_sql`). +//! +//! This module only performs lexical/syntactic parsing into sqlparser +//! [`Statement`](datafusion::sql::sqlparser::ast::Statement) values using +//! [`FunctionStreamDialect`]. Mapping those AST nodes to coordinator `Statement` +//! values is done by `classify_statement` in the `function-stream` coordinator module. +//! +//! **Data-definition / pipeline shape (supported forms in the dialect)** +//! - **`CREATE TABLE ... (cols [, WATERMARK FOR ...]) WITH (...)`** — connector-backed source DDL +//! - **`CREATE TABLE ...`** other forms (including `AS SELECT` where the dialect accepts it) +//! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** +//! - **`DROP TABLE`** / **`DROP STREAMING TABLE`** +//! - **`SHOW TABLES`**, **`SHOW STREAMING TABLE(S)`**, **`SHOW CREATE TABLE`**, **`SHOW CREATE STREAMING TABLE`** +//! +//! **`INSERT` is not supported** at the coordinator layer — use `CREATE TABLE ... AS SELECT` or +//! `CREATE STREAMING TABLE ... AS SELECT` instead (see coordinator classification). + +use datafusion::common::{Result, plan_err}; +use datafusion::error::DataFusionError; +use datafusion::sql::sqlparser::ast::Statement as DFStatement; +use datafusion::sql::sqlparser::dialect::FunctionStreamDialect; +use datafusion::sql::sqlparser::parser::Parser; + +/// Parse SQL text into zero or more dialect [`Statement`](DFStatement) nodes. +pub fn parse_sql(query: &str) -> Result> { + let trimmed = query.trim(); + if trimmed.is_empty() { + return plan_err!("Query is empty"); + } + + let dialect = FunctionStreamDialect {}; + let statements = Parser::parse_sql(&dialect, trimmed) + .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?; + + if statements.is_empty() { + return plan_err!("No SQL statements found"); + } + + Ok(statements) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_multiple_statements_ast() { + let sql = concat!( + "CREATE TABLE t1 (id INT); ", + "CREATE STREAMING TABLE sk WITH ('connector' = 'kafka') AS SELECT id FROM t1", + ); + let stmts = parse_sql(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(matches!(stmts[0], DFStatement::CreateTable(_))); + assert!(matches!(stmts[1], DFStatement::CreateStreamingTable { .. })); + } + + #[test] + fn test_parse_empty() { + assert!(parse_sql("").is_err()); + assert!(parse_sql(" ").is_err()); + } + + #[test] + fn test_parse_select_yields_query_ast() { + let stmts = parse_sql("SELECT 1").unwrap(); + assert_eq!(stmts.len(), 1); + assert!(matches!(stmts[0], DFStatement::Query(_))); + } +} diff --git a/src/sql/physical/cdc/encode.rs b/src/streaming_planner/src/physical/cdc/encode.rs similarity index 97% rename from src/sql/physical/cdc/encode.rs rename to src/streaming_planner/src/physical/cdc/encode.rs index 65ec758d..8457724c 100644 --- a/src/sql/physical/cdc/encode.rs +++ b/src/streaming_planner/src/physical/cdc/encode.rs @@ -28,9 +28,9 @@ use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskCo use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties}; use futures::{StreamExt, ready, stream::Stream}; -use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; -use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; -use crate::sql::physical::source_exec::make_stream_properties; +use crate::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; +use crate::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD}; +use crate::physical::source_exec::make_stream_properties; // ============================================================================ // CdcDebeziumPackExec (Execution Plan Node) @@ -85,7 +85,7 @@ impl CdcDebeziumPackExec { }) } - pub(crate) fn from_decoded_parts(input: Arc, schema: SchemaRef) -> Self { + pub fn from_decoded_parts(input: Arc, schema: SchemaRef) -> Self { Self { properties: make_stream_properties(schema.clone()), input, diff --git a/src/sql/physical/cdc/mod.rs b/src/streaming_planner/src/physical/cdc/mod.rs similarity index 100% rename from src/sql/physical/cdc/mod.rs rename to src/streaming_planner/src/physical/cdc/mod.rs diff --git a/src/sql/physical/cdc/unroll.rs b/src/streaming_planner/src/physical/cdc/unroll.rs similarity index 96% rename from src/sql/physical/cdc/unroll.rs rename to src/streaming_planner/src/physical/cdc/unroll.rs index 10c62c6c..963283cc 100644 --- a/src/sql/physical/cdc/unroll.rs +++ b/src/streaming_planner/src/physical/cdc/unroll.rs @@ -29,11 +29,11 @@ use datafusion::logical_expr::ColumnarValue; use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties}; use futures::{StreamExt, ready, stream::Stream}; -use crate::sql::common::TIMESTAMP_FIELD; -use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; -use crate::sql::functions::MultiHashFunction; -use crate::sql::physical::meta::{updating_meta_field, updating_meta_fields}; -use crate::sql::physical::source_exec::make_stream_properties; +use crate::common::TIMESTAMP_FIELD; +use crate::common::constants::{cdc, debezium_op_short, physical_plan_node_name}; +use crate::functions::MultiHashFunction; +use crate::physical::meta::{updating_meta_field, updating_meta_fields}; +use crate::physical::source_exec::make_stream_properties; // ============================================================================ // CdcDebeziumUnrollExec (Execution Plan Node) @@ -103,7 +103,7 @@ impl CdcDebeziumUnrollExec { } /// Used when deserializing a plan with a pre-baked output schema (see [`StreamingExtensionCodec`]). - pub(crate) fn from_decoded_parts( + pub fn from_decoded_parts( input: Arc, schema: SchemaRef, primary_key_indices: Vec, diff --git a/src/sql/physical/codec.rs b/src/streaming_planner/src/physical/codec.rs similarity index 97% rename from src/sql/physical/codec.rs rename to src/streaming_planner/src/physical/codec.rs index 1b96a9d6..2fb6fcfc 100644 --- a/src/sql/physical/codec.rs +++ b/src/streaming_planner/src/physical/codec.rs @@ -27,13 +27,13 @@ use protocol::function_stream_graph::{ }; use tokio::sync::mpsc::UnboundedReceiver; -use crate::sql::analysis::UNNESTED_COL; -use crate::sql::common::constants::{mem_exec_join_side, window_function_udf}; -use crate::sql::physical::cdc::{CdcDebeziumPackExec, CdcDebeziumUnrollExec}; -use crate::sql::physical::source_exec::{ +use crate::analysis::UNNESTED_COL; +use crate::common::constants::{mem_exec_join_side, window_function_udf}; +use crate::physical::cdc::{CdcDebeziumPackExec, CdcDebeziumUnrollExec}; +use crate::physical::source_exec::{ BufferedBatchesExec, InjectableSingleBatchExec, MpscReceiverStreamExec, PlanningPlaceholderExec, }; -use crate::sql::physical::udfs::window; +use crate::physical::udfs::window; // ============================================================================ // StreamingExtensionCodec & StreamingDecodingContext diff --git a/src/sql/physical/meta.rs b/src/streaming_planner/src/physical/meta.rs similarity index 93% rename from src/sql/physical/meta.rs rename to src/streaming_planner/src/physical/meta.rs index 1387482c..ced37656 100644 --- a/src/sql/physical/meta.rs +++ b/src/streaming_planner/src/physical/meta.rs @@ -14,8 +14,8 @@ use std::sync::{Arc, OnceLock}; use datafusion::arrow::datatypes::{DataType, Field, Fields}; -use crate::sql::common::UPDATING_META_FIELD; -use crate::sql::common::constants::updating_state_field; +use crate::common::UPDATING_META_FIELD; +use crate::common::constants::updating_state_field; pub fn updating_meta_fields() -> Fields { static FIELDS: OnceLock = OnceLock::new(); diff --git a/src/sql/physical/mod.rs b/src/streaming_planner/src/physical/mod.rs similarity index 100% rename from src/sql/physical/mod.rs rename to src/streaming_planner/src/physical/mod.rs diff --git a/src/sql/physical/source_exec.rs b/src/streaming_planner/src/physical/source_exec.rs similarity index 96% rename from src/sql/physical/source_exec.rs rename to src/streaming_planner/src/physical/source_exec.rs index fa65cbfd..c94c1ca3 100644 --- a/src/sql/physical/source_exec.rs +++ b/src/streaming_planner/src/physical/source_exec.rs @@ -29,11 +29,11 @@ use futures::StreamExt; use tokio::sync::mpsc::UnboundedReceiver; use tokio_stream::wrappers::UnboundedReceiverStream; -use crate::sql::common::constants::physical_plan_node_name; +use crate::common::constants::physical_plan_node_name; /// Standard [`PlanProperties`] for a continuous, unbounded stream: incremental emission, /// unknown partitioning, and unbounded boundedness (without requiring infinite memory). -pub(crate) fn create_unbounded_stream_properties(schema: SchemaRef) -> PlanProperties { +pub fn create_unbounded_stream_properties(schema: SchemaRef) -> PlanProperties { PlanProperties::new( EquivalenceProperties::new(schema), Partitioning::UnknownPartitioning(1), @@ -45,7 +45,7 @@ pub(crate) fn create_unbounded_stream_properties(schema: SchemaRef) -> PlanPrope } /// Alias for call sites that still use the older name. -pub(crate) fn make_stream_properties(schema: SchemaRef) -> PlanProperties { +pub fn make_stream_properties(schema: SchemaRef) -> PlanProperties { create_unbounded_stream_properties(schema) } @@ -58,14 +58,14 @@ pub(crate) fn make_stream_properties(schema: SchemaRef) -> PlanProperties { /// For event-driven loops that receive a single batch from the network and run a DataFusion /// plan over it, the batch is stored in the lock until execution starts. #[derive(Debug)] -pub(crate) struct InjectableSingleBatchExec { +pub struct InjectableSingleBatchExec { schema: SchemaRef, injected_batch: Arc>>, properties: PlanProperties, } impl InjectableSingleBatchExec { - pub(crate) fn new( + pub fn new( schema: SchemaRef, injected_batch: Arc>>, ) -> Self { @@ -152,14 +152,14 @@ impl ExecutionPlan for InjectableSingleBatchExec { /// /// Bridges async producers (e.g. network threads) into a DataFusion pipeline. #[derive(Debug)] -pub(crate) struct MpscReceiverStreamExec { +pub struct MpscReceiverStreamExec { schema: SchemaRef, channel_receiver: Arc>>>, properties: PlanProperties, } impl MpscReceiverStreamExec { - pub(crate) fn new( + pub fn new( schema: SchemaRef, channel_receiver: Arc>>>, ) -> Self { @@ -243,14 +243,14 @@ impl ExecutionPlan for MpscReceiverStreamExec { /// Drains a growable, locked `Vec` when `execute` runs (micro-batching). #[derive(Debug)] -pub(crate) struct BufferedBatchesExec { +pub struct BufferedBatchesExec { schema: SchemaRef, buffered_batches: Arc>>, properties: PlanProperties, } impl BufferedBatchesExec { - pub(crate) fn new( + pub fn new( schema: SchemaRef, buffered_batches: Arc>>, ) -> Self { diff --git a/src/sql/physical/udfs.rs b/src/streaming_planner/src/physical/udfs.rs similarity index 97% rename from src/sql/physical/udfs.rs rename to src/streaming_planner/src/physical/udfs.rs index 767abf06..0a11ba4f 100644 --- a/src/sql/physical/udfs.rs +++ b/src/streaming_planner/src/physical/udfs.rs @@ -20,9 +20,9 @@ use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use crate::common::constants::window_function_udf; use crate::make_udf_function; -use crate::sql::common::constants::window_function_udf; -use crate::sql::schema::utils::window_arrow_struct; +use crate::schema::utils::window_arrow_struct; // ============================================================================ // WindowFunctionUdf (User-Defined Scalar Function) diff --git a/src/sql/planning_runtime.rs b/src/streaming_planner/src/planning_runtime.rs similarity index 85% rename from src/sql/planning_runtime.rs rename to src/streaming_planner/src/planning_runtime.rs index dc4749ad..cb2ab93e 100644 --- a/src/sql/planning_runtime.rs +++ b/src/streaming_planner/src/planning_runtime.rs @@ -14,9 +14,9 @@ use std::sync::OnceLock; -use crate::config::streaming_job::ResolvedStreamingJobConfig; -use crate::sql::common::constants::sql_planning_default; -use crate::sql::types::SqlConfig; +use crate::common::constants::sql_planning_default; +use crate::types::SqlConfig; +use function_stream_config::streaming_job::ResolvedStreamingJobConfig; static SQL_PLANNING: OnceLock = OnceLock::new(); @@ -30,6 +30,6 @@ pub fn install_sql_planning_from_streaming_job(job: &ResolvedStreamingJobConfig) let _ = SQL_PLANNING.set(cfg).ok(); } -pub(crate) fn sql_planning_snapshot() -> SqlConfig { +pub fn sql_planning_snapshot() -> SqlConfig { SQL_PLANNING.get().cloned().unwrap_or_default() } diff --git a/src/sql/schema/catalog.rs b/src/streaming_planner/src/schema/catalog.rs similarity index 99% rename from src/sql/schema/catalog.rs rename to src/streaming_planner/src/schema/catalog.rs index 479df682..35966289 100644 --- a/src/sql/schema/catalog.rs +++ b/src/streaming_planner/src/schema/catalog.rs @@ -26,11 +26,11 @@ use super::column_descriptor::ColumnDescriptor; use super::data_encoding_format::DataEncodingFormat; use super::table::SqlSource; use super::temporal_pipeline_config::TemporalPipelineConfig; +use crate::common::constants::sql_field; +use crate::common::{Format, FsSchema}; +use crate::connector::config::ConnectorConfig; use crate::multifield_partial_ord; -use crate::sql::common::constants::sql_field; -use crate::sql::common::{Format, FsSchema}; -use crate::sql::connector::config::ConnectorConfig; -use crate::sql::types::ProcessingMode; +use crate::types::ProcessingMode; #[derive(Debug, Clone)] pub struct EngineDescriptor { diff --git a/src/sql/schema/column_descriptor.rs b/src/streaming_planner/src/schema/column_descriptor.rs similarity index 100% rename from src/sql/schema/column_descriptor.rs rename to src/streaming_planner/src/schema/column_descriptor.rs diff --git a/src/sql/schema/connection_type.rs b/src/streaming_planner/src/schema/connection_type.rs similarity index 100% rename from src/sql/schema/connection_type.rs rename to src/streaming_planner/src/schema/connection_type.rs diff --git a/src/sql/schema/data_encoding_format.rs b/src/streaming_planner/src/schema/data_encoding_format.rs similarity index 97% rename from src/sql/schema/data_encoding_format.rs rename to src/streaming_planner/src/schema/data_encoding_format.rs index 0b6f5e1d..b589d683 100644 --- a/src/sql/schema/data_encoding_format.rs +++ b/src/streaming_planner/src/schema/data_encoding_format.rs @@ -14,8 +14,8 @@ use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::common::{Result, plan_err}; use super::column_descriptor::ColumnDescriptor; -use crate::sql::common::Format; -use crate::sql::common::constants::cdc; +use crate::common::Format; +use crate::common::constants::cdc; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] pub enum DataEncodingFormat { diff --git a/src/sql/schema/introspection/ddl_formatter.rs b/src/streaming_planner/src/schema/introspection/ddl_formatter.rs similarity index 99% rename from src/sql/schema/introspection/ddl_formatter.rs rename to src/streaming_planner/src/schema/introspection/ddl_formatter.rs index f4ce36e6..15d8f707 100644 --- a/src/sql/schema/introspection/ddl_formatter.rs +++ b/src/streaming_planner/src/schema/introspection/ddl_formatter.rs @@ -15,7 +15,7 @@ use std::fmt::{self, Write}; use datafusion::arrow::datatypes::{DataType, Schema, TimeUnit}; -use crate::sql::common::constants::sql_field; +use crate::common::constants::sql_field; pub struct DdlBuilder<'a> { table_name: &'a str, diff --git a/src/sql/schema/introspection/mod.rs b/src/streaming_planner/src/schema/introspection/mod.rs similarity index 100% rename from src/sql/schema/introspection/mod.rs rename to src/streaming_planner/src/schema/introspection/mod.rs diff --git a/src/sql/schema/introspection/show_formatter.rs b/src/streaming_planner/src/schema/introspection/show_formatter.rs similarity index 96% rename from src/sql/schema/introspection/show_formatter.rs rename to src/streaming_planner/src/schema/introspection/show_formatter.rs index 28a81ae9..3857ba21 100644 --- a/src/sql/schema/introspection/show_formatter.rs +++ b/src/streaming_planner/src/schema/introspection/show_formatter.rs @@ -10,9 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::common::constants::connection_table_role; -use crate::sql::schema::catalog::ExternalTable; -use crate::sql::schema::table::CatalogEntity; +use crate::common::constants::connection_table_role; +use crate::schema::catalog::ExternalTable; +use crate::schema::table::CatalogEntity; use super::ddl_formatter::DdlBuilder; diff --git a/src/sql/schema/introspection/stream_formatter.rs b/src/streaming_planner/src/schema/introspection/stream_formatter.rs similarity index 95% rename from src/sql/schema/introspection/stream_formatter.rs rename to src/streaming_planner/src/schema/introspection/stream_formatter.rs index ebb02330..e3239ce8 100644 --- a/src/sql/schema/introspection/stream_formatter.rs +++ b/src/streaming_planner/src/schema/introspection/stream_formatter.rs @@ -14,9 +14,9 @@ use std::sync::Arc; use datafusion::arrow::datatypes::Schema; -use crate::sql::common::constants::connection_table_role; -use crate::sql::logical_node::logical::LogicalProgram; -use crate::sql::schema::schema_provider::StreamTable; +use crate::common::constants::connection_table_role; +use crate::logical_node::logical::LogicalProgram; +use crate::schema::schema_provider::StreamTable; use super::ddl_formatter::DdlBuilder; diff --git a/src/sql/schema/mod.rs b/src/streaming_planner/src/schema/mod.rs similarity index 100% rename from src/sql/schema/mod.rs rename to src/streaming_planner/src/schema/mod.rs diff --git a/src/sql/schema/schema_provider.rs b/src/streaming_planner/src/schema/schema_provider.rs similarity index 96% rename from src/sql/schema/schema_provider.rs rename to src/streaming_planner/src/schema/schema_provider.rs index 15cd58ee..75026c3f 100644 --- a/src/sql/schema/schema_provider.rs +++ b/src/streaming_planner/src/schema/schema_provider.rs @@ -27,11 +27,11 @@ use thiserror::Error; use tracing::{debug, error, info}; use unicase::UniCase; -use crate::sql::common::constants::{planning_placeholder_udf, window_fn}; -use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalProgram}; -use crate::sql::schema::table::CatalogEntity; -use crate::sql::schema::utils::window_arrow_struct; -use crate::sql::types::{PlanningOptions, PlanningPlaceholderUdf, SqlConfig}; +use crate::common::constants::{planning_placeholder_udf, window_fn}; +use crate::logical_node::logical::{DylibUdfConfig, LogicalProgram}; +use crate::schema::table::CatalogEntity; +use crate::schema::utils::window_arrow_struct; +use crate::types::{PlanningOptions, PlanningPlaceholderUdf, SqlConfig}; pub type ObjectName = UniCase; @@ -119,7 +119,7 @@ impl TableProvider for LogicalBatchInput { _filters: &[Expr], _limit: Option, ) -> DataFusionResult> { - Ok(Arc::new(crate::sql::physical::FsMemExec::new( + Ok(Arc::new(crate::physical::FsMemExec::new( self.table_name.clone(), Arc::clone(&self.schema), ))) @@ -181,7 +181,7 @@ impl StreamPlanningContext { } pub fn new() -> Self { - let config = crate::sql::planning_runtime::sql_planning_snapshot(); + let config = crate::planning_runtime::sql_planning_snapshot(); Self::try_new(config).expect("StreamPlanningContext bootstrap") } @@ -264,7 +264,7 @@ impl StreamPlanningContext { .map(Arc::make_mut) } - pub fn get_async_udf_options(&self, _name: &str) -> Option { + pub fn get_async_udf_options(&self, _name: &str) -> Option { None } diff --git a/src/sql/schema/table.rs b/src/streaming_planner/src/schema/table.rs similarity index 94% rename from src/sql/schema/table.rs rename to src/streaming_planner/src/schema/table.rs index 6c001d9c..912e9b00 100644 --- a/src/sql/schema/table.rs +++ b/src/streaming_planner/src/schema/table.rs @@ -10,12 +10,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::sql::analysis::rewrite_plan; -use crate::sql::logical_node::remote_table::RemoteTableBoundaryNode; -use crate::sql::logical_planner::optimizers::produce_optimized_plan; -use crate::sql::schema::StreamSchemaProvider; -use crate::sql::schema::catalog::ExternalTable; -use crate::sql::types::{ProcessingMode, QualifiedField}; +use crate::analysis::rewrite_plan; +use crate::logical_node::remote_table::RemoteTableBoundaryNode; +use crate::logical_planner::optimizers::produce_optimized_plan; +use crate::schema::StreamSchemaProvider; +use crate::schema::catalog::ExternalTable; +use crate::types::{ProcessingMode, QualifiedField}; use datafusion::arrow::datatypes::FieldRef; use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::{Extension, LogicalPlan}; diff --git a/src/sql/schema/table_role.rs b/src/streaming_planner/src/schema/table_role.rs similarity index 97% rename from src/sql/schema/table_role.rs rename to src/streaming_planner/src/schema/table_role.rs index 7d301f9d..4574a6ea 100644 --- a/src/sql/schema/table_role.rs +++ b/src/streaming_planner/src/schema/table_role.rs @@ -18,10 +18,10 @@ use datafusion::error::DataFusionError; use super::column_descriptor::ColumnDescriptor; use super::connection_type::ConnectionType; -use crate::sql::common::constants::{ +use crate::common::constants::{ SUPPORTED_CONNECTOR_ADAPTERS, connection_table_role, connector_type, }; -use crate::sql::common::with_option_keys as opt; +use crate::common::with_option_keys as opt; /// Role of a connector-backed table in the pipeline (ingest / egress / lookup). #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/src/sql/schema/temporal_pipeline_config.rs b/src/streaming_planner/src/schema/temporal_pipeline_config.rs similarity index 97% rename from src/sql/schema/temporal_pipeline_config.rs rename to src/streaming_planner/src/schema/temporal_pipeline_config.rs index f672e552..db751b1c 100644 --- a/src/sql/schema/temporal_pipeline_config.rs +++ b/src/streaming_planner/src/schema/temporal_pipeline_config.rs @@ -16,7 +16,7 @@ use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::Expr; use super::column_descriptor::ColumnDescriptor; -use crate::sql::common::constants::sql_field; +use crate::common::constants::sql_field; /// Event-time and watermark configuration for streaming tables. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] diff --git a/src/sql/schema/utils.rs b/src/streaming_planner/src/schema/utils.rs similarity index 95% rename from src/sql/schema/utils.rs rename to src/streaming_planner/src/schema/utils.rs index 45254e5f..be5efbab 100644 --- a/src/sql/schema/utils.rs +++ b/src/streaming_planner/src/schema/utils.rs @@ -16,8 +16,8 @@ use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference}; -use crate::sql::common::constants::window_interval_field; -use crate::sql::types::{QualifiedField, TIMESTAMP_FIELD}; +use crate::common::constants::window_interval_field; +use crate::types::{QualifiedField, TIMESTAMP_FIELD}; /// Returns the Arrow struct type for a window (start, end) pair. pub fn window_arrow_struct() -> DataType { diff --git a/src/sql/types/data_type.rs b/src/streaming_planner/src/types/data_type.rs similarity index 98% rename from src/sql/types/data_type.rs rename to src/streaming_planner/src/types/data_type.rs index 387a4190..44bb8087 100644 --- a/src/sql/types/data_type.rs +++ b/src/streaming_planner/src/types/data_type.rs @@ -17,8 +17,8 @@ use datafusion::arrow::datatypes::{ }; use datafusion::common::{Result, plan_datafusion_err, plan_err}; -use crate::sql::common::FsExtensionType; -use crate::sql::common::constants::planning_placeholder_udf; +use crate::common::FsExtensionType; +use crate::common::constants::planning_placeholder_udf; pub fn convert_data_type( sql_type: &datafusion::sql::sqlparser::ast::DataType, diff --git a/src/sql/types/df_field.rs b/src/streaming_planner/src/types/df_field.rs similarity index 100% rename from src/sql/types/df_field.rs rename to src/streaming_planner/src/types/df_field.rs diff --git a/src/sql/types/mod.rs b/src/streaming_planner/src/types/mod.rs similarity index 83% rename from src/sql/types/mod.rs rename to src/streaming_planner/src/types/mod.rs index d5124bcc..a796bfea 100644 --- a/src/sql/types/mod.rs +++ b/src/streaming_planner/src/types/mod.rs @@ -12,22 +12,22 @@ mod data_type; mod df_field; -pub(crate) mod placeholder_udf; +pub mod placeholder_udf; mod stream_schema; mod window; use std::time::Duration; -use crate::sql::common::constants::sql_planning_default; +use crate::common::constants::sql_planning_default; pub use df_field::{ QualifiedField, build_df_schema, build_df_schema_with_metadata, extract_qualified_fields, }; -pub(crate) use placeholder_udf::PlanningPlaceholderUdf; -pub(crate) use window::WindowBehavior; +pub use placeholder_udf::PlanningPlaceholderUdf; +pub use window::WindowBehavior; pub use window::{WindowType, extract_window_type}; -pub use crate::sql::common::constants::sql_field::TIMESTAMP_FIELD; +pub use crate::common::constants::sql_field::TIMESTAMP_FIELD; #[derive(Clone, Debug, Eq, PartialEq)] pub enum ProcessingMode { @@ -38,7 +38,7 @@ pub enum ProcessingMode { #[derive(Clone, Debug)] pub struct SqlConfig { pub default_parallelism: usize, - /// Physical pipeline parallelism for [`KeyExtractionNode`](crate::sql::logical_node::key_calculation::KeyExtractionNode) / KeyBy. + /// Physical pipeline parallelism for [`KeyExtractionNode`](crate::logical_node::key_calculation::KeyExtractionNode) / KeyBy. pub key_by_parallelism: usize, } diff --git a/src/sql/types/placeholder_udf.rs b/src/streaming_planner/src/types/placeholder_udf.rs similarity index 98% rename from src/sql/types/placeholder_udf.rs rename to src/streaming_planner/src/types/placeholder_udf.rs index 059637e9..2f4f3f3c 100644 --- a/src/sql/types/placeholder_udf.rs +++ b/src/streaming_planner/src/types/placeholder_udf.rs @@ -25,7 +25,7 @@ use datafusion::logical_expr::{ // ============================================================================ /// Logical-planning-only UDF: satisfies type checking until real functions are wired in. -pub(crate) struct PlanningPlaceholderUdf { +pub struct PlanningPlaceholderUdf { name: String, signature: Signature, return_type: DataType, diff --git a/src/sql/types/stream_schema.rs b/src/streaming_planner/src/types/stream_schema.rs similarity index 100% rename from src/sql/types/stream_schema.rs rename to src/streaming_planner/src/types/stream_schema.rs diff --git a/src/sql/types/window.rs b/src/streaming_planner/src/types/window.rs similarity index 98% rename from src/sql/types/window.rs rename to src/streaming_planner/src/types/window.rs index 1aa05f42..69401f51 100644 --- a/src/sql/types/window.rs +++ b/src/streaming_planner/src/types/window.rs @@ -16,7 +16,7 @@ use datafusion::common::{Result, ScalarValue, not_impl_err, plan_err}; use datafusion::logical_expr::Expr; use datafusion::logical_expr::expr::{Alias, ScalarFunction}; -use crate::sql::common::constants::window_fn; +use crate::common::constants::window_fn; use super::QualifiedField; @@ -35,7 +35,7 @@ pub enum WindowType { /// How windowing is represented in the physical plan. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum WindowBehavior { +pub enum WindowBehavior { FromOperator { window: WindowType, window_field: QualifiedField, diff --git a/src/streaming_runtime/Cargo.toml b/src/streaming_runtime/Cargo.toml new file mode 100644 index 00000000..026b50e1 --- /dev/null +++ b/src/streaming_runtime/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-streaming-runtime" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_streaming_runtime" +path = "src/lib.rs" diff --git a/src/lib.rs b/src/streaming_runtime/src/lib.rs similarity index 54% rename from src/lib.rs rename to src/streaming_runtime/src/lib.rs index a6bb4d28..aeb056d5 100644 --- a/src/lib.rs +++ b/src/streaming_runtime/src/lib.rs @@ -10,14 +10,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Library crate for function-stream +//! Streaming execution runtime. +//! +//! The streaming engine and shared runtime helpers (`streaming/`, `util/`) are +//! implemented under [`src/streaming`] and [`src/util`] in this package. They are +//! currently **compiled as part of the `function-stream` crate** via `#[path]` in +//! `src/lib.rs` / `src/main.rs`, sharing the root `crate::sql` name (re-exported streaming planner crate). -#![allow(dead_code)] - -pub mod config; -pub mod coordinator; -pub mod logging; -pub mod runtime; -pub mod server; -pub mod sql; -pub mod storage; +pub const CRATE_NAME: &str = "function-stream-streaming-runtime"; diff --git a/src/runtime/streaming/api/context.rs b/src/streaming_runtime/src/streaming/api/context.rs similarity index 96% rename from src/runtime/streaming/api/context.rs rename to src/streaming_runtime/src/streaming/api/context.rs index f2557e7a..a29d7ac2 100644 --- a/src/runtime/streaming/api/context.rs +++ b/src/streaming_runtime/src/streaming/api/context.rs @@ -19,11 +19,11 @@ use arrow_array::RecordBatch; use protocol::storage::SourceCheckpointInfo; use tokio::sync::mpsc; -use crate::runtime::memory::{MemoryBlock, MemoryPool, get_array_memory_size}; -use crate::runtime::streaming::network::endpoint::PhysicalSender; -use crate::runtime::streaming::protocol::control::JobMasterEvent; -use crate::runtime::streaming::protocol::event::{StreamEvent, TrackedEvent}; -use crate::runtime::streaming::state::IoManager; +use crate::memory::{MemoryBlock, MemoryPool, get_array_memory_size}; +use crate::streaming::network::endpoint::PhysicalSender; +use crate::streaming::protocol::control::JobMasterEvent; +use crate::streaming::protocol::event::{StreamEvent, TrackedEvent}; +use crate::streaming::state::IoManager; #[derive(Debug, Clone)] pub struct TaskContextConfig { diff --git a/src/runtime/streaming/api/mod.rs b/src/streaming_runtime/src/streaming/api/mod.rs similarity index 100% rename from src/runtime/streaming/api/mod.rs rename to src/streaming_runtime/src/streaming/api/mod.rs diff --git a/src/runtime/streaming/api/operator.rs b/src/streaming_runtime/src/streaming/api/operator.rs similarity index 93% rename from src/runtime/streaming/api/operator.rs rename to src/streaming_runtime/src/streaming/api/operator.rs index fc75e475..56cefcee 100644 --- a/src/runtime/streaming/api/operator.rs +++ b/src/streaming_runtime/src/streaming/api/operator.rs @@ -10,10 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::source::SourceOperator; -use crate::runtime::streaming::protocol::event::StreamOutput; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::source::SourceOperator; +use crate::streaming::protocol::event::StreamOutput; use arrow_array::RecordBatch; use async_trait::async_trait; diff --git a/src/runtime/streaming/api/source.rs b/src/streaming_runtime/src/streaming/api/source.rs similarity index 98% rename from src/runtime/streaming/api/source.rs rename to src/streaming_runtime/src/streaming/api/source.rs index 9c531f2c..4bed912f 100644 --- a/src/runtime/streaming/api/source.rs +++ b/src/streaming_runtime/src/streaming/api/source.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::streaming::api::context::TaskContext; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::api::context::TaskContext; use arrow_array::RecordBatch; use async_trait::async_trait; use protocol::storage::{ diff --git a/src/runtime/streaming/error.rs b/src/streaming_runtime/src/streaming/error.rs similarity index 100% rename from src/runtime/streaming/error.rs rename to src/streaming_runtime/src/streaming/error.rs diff --git a/src/runtime/streaming/execution/mod.rs b/src/streaming_runtime/src/streaming/execution/mod.rs similarity index 100% rename from src/runtime/streaming/execution/mod.rs rename to src/streaming_runtime/src/streaming/execution/mod.rs diff --git a/src/runtime/streaming/execution/operator_chain.rs b/src/streaming_runtime/src/streaming/execution/operator_chain.rs similarity index 98% rename from src/runtime/streaming/execution/operator_chain.rs rename to src/streaming_runtime/src/streaming/execution/operator_chain.rs index 88e8f441..38584c5d 100644 --- a/src/runtime/streaming/execution/operator_chain.rs +++ b/src/streaming_runtime/src/streaming/execution/operator_chain.rs @@ -13,14 +13,14 @@ use anyhow::anyhow; use async_trait::async_trait; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::error::RunError; -use crate::runtime::streaming::protocol::{ +use crate::sql::common::CheckpointBarrier; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::error::RunError; +use crate::streaming::protocol::{ control::{ControlCommand, StopMode}, event::{StreamEvent, StreamOutput, TrackedEvent}, }; -use crate::sql::common::CheckpointBarrier; // ============================================================================ // Core Traits diff --git a/src/runtime/streaming/execution/pipeline.rs b/src/streaming_runtime/src/streaming/execution/pipeline.rs similarity index 94% rename from src/runtime/streaming/execution/pipeline.rs rename to src/streaming_runtime/src/streaming/execution/pipeline.rs index 7c2ca17a..6b6b1878 100644 --- a/src/runtime/streaming/execution/pipeline.rs +++ b/src/streaming_runtime/src/streaming/execution/pipeline.rs @@ -14,20 +14,20 @@ use tokio::sync::mpsc::UnboundedReceiver; use tokio_stream::{StreamExt, StreamMap}; use tracing::{Instrument, info, info_span}; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::Operator; -use crate::runtime::streaming::error::RunError; -use crate::runtime::streaming::execution::operator_chain::{ChainBuilder, OperatorDrive}; -use crate::runtime::streaming::execution::tracker::{ +use crate::sql::common::Watermark; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::Operator; +use crate::streaming::error::RunError; +use crate::streaming::execution::operator_chain::{ChainBuilder, OperatorDrive}; +use crate::streaming::execution::tracker::{ barrier_aligner::{AlignmentStatus, BarrierAligner}, watermark_tracker::WatermarkTracker, }; -use crate::runtime::streaming::network::endpoint::BoxedEventStream; -use crate::runtime::streaming::protocol::{ +use crate::streaming::network::endpoint::BoxedEventStream; +use crate::streaming::protocol::{ control::ControlCommand, event::{StreamEvent, TrackedEvent}, }; -use crate::sql::common::Watermark; pub struct Pipeline { chain_head: Box, diff --git a/src/runtime/streaming/execution/source_driver.rs b/src/streaming_runtime/src/streaming/execution/source_driver.rs similarity index 95% rename from src/runtime/streaming/execution/source_driver.rs rename to src/streaming_runtime/src/streaming/execution/source_driver.rs index 0118c4ee..72cc160e 100644 --- a/src/runtime/streaming/execution/source_driver.rs +++ b/src/streaming_runtime/src/streaming/execution/source_driver.rs @@ -14,15 +14,15 @@ use tokio::sync::mpsc::UnboundedReceiver; use tokio::time::{Instant, sleep}; use tracing::{Instrument, info, info_span, warn}; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::source::{SourceCheckpointReport, SourceEvent, SourceOperator}; -use crate::runtime::streaming::error::RunError; -use crate::runtime::streaming::execution::OperatorDrive; -use crate::runtime::streaming::protocol::{ +use crate::sql::common::CheckpointBarrier; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::source::{SourceCheckpointReport, SourceEvent, SourceOperator}; +use crate::streaming::error::RunError; +use crate::streaming::execution::OperatorDrive; +use crate::streaming::protocol::{ control::ControlCommand, event::{StreamEvent, TrackedEvent}, }; -use crate::sql::common::CheckpointBarrier; pub struct SourceDriver { operator: Box, diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/streaming_runtime/src/streaming/execution/tracker/barrier_aligner.rs similarity index 100% rename from src/runtime/streaming/execution/tracker/barrier_aligner.rs rename to src/streaming_runtime/src/streaming/execution/tracker/barrier_aligner.rs diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/streaming_runtime/src/streaming/execution/tracker/mod.rs similarity index 100% rename from src/runtime/streaming/execution/tracker/mod.rs rename to src/streaming_runtime/src/streaming/execution/tracker/mod.rs diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/streaming_runtime/src/streaming/execution/tracker/watermark_tracker.rs similarity index 97% rename from src/runtime/streaming/execution/tracker/watermark_tracker.rs rename to src/streaming_runtime/src/streaming/execution/tracker/watermark_tracker.rs index af6fd0bc..ec44726d 100644 --- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs +++ b/src/streaming_runtime/src/streaming/execution/tracker/watermark_tracker.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::streaming::protocol::event::{merge_watermarks, watermark_strictly_advances}; use crate::sql::common::Watermark; +use crate::streaming::protocol::event::{merge_watermarks, watermark_strictly_advances}; #[derive(Debug)] pub struct WatermarkTracker { diff --git a/src/runtime/streaming/factory/connector/delta.rs b/src/streaming_runtime/src/streaming/factory/connector/delta.rs similarity index 87% rename from src/runtime/streaming/factory/connector/delta.rs rename to src/streaming_runtime/src/streaming/factory/connector/delta.rs index 726f87ef..0c684e94 100644 --- a/src/runtime/streaming/factory/connector/delta.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/delta.rs @@ -18,16 +18,16 @@ use prost::Message; use protocol::function_stream_graph::ConnectorOp; use protocol::function_stream_graph::connector_op::Config; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::sink_props_codec::{ - apply_common_sink_fields, normalized_props, parse_sink_memory_bytes, -}; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::operators::sink::delta::{DeltaFormat, DeltaSinkOperator}; -use crate::runtime::streaming::operators::sink::filesystem::compression_from_str; use crate::sql::common::constants::connection_format_value; use crate::sql::common::with_option_keys as opt; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::sink_props_codec::{ + apply_common_sink_fields, normalized_props, parse_sink_memory_bytes, +}; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::operators::sink::delta::{DeltaFormat, DeltaSinkOperator}; +use crate::streaming::operators::sink::filesystem::compression_from_str; pub struct DeltaSinkDispatcher; diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/streaming_runtime/src/streaming/factory/connector/dispatchers.rs similarity index 92% rename from src/runtime/streaming/factory/connector/dispatchers.rs rename to src/streaming_runtime/src/streaming/factory/connector/dispatchers.rs index 7d626600..bcd540a4 100644 --- a/src/runtime/streaming/factory/connector/dispatchers.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/dispatchers.rs @@ -16,10 +16,10 @@ use anyhow::{Context, Result, bail}; use prost::Message; use protocol::function_stream_graph::ConnectorOp; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; use crate::sql::common::constants::connector_type; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; use super::{ DeltaSinkDispatcher, FilesystemSinkDispatcher, IcebergSinkDispatcher, LanceDbSinkDispatcher, diff --git a/src/runtime/streaming/factory/connector/filesystem.rs b/src/streaming_runtime/src/streaming/factory/connector/filesystem.rs similarity index 89% rename from src/runtime/streaming/factory/connector/filesystem.rs rename to src/streaming_runtime/src/streaming/factory/connector/filesystem.rs index 94101407..b680590c 100644 --- a/src/runtime/streaming/factory/connector/filesystem.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/filesystem.rs @@ -18,17 +18,17 @@ use prost::Message; use protocol::function_stream_graph::ConnectorOp; use protocol::function_stream_graph::connector_op::Config; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::sink_props_codec::{ +use crate::sql::common::constants::connection_format_value; +use crate::sql::common::with_option_keys as opt; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::sink_props_codec::{ apply_common_sink_fields, normalized_props, parse_sink_memory_bytes, }; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::operators::sink::filesystem::{ +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::operators::sink::filesystem::{ FilesystemFormat, FilesystemSinkOperator, compression_from_str, }; -use crate::sql::common::constants::connection_format_value; -use crate::sql::common::with_option_keys as opt; pub struct FilesystemSinkDispatcher; diff --git a/src/runtime/streaming/factory/connector/iceberg.rs b/src/streaming_runtime/src/streaming/factory/connector/iceberg.rs similarity index 86% rename from src/runtime/streaming/factory/connector/iceberg.rs rename to src/streaming_runtime/src/streaming/factory/connector/iceberg.rs index 58e0809f..a6285cbf 100644 --- a/src/runtime/streaming/factory/connector/iceberg.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/iceberg.rs @@ -18,16 +18,16 @@ use prost::Message; use protocol::function_stream_graph::ConnectorOp; use protocol::function_stream_graph::connector_op::Config; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::sink_props_codec::{ - apply_common_sink_fields, normalized_props, parse_sink_memory_bytes, -}; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::operators::sink::filesystem::compression_from_str; -use crate::runtime::streaming::operators::sink::iceberg::{IcebergFormat, IcebergSinkOperator}; use crate::sql::common::constants::connection_format_value; use crate::sql::common::with_option_keys as opt; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::sink_props_codec::{ + apply_common_sink_fields, normalized_props, parse_sink_memory_bytes, +}; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::operators::sink::filesystem::compression_from_str; +use crate::streaming::operators::sink::iceberg::{IcebergFormat, IcebergSinkOperator}; pub struct IcebergSinkDispatcher; diff --git a/src/runtime/streaming/factory/connector/kafka.rs b/src/streaming_runtime/src/streaming/factory/connector/kafka.rs similarity index 94% rename from src/runtime/streaming/factory/connector/kafka.rs rename to src/streaming_runtime/src/streaming/factory/connector/kafka.rs index 17838e3e..ad77af06 100644 --- a/src/runtime/streaming/factory/connector/kafka.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/kafka.rs @@ -24,19 +24,17 @@ use protocol::function_stream_graph::{ }; use tracing::info; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::api::source::SourceOffset; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::format::{ +use crate::sql::common::FsSchema; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::api::source::SourceOffset; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::format::{ BadDataPolicy as RtBadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat, JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat, }; -use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator}; -use crate::runtime::streaming::operators::source::kafka::{ - BufferedDeserializer, KafkaSourceOperator, -}; -use crate::sql::common::FsSchema; +use crate::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator}; +use crate::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator}; const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024; diff --git a/src/runtime/streaming/factory/connector/lancedb.rs b/src/streaming_runtime/src/streaming/factory/connector/lancedb.rs similarity index 93% rename from src/runtime/streaming/factory/connector/lancedb.rs rename to src/streaming_runtime/src/streaming/factory/connector/lancedb.rs index ad8bc246..3deb055d 100644 --- a/src/runtime/streaming/factory/connector/lancedb.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/lancedb.rs @@ -18,15 +18,15 @@ use prost::Message; use protocol::function_stream_graph::ConnectorOp; use protocol::function_stream_graph::connector_op::Config; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::sink_props_codec::{ - apply_common_sink_fields, normalized_props, -}; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::operators::sink::lancedb::LanceDbSinkOperator; use crate::sql::common::constants::connection_format_value; use crate::sql::common::with_option_keys as opt; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::sink_props_codec::{ + apply_common_sink_fields, normalized_props, +}; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::operators::sink::lancedb::LanceDbSinkOperator; pub struct LanceDbSinkDispatcher; diff --git a/src/runtime/streaming/factory/connector/mod.rs b/src/streaming_runtime/src/streaming/factory/connector/mod.rs similarity index 100% rename from src/runtime/streaming/factory/connector/mod.rs rename to src/streaming_runtime/src/streaming/factory/connector/mod.rs diff --git a/src/runtime/streaming/factory/connector/s3.rs b/src/streaming_runtime/src/streaming/factory/connector/s3.rs similarity index 89% rename from src/runtime/streaming/factory/connector/s3.rs rename to src/streaming_runtime/src/streaming/factory/connector/s3.rs index 4b67fb9e..462b25d9 100644 --- a/src/runtime/streaming/factory/connector/s3.rs +++ b/src/streaming_runtime/src/streaming/factory/connector/s3.rs @@ -18,17 +18,15 @@ use prost::Message; use protocol::function_stream_graph::ConnectorOp; use protocol::function_stream_graph::connector_op::Config; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::sink_props_codec::{ - apply_common_sink_fields, normalized_props, -}; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::operators::sink::s3::{ - S3Format, S3SinkOperator, compression_from_str, -}; use crate::sql::common::constants::connection_format_value; use crate::sql::common::with_option_keys as opt; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::sink_props_codec::{ + apply_common_sink_fields, normalized_props, +}; +use crate::streaming::factory::global::Registry; +use crate::streaming::factory::operator_constructor::OperatorConstructor; +use crate::streaming::operators::sink::s3::{S3Format, S3SinkOperator, compression_from_str}; pub struct S3SinkDispatcher; diff --git a/src/runtime/streaming/factory/connector/sink_props_codec.rs b/src/streaming_runtime/src/streaming/factory/connector/sink_props_codec.rs similarity index 100% rename from src/runtime/streaming/factory/connector/sink_props_codec.rs rename to src/streaming_runtime/src/streaming/factory/connector/sink_props_codec.rs diff --git a/src/runtime/streaming/factory/global/mod.rs b/src/streaming_runtime/src/streaming/factory/global/mod.rs similarity index 100% rename from src/runtime/streaming/factory/global/mod.rs rename to src/streaming_runtime/src/streaming/factory/global/mod.rs diff --git a/src/runtime/streaming/factory/global/session_registry.rs b/src/streaming_runtime/src/streaming/factory/global/session_registry.rs similarity index 100% rename from src/runtime/streaming/factory/global/session_registry.rs rename to src/streaming_runtime/src/streaming/factory/global/session_registry.rs diff --git a/src/runtime/streaming/factory/mod.rs b/src/streaming_runtime/src/streaming/factory/mod.rs similarity index 100% rename from src/runtime/streaming/factory/mod.rs rename to src/streaming_runtime/src/streaming/factory/mod.rs diff --git a/src/runtime/streaming/factory/operator_constructor.rs b/src/streaming_runtime/src/streaming/factory/operator_constructor.rs similarity index 86% rename from src/runtime/streaming/factory/operator_constructor.rs rename to src/streaming_runtime/src/streaming/factory/operator_constructor.rs index 5d0ff7d7..2cd96e6d 100644 --- a/src/runtime/streaming/factory/operator_constructor.rs +++ b/src/streaming_runtime/src/streaming/factory/operator_constructor.rs @@ -13,8 +13,8 @@ use anyhow::Result; use std::sync::Arc; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::global::Registry; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::global::Registry; /// Builds a [`ConstructedOperator`] from serialized configuration and a [`Registry`]. pub trait OperatorConstructor: Send + Sync { diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/streaming_runtime/src/streaming/factory/operator_factory.rs similarity index 93% rename from src/runtime/streaming/factory/operator_factory.rs rename to src/streaming_runtime/src/streaming/factory/operator_factory.rs index 1ce04eeb..10b9d3bf 100644 --- a/src/runtime/streaming/factory/operator_factory.rs +++ b/src/streaming_runtime/src/streaming/factory/operator_factory.rs @@ -11,27 +11,23 @@ // limitations under the License. use super::operator_constructor::OperatorConstructor; -use crate::runtime::streaming::api::operator::ConstructedOperator; -use crate::runtime::streaming::factory::connector::{ - ConnectorSinkDispatcher, ConnectorSourceDispatcher, -}; -use crate::runtime::streaming::factory::global::Registry; -use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor; -use crate::runtime::streaming::operators::joins::{ - InstantJoinConstructor, JoinWithExpirationConstructor, -}; +use crate::streaming::api::operator::ConstructedOperator; +use crate::streaming::factory::connector::{ConnectorSinkDispatcher, ConnectorSourceDispatcher}; +use crate::streaming::factory::global::Registry; +use crate::streaming::operators::grouping::IncrementalAggregatingConstructor; +use crate::streaming::operators::joins::{InstantJoinConstructor, JoinWithExpirationConstructor}; use anyhow::{Result, anyhow}; use prost::Message; use protocol::function_stream_graph::ProjectionOperator as ProjectionOperatorProto; use std::collections::HashMap; use std::sync::Arc; -use crate::runtime::streaming::operators::watermark::WatermarkGeneratorConstructor; -use crate::runtime::streaming::operators::windows::{ +use crate::streaming::operators::watermark::WatermarkGeneratorConstructor; +use crate::streaming::operators::windows::{ SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor, TumblingAggregateWindowConstructor, WindowFunctionConstructor, }; -use crate::runtime::streaming::operators::{ +use crate::streaming::operators::{ KeyExecutionOperator, ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator, }; use protocol::function_stream_graph::{ @@ -125,7 +121,7 @@ impl OperatorFactory { ); self.register_named(OperatorName::ConnectorSink, Box::new(ConnectorSinkBridge)); - crate::runtime::streaming::factory::register_kafka_connector_plugins(self); + crate::streaming::factory::register_kafka_connector_plugins(self); } } diff --git a/src/runtime/streaming/format/config.rs b/src/streaming_runtime/src/streaming/format/config.rs similarity index 100% rename from src/runtime/streaming/format/config.rs rename to src/streaming_runtime/src/streaming/format/config.rs diff --git a/src/runtime/streaming/format/deserializer.rs b/src/streaming_runtime/src/streaming/format/deserializer.rs similarity index 100% rename from src/runtime/streaming/format/deserializer.rs rename to src/streaming_runtime/src/streaming/format/deserializer.rs diff --git a/src/runtime/streaming/format/encoder.rs b/src/streaming_runtime/src/streaming/format/encoder.rs similarity index 100% rename from src/runtime/streaming/format/encoder.rs rename to src/streaming_runtime/src/streaming/format/encoder.rs diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/streaming_runtime/src/streaming/format/json_encoder.rs similarity index 100% rename from src/runtime/streaming/format/json_encoder.rs rename to src/streaming_runtime/src/streaming/format/json_encoder.rs diff --git a/src/runtime/streaming/format/mod.rs b/src/streaming_runtime/src/streaming/format/mod.rs similarity index 100% rename from src/runtime/streaming/format/mod.rs rename to src/streaming_runtime/src/streaming/format/mod.rs diff --git a/src/runtime/streaming/format/serializer.rs b/src/streaming_runtime/src/streaming/format/serializer.rs similarity index 100% rename from src/runtime/streaming/format/serializer.rs rename to src/streaming_runtime/src/streaming/format/serializer.rs diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/streaming_runtime/src/streaming/job/edge_manager.rs similarity index 98% rename from src/runtime/streaming/job/edge_manager.rs rename to src/streaming_runtime/src/streaming/job/edge_manager.rs index 00c94485..32bb3722 100644 --- a/src/runtime/streaming/job/edge_manager.rs +++ b/src/streaming_runtime/src/streaming/job/edge_manager.rs @@ -16,7 +16,7 @@ use anyhow::{Result, anyhow}; use tokio::sync::mpsc; use tracing::{debug, info, warn}; -use crate::runtime::streaming::protocol::event::TrackedEvent; +use crate::streaming::protocol::event::TrackedEvent; use protocol::function_stream_graph::{FsEdge, FsNode}; const DEFAULT_CHANNEL_CAPACITY: usize = 2048; diff --git a/src/runtime/streaming/job/job_manager.rs b/src/streaming_runtime/src/streaming/job/job_manager.rs similarity index 96% rename from src/runtime/streaming/job/job_manager.rs rename to src/streaming_runtime/src/streaming/job/job_manager.rs index e4b9916b..c4c0a826 100644 --- a/src/runtime/streaming/job/job_manager.rs +++ b/src/streaming_runtime/src/streaming/job/job_manager.rs @@ -30,22 +30,22 @@ use crate::config::{ DEFAULT_CHECKPOINT_INTERVAL_MS, DEFAULT_OPERATOR_STATE_STORE_MEMORY_BYTES, DEFAULT_PIPELINE_PARALLELISM, }; -use crate::runtime::memory::global_memory_pool; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator}; -use crate::runtime::streaming::api::source::SourceOperator; -use crate::runtime::streaming::execution::{ChainBuilder, Pipeline, SourceDriver}; -use crate::runtime::streaming::factory::OperatorFactory; -use crate::runtime::streaming::job::edge_manager::EdgeManager; -use crate::runtime::streaming::job::models::{ +use crate::memory::global_memory_pool; +use crate::sql::logical_node::logical::OperatorName; +use crate::stream_catalog::CatalogManager; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{ConstructedOperator, Operator}; +use crate::streaming::api::source::SourceOperator; +use crate::streaming::execution::{ChainBuilder, Pipeline, SourceDriver}; +use crate::streaming::factory::OperatorFactory; +use crate::streaming::job::edge_manager::EdgeManager; +use crate::streaming::job::models::{ PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus, StreamingJobRollupStatus, }; -use crate::runtime::streaming::network::endpoint::{BoxedEventStream, PhysicalSender}; -use crate::runtime::streaming::protocol::control::{ControlCommand, JobMasterEvent, StopMode}; -use crate::runtime::streaming::protocol::event::CheckpointBarrier; -use crate::runtime::streaming::state::{IoManager, IoPool, NoopMetricsCollector}; -use crate::sql::logical_node::logical::OperatorName; -use crate::storage::stream_catalog::CatalogManager; +use crate::streaming::network::endpoint::{BoxedEventStream, PhysicalSender}; +use crate::streaming::protocol::control::{ControlCommand, JobMasterEvent, StopMode}; +use crate::streaming::protocol::event::CheckpointBarrier; +use crate::streaming::state::{IoManager, IoPool, NoopMetricsCollector}; #[derive(Debug, Clone)] pub struct StreamingJobSummary { @@ -80,7 +80,7 @@ pub struct StateConfig { pub pipeline_parallelism: u32, pub job_manager_control_plane_threads: u32, pub job_manager_data_plane_threads: u32, - /// Total bytes shared by all [`crate::runtime::streaming::state::OperatorStateStore`] (global pool). + /// Total bytes shared by all [`crate::streaming::state::OperatorStateStore`] (global pool). pub per_operator_memory_bytes: u64, } @@ -103,7 +103,7 @@ impl Default for StateConfig { static GLOBAL_JOB_MANAGER: OnceLock> = OnceLock::new(); -/// Operators that create an [`crate::runtime::streaming::state::OperatorStateStore`] at runtime. +/// Operators that create an [`crate::streaming::state::OperatorStateStore`] at runtime. fn pipeline_state_store_operator_count(operators: &[ChainedOperator]) -> usize { operators .iter() @@ -159,7 +159,7 @@ struct CheckpointCoordinatorConfig { } impl PipelineRunner { - async fn run(self) -> Result<(), crate::runtime::streaming::error::RunError> { + async fn run(self) -> Result<(), crate::streaming::error::RunError> { match self { PipelineRunner::Source(driver) => driver.run().await, PipelineRunner::Standard(pipeline) => pipeline.run().await, diff --git a/src/runtime/streaming/job/mod.rs b/src/streaming_runtime/src/streaming/job/mod.rs similarity index 100% rename from src/runtime/streaming/job/mod.rs rename to src/streaming_runtime/src/streaming/job/mod.rs diff --git a/src/runtime/streaming/job/models.rs b/src/streaming_runtime/src/streaming/job/models.rs similarity index 98% rename from src/runtime/streaming/job/models.rs rename to src/streaming_runtime/src/streaming/job/models.rs index e81649f2..a6d226bd 100644 --- a/src/runtime/streaming/job/models.rs +++ b/src/streaming_runtime/src/streaming/job/models.rs @@ -19,7 +19,7 @@ use protocol::function_stream_graph::FsProgram; use tokio::sync::mpsc; use tokio::task::JoinHandle; -use crate::runtime::streaming::protocol::control::ControlCommand; +use crate::streaming::protocol::control::ControlCommand; #[derive(Debug, Clone, PartialEq)] pub enum PipelineStatus { diff --git a/src/runtime/streaming/mod.rs b/src/streaming_runtime/src/streaming/mod.rs similarity index 100% rename from src/runtime/streaming/mod.rs rename to src/streaming_runtime/src/streaming/mod.rs diff --git a/src/runtime/streaming/network/endpoint.rs b/src/streaming_runtime/src/streaming/network/endpoint.rs similarity index 94% rename from src/runtime/streaming/network/endpoint.rs rename to src/streaming_runtime/src/streaming/network/endpoint.rs index ae75e6fc..12550977 100644 --- a/src/runtime/streaming/network/endpoint.rs +++ b/src/streaming_runtime/src/streaming/network/endpoint.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::streaming::protocol::event::StreamEvent; -use crate::runtime::streaming::protocol::event::TrackedEvent; +use crate::streaming::protocol::event::StreamEvent; +use crate::streaming::protocol::event::TrackedEvent; use anyhow::{Result, anyhow}; use std::pin::Pin; use tokio::sync::mpsc; diff --git a/src/runtime/streaming/network/environment.rs b/src/streaming_runtime/src/streaming/network/environment.rs similarity index 100% rename from src/runtime/streaming/network/environment.rs rename to src/streaming_runtime/src/streaming/network/environment.rs diff --git a/src/runtime/streaming/network/mod.rs b/src/streaming_runtime/src/streaming/network/mod.rs similarity index 100% rename from src/runtime/streaming/network/mod.rs rename to src/streaming_runtime/src/streaming/network/mod.rs diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/streaming_runtime/src/streaming/operators/grouping/incremental_aggregate.rs similarity index 99% rename from src/runtime/streaming/operators/grouping/incremental_aggregate.rs rename to src/streaming_runtime/src/streaming/operators/grouping/incremental_aggregate.rs index ff997c11..55ec2006 100644 --- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs +++ b/src/streaming_runtime/src/streaming/operators/grouping/incremental_aggregate.rs @@ -41,17 +41,17 @@ use std::{collections::HashMap, mem, sync::Arc}; use tracing::{debug, info, warn}; // ========================================================================= // ========================================================================= -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::operators::{Key, UpdatingCache}; -use crate::runtime::streaming::state::OperatorStateStore; -use crate::runtime::util::decode_aggregate; use crate::sql::common::{ CheckpointBarrier, FsSchema, TIMESTAMP_FIELD, UPDATING_META_FIELD, Watermark, to_nanos, }; use crate::sql::physical::updating_meta_fields; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::operators::{Key, UpdatingCache}; +use crate::streaming::state::OperatorStateStore; +use crate::util::decode_aggregate; #[derive(Debug, Copy, Clone)] struct BatchData { diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/streaming_runtime/src/streaming/operators/grouping/mod.rs similarity index 100% rename from src/runtime/streaming/operators/grouping/mod.rs rename to src/streaming_runtime/src/streaming/operators/grouping/mod.rs diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/streaming_runtime/src/streaming/operators/grouping/updating_cache.rs similarity index 100% rename from src/runtime/streaming/operators/grouping/updating_cache.rs rename to src/streaming_runtime/src/streaming/operators/grouping/updating_cache.rs diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/streaming_runtime/src/streaming/operators/joins/join_instance.rs similarity index 98% rename from src/runtime/streaming/operators/joins/join_instance.rs rename to src/streaming_runtime/src/streaming/operators/joins/join_instance.rs index 5c04c1fb..232c2985 100644 --- a/src/runtime/streaming/operators/joins/join_instance.rs +++ b/src/streaming_runtime/src/streaming/operators/joins/join_instance.rs @@ -25,13 +25,13 @@ use std::sync::{Arc, RwLock}; use std::time::UNIX_EPOCH; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; use protocol::function_stream_graph::JoinOperator; diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/streaming_runtime/src/streaming/operators/joins/join_with_expiration.rs similarity index 97% rename from src/runtime/streaming/operators/joins/join_with_expiration.rs rename to src/streaming_runtime/src/streaming/operators/joins/join_with_expiration.rs index 5ed8dfa3..139e88cd 100644 --- a/src/runtime/streaming/operators/joins/join_with_expiration.rs +++ b/src/streaming_runtime/src/streaming/operators/joins/join_with_expiration.rs @@ -24,13 +24,13 @@ use std::sync::{Arc, RwLock}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; use protocol::function_stream_graph::JoinOperator; diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/streaming_runtime/src/streaming/operators/joins/mod.rs similarity index 100% rename from src/runtime/streaming/operators/joins/mod.rs rename to src/streaming_runtime/src/streaming/operators/joins/mod.rs diff --git a/src/runtime/streaming/operators/key_by.rs b/src/streaming_runtime/src/streaming/operators/key_by.rs similarity index 96% rename from src/runtime/streaming/operators/key_by.rs rename to src/streaming_runtime/src/streaming/operators/key_by.rs index 90c55d08..f1e2831d 100644 --- a/src/runtime/streaming/operators/key_by.rs +++ b/src/streaming_runtime/src/streaming/operators/key_by.rs @@ -19,10 +19,10 @@ use datafusion_common::hash_utils::create_hashes; use datafusion_physical_expr::expressions::Column; use std::sync::Arc; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; use protocol::function_stream_graph::KeyPlanOperator; diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/streaming_runtime/src/streaming/operators/key_operator.rs similarity index 94% rename from src/runtime/streaming/operators/key_operator.rs rename to src/streaming_runtime/src/streaming/operators/key_operator.rs index 7a89d2f2..a8064d2e 100644 --- a/src/runtime/streaming/operators/key_operator.rs +++ b/src/streaming_runtime/src/streaming/operators/key_operator.rs @@ -13,13 +13,13 @@ //! Key-by over the physical plan output: key column(s) are **values** projected by the plan //! (e.g. `_key_user_id`); **shuffle / `StreamOutput::Keyed` uses `u64` hashes** computed by //! [`datafusion_common::hash_utils::create_hashes`] on those columns — same mechanism as -//! [`crate::runtime::streaming::operators::key_by::KeyByOperator`]. +//! [`crate::streaming::operators::key_by::KeyByOperator`]. -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::operators::StatelessPhysicalExecutor; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::operators::StatelessPhysicalExecutor; use ahash::RandomState; use anyhow::{Result, anyhow}; use arrow::compute::{sort_to_indices, take}; diff --git a/src/runtime/streaming/operators/mod.rs b/src/streaming_runtime/src/streaming/operators/mod.rs similarity index 100% rename from src/runtime/streaming/operators/mod.rs rename to src/streaming_runtime/src/streaming/operators/mod.rs diff --git a/src/runtime/streaming/operators/projection.rs b/src/streaming_runtime/src/streaming/operators/projection.rs similarity index 94% rename from src/runtime/streaming/operators/projection.rs rename to src/streaming_runtime/src/streaming/operators/projection.rs index b84d74aa..54882547 100644 --- a/src/runtime/streaming/operators/projection.rs +++ b/src/streaming_runtime/src/streaming/operators/projection.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use protocol::function_stream_graph::ProjectionOperator as ProjectionOperatorProto; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::global::Registry; use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark}; use crate::sql::logical_node::logical::OperatorName; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::global::Registry; pub struct ProjectionOperator { name: String, diff --git a/src/runtime/streaming/operators/sink/delta/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/delta/mod.rs similarity index 96% rename from src/runtime/streaming/operators/sink/delta/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/delta/mod.rs index 4df6b3b5..b2d16c60 100644 --- a/src/runtime/streaming/operators/sink/delta/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/delta/mod.rs @@ -26,14 +26,14 @@ use parquet::basic::Compression; use tokio::io::AsyncWriteExt; use tracing::{debug, info, warn}; -use crate::runtime::memory::{MemoryBlock, try_global_memory_pool}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::format::encoder::FormatEncoder; +use crate::memory::{MemoryBlock, try_global_memory_pool}; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::with_option_keys as opt; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::format::encoder::FormatEncoder; /// Flush early when buffered batches exceed this size. const DEFAULT_MAX_BUFFER_BYTES: usize = 64 * 1024 * 1024; diff --git a/src/runtime/streaming/operators/sink/filesystem/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/filesystem/mod.rs similarity index 96% rename from src/runtime/streaming/operators/sink/filesystem/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/filesystem/mod.rs index a865a752..3461c4a4 100644 --- a/src/runtime/streaming/operators/sink/filesystem/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/filesystem/mod.rs @@ -20,13 +20,13 @@ use parquet::basic::Compression; use tokio::io::AsyncWriteExt; use tracing::{debug, info, warn}; -use crate::runtime::memory::{MemoryBlock, try_global_memory_pool}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::format::encoder::FormatEncoder; +use crate::memory::{MemoryBlock, try_global_memory_pool}; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::format::encoder::FormatEncoder; const DEFAULT_MAX_BUFFER_BYTES: usize = 64 * 1024 * 1024; diff --git a/src/runtime/streaming/operators/sink/iceberg/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/iceberg/mod.rs similarity index 96% rename from src/runtime/streaming/operators/sink/iceberg/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/iceberg/mod.rs index b6c17414..51715aa2 100644 --- a/src/runtime/streaming/operators/sink/iceberg/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/iceberg/mod.rs @@ -26,14 +26,14 @@ use parquet::basic::Compression; use tokio::io::AsyncWriteExt; use tracing::{debug, info, warn}; -use crate::runtime::memory::{MemoryBlock, try_global_memory_pool}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::format::encoder::FormatEncoder; +use crate::memory::{MemoryBlock, try_global_memory_pool}; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::with_option_keys as opt; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::format::encoder::FormatEncoder; const DEFAULT_MAX_BUFFER_BYTES: usize = 64 * 1024 * 1024; diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/kafka/mod.rs similarity index 98% rename from src/runtime/streaming/operators/sink/kafka/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/kafka/mod.rs index b30bc572..1266ebdd 100644 --- a/src/runtime/streaming/operators/sink/kafka/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/kafka/mod.rs @@ -36,12 +36,12 @@ use std::time::Duration; use tokio::time::sleep; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::format::DataSerializer; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::format::DataSerializer; // ============================================================================ // ============================================================================ diff --git a/src/runtime/streaming/operators/sink/lancedb/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/lancedb/mod.rs similarity index 96% rename from src/runtime/streaming/operators/sink/lancedb/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/lancedb/mod.rs index 4e8d8309..e69a59ee 100644 --- a/src/runtime/streaming/operators/sink/lancedb/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/lancedb/mod.rs @@ -23,11 +23,11 @@ use lance::dataset::{WriteMode, WriteParams}; use lance::io::{ObjectStoreParams, StorageOptionsAccessor}; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; pub struct LanceDbSinkOperator { table_name: String, diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/mod.rs similarity index 100% rename from src/runtime/streaming/operators/sink/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/mod.rs diff --git a/src/runtime/streaming/operators/sink/s3/mod.rs b/src/streaming_runtime/src/streaming/operators/sink/s3/mod.rs similarity index 97% rename from src/runtime/streaming/operators/sink/s3/mod.rs rename to src/streaming_runtime/src/streaming/operators/sink/s3/mod.rs index 715b5b86..f374b95c 100644 --- a/src/runtime/streaming/operators/sink/s3/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/sink/s3/mod.rs @@ -26,12 +26,12 @@ use parquet::basic::Compression; use parquet::file::properties::WriterProperties; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; use crate::sql::common::constants::factory_operator_name; use crate::sql::common::with_option_keys as opt; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; #[derive(Debug, Clone, Copy)] pub enum S3Format { diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/streaming_runtime/src/streaming/operators/source/kafka/mod.rs similarity index 98% rename from src/runtime/streaming/operators/source/kafka/mod.rs rename to src/streaming_runtime/src/streaming/operators/source/kafka/mod.rs index 75edb968..7f5e09d4 100644 --- a/src/runtime/streaming/operators/source/kafka/mod.rs +++ b/src/streaming_runtime/src/streaming/operators/source/kafka/mod.rs @@ -31,13 +31,13 @@ use std::num::NonZeroU32; use std::time::{Duration, Instant}; use tracing::{debug, error, info, warn}; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::source::{ - SourceCheckpointReport, SourceEvent, SourceOffset, SourceOperator, -}; -use crate::runtime::streaming::format::{BadDataPolicy, DataDeserializer, Format}; use crate::sql::common::fs_schema::FieldValueType; use crate::sql::common::{CheckpointBarrier, MetadataField}; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::source::{ + SourceCheckpointReport, SourceEvent, SourceOffset, SourceOperator, +}; +use crate::streaming::format::{BadDataPolicy, DataDeserializer, Format}; pub trait BatchDeserializer: Send + 'static { fn deserialize_slice( diff --git a/src/runtime/wasm/input/protocol/mod.rs b/src/streaming_runtime/src/streaming/operators/source/mod.rs similarity index 100% rename from src/runtime/wasm/input/protocol/mod.rs rename to src/streaming_runtime/src/streaming/operators/source/mod.rs diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/streaming_runtime/src/streaming/operators/stateless_physical_executor.rs similarity index 98% rename from src/runtime/streaming/operators/stateless_physical_executor.rs rename to src/streaming_runtime/src/streaming/operators/stateless_physical_executor.rs index eb595d31..560ae8c5 100644 --- a/src/runtime/streaming/operators/stateless_physical_executor.rs +++ b/src/streaming_runtime/src/streaming/operators/stateless_physical_executor.rs @@ -24,8 +24,8 @@ use datafusion_proto::protobuf::PhysicalPlanNode; use futures::StreamExt; use prost::Message; -use crate::runtime::streaming::factory::Registry; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; +use crate::streaming::factory::Registry; pub struct StatelessPhysicalExecutor { batch: Arc>>, diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/streaming_runtime/src/streaming/operators/value_execution.rs similarity index 89% rename from src/runtime/streaming/operators/value_execution.rs rename to src/streaming_runtime/src/streaming/operators/value_execution.rs index b93cd78b..d971668f 100644 --- a/src/runtime/streaming/operators/value_execution.rs +++ b/src/streaming_runtime/src/streaming/operators/value_execution.rs @@ -15,11 +15,11 @@ use arrow_array::RecordBatch; use async_trait::async_trait; use futures::StreamExt; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::operators::StatelessPhysicalExecutor; use crate::sql::common::{CheckpointBarrier, Watermark}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::operators::StatelessPhysicalExecutor; pub struct ValueExecutionOperator { name: String, diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/streaming_runtime/src/streaming/operators/watermark/mod.rs similarity index 100% rename from src/runtime/streaming/operators/watermark/mod.rs rename to src/streaming_runtime/src/streaming/operators/watermark/mod.rs diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/streaming_runtime/src/streaming/operators/watermark/watermark_generator.rs similarity index 96% rename from src/runtime/streaming/operators/watermark/watermark_generator.rs rename to src/streaming_runtime/src/streaming/operators/watermark/watermark_generator.rs index 497553eb..ea70b41b 100644 --- a/src/runtime/streaming/operators/watermark/watermark_generator.rs +++ b/src/streaming_runtime/src/streaming/operators/watermark/watermark_generator.rs @@ -25,11 +25,11 @@ use std::sync::Arc; use std::time::{Duration, SystemTime}; use tracing::debug; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark, from_nanos, to_millis}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; use async_trait::async_trait; use protocol::function_stream_graph::ExpressionWatermarkConfig; diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/streaming_runtime/src/streaming/operators/windows/mod.rs similarity index 100% rename from src/runtime/streaming/operators/windows/mod.rs rename to src/streaming_runtime/src/streaming/operators/windows/mod.rs diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/streaming_runtime/src/streaming/operators/windows/session_aggregating_window.rs similarity index 99% rename from src/runtime/streaming/operators/windows/session_aggregating_window.rs rename to src/streaming_runtime/src/streaming/operators/windows/session_aggregating_window.rs index 2056cdd9..b42fcc13 100644 --- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs +++ b/src/streaming_runtime/src/streaming/operators/windows/session_aggregating_window.rs @@ -36,17 +36,17 @@ use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel}; use tracing::info; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::converter::Converter; use crate::sql::common::{ CheckpointBarrier, FsSchema, FsSchemaRef, Watermark, from_nanos, to_nanos, }; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; use crate::sql::schema::utils::window_arrow_struct; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; use protocol::function_stream_graph::SessionWindowAggregateOperator; // ============================================================================ diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/streaming_runtime/src/streaming/operators/windows/sliding_aggregating_window.rs similarity index 98% rename from src/runtime/streaming/operators/windows/sliding_aggregating_window.rs rename to src/streaming_runtime/src/streaming/operators/windows/sliding_aggregating_window.rs index f18b3b14..b9c57835 100644 --- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs +++ b/src/streaming_runtime/src/streaming/operators/windows/sliding_aggregating_window.rs @@ -33,13 +33,13 @@ use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel}; use tracing::info; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark, from_nanos, to_nanos}; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; use protocol::function_stream_graph::SlidingWindowAggregateOperator; // ============================================================================ diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/streaming_runtime/src/streaming/operators/windows/tumbling_aggregating_window.rs similarity index 98% rename from src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs rename to src/streaming_runtime/src/streaming/operators/windows/tumbling_aggregating_window.rs index 5c805625..7d45a684 100644 --- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs +++ b/src/streaming_runtime/src/streaming/operators/windows/tumbling_aggregating_window.rs @@ -34,15 +34,15 @@ use std::time::{Duration, SystemTime}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel}; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::time_utils::print_time; use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark, from_nanos, to_nanos}; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; use crate::sql::schema::utils::add_timestamp_field_arrow; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; use protocol::function_stream_graph::TumblingWindowAggregateOperator; diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/streaming_runtime/src/streaming/operators/windows/window_function.rs similarity index 97% rename from src/runtime/streaming/operators/windows/window_function.rs rename to src/streaming_runtime/src/streaming/operators/windows/window_function.rs index 37815b78..83f14551 100644 --- a/src/runtime/streaming/operators/windows/window_function.rs +++ b/src/streaming_runtime/src/streaming/operators/windows/window_function.rs @@ -26,16 +26,16 @@ use std::time::SystemTime; use tokio::sync::mpsc::{UnboundedReceiver, unbounded_channel}; use tracing::{info, warn}; -use crate::runtime::streaming::StreamOutput; -use crate::runtime::streaming::api::context::TaskContext; -use crate::runtime::streaming::api::operator::{Collector, Operator}; -use crate::runtime::streaming::factory::Registry; -use crate::runtime::streaming::state::OperatorStateStore; use crate::sql::common::time_utils::print_time; use crate::sql::common::{ CheckpointBarrier, FsSchema, FsSchemaRef, Watermark, from_nanos, to_nanos, }; use crate::sql::physical::{StreamingDecodingContext, StreamingExtensionCodec}; +use crate::streaming::StreamOutput; +use crate::streaming::api::context::TaskContext; +use crate::streaming::api::operator::{Collector, Operator}; +use crate::streaming::factory::Registry; +use crate::streaming::state::OperatorStateStore; use async_trait::async_trait; // ============================================================================ diff --git a/src/runtime/streaming/protocol/control.rs b/src/streaming_runtime/src/streaming/protocol/control.rs similarity index 100% rename from src/runtime/streaming/protocol/control.rs rename to src/streaming_runtime/src/streaming/protocol/control.rs diff --git a/src/streaming_runtime/src/streaming/protocol/event.rs b/src/streaming_runtime/src/streaming/protocol/event.rs new file mode 100644 index 00000000..0db2135d --- /dev/null +++ b/src/streaming_runtime/src/streaming/protocol/event.rs @@ -0,0 +1,59 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use arrow_array::RecordBatch; + +use crate::memory::MemoryTicket; + +pub use function_stream_runtime_common::streaming_protocol::{ + CheckpointBarrier, Watermark, merge_watermarks, watermark_strictly_advances, +}; + +#[derive(Debug, Clone)] +pub enum StreamEvent { + Data(RecordBatch), + Watermark(Watermark), + Barrier(CheckpointBarrier), + EndOfStream, +} + +#[derive(Debug, Clone)] +pub enum StreamOutput { + Forward(RecordBatch), + Keyed(u64, RecordBatch), + Broadcast(RecordBatch), + Watermark(Watermark), +} + +#[derive(Debug, Clone)] +pub struct TrackedEvent { + pub event: StreamEvent, + pub _ticket: Option>, +} + +impl TrackedEvent { + pub fn new(event: StreamEvent, ticket: Option) -> Self { + Self { + event, + _ticket: ticket.map(Arc::new), + } + } + + pub fn control(event: StreamEvent) -> Self { + Self { + event, + _ticket: None, + } + } +} diff --git a/src/runtime/streaming/protocol/mod.rs b/src/streaming_runtime/src/streaming/protocol/mod.rs similarity index 96% rename from src/runtime/streaming/protocol/mod.rs rename to src/streaming_runtime/src/streaming/protocol/mod.rs index 28fd85a4..d41d9fcf 100644 --- a/src/runtime/streaming/protocol/mod.rs +++ b/src/streaming_runtime/src/streaming/protocol/mod.rs @@ -15,4 +15,5 @@ pub mod event; #[allow(unused_imports)] pub use control::{ControlCommand, JobMasterEvent, StopMode}; +#[allow(unused_imports)] pub use event::{CheckpointBarrier, StreamOutput, Watermark}; diff --git a/src/runtime/streaming/state/error.rs b/src/streaming_runtime/src/streaming/state/error.rs similarity index 97% rename from src/runtime/streaming/state/error.rs rename to src/streaming_runtime/src/streaming/state/error.rs index 37bc6481..cab033db 100644 --- a/src/runtime/streaming/state/error.rs +++ b/src/streaming_runtime/src/streaming/state/error.rs @@ -13,7 +13,7 @@ use crossbeam_channel::TrySendError; use thiserror::Error; -use crate::runtime::memory::MemoryAllocationError; +use crate::memory::MemoryAllocationError; #[derive(Error, Debug)] pub enum StateEngineError { diff --git a/src/runtime/streaming/state/io_manager.rs b/src/streaming_runtime/src/streaming/state/io_manager.rs similarity index 100% rename from src/runtime/streaming/state/io_manager.rs rename to src/streaming_runtime/src/streaming/state/io_manager.rs diff --git a/src/runtime/streaming/state/metrics.rs b/src/streaming_runtime/src/streaming/state/metrics.rs similarity index 100% rename from src/runtime/streaming/state/metrics.rs rename to src/streaming_runtime/src/streaming/state/metrics.rs diff --git a/src/runtime/streaming/state/mod.rs b/src/streaming_runtime/src/streaming/state/mod.rs similarity index 100% rename from src/runtime/streaming/state/mod.rs rename to src/streaming_runtime/src/streaming/state/mod.rs diff --git a/src/runtime/streaming/state/operator_state.rs b/src/streaming_runtime/src/streaming/state/operator_state.rs similarity index 99% rename from src/runtime/streaming/state/operator_state.rs rename to src/streaming_runtime/src/streaming/state/operator_state.rs index a3514461..224671f2 100644 --- a/src/runtime/streaming/state/operator_state.rs +++ b/src/streaming_runtime/src/streaming/state/operator_state.rs @@ -13,7 +13,7 @@ use super::error::{Result, StateEngineError}; use super::io_manager::{CompactJob, IoManager, SpillJob}; use super::metrics::StateMetricsCollector; -use crate::runtime::memory::{MemoryBlock, MemoryTicket}; +use crate::memory::{MemoryBlock, MemoryTicket}; use arrow_array::builder::{BinaryBuilder, BooleanBuilder, UInt64Builder}; use arrow_array::{Array, BinaryArray, RecordBatch, UInt64Array}; use arrow_schema::{DataType, Field, Schema}; @@ -813,7 +813,7 @@ mod tests { use super::super::io_manager::IoPool; use super::super::metrics::NoopMetricsCollector; use super::*; - use crate::runtime::memory::{MemoryBlock, MemoryPool, global_memory_pool}; + use crate::memory::{MemoryBlock, MemoryPool, global_memory_pool}; use arrow_array::Int64Array; use tempfile::TempDir; @@ -836,7 +836,7 @@ mod tests { const TEST_OPERATOR_MEMORY: u64 = 2 * 1024 * 1024; fn ensure_global_memory_pool() { - use crate::runtime::memory::{init_global_memory_pool, try_global_memory_pool}; + use crate::memory::{init_global_memory_pool, try_global_memory_pool}; use std::sync::Once; static INIT: Once = Once::new(); INIT.call_once(|| { diff --git a/src/runtime/util/mod.rs b/src/streaming_runtime/src/util/mod.rs similarity index 100% rename from src/runtime/util/mod.rs rename to src/streaming_runtime/src/util/mod.rs diff --git a/src/runtime/util/physical_aggregate.rs b/src/streaming_runtime/src/util/physical_aggregate.rs similarity index 100% rename from src/runtime/util/physical_aggregate.rs rename to src/streaming_runtime/src/util/physical_aggregate.rs diff --git a/src/wasm_runtime/Cargo.toml b/src/wasm_runtime/Cargo.toml new file mode 100644 index 00000000..2f385240 --- /dev/null +++ b/src/wasm_runtime/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "function-stream-wasm-runtime" +version.workspace = true +edition.workspace = true + +[lib] +name = "function_stream_wasm_runtime" +path = "src/lib.rs" diff --git a/src/wasm_runtime/src/lib.rs b/src/wasm_runtime/src/lib.rs new file mode 100644 index 00000000..12b3ebee --- /dev/null +++ b/src/wasm_runtime/src/lib.rs @@ -0,0 +1,22 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! WebAssembly execution runtime. +//! +//! Implementation lives under `src/wasm/` in this package. It is currently **compiled as +//! part of the `function-stream` crate** via `#[path]` in `src/lib.rs` / `src/main.rs`, so paths +//! like `crate::sql` (streaming planner dependency) and `crate::memory` keep resolving. +//! +//! Operator state storage (`state_backend/`) also lives in this package and is compiled via +//! `#[path]` from the root crate as `crate::state_backend`. + +pub const CRATE_NAME: &str = "function-stream-wasm-runtime"; diff --git a/src/storage/state_backend/error.rs b/src/wasm_runtime/src/state_backend/error.rs similarity index 100% rename from src/storage/state_backend/error.rs rename to src/wasm_runtime/src/state_backend/error.rs diff --git a/src/storage/state_backend/factory.rs b/src/wasm_runtime/src/state_backend/factory.rs similarity index 82% rename from src/storage/state_backend/factory.rs rename to src/wasm_runtime/src/state_backend/factory.rs index 5dc38632..2bc0b55c 100644 --- a/src/storage/state_backend/factory.rs +++ b/src/wasm_runtime/src/state_backend/factory.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::store::StateStore; +use crate::state_backend::error::BackendError; +use crate::state_backend::store::StateStore; use std::path::Path; use std::sync::Arc; @@ -47,11 +47,11 @@ pub fn get_factory_for_task>( task_name: String, created_at: u64, base_dir: Option

, - rocksdb_config: Option, + rocksdb_config: Option, ) -> Result, BackendError> { match factory_type { FactoryType::Memory => { - Ok(crate::storage::state_backend::memory::MemoryStateStoreFactory::default_factory()) + Ok(crate::state_backend::memory::MemoryStateStoreFactory::default_factory()) } FactoryType::RocksDB => { let base_dir = base_dir.ok_or_else(|| { @@ -63,9 +63,8 @@ pub fn get_factory_for_task>( .join(format!("{}-{}", task_name, created_at)); let config = rocksdb_config.unwrap_or_default(); - let factory = crate::storage::state_backend::rocksdb::RocksDBStateStoreFactory::new( - db_path, config, - )?; + let factory = + crate::state_backend::rocksdb::RocksDBStateStoreFactory::new(db_path, config)?; Ok(Arc::new(factory)) } diff --git a/src/storage/state_backend/key_builder.rs b/src/wasm_runtime/src/state_backend/key_builder.rs similarity index 100% rename from src/storage/state_backend/key_builder.rs rename to src/wasm_runtime/src/state_backend/key_builder.rs diff --git a/src/storage/state_backend/memory/factory.rs b/src/wasm_runtime/src/state_backend/memory/factory.rs similarity index 86% rename from src/storage/state_backend/memory/factory.rs rename to src/wasm_runtime/src/state_backend/memory/factory.rs index b62bd444..0526ecbd 100644 --- a/src/storage/state_backend/memory/factory.rs +++ b/src/wasm_runtime/src/state_backend/memory/factory.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::store::MemoryStateStore; -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::factory::StateStoreFactory; +use crate::state_backend::error::BackendError; +use crate::state_backend::factory::StateStoreFactory; use std::sync::{Arc, Mutex}; pub struct MemoryStateStoreFactory {} @@ -43,7 +43,7 @@ impl StateStoreFactory for MemoryStateStoreFactory { fn new_state_store( &self, _column_family: Option, - ) -> Result, BackendError> { + ) -> Result, BackendError> { Ok(Box::new(MemoryStateStore::new())) } } diff --git a/src/storage/state_backend/memory/mod.rs b/src/wasm_runtime/src/state_backend/memory/mod.rs similarity index 100% rename from src/storage/state_backend/memory/mod.rs rename to src/wasm_runtime/src/state_backend/memory/mod.rs diff --git a/src/storage/state_backend/memory/store.rs b/src/wasm_runtime/src/state_backend/memory/store.rs similarity index 95% rename from src/storage/state_backend/memory/store.rs rename to src/wasm_runtime/src/state_backend/memory/store.rs index e65e839d..5d07ad99 100644 --- a/src/storage/state_backend/memory/store.rs +++ b/src/wasm_runtime/src/state_backend/memory/store.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::store::{StateIterator, StateStore}; +use crate::state_backend::error::BackendError; +use crate::state_backend::store::{StateIterator, StateStore}; use std::collections::HashMap; use std::sync::{Arc, Mutex}; @@ -94,9 +94,8 @@ impl StateStore for MemoryStateStore { user_key: Vec, value: Vec, ) -> Result<(), BackendError> { - let key_bytes = crate::storage::state_backend::key_builder::build_key( - &key_group, &key, &namespace, &user_key, - ); + let key_bytes = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &user_key); let existing = self.get_state(key_bytes.clone())?; diff --git a/src/storage/state_backend/mod.rs b/src/wasm_runtime/src/state_backend/mod.rs similarity index 100% rename from src/storage/state_backend/mod.rs rename to src/wasm_runtime/src/state_backend/mod.rs diff --git a/src/storage/state_backend/rocksdb/factory.rs b/src/wasm_runtime/src/state_backend/rocksdb/factory.rs similarity index 94% rename from src/storage/state_backend/rocksdb/factory.rs rename to src/wasm_runtime/src/state_backend/rocksdb/factory.rs index 11554e13..74759b91 100644 --- a/src/storage/state_backend/rocksdb/factory.rs +++ b/src/wasm_runtime/src/state_backend/rocksdb/factory.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::factory::StateStoreFactory; +use crate::state_backend::error::BackendError; +use crate::state_backend::factory::StateStoreFactory; use rocksdb::{ColumnFamilyDescriptor, DB, Options}; use std::path::Path; use std::sync::{Arc, Mutex}; @@ -56,7 +56,7 @@ impl StateStoreFactory for RocksDBStateStoreFactory { fn new_state_store( &self, column_family: Option, - ) -> Result, BackendError> { + ) -> Result, BackendError> { self.new_state_store(column_family) } } @@ -137,7 +137,7 @@ impl RocksDBStateStoreFactory { pub fn new_state_store( &self, column_family: Option, - ) -> Result, BackendError> { + ) -> Result, BackendError> { if let Some(ref cf_name) = column_family && cf_name != "default" && self.db.cf_handle(cf_name).is_none() @@ -158,7 +158,7 @@ impl RocksDBStateStoreFactory { } } - crate::storage::state_backend::rocksdb::store::RocksDBStateStore::new_with_factory( + crate::state_backend::rocksdb::store::RocksDBStateStore::new_with_factory( self.db.clone(), column_family, ) diff --git a/src/storage/state_backend/rocksdb/mod.rs b/src/wasm_runtime/src/state_backend/rocksdb/mod.rs similarity index 100% rename from src/storage/state_backend/rocksdb/mod.rs rename to src/wasm_runtime/src/state_backend/rocksdb/mod.rs diff --git a/src/storage/state_backend/rocksdb/store.rs b/src/wasm_runtime/src/state_backend/rocksdb/store.rs similarity index 97% rename from src/storage/state_backend/rocksdb/store.rs rename to src/wasm_runtime/src/state_backend/rocksdb/store.rs index e8a2ad13..b7d98e4c 100644 --- a/src/storage/state_backend/rocksdb/store.rs +++ b/src/wasm_runtime/src/state_backend/rocksdb/store.rs @@ -10,9 +10,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::key_builder::{build_key, increment_key, is_all_0xff}; -use crate::storage::state_backend::store::{StateIterator, StateStore}; +use crate::state_backend::error::BackendError; +use crate::state_backend::key_builder::{build_key, increment_key, is_all_0xff}; +use crate::state_backend::store::{StateIterator, StateStore}; use rocksdb::{ BlockBasedOptions, Cache, ColumnFamilyDescriptor, DB, DBCompressionType, Direction, IteratorMode, Options, ReadOptions, WriteBatch, WriteOptions, diff --git a/src/storage/state_backend/server.rs b/src/wasm_runtime/src/state_backend/server.rs similarity index 92% rename from src/storage/state_backend/server.rs rename to src/wasm_runtime/src/state_backend/server.rs index efeceaa4..1fb24c38 100644 --- a/src/storage/state_backend/server.rs +++ b/src/wasm_runtime/src/state_backend/server.rs @@ -12,11 +12,9 @@ use crate::config::storage::{StateStorageConfig, StateStorageType}; use crate::config::{get_state_dir, get_state_dir_for_base}; -use crate::storage::state_backend::error::BackendError; -use crate::storage::state_backend::factory::{ - FactoryType, StateStoreFactory, get_factory_for_task, -}; -use crate::storage::state_backend::rocksdb::RocksDBConfig; +use crate::state_backend::error::BackendError; +use crate::state_backend::factory::{FactoryType, StateStoreFactory, get_factory_for_task}; +use crate::state_backend::rocksdb::RocksDBConfig; use std::fs; use std::path::PathBuf; use std::sync::Arc; diff --git a/src/storage/state_backend/store.rs b/src/wasm_runtime/src/state_backend/store.rs similarity index 88% rename from src/storage/state_backend/store.rs rename to src/wasm_runtime/src/state_backend/store.rs index aaa02988..cb897f6b 100644 --- a/src/storage/state_backend/store.rs +++ b/src/wasm_runtime/src/state_backend/store.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::storage::state_backend::error::BackendError; +use crate::state_backend::error::BackendError; pub type StateIteratorItem = Result, Vec)>, BackendError>; @@ -107,9 +107,8 @@ pub trait StateStore: Send + Sync { user_key: Vec, value: Vec, ) -> Result<(), BackendError> { - let key_bytes = crate::storage::state_backend::key_builder::build_key( - &key_group, &key, &namespace, &user_key, - ); + let key_bytes = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &user_key); self.put_state(key_bytes, value) } @@ -132,9 +131,8 @@ pub trait StateStore: Send + Sync { namespace: Vec, user_key: Vec, ) -> Result>, BackendError> { - let key_bytes = crate::storage::state_backend::key_builder::build_key( - &key_group, &key, &namespace, &user_key, - ); + let key_bytes = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &user_key); self.get_state(key_bytes) } @@ -156,9 +154,8 @@ pub trait StateStore: Send + Sync { namespace: Vec, user_key: Vec, ) -> Result<(), BackendError> { - let key_bytes = crate::storage::state_backend::key_builder::build_key( - &key_group, &key, &namespace, &user_key, - ); + let key_bytes = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &user_key); self.delete_state(key_bytes) } @@ -199,12 +196,8 @@ pub trait StateStore: Send + Sync { key: Vec, namespace: Vec, ) -> Result { - let prefix_bytes = crate::storage::state_backend::key_builder::build_key( - &key_group, - &key, - &namespace, - &[], - ); + let prefix_bytes = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &[]); self.delete_prefix_bytes(prefix_bytes) } @@ -228,13 +221,13 @@ pub trait StateStore: Send + Sync { start_inclusive: Vec, end_exclusive: Vec, ) -> Result>, BackendError> { - let start_key = crate::storage::state_backend::key_builder::build_key( + let start_key = crate::state_backend::key_builder::build_key( &key_group, &key, &namespace, &start_inclusive, ); - let end_key = crate::storage::state_backend::key_builder::build_key( + let end_key = crate::state_backend::key_builder::build_key( &key_group, &key, &namespace, @@ -279,12 +272,8 @@ pub trait StateStore: Send + Sync { key: Vec, namespace: Vec, ) -> Result, BackendError> { - let prefix = crate::storage::state_backend::key_builder::build_key( - &key_group, - &key, - &namespace, - &[], - ); + let prefix = + crate::state_backend::key_builder::build_key(&key_group, &key, &namespace, &[]); self.scan(prefix) } } diff --git a/src/runtime/wasm/buffer_and_event/buffer_or_event.rs b/src/wasm_runtime/src/wasm/buffer_and_event/buffer_or_event.rs similarity index 100% rename from src/runtime/wasm/buffer_and_event/buffer_or_event.rs rename to src/wasm_runtime/src/wasm/buffer_and_event/buffer_or_event.rs diff --git a/src/runtime/wasm/buffer_and_event/mod.rs b/src/wasm_runtime/src/wasm/buffer_and_event/mod.rs similarity index 100% rename from src/runtime/wasm/buffer_and_event/mod.rs rename to src/wasm_runtime/src/wasm/buffer_and_event/mod.rs diff --git a/src/runtime/wasm/buffer_and_event/stream_element/mod.rs b/src/wasm_runtime/src/wasm/buffer_and_event/stream_element/mod.rs similarity index 100% rename from src/runtime/wasm/buffer_and_event/stream_element/mod.rs rename to src/wasm_runtime/src/wasm/buffer_and_event/stream_element/mod.rs diff --git a/src/runtime/wasm/buffer_and_event/stream_element/stream_element.rs b/src/wasm_runtime/src/wasm/buffer_and_event/stream_element/stream_element.rs similarity index 100% rename from src/runtime/wasm/buffer_and_event/stream_element/stream_element.rs rename to src/wasm_runtime/src/wasm/buffer_and_event/stream_element/stream_element.rs diff --git a/src/runtime/wasm/input/input_protocol.rs b/src/wasm_runtime/src/wasm/input/input_protocol.rs similarity index 95% rename from src/runtime/wasm/input/input_protocol.rs rename to src/wasm_runtime/src/wasm/input/input_protocol.rs index 50294201..782a7f4f 100644 --- a/src/runtime/wasm/input/input_protocol.rs +++ b/src/wasm_runtime/src/wasm/input/input_protocol.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::buffer_and_event::BufferOrEvent; use std::time::Duration; pub trait InputProtocol: Send + Sync + 'static { diff --git a/src/runtime/wasm/input/input_provider.rs b/src/wasm_runtime/src/wasm/input/input_provider.rs similarity index 94% rename from src/runtime/wasm/input/input_provider.rs rename to src/wasm_runtime/src/wasm/input/input_provider.rs index 8eee649d..d7ed64b9 100644 --- a/src/runtime/wasm/input/input_provider.rs +++ b/src/wasm_runtime/src/wasm/input/input_provider.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::input::Input; -use crate::runtime::wasm::task::InputConfig; +use crate::input::Input; +use crate::wasm::task::InputConfig; pub struct InputProvider; @@ -66,8 +66,8 @@ impl InputProvider { extra, runtime: _, } => { - use crate::runtime::input::InputRunner; - use crate::runtime::input::protocol::kafka::{KafkaConfig, KafkaProtocol}; + use crate::input::InputRunner; + use crate::input::protocol::kafka::{KafkaConfig, KafkaProtocol}; let servers: Vec = bootstrap_servers .split(',') diff --git a/src/runtime/wasm/input/input_runner.rs b/src/wasm_runtime/src/wasm/input/input_runner.rs similarity index 96% rename from src/runtime/wasm/input/input_runner.rs rename to src/wasm_runtime/src/wasm/input/input_runner.rs index ece85e3d..24b96dae 100644 --- a/src/runtime/wasm/input/input_runner.rs +++ b/src/wasm_runtime/src/wasm/input/input_runner.rs @@ -10,13 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::common::TaskCompletionFlag; -use crate::runtime::input::input_protocol::InputProtocol; -use crate::runtime::input::{Input, InputState}; -use crate::runtime::processor::function_error::FunctionErrorReport; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::runtime::wasm::task::ControlMailBox; -use crate::runtime::wasm::task::InputRuntimeConfig; +use crate::common::TaskCompletionFlag; +use crate::input::input_protocol::InputProtocol; +use crate::input::{Input, InputState}; +use crate::processor::function_error::FunctionErrorReport; +use crate::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::task::ControlMailBox; +use crate::wasm::task::InputRuntimeConfig; use crossbeam_channel::{Receiver, Sender, bounded, unbounded}; use std::sync::{Arc, Mutex}; use std::thread; @@ -250,7 +250,7 @@ impl InputRunner

{ impl Input for InputRunner

{ fn init_with_context( &mut self, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, ) -> Result<(), Box> { if !matches!(*self.state.lock().unwrap(), InputState::Uninitialized) { return Ok(()); @@ -298,7 +298,7 @@ impl Input for InputRunner

{ }) .map_err(|e| Box::new(std::io::Error::other(e)) as Box)?; - use crate::runtime::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; + use crate::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; let mut group = ThreadGroup::new( ThreadGroupType::Input(self.group_id), format!("Input-g{}", self.group_id), diff --git a/src/runtime/wasm/input/interface.rs b/src/wasm_runtime/src/wasm/input/interface.rs similarity index 89% rename from src/runtime/wasm/input/interface.rs rename to src/wasm_runtime/src/wasm/input/interface.rs index 06da4923..cbfc61cc 100644 --- a/src/runtime/wasm/input/interface.rs +++ b/src/wasm_runtime/src/wasm/input/interface.rs @@ -10,10 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::runtime::wasm::taskexecutor::InitContext; +use crate::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::taskexecutor::InitContext; -pub use crate::runtime::common::ComponentState as InputState; +pub use crate::common::ComponentState as InputState; pub trait Input: Send + Sync { fn init_with_context( diff --git a/src/runtime/wasm/input/mod.rs b/src/wasm_runtime/src/wasm/input/mod.rs similarity index 100% rename from src/runtime/wasm/input/mod.rs rename to src/wasm_runtime/src/wasm/input/mod.rs diff --git a/src/runtime/wasm/input/protocol/kafka/config.rs b/src/wasm_runtime/src/wasm/input/protocol/kafka/config.rs similarity index 100% rename from src/runtime/wasm/input/protocol/kafka/config.rs rename to src/wasm_runtime/src/wasm/input/protocol/kafka/config.rs diff --git a/src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs b/src/wasm_runtime/src/wasm/input/protocol/kafka/kafka_protocol.rs similarity index 97% rename from src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs rename to src/wasm_runtime/src/wasm/input/protocol/kafka/kafka_protocol.rs index 1fb487a6..928c9050 100644 --- a/src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs +++ b/src/wasm_runtime/src/wasm/input/protocol/kafka/kafka_protocol.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::config::KafkaConfig; -use crate::runtime::input::input_protocol::InputProtocol; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; +use crate::input::input_protocol::InputProtocol; +use crate::wasm::buffer_and_event::BufferOrEvent; use rdkafka::Message; use rdkafka::TopicPartitionList; use rdkafka::config::ClientConfig; diff --git a/src/runtime/wasm/input/protocol/kafka/mod.rs b/src/wasm_runtime/src/wasm/input/protocol/kafka/mod.rs similarity index 100% rename from src/runtime/wasm/input/protocol/kafka/mod.rs rename to src/wasm_runtime/src/wasm/input/protocol/kafka/mod.rs diff --git a/src/sql/connector/source/mod.rs b/src/wasm_runtime/src/wasm/input/protocol/mod.rs similarity index 100% rename from src/sql/connector/source/mod.rs rename to src/wasm_runtime/src/wasm/input/protocol/mod.rs diff --git a/src/runtime/wasm/mod.rs b/src/wasm_runtime/src/wasm/mod.rs similarity index 100% rename from src/runtime/wasm/mod.rs rename to src/wasm_runtime/src/wasm/mod.rs diff --git a/src/runtime/wasm/output/interface.rs b/src/wasm_runtime/src/wasm/output/interface.rs similarity index 93% rename from src/runtime/wasm/output/interface.rs rename to src/wasm_runtime/src/wasm/output/interface.rs index 21c3055d..e38217a1 100644 --- a/src/runtime/wasm/output/interface.rs +++ b/src/wasm_runtime/src/wasm/output/interface.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::runtime::wasm::taskexecutor::InitContext; +use crate::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::taskexecutor::InitContext; pub trait Output: Send + Sync { fn init_with_context( diff --git a/src/runtime/wasm/output/mod.rs b/src/wasm_runtime/src/wasm/output/mod.rs similarity index 100% rename from src/runtime/wasm/output/mod.rs rename to src/wasm_runtime/src/wasm/output/mod.rs diff --git a/src/runtime/wasm/output/output_protocol.rs b/src/wasm_runtime/src/wasm/output/output_protocol.rs similarity index 95% rename from src/runtime/wasm/output/output_protocol.rs rename to src/wasm_runtime/src/wasm/output/output_protocol.rs index 6140d3eb..8efc5f7d 100644 --- a/src/runtime/wasm/output/output_protocol.rs +++ b/src/wasm_runtime/src/wasm/output/output_protocol.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::buffer_and_event::BufferOrEvent; pub trait OutputProtocol: Send + Sync + 'static { fn name(&self) -> String; diff --git a/src/runtime/wasm/output/output_provider.rs b/src/wasm_runtime/src/wasm/output/output_provider.rs similarity index 91% rename from src/runtime/wasm/output/output_provider.rs rename to src/wasm_runtime/src/wasm/output/output_provider.rs index 25ca8431..33b470d1 100644 --- a/src/runtime/wasm/output/output_provider.rs +++ b/src/wasm_runtime/src/wasm/output/output_provider.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::output::Output; -use crate::runtime::wasm::task::OutputConfig; +use crate::output::Output; +use crate::wasm::task::OutputConfig; pub struct OutputProvider; @@ -59,10 +59,8 @@ impl OutputProvider { extra, runtime: _, } => { - use crate::runtime::output::output_runner::OutputRunner; - use crate::runtime::output::protocol::kafka::{ - KafkaOutputProtocol, KafkaProducerConfig, - }; + use crate::output::output_runner::OutputRunner; + use crate::output::protocol::kafka::{KafkaOutputProtocol, KafkaProducerConfig}; let servers: Vec = bootstrap_servers .split(',') diff --git a/src/runtime/wasm/output/output_runner.rs b/src/wasm_runtime/src/wasm/output/output_runner.rs similarity index 96% rename from src/runtime/wasm/output/output_runner.rs rename to src/wasm_runtime/src/wasm/output/output_runner.rs index ca6d780c..b732693a 100644 --- a/src/runtime/wasm/output/output_runner.rs +++ b/src/wasm_runtime/src/wasm/output/output_runner.rs @@ -10,13 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::common::{ComponentState, TaskCompletionFlag}; -use crate::runtime::output::Output; -use crate::runtime::output::output_protocol::OutputProtocol; -use crate::runtime::processor::function_error::FunctionErrorReport; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::runtime::wasm::task::ControlMailBox; -use crate::runtime::wasm::task::OutputRuntimeConfig; +use crate::common::{ComponentState, TaskCompletionFlag}; +use crate::output::Output; +use crate::output::output_protocol::OutputProtocol; +use crate::processor::function_error::FunctionErrorReport; +use crate::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::task::ControlMailBox; +use crate::wasm::task::OutputRuntimeConfig; use crossbeam_channel::{Receiver, Sender, bounded, unbounded}; use std::sync::{Arc, Mutex}; use std::thread; @@ -288,7 +288,7 @@ impl OutputRunner

{ impl Output for OutputRunner

{ fn init_with_context( &mut self, - ctx: &crate::runtime::wasm::taskexecutor::InitContext, + ctx: &crate::wasm::taskexecutor::InitContext, ) -> Result<(), Box> { if !matches!(*self.state.lock().unwrap(), ComponentState::Uninitialized) { return Ok(()); @@ -333,7 +333,7 @@ impl Output for OutputRunner

{ }) .map_err(|e| Box::new(std::io::Error::other(e)) as Box)?; - use crate::runtime::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; + use crate::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; let mut group = ThreadGroup::new( ThreadGroupType::Output(self.output_id), format!("Output-{}", self.output_id), diff --git a/src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs b/src/wasm_runtime/src/wasm/output/protocol/kafka/kafka_protocol.rs similarity index 96% rename from src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs rename to src/wasm_runtime/src/wasm/output/protocol/kafka/kafka_protocol.rs index d9e6db4d..787804a2 100644 --- a/src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs +++ b/src/wasm_runtime/src/wasm/output/protocol/kafka/kafka_protocol.rs @@ -11,8 +11,8 @@ // limitations under the License. use super::producer_config::KafkaProducerConfig; -use crate::runtime::output::output_protocol::OutputProtocol; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; +use crate::output::output_protocol::OutputProtocol; +use crate::wasm::buffer_and_event::BufferOrEvent; use rdkafka::producer::{BaseRecord, DefaultProducerContext, Producer, ThreadedProducer}; use std::sync::Mutex; use std::time::Duration; diff --git a/src/runtime/wasm/output/protocol/kafka/mod.rs b/src/wasm_runtime/src/wasm/output/protocol/kafka/mod.rs similarity index 100% rename from src/runtime/wasm/output/protocol/kafka/mod.rs rename to src/wasm_runtime/src/wasm/output/protocol/kafka/mod.rs diff --git a/src/runtime/wasm/output/protocol/kafka/producer_config.rs b/src/wasm_runtime/src/wasm/output/protocol/kafka/producer_config.rs similarity index 100% rename from src/runtime/wasm/output/protocol/kafka/producer_config.rs rename to src/wasm_runtime/src/wasm/output/protocol/kafka/producer_config.rs diff --git a/src/runtime/wasm/output/protocol/mod.rs b/src/wasm_runtime/src/wasm/output/protocol/mod.rs similarity index 100% rename from src/runtime/wasm/output/protocol/mod.rs rename to src/wasm_runtime/src/wasm/output/protocol/mod.rs diff --git a/src/runtime/wasm/processor/function_error.rs b/src/wasm_runtime/src/wasm/processor/function_error.rs similarity index 100% rename from src/runtime/wasm/processor/function_error.rs rename to src/wasm_runtime/src/wasm/processor/function_error.rs diff --git a/src/runtime/wasm/processor/mod.rs b/src/wasm_runtime/src/wasm/processor/mod.rs similarity index 100% rename from src/runtime/wasm/processor/mod.rs rename to src/wasm_runtime/src/wasm/processor/mod.rs diff --git a/src/runtime/wasm/processor/python/mod.rs b/src/wasm_runtime/src/wasm/processor/python/mod.rs similarity index 100% rename from src/runtime/wasm/processor/python/mod.rs rename to src/wasm_runtime/src/wasm/processor/python/mod.rs diff --git a/src/runtime/wasm/processor/python/python_host.rs b/src/wasm_runtime/src/wasm/processor/python/python_host.rs similarity index 100% rename from src/runtime/wasm/processor/python/python_host.rs rename to src/wasm_runtime/src/wasm/processor/python/python_host.rs diff --git a/src/runtime/wasm/processor/python/python_service.rs b/src/wasm_runtime/src/wasm/processor/python/python_service.rs similarity index 100% rename from src/runtime/wasm/processor/python/python_service.rs rename to src/wasm_runtime/src/wasm/processor/python/python_service.rs diff --git a/src/runtime/wasm/processor/wasm/input_strategy.rs b/src/wasm_runtime/src/wasm/processor/wasm/input_strategy.rs similarity index 100% rename from src/runtime/wasm/processor/wasm/input_strategy.rs rename to src/wasm_runtime/src/wasm/processor/wasm/input_strategy.rs diff --git a/src/runtime/wasm/processor/wasm/mod.rs b/src/wasm_runtime/src/wasm/processor/wasm/mod.rs similarity index 100% rename from src/runtime/wasm/processor/wasm/mod.rs rename to src/wasm_runtime/src/wasm/processor/wasm/mod.rs diff --git a/src/runtime/wasm/processor/wasm/thread_pool.rs b/src/wasm_runtime/src/wasm/processor/wasm/thread_pool.rs similarity index 99% rename from src/runtime/wasm/processor/wasm/thread_pool.rs rename to src/wasm_runtime/src/wasm/processor/wasm/thread_pool.rs index c78eef79..d625d86a 100644 --- a/src/runtime/wasm/processor/wasm/thread_pool.rs +++ b/src/wasm_runtime/src/wasm/processor/wasm/thread_pool.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::common::ComponentState; -use crate::runtime::processor::wasm::wasm_task::WasmTask; +use crate::common::ComponentState; +use crate::processor::wasm::wasm_task::WasmTask; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; diff --git a/src/runtime/wasm/processor/wasm/wasm_cache.rs b/src/wasm_runtime/src/wasm/processor/wasm/wasm_cache.rs similarity index 100% rename from src/runtime/wasm/processor/wasm/wasm_cache.rs rename to src/wasm_runtime/src/wasm/processor/wasm/wasm_cache.rs diff --git a/src/runtime/wasm/processor/wasm/wasm_host.rs b/src/wasm_runtime/src/wasm/processor/wasm/wasm_host.rs similarity index 94% rename from src/runtime/wasm/processor/wasm/wasm_host.rs rename to src/wasm_runtime/src/wasm/processor/wasm/wasm_host.rs index 2bf7d4f0..348f3063 100644 --- a/src/runtime/wasm/processor/wasm/wasm_host.rs +++ b/src/wasm_runtime/src/wasm/processor/wasm/wasm_host.rs @@ -10,10 +10,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::output::Output; -use crate::runtime::processor::wasm::wasm_cache; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::storage::state_backend::{StateStore, StateStoreFactory}; +use crate::output::Output; +use crate::processor::wasm::wasm_cache; +use crate::state_backend::{StateStore, StateStoreFactory}; +use crate::wasm::buffer_and_event::BufferOrEvent; use std::sync::{Arc, OnceLock}; use wasmtime::component::{Component, HasData, Linker, Resource, bindgen}; use wasmtime::{Config, Engine, Store}; @@ -67,7 +67,7 @@ fn get_global_engine(_wasm_size: usize) -> anyhow::Result> { bindgen!({ world: "processor", - path: "wit", + path: "../../wit", with: { "functionstream:core/kv.store": FunctionStreamStoreHandle, "functionstream:core/kv.iterator": FunctionStreamIteratorHandle, @@ -86,7 +86,7 @@ impl Drop for FunctionStreamStoreHandle { } pub struct FunctionStreamIteratorHandle { - pub state_iterator: Box, + pub state_iterator: Box, } pub struct HostState { @@ -205,7 +205,7 @@ impl HostStore for HostState { .get(&self_) .map_err(|e| Error::Other(format!("Failed to get store resource: {}", e)))?; - let real_key = crate::storage::state_backend::key_builder::build_key( + let real_key = crate::state_backend::key_builder::build_key( &key.key_group, &key.key, &key.namespace, @@ -228,7 +228,7 @@ impl HostStore for HostState { .get(&self_) .map_err(|e| Error::Other(format!("Failed to get store resource: {}", e)))?; - let real_key = crate::storage::state_backend::key_builder::build_key( + let real_key = crate::state_backend::key_builder::build_key( &key.key_group, &key.key, &key.namespace, @@ -251,7 +251,7 @@ impl HostStore for HostState { .get(&self_) .map_err(|e| Error::Other(format!("Failed to get store resource: {}", e)))?; - let real_key = crate::storage::state_backend::key_builder::build_key( + let real_key = crate::state_backend::key_builder::build_key( &key.key_group, &key.key, &key.namespace, @@ -284,7 +284,7 @@ impl HostStore for HostState { .get(&self_) .map_err(|e| Error::Other(format!("Failed to get store resource: {}", e)))?; - let prefix_key = crate::storage::state_backend::key_builder::build_key( + let prefix_key = crate::state_backend::key_builder::build_key( &key.key_group, &key.key, &key.namespace, @@ -323,13 +323,13 @@ impl HostStore for HostState { .get(&self_) .map_err(|e| Error::Other(format!("Failed to get store resource: {}", e)))?; - let start_key = crate::storage::state_backend::key_builder::build_key( + let start_key = crate::state_backend::key_builder::build_key( &key_group, &key, &namespace, &start_inclusive, ); - let end_key = crate::storage::state_backend::key_builder::build_key( + let end_key = crate::state_backend::key_builder::build_key( &key_group, &key, &namespace, @@ -449,7 +449,7 @@ pub fn create_wasm_host_with_component( engine: &Engine, component: &Component, outputs: Vec>, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, task_name: String, create_time: u64, ) -> anyhow::Result<(Processor, Store)> { @@ -495,7 +495,7 @@ pub fn create_wasm_host_with_component( pub fn create_wasm_host( wasm_bytes: &[u8], outputs: Vec>, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, task_name: String, create_time: u64, ) -> anyhow::Result<(Processor, Store)> { diff --git a/src/runtime/wasm/processor/wasm/wasm_processor.rs b/src/wasm_runtime/src/wasm/processor/wasm/wasm_processor.rs similarity index 99% rename from src/runtime/wasm/processor/wasm/wasm_processor.rs rename to src/wasm_runtime/src/wasm/processor/wasm/wasm_processor.rs index 52234bfe..741280bb 100644 --- a/src/runtime/wasm/processor/wasm/wasm_processor.rs +++ b/src/wasm_runtime/src/wasm/processor/wasm/wasm_processor.rs @@ -17,7 +17,7 @@ use super::wasm_host::{HostState, Processor}; use super::wasm_processor_trait::WasmProcessor; -use crate::runtime::output::Output; +use crate::output::Output; use std::cell::RefCell; use std::error::Error; use std::fmt; @@ -134,7 +134,7 @@ impl WasmProcessorImpl { impl WasmProcessor for WasmProcessorImpl { fn init_with_context( &mut self, - _init_context: &crate::runtime::wasm::taskexecutor::InitContext, + _init_context: &crate::wasm::taskexecutor::InitContext, ) -> Result<(), Box> { if self.initialized { log::warn!("WasmProcessor '{}' already initialized", self.name); @@ -405,7 +405,7 @@ impl WasmProcessor for WasmProcessorImpl { fn init_wasm_host( &mut self, outputs: Vec>, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, task_name: String, create_time: u64, ) -> Result<(), Box> { diff --git a/src/runtime/wasm/processor/wasm/wasm_processor_trait.rs b/src/wasm_runtime/src/wasm/processor/wasm/wasm_processor_trait.rs similarity index 97% rename from src/runtime/wasm/processor/wasm/wasm_processor_trait.rs rename to src/wasm_runtime/src/wasm/processor/wasm/wasm_processor_trait.rs index fb2c17fb..c699fe8b 100644 --- a/src/runtime/wasm/processor/wasm/wasm_processor_trait.rs +++ b/src/wasm_runtime/src/wasm/processor/wasm/wasm_processor_trait.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::output::Output; -use crate::runtime::wasm::taskexecutor::InitContext; +use crate::output::Output; +use crate::wasm::taskexecutor::InitContext; pub trait WasmProcessor: Send + Sync { fn process( diff --git a/src/runtime/wasm/processor/wasm/wasm_task.rs b/src/wasm_runtime/src/wasm/processor/wasm/wasm_task.rs similarity index 97% rename from src/runtime/wasm/processor/wasm/wasm_task.rs rename to src/wasm_runtime/src/wasm/processor/wasm/wasm_task.rs index 4330aaaf..ea0f5dc1 100644 --- a/src/runtime/wasm/processor/wasm/wasm_task.rs +++ b/src/wasm_runtime/src/wasm/processor/wasm/wasm_task.rs @@ -13,14 +13,14 @@ use super::input_strategy::{InputStrategy, RoundRobinStrategy, from_selector_name}; use super::thread_pool::ThreadGroup; use super::wasm_processor_trait::WasmProcessor; -use crate::runtime::common::{ComponentState, TaskCompletionFlag}; -use crate::runtime::input::Input; -use crate::runtime::output::Output; -use crate::runtime::processor::function_error::FunctionErrorReport; -use crate::runtime::wasm::buffer_and_event::BufferOrEvent; -use crate::runtime::wasm::task::ProcessorRuntimeConfig; -use crate::runtime::wasm::task::{ControlMailBox, TaskControlSignal, TaskLifecycle}; -use crate::storage::task::FunctionInfo; +use crate::common::{ComponentState, TaskCompletionFlag}; +use crate::input::Input; +use crate::output::Output; +use crate::processor::function_error::FunctionErrorReport; +use crate::task::FunctionInfo; +use crate::wasm::buffer_and_event::BufferOrEvent; +use crate::wasm::task::ProcessorRuntimeConfig; +use crate::wasm::task::{ControlMailBox, TaskControlSignal, TaskLifecycle}; use crossbeam_channel::{Receiver, after, select, unbounded}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc; @@ -120,7 +120,7 @@ impl WasmTask { pub fn init_with_context( &mut self, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, ) -> Result<(), Box> { let mut inputs = self.inputs.take().ok_or_else(|| { Box::new(std::io::Error::other("inputs already moved to thread")) @@ -235,7 +235,7 @@ impl WasmTask { ))) as Box })?; - use crate::runtime::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; + use crate::processor::wasm::thread_pool::{ThreadGroup, ThreadGroupType}; let mut main_runloop_group = ThreadGroup::new( ThreadGroupType::MainRunloop, format!("MainRunloop-{}", self.task_name), @@ -262,7 +262,7 @@ impl WasmTask { shared_state: Arc>, failure_cause: Arc>>, execution_state: Arc>, - _init_context: crate::runtime::wasm::taskexecutor::InitContext, + _init_context: crate::wasm::taskexecutor::InitContext, ) { let mut state = TaskState::Initialized; let mut last_idx: usize = 0; @@ -729,7 +729,7 @@ impl WasmTask { impl TaskLifecycle for WasmTask { fn init_with_context( &mut self, - init_context: &crate::runtime::wasm::taskexecutor::InitContext, + init_context: &crate::wasm::taskexecutor::InitContext, ) -> Result<(), Box> { ::init_with_context(self, init_context) } diff --git a/src/runtime/wasm/task/builder/mod.rs b/src/wasm_runtime/src/wasm/task/builder/mod.rs similarity index 100% rename from src/runtime/wasm/task/builder/mod.rs rename to src/wasm_runtime/src/wasm/task/builder/mod.rs diff --git a/src/runtime/wasm/task/builder/processor/mod.rs b/src/wasm_runtime/src/wasm/task/builder/processor/mod.rs similarity index 92% rename from src/runtime/wasm/task/builder/processor/mod.rs rename to src/wasm_runtime/src/wasm/task/builder/processor/mod.rs index c1306924..1ff39a7d 100644 --- a/src/runtime/wasm/task/builder/processor/mod.rs +++ b/src/wasm_runtime/src/wasm/task/builder/processor/mod.rs @@ -14,13 +14,13 @@ // // Specifically handles building logic for Processor type configuration -use crate::runtime::input::{Input, InputProvider}; -use crate::runtime::output::{Output, OutputProvider}; -use crate::runtime::processor::wasm::wasm_processor::WasmProcessorImpl; -use crate::runtime::processor::wasm::wasm_processor_trait::WasmProcessor; -use crate::runtime::processor::wasm::wasm_task::WasmTask; -use crate::runtime::wasm::task::yaml_keys::{TYPE, type_values}; -use crate::runtime::wasm::task::{InputConfig, OutputConfig, ProcessorConfig, WasmTaskConfig}; +use crate::input::{Input, InputProvider}; +use crate::output::{Output, OutputProvider}; +use crate::processor::wasm::wasm_processor::WasmProcessorImpl; +use crate::processor::wasm::wasm_processor_trait::WasmProcessor; +use crate::processor::wasm::wasm_task::WasmTask; +use crate::wasm::task::yaml_keys::{TYPE, type_values}; +use crate::wasm::task::{InputConfig, OutputConfig, ProcessorConfig, WasmTaskConfig}; use serde_yaml::Value; use std::sync::Arc; diff --git a/src/runtime/wasm/task/builder/python/mod.rs b/src/wasm_runtime/src/wasm/task/builder/python/mod.rs similarity index 89% rename from src/runtime/wasm/task/builder/python/mod.rs rename to src/wasm_runtime/src/wasm/task/builder/python/mod.rs index 1b31d2e5..8bc9aa2d 100644 --- a/src/runtime/wasm/task/builder/python/mod.rs +++ b/src/wasm_runtime/src/wasm/task/builder/python/mod.rs @@ -14,14 +14,14 @@ // // Specifically handles building logic for python runtime configuration -use crate::runtime::input::{Input, InputProvider}; -use crate::runtime::output::{Output, OutputProvider}; -use crate::runtime::processor::python::get_python_engine_and_component; -use crate::runtime::processor::wasm::wasm_processor::WasmProcessorImpl; -use crate::runtime::processor::wasm::wasm_processor_trait::WasmProcessor; -use crate::runtime::processor::wasm::wasm_task::WasmTask; -use crate::runtime::wasm::task::yaml_keys::{TYPE, type_values}; -use crate::runtime::wasm::task::{InputConfig, OutputConfig, ProcessorConfig, WasmTaskConfig}; +use crate::input::{Input, InputProvider}; +use crate::output::{Output, OutputProvider}; +use crate::processor::python::get_python_engine_and_component; +use crate::processor::wasm::wasm_processor::WasmProcessorImpl; +use crate::processor::wasm::wasm_processor_trait::WasmProcessor; +use crate::processor::wasm::wasm_task::WasmTask; +use crate::wasm::task::yaml_keys::{TYPE, type_values}; +use crate::wasm::task::{InputConfig, OutputConfig, ProcessorConfig, WasmTaskConfig}; use serde_yaml::Value; use std::sync::Arc; @@ -33,8 +33,7 @@ impl PythonBuilder { yaml_value: &Value, modules: &[(String, Vec)], create_time: u64, - ) -> Result, Box> - { + ) -> Result, Box> { let config_type = yaml_value .get(TYPE) .and_then(|v| v.as_str()) diff --git a/src/runtime/wasm/task/builder/sink/mod.rs b/src/wasm_runtime/src/wasm/task/builder/sink/mod.rs similarity index 95% rename from src/runtime/wasm/task/builder/sink/mod.rs rename to src/wasm_runtime/src/wasm/task/builder/sink/mod.rs index 65e8bc95..844b0aef 100644 --- a/src/runtime/wasm/task/builder/sink/mod.rs +++ b/src/wasm_runtime/src/wasm/task/builder/sink/mod.rs @@ -14,8 +14,8 @@ // // Specifically handles building logic for Sink type configuration (future support) -use crate::runtime::processor::wasm::wasm_task::WasmTask; -use crate::runtime::wasm::task::yaml_keys::{TYPE, type_values}; +use crate::processor::wasm::wasm_task::WasmTask; +use crate::wasm::task::yaml_keys::{TYPE, type_values}; use serde_yaml::Value; use std::sync::Arc; diff --git a/src/runtime/wasm/task/builder/source/mod.rs b/src/wasm_runtime/src/wasm/task/builder/source/mod.rs similarity index 95% rename from src/runtime/wasm/task/builder/source/mod.rs rename to src/wasm_runtime/src/wasm/task/builder/source/mod.rs index fc81bea9..938a5046 100644 --- a/src/runtime/wasm/task/builder/source/mod.rs +++ b/src/wasm_runtime/src/wasm/task/builder/source/mod.rs @@ -14,8 +14,8 @@ // // Specifically handles building logic for Source type configuration (future support) -use crate::runtime::processor::wasm::wasm_task::WasmTask; -use crate::runtime::wasm::task::yaml_keys::{TYPE, type_values}; +use crate::processor::wasm::wasm_task::WasmTask; +use crate::wasm::task::yaml_keys::{TYPE, type_values}; use serde_yaml::Value; use std::sync::Arc; diff --git a/src/runtime/wasm/task/builder/task_builder.rs b/src/wasm_runtime/src/wasm/task/builder/task_builder.rs similarity index 93% rename from src/runtime/wasm/task/builder/task_builder.rs rename to src/wasm_runtime/src/wasm/task/builder/task_builder.rs index 2246d6d8..9ca7236d 100644 --- a/src/runtime/wasm/task/builder/task_builder.rs +++ b/src/wasm_runtime/src/wasm/task/builder/task_builder.rs @@ -15,13 +15,13 @@ //! Provides unified factory methods to create TaskLifecycle instances from YAML config. //! Dispatches to specific builders (Processor, Source, Sink, Python) based on task type. -use crate::runtime::wasm::task::TaskLifecycle; -use crate::runtime::wasm::task::builder::processor::ProcessorBuilder; +use crate::wasm::task::TaskLifecycle; +use crate::wasm::task::builder::processor::ProcessorBuilder; #[cfg(feature = "python")] -use crate::runtime::wasm::task::builder::python::PythonBuilder; -use crate::runtime::wasm::task::builder::sink::SinkBuilder; -use crate::runtime::wasm::task::builder::source::SourceBuilder; -use crate::runtime::wasm::task::yaml_keys::{NAME, TYPE, type_values}; +use crate::wasm::task::builder::python::PythonBuilder; +use crate::wasm::task::builder::sink::SinkBuilder; +use crate::wasm::task::builder::source::SourceBuilder; +use crate::wasm::task::yaml_keys::{NAME, TYPE, type_values}; use serde_yaml::Value; use std::sync::Arc; @@ -169,7 +169,7 @@ impl TaskBuilder { /// Build and unwrap WASM task from Arc fn build_wasm_task( result: Result< - Arc, + Arc, Box, >, task_name: &str, diff --git a/src/runtime/wasm/task/control_mailbox.rs b/src/wasm_runtime/src/wasm/task/control_mailbox.rs similarity index 97% rename from src/runtime/wasm/task/control_mailbox.rs rename to src/wasm_runtime/src/wasm/task/control_mailbox.rs index 8aaf2de3..07397ac6 100644 --- a/src/runtime/wasm/task/control_mailbox.rs +++ b/src/wasm_runtime/src/wasm/task/control_mailbox.rs @@ -10,8 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::runtime::common::TaskCompletionFlag; -use crate::runtime::processor::function_error::FunctionErrorReport; +use crate::common::TaskCompletionFlag; +use crate::processor::function_error::FunctionErrorReport; use crossbeam_channel::Sender; #[derive(Clone)] diff --git a/src/runtime/wasm/task/lifecycle.rs b/src/wasm_runtime/src/wasm/task/lifecycle.rs similarity index 96% rename from src/runtime/wasm/task/lifecycle.rs rename to src/wasm_runtime/src/wasm/task/lifecycle.rs index ea00f7c2..cbe7955d 100644 --- a/src/runtime/wasm/task/lifecycle.rs +++ b/src/wasm_runtime/src/wasm/task/lifecycle.rs @@ -14,10 +14,10 @@ // // Defines the complete lifecycle management interface for Task, including initialization, start, stop, checkpoint and close -use crate::runtime::common::ComponentState; -use crate::runtime::wasm::task::control_mailbox::ControlMailBox; -use crate::runtime::wasm::taskexecutor::InitContext; -use crate::storage::task::FunctionInfo; +use crate::common::ComponentState; +use crate::task::FunctionInfo; +use crate::wasm::task::control_mailbox::ControlMailBox; +use crate::wasm::taskexecutor::InitContext; use std::sync::Arc; /// Task lifecycle management interface diff --git a/src/runtime/wasm/task/mod.rs b/src/wasm_runtime/src/wasm/task/mod.rs similarity index 100% rename from src/runtime/wasm/task/mod.rs rename to src/wasm_runtime/src/wasm/task/mod.rs diff --git a/src/runtime/wasm/task/processor_config.rs b/src/wasm_runtime/src/wasm/task/processor_config.rs similarity index 99% rename from src/runtime/wasm/task/processor_config.rs rename to src/wasm_runtime/src/wasm/task/processor_config.rs index a3069adc..8e642ed6 100644 --- a/src/runtime/wasm/task/processor_config.rs +++ b/src/wasm_runtime/src/wasm/task/processor_config.rs @@ -608,7 +608,7 @@ impl WasmTaskConfig { task_name: String, value: &Value, ) -> Result> { - use crate::runtime::wasm::task::yaml_keys::{INPUT_GROUPS, INPUTS, NAME, OUTPUTS}; + use crate::wasm::task::yaml_keys::{INPUT_GROUPS, INPUTS, NAME, OUTPUTS}; // 1. Get name from config (if exists), otherwise use the passed task_name let config_name = value diff --git a/src/runtime/wasm/task/yaml_keys.rs b/src/wasm_runtime/src/wasm/task/yaml_keys.rs similarity index 100% rename from src/runtime/wasm/task/yaml_keys.rs rename to src/wasm_runtime/src/wasm/task/yaml_keys.rs diff --git a/src/runtime/wasm/taskexecutor/init_context.rs b/src/wasm_runtime/src/wasm/taskexecutor/init_context.rs similarity index 90% rename from src/runtime/wasm/taskexecutor/init_context.rs rename to src/wasm_runtime/src/wasm/taskexecutor/init_context.rs index fca44a32..edd8837d 100644 --- a/src/runtime/wasm/taskexecutor/init_context.rs +++ b/src/wasm_runtime/src/wasm/taskexecutor/init_context.rs @@ -14,10 +14,10 @@ // // Provides various resources needed for task initialization, including state storage, task storage, thread pool, etc. -use crate::runtime::processor::wasm::thread_pool::{TaskThreadPool, ThreadGroup}; -use crate::runtime::wasm::task::ControlMailBox; -use crate::storage::state_backend::StateStorageServer; -use crate::storage::task::TaskStorage; +use crate::processor::wasm::thread_pool::{TaskThreadPool, ThreadGroup}; +use crate::state_backend::StateStorageServer; +use crate::task::TaskStorage; +use crate::wasm::task::ControlMailBox; use std::sync::{Arc, Mutex}; #[derive(Clone)] diff --git a/src/runtime/wasm/taskexecutor/mod.rs b/src/wasm_runtime/src/wasm/taskexecutor/mod.rs similarity index 100% rename from src/runtime/wasm/taskexecutor/mod.rs rename to src/wasm_runtime/src/wasm/taskexecutor/mod.rs diff --git a/src/runtime/wasm/taskexecutor/task_manager.rs b/src/wasm_runtime/src/wasm/taskexecutor/task_manager.rs similarity index 96% rename from src/runtime/wasm/taskexecutor/task_manager.rs rename to src/wasm_runtime/src/wasm/taskexecutor/task_manager.rs index 897e0a3d..d88c3df4 100644 --- a/src/runtime/wasm/taskexecutor/task_manager.rs +++ b/src/wasm_runtime/src/wasm/taskexecutor/task_manager.rs @@ -10,15 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::common::ComponentState; use crate::config::GlobalConfig; -use crate::runtime::common::ComponentState; -use crate::runtime::processor::wasm::thread_pool::{GlobalTaskThreadPool, TaskThreadPool}; -use crate::runtime::wasm::task::{TaskBuilder, TaskLifecycle}; -use crate::runtime::wasm::taskexecutor::init_context::InitContext; -use crate::storage::state_backend::StateStorageServer; -use crate::storage::task::{ - FunctionInfo, StoredTaskInfo, TaskModuleBytes, TaskStorage, TaskStorageFactory, -}; +use crate::processor::wasm::thread_pool::{GlobalTaskThreadPool, TaskThreadPool}; +use crate::state_backend::StateStorageServer; +use crate::task::{FunctionInfo, StoredTaskInfo, TaskModuleBytes, TaskStorage, TaskStorageFactory}; +use crate::wasm::task::{TaskBuilder, TaskLifecycle}; +use crate::wasm::taskexecutor::init_context::InitContext; use anyhow::{Context, Result, anyhow}; use parking_lot::RwLock;