From adbbc8e83174ba0820015d25942e611c9ef31b84 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 01/23] add flake --- flake.lock | 77 +++++++++++++++++++++++++++++++++++++++++++++ flake.nix | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/flake.lock b/flake.lock new file mode 100644 index 000000000..428032335 --- /dev/null +++ b/flake.lock @@ -0,0 +1,77 @@ +{ + "nodes": { + "crane": { + "locked": { + "lastModified": 1775839657, + "narHash": "sha256-SPm9ck7jh3Un9nwPuMGbRU04UroFmOHjLP56T10MOeM=", + "owner": "ipetkov", + "repo": "crane", + "rev": "7cf72d978629469c4bd4206b95c402514c1f6000", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1776067740, + "narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 000000000..14bce88dd --- /dev/null +++ b/flake.nix @@ -0,0 +1,91 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; + flake-utils.url = "github:numtide/flake-utils"; + crane.url = "github:ipetkov/crane"; + }; + + outputs = { self, nixpkgs, flake-utils, crane }: + flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: + let + pkgs = import nixpkgs { inherit system; }; + craneLib = crane.mkLib pkgs; + stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); + stdenv = stdenv' pkgs; + + devShell = craneLib.devShell.override { + mkShell = pkgs.mkShell.override { + inherit stdenv; + }; + }; + + env = { + LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; + CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; + CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; + }; + + commonArgs = { + src = let + unfilteredSrc = ./.; + fs = pkgs.lib.fileset; + in fs.toSource { + root = unfilteredSrc; + fileset = fs.unions [ + (craneLib.fileset.cargoTomlAndLock unfilteredSrc) + (craneLib.fileset.rust unfilteredSrc) + (fs.fileFilter + (file: file.hasExt "c" || file.hasExt "h" || file.hasExt "sql") + unfilteredSrc + ) + ]; + }; + strictDeps = true; + + stdenv = stdenv'; + nativeBuildInputs = with pkgs; [ + pkg-config + ]; + buildInputs = with pkgs; [ + openssl + ]; + + inherit env; + } // (craneLib.crateNameFromCargoToml { cargoToml = ./pgdog/Cargo.toml; }); + + cargoArtifacts = craneLib.buildDepsOnly commonArgs; + + pgDog = craneLib.buildPackage (commonArgs // { + inherit cargoArtifacts; + doCheck = false; + cargoExtraArgs = "-p pgdog"; + }); + + in { + packages.default = pgDog; + + devShells.default = devShell { + checks = self.checks; + inputsFrom = [ cargoArtifacts ]; + inherit env; + }; + + checks = { + inherit pgDog; + + pgDogClippy = craneLib.cargoClippy (commonArgs // { + inherit cargoArtifacts; + cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings"; + }); + + pgDogFmt = craneLib.cargoFmt commonArgs; + + pgDogNextest = craneLib.cargoNextest (commonArgs // { + inherit cargoArtifacts; + checkPhaseCargoCommand = "echo hello world"; + cargoNextestExtraArgs = "--test-threads=1 --no-fail-fast"; + }); + }; + } + ); +} From 0b90713daa2f552f41c4e4dc03b131d20ad6c286 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 02/23] redis cache mvp --- .claude/skills/debug/SKILL.md | 82 ++++ .gitignore | 1 + Cargo.lock | 122 +++++- docs/CACHE.md | 269 ++++++++++++ flake.nix | 29 +- pgdog-config/src/cache.rs | 77 ++++ pgdog-config/src/core.rs | 5 + pgdog-config/src/general.rs | 6 + pgdog-config/src/lib.rs | 2 + pgdog/Cargo.toml | 2 + pgdog/src/backend/pool/cluster.rs | 11 + .../client/query_engine/cache/client.rs | 313 ++++++++++++++ .../client/query_engine/cache/context.rs | 17 + .../client/query_engine/cache/integration.rs | 163 +++++++ .../frontend/client/query_engine/cache/mod.rs | 82 ++++ .../client/query_engine/cache/policy.rs | 398 ++++++++++++++++++ .../client/query_engine/cache/stats.rs | 59 +++ .../frontend/client/query_engine/context.rs | 8 +- pgdog/src/frontend/client/query_engine/mod.rs | 19 +- .../src/frontend/client/query_engine/query.rs | 2 + 20 files changed, 1644 insertions(+), 23 deletions(-) create mode 100644 .claude/skills/debug/SKILL.md create mode 100644 docs/CACHE.md create mode 100644 pgdog-config/src/cache.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/client.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/context.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/integration.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/mod.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/policy.rs create mode 100644 pgdog/src/frontend/client/query_engine/cache/stats.rs diff --git a/.claude/skills/debug/SKILL.md b/.claude/skills/debug/SKILL.md new file mode 100644 index 000000000..d22874e8c --- /dev/null +++ b/.claude/skills/debug/SKILL.md @@ -0,0 +1,82 @@ +# Debug Skill — pgdog Development Environment + +## Rules + +1. **Build command**: Always use `cargo build -p pgdog` (debug profile). Never use `--release` during debugging — it slows down builds by 4× and you rarely need to verify the final binary during development. + +2. **Docker environment**: You have access to rootless Docker. Key services: + - PostgreSQL runs on port **5433** (NOT 5432 — your real system postgres uses that) + - Redis is available on the default port **6379** + - If services aren't running, start them first: + ```bash + docker start $(docker ps -a -q --filter "name=pgdog" --latest) + # Or inspect what containers exist: + docker ps -a + ``` + +3. **Running pgdog in background**: Use `systemd-run --user`, NOT `nohup` or `&`. Example: + ```bash + systemd-run --user --collect --unit=pgdog-debug \ + --setenv=RUST_LOG=debug \ + --working-directory= \ + /target/debug/pgdog --config pgdog.toml 2>&1 + ``` + To stop it later: + ```bash + systemctl --user list-units | grep pgdog + systemctl --user stop + ``` + +## Useful Debugging Commands + +### Check docker services + +```bash +docker ps +``` + +### Check redis connectivity + +```bash +redis-cli ping +``` + +### Clear redis cache (useful for testing) + +```bash +redis-cli FLUSHALL +``` + +### Watch cache keys in real-time + +```bash +redis-cli MONITOR | grep "pgdog:" +``` + +### Inspect cached response bytes + +```bash +redis-cli --scan --pattern "pgdog:*" | head -1 | xargs redis-cli GET | xxd | head -20 +``` + +## File Structure Reference + +The cache implementation lives in: + +``` +pgdog/src/frontend/client/query_engine/cache/ +├── mod.rs # Module exports +├── client.rs # Redis client wrapper (fred v9) +├── integration.rs # cache_check(), send_cached_response(), cache_response() +├── policy.rs # CachePolicyResolver (3-tier decision engine) +└── stats.rs # QueryStatsTracker (hit/miss counters) +``` + +State documentation: `CacheState.md` in the project root. + +## Common Pitfalls + +- **Parser disabled by default**: `route.is_read()` returns false for `SELECT 1` when the query parser is off. The `is_likely_read()` heuristic in integration.rs covers this. +- **Policy defaults to NoCache**: `DatabaseCache.policy()` returns `CachePolicy::NoCache` by default. You must set `policy = "cache"` in the config. +- **Cache keys are hashed**: The key is a DefaultHasher hex digest of the raw query string, not the query itself. +- **Wire format is concatenated bytes**: Multiple PostgreSQL messages are concatenated into a single `Vec` with `[code: u8][length: u32be][payload: ...]` structure. diff --git a/.gitignore b/.gitignore index 5db985b82..32aeaec2a 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ perf.data.old CLAUDE.local.md .claude/plans/ .claude/completed_plans/ +!.claude/skills/debug # Ignore generated bindings pgdog-plugin/src/bindings.rs diff --git a/Cargo.lock b/Cargo.lock index 98f6c842c..1596b7c5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -663,7 +663,7 @@ dependencies = [ "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -973,6 +973,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cookie-factory" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396de984970346b0d9e93d1415082923c679e5ae5c3ee3dcbd104f5610af126b" + [[package]] name = "core-foundation" version = "0.9.4" @@ -1023,6 +1029,12 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc16" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff" + [[package]] name = "critical-section" version = "1.2.0" @@ -1450,6 +1462,15 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "flume" version = "0.11.1" @@ -1497,6 +1518,47 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fred" +version = "9.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cdd5378252ea124b712e0ac55147d26ae3af575883b34b8423091a4c719606b" +dependencies = [ + "arc-swap", + "async-trait", + "bytes", + "bytes-utils", + "crossbeam-queue", + "float-cmp", + "fred-macros", + "futures", + "log", + "parking_lot", + "rand 0.8.5", + "redis-protocol", + "rustls 0.23.27", + "rustls-native-certs 0.7.3", + "semver", + "socket2", + "tokio", + "tokio-rustls 0.26.2", + "tokio-stream", + "tokio-util", + "url", + "urlencoding", +] + +[[package]] +name = "fred-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1458c6e22d36d61507034d5afecc64f105c1d39712b7ac6ec3b352c423f715cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -2871,6 +2933,7 @@ dependencies = [ "dashmap", "derive_builder", "fnv", + "fred", "futures", "hickory-resolver", "http-body-util", @@ -2896,6 +2959,7 @@ dependencies = [ "rust_decimal", "rustls-native-certs 0.8.1", "rustls-pki-types", + "scc 3.7.0", "scram", "semver", "serde", @@ -3458,6 +3522,20 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "redis-protocol" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65deb7c9501fbb2b6f812a30d59c0253779480853545153a51d8e9e444ddc99f" +dependencies = [ + "bytes", + "bytes-utils", + "cookie-factory", + "crc16", + "log", + "nom", +] + [[package]] name = "redox_syscall" version = "0.5.12" @@ -3826,6 +3904,19 @@ dependencies = [ "security-framework 2.11.1", ] +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -3899,13 +3990,29 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "saa" +version = "5.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8d438861332c3b1ac396c77bd9cac620ea1ff347efb63c05a83d8f0a593899" + [[package]] name = "scc" version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" dependencies = [ - "sdd", + "sdd 3.0.8", +] + +[[package]] +name = "scc" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c154cf1d115a1e901d7f4e3f279eb6eb455f0d670c1cf3c1aa74d50ad37fa9" +dependencies = [ + "saa", + "sdd 4.8.6", ] [[package]] @@ -3981,6 +4088,15 @@ version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21" +[[package]] +name = "sdd" +version = "4.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f0e40a01b94e35d1dacbcfbe5bfd3d31e37d9590b2e6d86a82b0e87bd4f551" +dependencies = [ + "saa", +] + [[package]] name = "seahash" version = "4.1.0" @@ -4103,7 +4219,7 @@ dependencies = [ "log", "once_cell", "parking_lot", - "scc", + "scc 2.3.4", "serial_test_derive", ] diff --git a/docs/CACHE.md b/docs/CACHE.md new file mode 100644 index 000000000..7b0159754 --- /dev/null +++ b/docs/CACHE.md @@ -0,0 +1,269 @@ +# Redis Cache for pgdog — State of Implementation + +## Architecture + +Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Three-tier policy resolution: SQL comment → per-database config → auto-decision engine. + +--- + +## Initial Implementation (Before Debugging Session) + +### Files Added + +#### 1. `pgdog/src/frontend/client/query_engine/cache/` (new module) + +**`mod.rs`** — Module exports: +```rust +pub mod client; +pub mod integration; +pub mod policy; +pub mod stats; + +pub use client::CacheClient; +pub use integration::{cache_check, cache_response, send_cached_response, CacheCheckResult}; +pub use policy::{ + CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, + CommentCacheExtractor, ParameterCacheExtractor, +}; +pub use stats::QueryStatsTracker; +``` + +**`client.rs`** — Redis client wrapper using `fred` v9: +- `CacheClient::new(config)` — builds client from `Option<&DatabaseCache>`, returns disabled stub if no config/URL +- `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag +- `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes +- `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` +- `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` +- `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) +- `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) +- Keys are prefixed with `"pgdog:"` +- Error types: `RedisError(String)`, `ConnectionFailed(String)` +- `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds +- `reconnecting: Arc` — prevents multiple concurrent reconnect tasks +- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net + +**`policy.rs`** — 3-tier policy resolution with trait-based extraction: +- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` (moved here from `route.rs`) +- `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` +- `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** +- `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter +- `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result +- Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) +- Tier 2: Database config `CachePolicy` (`NoCache` / `Cache` / `Auto`) +- Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` + +**`stats.rs`** — Per-fingerprint query statistics tracker: +- `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` +- Internally: `Arc>>` using `parking_lot` + +**`integration.rs`** — Integration logic (as currently exists after debugging fixes): +- `cache_check()` — main entry point, creates `CachePolicyDispatcher` with `CommentCacheExtractor` + `ParameterCacheExtractor`, calls `dispatcher.extract(query, params)` to get `CacheDirective`, then runs `CachePolicyResolver::resolve()` +- `is_likely_read()` — fallback heuristic for when parser is disabled: checks SQL starts with SELECT/SHOW/EXPLAIN/WITH +- `send_cached_response()` — deserializes wire-format bytes and sends to client +- `cache_response()` — serializes `Vec` into wire bytes and stores in Redis +- `get_db_cache_config()` — looks up `DatabaseCache` from global config by database name +- `compute_cache_key()` — DefaultHasher-based hash of `{database, query}`; database name is hashed first to namespace keys and prevent collisions when multiple databases share one Redis + +### Files Modified + +#### 2. `pgdog-config/src/database.rs` + +Added before the `Database` struct: + +- `CachePolicy` enum: `NoCache` (default), `Cache`, `Auto` + - Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy` +- `DatabaseCache` struct: + - `enabled: Option` — is caching on? + - `policy: Option` — which policy? + - `ttl: Option` — default TTL seconds (default 300) + - `redis_url: Option` — Redis connection URL + - `max_result_size: Option` — max cached result bytes + - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` +- Added `cache: Option` field to `Database` struct + +#### 3. `pgdog-config/src/lib.rs` + +Added `CachePolicy` and `DatabaseCache` to the public `pub use database::` export. + +#### 4. `pgdog/src/frontend/router/parser/route.rs` + +`CacheDirective` enum was **moved to** `cache/policy.rs` — `route.rs` now re-exports it via `pub use crate::frontend::client::query_engine::cache::policy::CacheDirective`. Route still has `cache_directive` field and methods available for manual override, but the type is imported from the cache module. + +#### 5. `pgdog/src/frontend/router/parser/comment.rs` + +All cache-related regex and parsing was **removed** from this file. Cache extraction is now independent and lives in `cache/policy.rs` with its own standalone regex. The `comment()` function returns a 2-tuple `(Option, Option)` again. + +#### 6. `pgdog/src/frontend/router/parser/cache/ast.rs` + +`comment_cache_directive` field was **removed** from `AstInner` struct and `new()` method. Cache parsing is no longer done at the AST level. + +#### 7. `pgdog/src/frontend/router/parser/query/mod.rs` + +All cache directive handling was **removed**: `cache_directive` field removed from `QueryParser` struct, cache directive propagation removed. Cache policy extraction now happens independently in `integration.rs`. + +#### 8. `pgdog/src/frontend/router/parser/mod.rs` + +- Updated export: `pub use route::{CacheDirective, Route, Shard, ShardWithPriority, ShardsWithPriority};` + +#### 9. `pgdog/Cargo.toml` + +- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies + +#### 10. `pgdog/src/frontend/client/query_engine/mod.rs` + +- Added `pub mod cache;` module declaration +- Added `cache_client: CacheClient`, `cache_stats: QueryStatsTracker`, `database: String`, `cache_miss: Option<(String, Option)>`, `cache_response_buffer: Vec` fields to `QueryEngine` +- `new()` looks up cache config from global config by database name and creates `CacheClient` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache_check()`. On HIT: sends cached response and returns. On MISS: stores `(cache_key, ttl)` and starts capture. On Passthrough: clears miss state. +- After `match command`, calls `self.finalize_cache().await` to store the captured response in Redis. +- Added helper methods: `start_cache_capture()`, `capture_response()`, `is_caching()`, `finalize_cache()` + +#### 11. `pgdog/src/frontend/client/query_engine/query.rs` + +- `process_server_message()` added cache capture at the top: if `self.is_caching()`, clones and stores the message via `self.capture_response()`. + +--- + +## Key Design Decisions + +| Decision | Choice | +|----------|--------| +| Interception point | Between `parse_and_rewrite()` and `route_query()` in `handle()` | +| Cache config scope | Per-database (`Database.cache` field) | +| Redis client | `fred` crate v9 (async-native, tokio integration) | +| Cacheable queries | Only reads (`Route::is_read()` + `is_likely_read()` fallback) | +| Cache policy resolution | 3-tier: SQL comment → per-database config → auto-decision | +| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → `return Ok(())` | +| Cache MISS flow | Normal execute → capture response bytes → store in Redis → respond | +| Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | +| Cache key | `DefaultHasher` of `{database}:{query}` — database name is hashed first to namespace keys, preventing collisions when multiple databases share one Redis | +| Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | + +--- + +## Bugs Found & Fixed + +1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). + +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. + +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. + +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: Added `is_likely_read()` heuristic function in `cache_check` that checks uppercase SQL prefix (SELECT/SHOW/EXPLAIN/WITH) as a fallback when parser is disabled. + +5. **DB cache config defaults** - Observation: `DatabaseCache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Added `policy = "cache"` to pgdog.toml. + +--- + +## Refactoring: Decoupled Cache Policy Extraction + +The original implementation entangled cache directive parsing with pgdog's general comment parser (`comment.rs`), which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. + +**What was done:** + +- `CacheDirective` enum moved from `route.rs` to `cache/policy.rs` +- Cache parsing **removed** from `comment.rs`, `ast.rs`, `query/mod.rs` — they no longer handle `CacheDirective` +- `route.rs` now re-exports `CacheDirective` from the cache module +- New **trait-based extraction system** in `cache/policy.rs`: + - `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` + - `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser + - `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter + - `CachePolicyDispatcher`: chains extractors, returns first non-`None` result +- `integration.rs` now creates the dispatcher inline in `cache_check()` and passes `context.params` for parameter extraction + +This ensures cache annotations work regardless of whether the query parser is enabled or bypassed. + +## How to Control Cache + +### SQL Comments + +Add a C-style comment before your query. The first matching directive wins: + +```sql +-- Force bypass cache for this query +/* pgdog_cache: no-cache */ +SELECT * FROM users WHERE id = 1; + +-- Cache with database default TTL +/* pgdog_cache: cache */ +SELECT * FROM products WHERE category = 'electronics'; + +-- Cache with custom TTL in seconds +/* pgdog_cache: cache ttl=300 */ +SELECT * FROM orders; +``` + +### Connection Parameter + +Set `pgdog.cache` at connection time (via DSN options) or with `SET` after connecting: + +```sql +-- Session-wide: all queries in this connection bypass cache +SET pgdog.cache = 'no-cache'; + +-- Session-wide: cache all queries with default TTL +SET pgdog.cache = 'cache'; + +-- Session-wide: cache all queries with 5-minute TTL +SET pgdog.cache = 'cache ttl=300'; +``` + +### Priority Order + +Extractors are checked in order — first non-`None` result wins, then falls through to database config: + +``` +SQL comment → pgdog.cache parameter → DB policy config → Auto-decision +(highest) (lowest) +``` + +--- + +# What's Left To Do + +1. **Redo is_likely_read** — **DONE.** Instead of heuristic-based detection, caching now requires the query parser. If `query_parser = "auto"` and any database has `cache.enabled = true`, it's auto-upgraded to `"on"` globally. If `query_parser = "off"` or `"session_control"` and cache is enabled, a startup warning is emitted and caching won't work for that database. `ClusterConfig::new()` also forces `On` per-cluster if cache is enabled and global parser is `Off`/`SessionControl`/`Auto`. This means `route.is_read()` from the AST parser is always accurate — it correctly detects CTE writes (`WITH ... INSERT`), `FOR UPDATE/SHARE`, and volatile functions (`nextval()`, `pg_advisory_lock()`). The old `is_likely_read()` string-prefix heuristic has been removed entirely. + +2. **pgdog_cache: comment annotation** — **DONE.** Cache directive extraction now uses its own standalone regex in `cache/policy.rs`, working independently of the AST parser. It scans the raw query string, so it functions correctly even when `query_parser_bypass()` is triggered. The `/* pgdog_cache: ... */` comment format is supported with optional `ttl=` parameter. + +3. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. + +4. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. + +5. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: Actually, pgdog implements prepared statements caching. But i don't know what kind of caching is this: just query cache or result cache. And if we'll implement our cache, will this break this prepared statement cache?) + +6. **Error handling / Reconnection** — DONE. Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. + +7. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. + +8. **Cache key collision across databases sharing one Redis** — Problem: `compute_cache_key()` only hashed the raw query string. When two databases point to the same Redis and both run `SELECT * FROM users WHERE id = 1`, they produce identical keys and can serve wrong data on cache hits. Fix: Changed `compute_cache_key(query: &str)` to `compute_cache_key(query: &str, database: &str)` — database name is now hashed first, then the query, guaranteeing unique keys per database even on a shared Redis instance (`integration.rs:99`). + +9. **Redis disconnect/reconnect blocks all queries** — Problem: When Redis becomes unavailable after initial connection, `client.get()`/`client.set()` block for the full timeout duration (2s) because fred's `default_command_timeout` is `Duration::from_millis(0)` (no timeout). After the first request fails, subsequent requests still hit the timeout. After Redis restarts, caching never recovers. Root cause analysis: (a) fred's `ClientState` can report `Connected` even when TCP isn't ready, so relying on `client.state()` for the fast-path check leads to unnecessary blocking. (b) `force_reconnection()` hangs indefinitely when Redis is down — fred's router task can't respond without a connection, so the reconnect loop deadlocks. (c) Even after Redis restarts, if the initial `init()` failed, fred's routing tasks never started, so `ping()` and all operations fail silently. Fix: (1) Replaced `connect_initiated` + state-check logic with a single `redis_connected: AtomicBool` — the authoritative gate for all Redis operations. Returns error immediately if false, no Redis call attempted. (2) `ensure_connected()` calls `init()` (only one-shot on fresh start), then verifies with `ping()`. Sets `redis_connected = true` only after PING succeeds. (3) `mark_disconnected()` sets `redis_connected = false` and spawns exactly one background reconnect task (CAS-guarded via `reconnecting: AtomicBool`). (4) Reconnect task retries `client.init()` every 500ms (fred allows re-init after disconnect). On success, verifies with PING, then sets `redis_connected = true`. (5) All Redis calls (init, get, set, ping) wrapped in `tokio::time::timeout(2s)` as safety net. + +--- + +## Testing + +### Framework + +System tests live in `integration/rust/tests/integration/` alongside the existing integration suite. They use: +- `sqlx` and `tokio-postgres` for PG queries through pgdog on port 6432 +- `#[tokio::test]` + `#[serial]` from `serial_test` for test isolation +- `reqwest` to read metrics from `http://127.0.0.1:9090/metrics` + +Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` + +### External Dependencies + +Redis must be running locally on port 6379 before cache tests execute. Unlike Postgres, Redis is **not** currently provisioned by `integration/setup.sh` or any CI workflow. To add cache tests to CI: +- **GitHub Actions:** add `sudo apt-get install -y redis-server && sudo service redis-server start` in `.github/workflows/ci.yml` +- **RWX:** add a `*redis-bg-process` alias in `.rwx/integration.yml` (same pattern as `*postgres-bg-process`) +- **Local/dev:** start Redis manually (expected on `127.0.0.1:6379`) + +### Planned Tests + +1. **Database key namespace collision** — Two databases (`db_a`, `db_b`) sharing one Redis, both running `SELECT 1 AS val` but with different underlying PG data. Verify each database gets its own correct data and no cross-database cache hit occurs. +2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. +3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. +4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these operations do not populate or consume the cache. +5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking or crashing. +6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. diff --git a/flake.nix b/flake.nix index 14bce88dd..4a7fd6c7f 100644 --- a/flake.nix +++ b/flake.nix @@ -9,22 +9,18 @@ flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: let pkgs = import nixpkgs { inherit system; }; - craneLib = crane.mkLib pkgs; stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); stdenv = stdenv' pkgs; - - devShell = craneLib.devShell.override { - mkShell = pkgs.mkShell.override { - inherit stdenv; - }; - }; + craneLib = (crane.mkLib pkgs).overrideScope (final: prev: { + stdenvSelector = stdenv'; + }); env = { LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; }; - + commonArgs = { src = let unfilteredSrc = ./.; @@ -42,7 +38,6 @@ }; strictDeps = true; - stdenv = stdenv'; nativeBuildInputs = with pkgs; [ pkg-config ]; @@ -55,6 +50,16 @@ cargoArtifacts = craneLib.buildDepsOnly commonArgs; + devShell = (craneLib.devShell.override { + mkShell = pkgs.mkShell.override { + inherit stdenv; + }; + }) { + checks = self.checks; + inputsFrom = [ cargoArtifacts ]; + inherit env; + }; + pgDog = craneLib.buildPackage (commonArgs // { inherit cargoArtifacts; doCheck = false; @@ -64,11 +69,7 @@ in { packages.default = pgDog; - devShells.default = devShell { - checks = self.checks; - inputsFrom = [ cargoArtifacts ]; - inherit env; - }; + devShells.default = devShell; checks = { inherit pgDog; diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs new file mode 100644 index 000000000..ef1c44530 --- /dev/null +++ b/pgdog-config/src/cache.rs @@ -0,0 +1,77 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Cache policy. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Copy, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum CachePolicy { + /// Never cache queries for this database. + #[default] + NoCache, + /// Always cache read queries. + Cache, + /// Dynamically decide based on Redis memory and query stats. + Auto, +} + +impl std::str::FromStr for CachePolicy { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "no_cache" | "no-cache" => Ok(Self::NoCache), + "cache" => Ok(Self::Cache), + "auto" => Ok(Self::Auto), + _ => Err(format!("Invalid cache policy: {}", s)), + } + } +} + +impl std::fmt::Display for CachePolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let display = match self { + Self::NoCache => "no_cache", + Self::Cache => "cache", + Self::Auto => "auto", + }; + write!(f, "{}", display) + } +} + +/// Redis cache configuration for a database. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, JsonSchema, +)] +#[serde(deny_unknown_fields)] +pub struct Cache { + /// Whether to enable caching for this database. + pub enabled: Option, + /// Cache policy: no_cache, cache, or auto. + pub policy: Option, + /// Default TTL in seconds for cached queries. + pub ttl: Option, + /// Redis connection URL (e.g., redis://localhost:6379). + pub redis_url: Option, + /// Maximum result size in bytes to cache (0 = unlimited). + pub max_result_size: Option, +} + +impl Cache { + pub fn is_enabled(&self) -> bool { + self.enabled.unwrap_or(false) + } + + pub fn policy(&self) -> CachePolicy { + self.policy.unwrap_or_default() + } + + pub fn ttl(&self) -> u64 { + self.ttl.unwrap_or(300) + } + + pub fn max_result_size(&self) -> Option { + self.max_result_size + } +} \ No newline at end of file diff --git a/pgdog-config/src/core.rs b/pgdog-config/src/core.rs index 856518a89..98135178c 100644 --- a/pgdog-config/src/core.rs +++ b/pgdog-config/src/core.rs @@ -564,6 +564,11 @@ impl Config { r#""pg_query_raw" parser engine requires a large thread stack, setting it to 32MiB for each Tokio worker"# ); } + + if self.general.cache.is_enabled() + && matches!(self.general.query_parser, QueryParserLevel::Off | QueryParserLevel::SessionControl) { + warn!("cache requires enabled query parser but it's disabled or session controlled"); + } } /// Multi-tenancy is enabled. diff --git a/pgdog-config/src/general.rs b/pgdog-config/src/general.rs index aa0f636f6..59d76aafa 100644 --- a/pgdog-config/src/general.rs +++ b/pgdog-config/src/general.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; +use crate::cache::Cache; use crate::pooling::ConnectionRecovery; use crate::UniqueIdFunction; use crate::{ @@ -643,6 +644,10 @@ pub struct General { /// https://docs.pgdog.dev/configuration/pgdog.toml/general/#cutover_save_config #[serde(default)] pub cutover_save_config: bool, + + /// Redis cache configuration for this database. + #[serde(default)] + pub cache: Cache, } impl Default for General { @@ -729,6 +734,7 @@ impl Default for General { cutover_timeout_action: Self::cutover_timeout_action(), cutover_save_config: bool::default(), unique_id_function: Self::unique_id_function(), + cache: Cache::default(), } } } diff --git a/pgdog-config/src/lib.rs b/pgdog-config/src/lib.rs index 1a106a295..399fd8e3d 100644 --- a/pgdog-config/src/lib.rs +++ b/pgdog-config/src/lib.rs @@ -1,5 +1,6 @@ // Submodules pub mod auth; +pub mod cache; pub mod core; pub mod data_types; pub mod database; @@ -18,6 +19,7 @@ pub mod users; pub mod util; pub use auth::{AuthType, PassthroughAuth}; +pub use cache::{CachePolicy, Cache}; pub use core::{Config, ConfigAndUsers}; pub use data_types::*; pub use database::{ diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index 7c62c28c4..05e69167f 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -69,6 +69,8 @@ pgdog-config = { path = "../pgdog-config" } pgdog-vector = { path = "../pgdog-vector" } pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} +fred = { version = "9", features = ["enable-rustls"] } +scc = "3.7" [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.6" diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index 9dbd038d0..22bee793e 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -81,6 +81,7 @@ pub struct Cluster { reload_schema_on_ddl: bool, load_schema: LoadSchema, resharding_parallel_copies: usize, + cache_enabled: bool, } /// Sharding configuration from the cluster. @@ -157,6 +158,7 @@ pub struct ClusterConfig<'a> { pub reload_schema_on_ddl: bool, pub load_schema: LoadSchema, pub resharding_parallel_copies: usize, + pub cache_enabled: bool } impl<'a> ClusterConfig<'a> { @@ -210,6 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, + cache_enabled: general.cache.is_enabled() } } } @@ -247,6 +250,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, + cache_enabled } = config; let identifier = Arc::new(DatabaseUser { @@ -296,6 +300,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, + cache_enabled, } } @@ -470,6 +475,7 @@ impl Cluster { || self.dry_run() || self.prepared_statements() == &PreparedStatements::Full || self.pub_sub_enabled() + || self.cache_enabled() || RegexParser::use_parser(request) } } @@ -545,6 +551,11 @@ impl Cluster { self.resharding_parallel_copies } + /// Redis cache enabled. + pub fn cache_enabled(&self) -> bool { + self.cache_enabled + } + /// Launch the connection pools. pub(crate) fn launch(&self) { for shard in self.shards() { diff --git a/pgdog/src/frontend/client/query_engine/cache/client.rs b/pgdog/src/frontend/client/query_engine/cache/client.rs new file mode 100644 index 000000000..9ae4577e0 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/client.rs @@ -0,0 +1,313 @@ +use fred::prelude::*; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tracing::{debug, error, info}; + +use pgdog_config::Cache as CacheConfig; + +const CACHE_KEY_PREFIX: &str = "pgdog:"; + +/// Timeout for individual Redis operations (GET/SET/init). +/// Safety net — should never fire in normal operation since the atomic flag gates all calls. +const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); + +#[derive(Clone)] +pub struct CacheClient { + client: Option, + config: CacheConfig, + /// Master connection state flag. Set true only after PING succeeds + /// on init or reconnect. Set false immediately on any error/timeout. + redis_connected: Arc, + /// Prevents spawning multiple reconnect tasks simultaneously. + reconnecting: Arc, +} + +impl std::fmt::Debug for CacheClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CacheClient") + .field("client", &self.client.as_ref().map(|_| "...")) + .field("config", &self.config) + .field( + "redis_connected", + &self.redis_connected.load(Ordering::Relaxed), + ) + .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) + .finish() + } +} + +impl CacheClient { + pub fn new(config: &CacheConfig) -> Self { + let cache_config = config.clone(); + + if !cache_config.is_enabled() || cache_config.redis_url.is_none() { + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + + let url = cache_config.redis_url.as_ref().unwrap(); + let client_config = match RedisConfig::from_url(url) { + Ok(c) => c, + Err(e) => { + error!("Failed to parse Redis URL: {}", e); + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + }; + + let client = match Builder::from_config(client_config).build() { + Ok(c) => c, + Err(e) => { + error!("Failed to build Redis client: {}", e); + return Self { + client: None, + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + }; + } + }; + + Self { + client: Some(client), + config: cache_config, + redis_connected: Arc::new(AtomicBool::new(false)), + reconnecting: Arc::new(AtomicBool::new(false)), + } + } + + async fn ensure_connected(&self) -> bool { + if self.redis_connected.load(Ordering::Acquire) { + return true; + } + + if self.reconnecting.load(Ordering::Relaxed) { + return false; + } + + if let Some(ref client) = self.client { + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => { + if Self::ping_client(client).await { + self.redis_connected.store(true, Ordering::Release); + info!("Connected to Redis"); + return true; + } else { + debug!("Redis init returned OK but PING failed — Redis not ready"); + } + } + Ok(Err(e)) => { + debug!("Redis init failed: {}", e); + } + Err(_) => { + error!("Redis init timed out"); + } + } + } + false + } + + async fn ping_client(client: &RedisClient) -> bool { + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.ping::()).await { + Ok(Ok(resp)) => { + info!("Redis PING succeeded: {}", resp); + true + } + Ok(Err(e)) => { + debug!("Redis PING failed: {}", e); + false + } + Err(_) => { + debug!("Redis PING timed out"); + false + } + } + } + + fn spawn_reconnect(&self) { + if self + .reconnecting + .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) + .is_err() + { + debug!("Redis reconnect task already running, skipping"); + return; + } + + let Some(ref client) = self.client else { + error!("Redis reconnect: no client available"); + self.reconnecting.store(false, Ordering::Release); + return; + }; + + let client = client.clone(); + let redis_connected = self.redis_connected.clone(); + let reconnecting = self.reconnecting.clone(); + + tokio::spawn(async move { + info!("Redis reconnect task started"); + let mut attempt = 0; + loop { + attempt += 1; + debug!("Redis reconnect attempt #{}", attempt); + + let init_ok = + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => true, + Ok(Err(_)) | Err(_) => false, + }; + + if init_ok || Self::ping_client(&client).await { + redis_connected.store(true, Ordering::Release); + reconnecting.store(false, Ordering::Release); + info!("Redis reconnected successfully"); + return; + } + tokio::time::sleep(Duration::from_millis(500)).await; + } + }); + + info!("Spawning Redis reconnect task"); + } + + fn mark_disconnected(&self) { + self.redis_connected.store(false, Ordering::Release); + self.spawn_reconnect(); + } + + pub fn is_connected(&self) -> bool { + self.redis_connected.load(Ordering::Relaxed) + } + + pub(crate) async fn get(&self, key: u64) -> Result>, Error> { + if !self.ensure_connected().await { + if !self.is_connected() { + self.spawn_reconnect(); + return Err(Error::ConnectionFailed( + "Redis disconnected, reconnecting in background".to_string(), + )); + } + return Err(Error::ConnectionFailed("Redis not connected".to_string())); + } + + let Some(ref client) = self.client else { + return Ok(None); + }; + + let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); + let val = match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + client.get::(full_key), + ) + .await + { + Ok(Ok(v)) => v, + Ok(Err(e)) => { + debug!("Redis GET error for key {}: {}", key, e); + self.mark_disconnected(); + return Err(Error::RedisError(e.to_string())); + } + Err(_) => { + error!("Redis GET timed out for key {}", key); + self.mark_disconnected(); + return Err(Error::ConnectionFailed("Redis GET timed out".to_string())); + } + }; + + if val.is_null() { + debug!("Cache miss for key {}", key); + Ok(None) + } else if let Some(bytes) = val.into_bytes() { + debug!("Cache hit for key {}", key); + Ok(Some(bytes.to_vec())) + } else { + debug!("Redis GET value not bytes for key {}", key); + Ok(None) + } + } + + pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: Option) -> Result<(), Error> { + if !self.ensure_connected().await { + if !self.is_connected() { + self.spawn_reconnect(); + return Err(Error::ConnectionFailed( + "Redis disconnected, reconnecting in background".to_string(), + )); + } + return Err(Error::ConnectionFailed("Redis not connected".to_string())); + } + + let Some(ref client) = self.client else { + return Ok(()); + }; + + let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); + + if let Some(max_size) = self.config.max_result_size() { + if value.len() > max_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + max_size + ); + return Ok(()); + } + } + + let ttl_seconds = ttl.unwrap_or_else(|| self.config.ttl()) as i64; + + match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + client.set::<(), _, _>( + full_key, + value, + Some(Expiration::EX(ttl_seconds)), + None, + false, + ), + ) + .await + { + Ok(Ok(_)) => { + debug!("Cached key {} with TTL {}s", key, ttl_seconds); + Ok(()) + } + Ok(Err(e)) => { + debug!("Redis SET error for key {}: {}", key, e); + self.mark_disconnected(); + Err(Error::RedisError(e.to_string())) + } + Err(_) => { + error!("Redis SET timed out for key {}", key); + self.mark_disconnected(); + Err(Error::ConnectionFailed("Redis SET timed out".to_string())) + } + } + } + + pub fn config(&self) -> &CacheConfig { + &self.config + } + + pub fn is_enabled(&self) -> bool { + self.client.is_some() && self.config.is_enabled() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Redis error: {0}")] + RedisError(String), + #[error("Connection failed: {0}")] + ConnectionFailed(String), +} diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/client/query_engine/cache/context.rs new file mode 100644 index 000000000..3234f6be1 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/context.rs @@ -0,0 +1,17 @@ +use crate::net::Message; + +/// Cache context to use in QueryEngineContext. +#[derive(Default)] +pub struct CacheContext { + pub cache_miss: Option<(u64, Option)>, + pub response_buffer: Vec, +} + +impl CacheContext { + /// Capture a response message for caching. + pub fn capture_response(&mut self, message: Message) { + if self.cache_miss.is_some() { + self.response_buffer.push(message); + } + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs new file mode 100644 index 000000000..625d2ac15 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -0,0 +1,163 @@ +use std::hash::{DefaultHasher, Hasher}; + +use crate::{ + frontend::client::query_engine::{cache::Cache, QueryEngineContext}, + net::{FromBytes, Message, ToBytes}, +}; + +use tracing::debug; + +use super::CachePolicyResolver; + +pub enum CacheCheckResult { + Hit { + cached: Vec, + }, + Miss { + cache_key_hash: u64, + ttl: Option, + }, + Passthrough, +} + +impl Cache { + pub(super) async fn cache_check( + &self, + context: &mut QueryEngineContext<'_>, + ) -> CacheCheckResult { + let route = match context.client_request.route.as_ref() { + Some(r) => r, + None => return CacheCheckResult::Passthrough, + }; + + // Detect read-only status via the AST parser's route classification. + // When caching is enabled, the query parser is auto-enabled. + let is_read = route.is_read(); + if !is_read { + return CacheCheckResult::Passthrough; + } + + let query = match context.client_request.query() { + Ok(Some(q)) => q, + _ => return CacheCheckResult::Passthrough, + }; + + let db_hash = { + let mut hasher = DefaultHasher::new(); + hasher.write(self.database.as_bytes()); + hasher.finish() + }; + let cache_key_hash = pg_query::fingerprint(query.query()) + .expect("We're sure that query is correct if we've reached here.") + .value + .wrapping_add(db_hash); + + let cache_directive = self + .policy_dispatcher + .extract(query.query(), context.params); + debug!( + "cache_check: sql={}, db_config={:?}", + query.query(), + self.config + ); + + let decision = CachePolicyResolver::resolve( + cache_directive, + &self.config, + is_read, + cache_key_hash, + &self.stats, + ) + .await; + + if !decision.should_cache() { + return CacheCheckResult::Passthrough; + } + + match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => { + self.stats.record_hit(cache_key_hash, cached.len()).await; + CacheCheckResult::Hit { cached } + } + Ok(None) => { + self.stats.record_miss(cache_key_hash).await; + CacheCheckResult::Miss { + cache_key_hash, + ttl: decision.ttl(), + } + } + Err(e) => { + debug!("Cache get error: {}", e); + CacheCheckResult::Passthrough + } + } + } + + pub(super) async fn send_cached_response( + &self, + context: &mut QueryEngineContext<'_>, + cached: Vec, + ) -> Result<(), crate::frontend::Error> { + let mut offset = 0; + let len = cached.len(); + + while offset < len { + if offset + 5 > len { + break; + } + + let _code = cached[offset] as char; + let msg_len = u32::from_be_bytes([ + cached[offset + 1], + cached[offset + 2], + cached[offset + 3], + cached[offset + 4], + ]) as usize; + + if msg_len < 4 || offset + 1 + msg_len > len { + break; + } + + let end = offset + 1 + msg_len; + let msg_bytes: bytes::Bytes = cached[offset..end].to_vec().into(); + let msg = Message::from_bytes(msg_bytes)?; + offset = end; + + context.stream.send_flush(&msg).await?; + } + + Ok(()) + } + + pub(super) async fn cache_response( + &self, + cache_key_hash: u64, + messages: Vec, + ttl: Option, + ) -> Result<(), ()> { + if messages.is_empty() || !self.client.is_enabled() { + return Ok(()); + } + + let mut buffer = Vec::new(); + for msg in &messages { + match msg.to_bytes() { + Ok(bytes) => buffer.extend_from_slice(&bytes), + Err(e) => { + debug!("Failed to serialize message for caching: {}", e); + return Ok(()); + } + } + } + + if buffer.is_empty() { + return Ok(()); + } + + if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { + debug!("Failed to cache response: {}", e); + } + + Ok(()) + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/client/query_engine/cache/mod.rs new file mode 100644 index 000000000..012a53852 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/mod.rs @@ -0,0 +1,82 @@ +pub mod client; +pub mod context; +pub mod integration; +pub mod policy; +pub mod stats; + +pub use client::CacheClient; +pub use integration::CacheCheckResult; +use pgdog_config::Cache as CacheConfig; +pub use policy::{ + CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, + CommentCacheExtractor, ParameterCacheExtractor, +}; +pub use stats::QueryStatsTracker; +use tracing::debug; + +use crate::frontend::client::query_engine::QueryEngineContext; + +#[derive(Debug)] +pub struct Cache { + client: CacheClient, + stats: QueryStatsTracker, + config: CacheConfig, + database: String, + policy_dispatcher: CachePolicyDispatcher, +} + +impl Cache { + pub fn new(cache_config: &CacheConfig, database: &str) -> Self { + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(CommentCacheExtractor)); + dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); + + Cache { + client: CacheClient::new(cache_config), + stats: QueryStatsTracker::default(), + config: cache_config.clone(), + database: database.to_string(), + policy_dispatcher: dispatcher, + } + } + + pub async fn try_read_cache( + &mut self, + context: &mut QueryEngineContext<'_>, + ) -> Result { + let cache_result = self.cache_check(context).await; + + match cache_result { + CacheCheckResult::Hit { cached } => { + debug!("Cache hit, serving from cache"); + self.send_cached_response(context, cached).await?; + return Ok(true); + } + CacheCheckResult::Miss { + cache_key_hash, + ttl, + } => { + context.cache_context.cache_miss = Some((cache_key_hash, ttl)); + context.cache_context.response_buffer.clear(); + debug!("Cache miss for key hash: {}", cache_key_hash); + } + CacheCheckResult::Passthrough => { + context.cache_context.cache_miss = None; + } + } + + return Ok(false); + } + + /// Finalize caching by storing the response in Redis. + pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>,) { + if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { + if !context.cache_context.response_buffer.is_empty() { + let messages = std::mem::take(&mut context.cache_context.response_buffer); + if let Err(e) = self.cache_response(cache_key, messages, ttl).await { + debug!("Failed to cache response: {:?}", e); + } + } + } + } +} \ No newline at end of file diff --git a/pgdog/src/frontend/client/query_engine/cache/policy.rs b/pgdog/src/frontend/client/query_engine/cache/policy.rs new file mode 100644 index 000000000..582fde578 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/policy.rs @@ -0,0 +1,398 @@ +use core::fmt; + +use pgdog_config::{Cache as CacheConfig, CachePolicy}; +use tracing::debug; + +use super::stats::QueryStatsTracker; + +use crate::net::parameter::ParameterValue; +use crate::net::Parameters; +use once_cell::sync::Lazy; +use regex::Regex; + +static CACHE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache: *(no-cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() +}); + +/// Cache directive from SQL comment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CacheDirective { + #[default] + None, + Cache { + ttl_seconds: Option, + }, + NoCache, +} + +impl CacheDirective { + pub fn is_cache(&self) -> bool { + matches!(self, CacheDirective::Cache { .. }) + } + + pub fn is_no_cache(&self) -> bool { + matches!(self, CacheDirective::NoCache) + } + + pub fn ttl(&self) -> Option { + match self { + CacheDirective::Cache { ttl_seconds } => *ttl_seconds, + _ => None, + } + } +} + +pub trait CachePolicyExtractor: Send + Sync + fmt::Debug { + fn extract(&self, query: &str, params: &Parameters) -> CacheDirective; +} + +#[derive(Debug)] +pub struct CommentCacheExtractor; + +impl CachePolicyExtractor for CommentCacheExtractor { + fn extract(&self, query: &str, _params: &Parameters) -> CacheDirective { + for cap in CACHE.captures_iter(query) { + if let Some(action) = cap.get(1) { + let action = action.as_str(); + if action == "no-cache" { + return CacheDirective::NoCache; + } else if action.starts_with("cache") { + let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + return CacheDirective::Cache { ttl_seconds: ttl }; + } + } else { + return CacheDirective::Cache { ttl_seconds: None }; + } + } + CacheDirective::None + } +} + +#[derive(Debug)] +pub struct ParameterCacheExtractor { + key: String, +} + +impl ParameterCacheExtractor { + pub fn new() -> Self { + Self { + key: "pgdog.cache".to_string(), + } + } +} + +impl CachePolicyExtractor for ParameterCacheExtractor { + fn extract(&self, _query: &str, params: &Parameters) -> CacheDirective { + let value = match params.get(&self.key) { + Some(p) => p, + None => return CacheDirective::None, + }; + + let s = match value { + ParameterValue::String(v) => v.as_str(), + _ => return CacheDirective::None, + }; + + match s { + "no-cache" => CacheDirective::NoCache, + "cache" => CacheDirective::Cache { ttl_seconds: None }, + _ => { + if let Some(ttl) = s + .strip_prefix("cache ttl=") + .and_then(|t| t.trim().parse::().ok()) + { + CacheDirective::Cache { + ttl_seconds: Some(ttl), + } + } else if let Some(ttl) = s + .strip_prefix("cache ttl =") + .and_then(|t| t.trim().parse::().ok()) + { + CacheDirective::Cache { + ttl_seconds: Some(ttl), + } + } else { + CacheDirective::None + } + } + } + } +} + +#[derive(Debug)] +pub struct CachePolicyDispatcher { + extractors: Vec>, +} + +impl CachePolicyDispatcher { + pub fn new() -> Self { + Self { + extractors: Vec::new(), + } + } + + pub fn add_extractor(&mut self, extractor: Box) { + self.extractors.push(extractor); + } + + pub fn extract(&self, query: &str, params: &Parameters) -> CacheDirective { + for extractor in &self.extractors { + let result = extractor.extract(query, params); + if result != CacheDirective::None { + debug!("Cache directive for query {} is {:?}", query, result); + return result; + } + } + CacheDirective::None + } + + pub fn is_empty(&self) -> bool { + self.extractors.is_empty() + } +} + +pub struct CachePolicyResolver; + +impl CachePolicyResolver { + pub async fn resolve( + cache_directive: CacheDirective, + cache_config: &CacheConfig, + is_read: bool, + cache_key_hash: u64, + stats: &QueryStatsTracker, + ) -> CacheDecision { + if !is_read { + return CacheDecision::Skip; + } + + if let CacheDirective::NoCache = cache_directive { + return CacheDecision::Skip; + } + + if let CacheDirective::Cache { ttl_seconds } = cache_directive { + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))); + } + + match cache_config.policy() { + CachePolicy::NoCache => CacheDecision::Skip, + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Auto => Self::auto_decision(cache_key_hash, stats).await, + } + } + + async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let query_stats = stats.get(cache_key_hash).await; + + if query_stats.hit_count > query_stats.miss_count + && query_stats.avg_result_size() < 1_000_000 + { + CacheDecision::Cache(None) + } else { + CacheDecision::Skip + } + } +} + +pub enum CacheDecision { + Skip, + Cache(Option), +} + +impl CacheDecision { + pub fn should_cache(&self) -> bool { + matches!(self, CacheDecision::Cache(_)) + } + + pub fn ttl(&self) -> Option { + match self { + CacheDecision::Cache(ttl) => *ttl, + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_skip_for_writes() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::Cache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::None, + &cache_config, + false, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(!decision.should_cache()); + } + + #[tokio::test] + async fn test_no_cache_directive() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::Cache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::NoCache, + &cache_config, + true, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(!decision.should_cache()); + } + + #[tokio::test] + async fn test_cache_directive_with_ttl() { + let cache_config = CacheConfig { + enabled: Some(true), + policy: Some(CachePolicy::NoCache), + ttl: None, + redis_url: None, + max_result_size: None, + }; + let decision = CachePolicyResolver::resolve( + CacheDirective::Cache { + ttl_seconds: Some(120), + }, + &cache_config, + true, + 0xAABBCCDD, + &QueryStatsTracker::default(), + ) + .await; + assert!(decision.should_cache()); + assert_eq!(decision.ttl(), Some(120)); + } + + #[test] + fn test_comment_extractor_no_cache() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = + extractor.extract("SELECT * FROM users /* pgdog_cache: no-cache */", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } + + #[test] + fn test_comment_extractor_cache_default_ttl() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users /* pgdog_cache: cache */", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_comment_extractor_cache_with_ttl() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract( + "SELECT * FROM users /* pgdog_cache: cache ttl=60 */", + ¶ms, + ); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_comment_extractor_no_directive() { + let extractor = CommentCacheExtractor; + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::None)); + } + + #[test] + fn test_parameter_extractor_no_cache() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } + + #[test] + fn test_parameter_extractor_cache() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "cache"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_parameter_extractor_cache_with_ttl() { + let extractor = ParameterCacheExtractor::new(); + let mut params = Parameters::default(); + params.insert("pgdog.cache", "cache ttl=120"); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(120)), + _ => panic!("Expected Cache directive"), + } + } + + #[test] + fn test_parameter_extractor_no_param() { + let extractor = ParameterCacheExtractor::new(); + let params = Parameters::default(); + let directive = extractor.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::None)); + } + + #[test] + fn test_dispatcher_comment_wins() { + let comment_extractor = CommentCacheExtractor; + let parameter_extractor = ParameterCacheExtractor::new(); + + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(comment_extractor)); + dispatcher.add_extractor(Box::new(parameter_extractor)); + + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + + let directive = dispatcher.extract("SELECT * /* pgdog_cache: cache ttl=60 */", ¶ms); + match directive { + CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), + _ => panic!("Expected comment to win"), + } + } + + #[test] + fn test_dispatcher_parameter_fallback() { + let comment_extractor = CommentCacheExtractor; + let parameter_extractor = ParameterCacheExtractor::new(); + + let mut dispatcher = CachePolicyDispatcher::new(); + dispatcher.add_extractor(Box::new(comment_extractor)); + dispatcher.add_extractor(Box::new(parameter_extractor)); + + let mut params = Parameters::default(); + params.insert("pgdog.cache", "no-cache"); + + let directive = dispatcher.extract("SELECT * FROM users", ¶ms); + assert!(matches!(directive, CacheDirective::NoCache)); + } +} diff --git a/pgdog/src/frontend/client/query_engine/cache/stats.rs b/pgdog/src/frontend/client/query_engine/cache/stats.rs new file mode 100644 index 000000000..e2946c667 --- /dev/null +++ b/pgdog/src/frontend/client/query_engine/cache/stats.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; + +use scc::HashMap; + +#[derive(Debug, Clone, Default)] +pub struct QueryStats { + pub hit_count: u64, + pub miss_count: u64, + pub total_result_size: u64, +} + +impl QueryStats { + pub fn avg_result_size(&self) -> u64 { + let total = self.hit_count + self.miss_count; + if total == 0 { + 0 + } else { + self.total_result_size / total + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct QueryStatsTracker { + stats: Arc>, +} + +impl QueryStatsTracker { + pub async fn record_hit(&self, cache_key_hash: u64, result_size: usize) { + let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); + entry.hit_count += 1; + entry.total_result_size += result_size as u64; + } + + pub async fn record_miss(&self, cache_key_hash: u64) { + let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); + entry.miss_count += 1; + } + + pub async fn get(&self, cache_key_hash: u64) -> QueryStats { + self.stats + .get_async(&cache_key_hash) + .await + .map(|entry| entry.get().clone()) + .unwrap_or_default() + } + + pub async fn clear(&self) { + self.stats.clear_async().await + } + + pub async fn len(&self) -> usize { + self.stats.len() + } + + pub async fn is_empty(&self) -> bool { + self.stats.is_empty() + } +} diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index b54751a35..a2cdc7ca2 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,9 +1,7 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - client::{timeouts::Timeouts, Sticky, TransactionType}, - router::parser::rewrite::statement::plan::RewriteResult, - Client, ClientRequest, PreparedStatements, + Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, query_engine::cache::context::CacheContext, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult }, net::{BackendKeyData, Parameters, Stream}, }; @@ -39,6 +37,8 @@ pub struct QueryEngineContext<'a> { pub(super) sticky: Sticky, /// Rewrite result. pub(super) rewrite_result: Option, + /// Cache context. + pub(super) cache_context: CacheContext } impl<'a> QueryEngineContext<'a> { @@ -60,6 +60,7 @@ impl<'a> QueryEngineContext<'a> { rollback: false, sticky: client.sticky, rewrite_result: None, + cache_context: CacheContext::default(), } } @@ -86,6 +87,7 @@ impl<'a> QueryEngineContext<'a> { rollback: false, sticky: Sticky::new(), rewrite_result: None, + cache_context: CacheContext::default(), } } diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index f0dc8979b..d59adba46 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -2,9 +2,11 @@ use crate::{ backend::pool::{Connection, Request}, config::config, frontend::{ - client::query_engine::{hooks::QueryEngineHooks, route_query::ClusterCheck}, - router::{parser::Shard, Route}, - BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, + BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, client::query_engine::{ + cache::Cache, + hooks::QueryEngineHooks, + route_query::ClusterCheck, + }, router::{Route, parser::Shard} }, net::{ErrorResponse, Message, Parameters}, state::State, @@ -12,6 +14,7 @@ use crate::{ use tracing::debug; +pub mod cache; pub mod connect; pub mod context; pub mod deallocate; @@ -55,6 +58,7 @@ pub struct QueryEngine { notify_buffer: NotifyBuffer, pending_explain: Option, hooks: QueryEngineHooks, + cache: Cache, } impl QueryEngine { @@ -64,6 +68,7 @@ impl QueryEngine { let database = params.get_default("database", user); let backend = Connection::new(user, database, admin)?; + let cache_config = &config().config.general.cache; Ok(Self { backend, @@ -76,6 +81,7 @@ impl QueryEngine { pending_explain: None, begin_stmt: None, router: Router::default(), + cache: Cache::new(cache_config, database), }) } @@ -129,6 +135,11 @@ impl QueryEngine { return Ok(()); } + if self.cache.try_read_cache(context).await? { + self.update_stats(context); + return Ok(()); + } + self.hooks.before_execution(context)?; // Queue up request to mirrors, if any. @@ -228,6 +239,8 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } + self.cache.save_response_in_cache(context).await; + self.hooks.after_execution(context)?; if context.in_error() { diff --git a/pgdog/src/frontend/client/query_engine/query.rs b/pgdog/src/frontend/client/query_engine/query.rs index 231d936cd..0775b4682 100644 --- a/pgdog/src/frontend/client/query_engine/query.rs +++ b/pgdog/src/frontend/client/query_engine/query.rs @@ -120,6 +120,8 @@ impl QueryEngine { context: &mut QueryEngineContext<'_>, mut message: Message, ) -> Result<(), Error> { + context.cache_context.capture_response(message.clone()); + self.streaming = message.streaming(); let code = message.code(); From 98eff6f2c6d99efed252ae6016ef278fb3b67d21 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 03/23] updated cache docs --- docs/CACHE.md | 202 ++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 105 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 7b0159754..a6f302711 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -6,21 +6,42 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on --- -## Initial Implementation (Before Debugging Session) +## Implementation ### Files Added -#### 1. `pgdog/src/frontend/client/query_engine/cache/` (new module) +#### 1. `pgdog-config/src/cache.rs` (new file) -**`mod.rs`** — Module exports: +**CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. + +**Cache struct:** +- `enabled: Option` — is caching on? +- `policy: Option` — which policy? +- `ttl: Option` — default TTL seconds (default 300) +- `redis_url: Option` — Redis connection URL +- `max_result_size: Option` — max cached result bytes +- Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` + +#### 2. `pgdog-config/src/general.rs` + +Added `cache: Cache` field to `General` struct — **cache config is global**, not per-database. + +#### 3. `pgdog-config/src/lib.rs` + +Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. + +#### 4. `pgdog/src/frontend/client/query_engine/cache/` (new module) + +**`mod.rs`** — Module exports and main `Cache` struct: ```rust pub mod client; +pub mod context; pub mod integration; pub mod policy; pub mod stats; pub use client::CacheClient; -pub use integration::{cache_check, cache_response, send_cached_response, CacheCheckResult}; +pub use integration::CacheCheckResult; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, CommentCacheExtractor, ParameterCacheExtractor, @@ -28,14 +49,22 @@ pub use policy::{ pub use stats::QueryStatsTracker; ``` +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `CacheConfig`, `database`, `policy_dispatcher`. + +Key methods: +- `new(cache_config, database)` — creates client, stats, dispatcher +- `try_read_cache(context)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `save_response_in_cache(context)` — finalizes by storing the captured response + **`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new(config)` — builds client from `Option<&DatabaseCache>`, returns disabled stub if no config/URL +- `CacheClient::new(config)` — builds client from `&CacheConfig`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag - `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` - `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` - `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) - `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) +- `is_enabled()` — returns true if both client exists and config enabled - Keys are prefixed with `"pgdog:"` - Error types: `RedisError(String)`, `ConnectionFailed(String)` - `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds @@ -43,7 +72,7 @@ pub use stats::QueryStatsTracker; - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net **`policy.rs`** — 3-tier policy resolution with trait-based extraction: -- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` (moved here from `route.rs`) +- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` - `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` - `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** - `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter @@ -53,74 +82,50 @@ pub use stats::QueryStatsTracker; - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` **`stats.rs`** — Per-fingerprint query statistics tracker: +- `QueryStats` struct: `hit_count`, `miss_count`, `total_result_size`, `avg_result_size()` - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` -- Internally: `Arc>>` using `parking_lot` +- Internally: `Arc>` -**`integration.rs`** — Integration logic (as currently exists after debugging fixes): -- `cache_check()` — main entry point, creates `CachePolicyDispatcher` with `CommentCacheExtractor` + `ParameterCacheExtractor`, calls `dispatcher.extract(query, params)` to get `CacheDirective`, then runs `CachePolicyResolver::resolve()` -- `is_likely_read()` — fallback heuristic for when parser is disabled: checks SQL starts with SELECT/SHOW/EXPLAIN/WITH +**`context.rs`** — Cache context held in `QueryEngineContext`: +- `CacheContext` with `cache_miss: Option<(u64, Option)>` and `response_buffer: Vec` +- `capture_response(message)` — stores message in buffer when cache miss is tracked + +**`integration.rs`** — Integration methods on `impl Cache`: +- `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- `get_db_cache_config()` — looks up `DatabaseCache` from global config by database name -- `compute_cache_key()` — DefaultHasher-based hash of `{database, query}`; database name is hashed first to namespace keys and prevent collisions when multiple databases share one Redis +- Cache key: `pg_query::fingerprint(query).value.wrapping_add(db_hash)` ### Files Modified -#### 2. `pgdog-config/src/database.rs` - -Added before the `Database` struct: - -- `CachePolicy` enum: `NoCache` (default), `Cache`, `Auto` - - Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy` -- `DatabaseCache` struct: - - `enabled: Option` — is caching on? - - `policy: Option` — which policy? - - `ttl: Option` — default TTL seconds (default 300) - - `redis_url: Option` — Redis connection URL - - `max_result_size: Option` — max cached result bytes - - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` -- Added `cache: Option` field to `Database` struct - -#### 3. `pgdog-config/src/lib.rs` - -Added `CachePolicy` and `DatabaseCache` to the public `pub use database::` export. - -#### 4. `pgdog/src/frontend/router/parser/route.rs` - -`CacheDirective` enum was **moved to** `cache/policy.rs` — `route.rs` now re-exports it via `pub use crate::frontend::client::query_engine::cache::policy::CacheDirective`. Route still has `cache_directive` field and methods available for manual override, but the type is imported from the cache module. - -#### 5. `pgdog/src/frontend/router/parser/comment.rs` +#### 5. `pgdog/Cargo.toml` -All cache-related regex and parsing was **removed** from this file. Cache extraction is now independent and lives in `cache/policy.rs` with its own standalone regex. The `comment()` function returns a 2-tuple `(Option, Option)` again. +- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies -#### 6. `pgdog/src/frontend/router/parser/cache/ast.rs` +#### 6. `pgdog/src/frontend/client/query_engine/mod.rs` -`comment_cache_directive` field was **removed** from `AstInner` struct and `new()` method. Cache parsing is no longer done at the AST level. +- Added `pub mod cache;` module declaration +- Added `cache: Cache` field to `QueryEngine` +- `new()` loads `cache_config` from `config().config.general.cache` and creates `Cache::new(cache_config, database)` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- After `match command`, calls `self.cache.save_response_in_cache(context)` to store the captured response in Redis. -#### 7. `pgdog/src/frontend/router/parser/query/mod.rs` +#### 7. `pgdog/src/frontend/client/query_engine/query.rs` -All cache directive handling was **removed**: `cache_directive` field removed from `QueryParser` struct, cache directive propagation removed. Cache policy extraction now happens independently in `integration.rs`. +- `process_server_message()` added cache capture: `context.cache_context.capture_response(message.clone())`. -#### 8. `pgdog/src/frontend/router/parser/mod.rs` +#### 8. `pgdog/src/frontend/client/query_engine/context.rs` -- Updated export: `pub use route::{CacheDirective, Route, Shard, ShardWithPriority, ShardsWithPriority};` +- Added `cache_context: CacheContext` field to `QueryEngineContext`. -#### 9. `pgdog/Cargo.toml` +#### 9. `pgdog/src/backend/pool/cluster.rs` -- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies +- Added `cache_enabled: bool` field to `ClusterConfig` and `Cluster` +- `cluster.rs` adds `|| self.cache_enabled()` in query parser requirement check — when caching is on, the query parser is forced on alongside `dry_run`, `prepared_statements`, `pub_sub`, and `regex_parser` -#### 10. `pgdog/src/frontend/client/query_engine/mod.rs` +#### 10. `pgdog-config/src/core.rs` -- Added `pub mod cache;` module declaration -- Added `cache_client: CacheClient`, `cache_stats: QueryStatsTracker`, `database: String`, `cache_miss: Option<(String, Option)>`, `cache_response_buffer: Vec` fields to `QueryEngine` -- `new()` looks up cache config from global config by database name and creates `CacheClient` -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache_check()`. On HIT: sends cached response and returns. On MISS: stores `(cache_key, ttl)` and starts capture. On Passthrough: clears miss state. -- After `match command`, calls `self.finalize_cache().await` to store the captured response in Redis. -- Added helper methods: `start_cache_capture()`, `capture_response()`, `is_caching()`, `finalize_cache()` - -#### 11. `pgdog/src/frontend/client/query_engine/query.rs` - -- `process_server_message()` added cache capture at the top: if `self.is_caching()`, clones and stores the message via `self.capture_response()`. +- Added startup warning: `cache requires enabled query parser but it's disabled or session controlled` when `cache.is_enabled()` and parser is `Off` or `SessionControl` --- @@ -128,15 +133,15 @@ All cache directive handling was **removed**: `cache_directive` field removed fr | Decision | Choice | |----------|--------| -| Interception point | Between `parse_and_rewrite()` and `route_query()` in `handle()` | -| Cache config scope | Per-database (`Database.cache` field) | +| Interception point | Between `route_query()` and `before_execution()` in `handle()` | +| Cache config scope | **Global** (`config.general.cache`) | | Redis client | `fred` crate v9 (async-native, tokio integration) | -| Cacheable queries | Only reads (`Route::is_read()` + `is_likely_read()` fallback) | -| Cache policy resolution | 3-tier: SQL comment → per-database config → auto-decision | -| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → `return Ok(())` | -| Cache MISS flow | Normal execute → capture response bytes → store in Redis → respond | +| Cacheable queries | Only reads (`route.is_read()`) | +| Cache policy resolution | 3-tier: SQL comment → pgdog.cache param → DB policy → auto-decision | +| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | +| Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | -| Cache key | `DefaultHasher` of `{database}:{query}` — database name is hashed first to namespace keys, preventing collisions when multiple databases share one Redis | +| Cache key | `pg_query::fingerprint(query).value.wrapping_add(db_hash)` where `db_hash = DefaultHasher of database name` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- @@ -149,29 +154,26 @@ All cache directive handling was **removed**: `cache_directive` field removed fr 3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. -4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: Added `is_likely_read()` heuristic function in `cache_check` that checks uppercase SQL prefix (SELECT/SHOW/EXPLAIN/WITH) as a fallback when parser is disabled. +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. -5. **DB cache config defaults** - Observation: `DatabaseCache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Added `policy = "cache"` to pgdog.toml. +5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. --- ## Refactoring: Decoupled Cache Policy Extraction -The original implementation entangled cache directive parsing with pgdog's general comment parser (`comment.rs`), which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. +The original implementation entangled cache directive parsing with pgdog's general comment parser, which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. **What was done:** -- `CacheDirective` enum moved from `route.rs` to `cache/policy.rs` -- Cache parsing **removed** from `comment.rs`, `ast.rs`, `query/mod.rs` — they no longer handle `CacheDirective` -- `route.rs` now re-exports `CacheDirective` from the cache module -- New **trait-based extraction system** in `cache/policy.rs`: - - `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` - - `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser - - `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter - - `CachePolicyDispatcher`: chains extractors, returns first non-`None` result -- `integration.rs` now creates the dispatcher inline in `cache_check()` and passes `context.params` for parameter extraction - -This ensures cache annotations work regardless of whether the query parser is enabled or bypassed. +- New **`cache/`** module created under `query_engine/` +- `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` +- `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser +- `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter +- `CachePolicyDispatcher`: chains extractors, returns first non-`None` result +- `Cache` struct as abstraction layer over client, stats, config, and dispatcher +- `CacheContext` struct holds `cache_miss` and `response_buffer` per-query +- Cache integration happens via `try_read_cache()` and `save_response_in_cache()` methods on `Cache` ## How to Control Cache @@ -219,51 +221,41 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi --- -# What's Left To Do - -1. **Redo is_likely_read** — **DONE.** Instead of heuristic-based detection, caching now requires the query parser. If `query_parser = "auto"` and any database has `cache.enabled = true`, it's auto-upgraded to `"on"` globally. If `query_parser = "off"` or `"session_control"` and cache is enabled, a startup warning is emitted and caching won't work for that database. `ClusterConfig::new()` also forces `On` per-cluster if cache is enabled and global parser is `Off`/`SessionControl`/`Auto`. This means `route.is_read()` from the AST parser is always accurate — it correctly detects CTE writes (`WITH ... INSERT`), `FOR UPDATE/SHARE`, and volatile functions (`nextval()`, `pg_advisory_lock()`). The old `is_likely_read()` string-prefix heuristic has been removed entirely. +## Completed -2. **pgdog_cache: comment annotation** — **DONE.** Cache directive extraction now uses its own standalone regex in `cache/policy.rs`, working independently of the AST parser. It scans the raw query string, so it functions correctly even when `query_parser_bypass()` is triggered. The `/* pgdog_cache: ... */` comment format is supported with optional `ttl=` parameter. +1. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -3. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. +2. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. -4. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. +3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -5. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: Actually, pgdog implements prepared statements caching. But i don't know what kind of caching is this: just query cache or result cache. And if we'll implement our cache, will this break this prepared statement cache?) +4. **Cache key collision across databases sharing one Redis** — Database name is hashed via `DefaultHasher` and combined with `pg_query::fingerprint(query).value` using `wrapping_add` to produce unique per-database keys even on shared Redis. -6. **Error handling / Reconnection** — DONE. Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. +5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. -7. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. - -8. **Cache key collision across databases sharing one Redis** — Problem: `compute_cache_key()` only hashed the raw query string. When two databases point to the same Redis and both run `SELECT * FROM users WHERE id = 1`, they produce identical keys and can serve wrong data on cache hits. Fix: Changed `compute_cache_key(query: &str)` to `compute_cache_key(query: &str, database: &str)` — database name is now hashed first, then the query, guaranteeing unique keys per database even on a shared Redis instance (`integration.rs:99`). +--- -9. **Redis disconnect/reconnect blocks all queries** — Problem: When Redis becomes unavailable after initial connection, `client.get()`/`client.set()` block for the full timeout duration (2s) because fred's `default_command_timeout` is `Duration::from_millis(0)` (no timeout). After the first request fails, subsequent requests still hit the timeout. After Redis restarts, caching never recovers. Root cause analysis: (a) fred's `ClientState` can report `Connected` even when TCP isn't ready, so relying on `client.state()` for the fast-path check leads to unnecessary blocking. (b) `force_reconnection()` hangs indefinitely when Redis is down — fred's router task can't respond without a connection, so the reconnect loop deadlocks. (c) Even after Redis restarts, if the initial `init()` failed, fred's routing tasks never started, so `ping()` and all operations fail silently. Fix: (1) Replaced `connect_initiated` + state-check logic with a single `redis_connected: AtomicBool` — the authoritative gate for all Redis operations. Returns error immediately if false, no Redis call attempted. (2) `ensure_connected()` calls `init()` (only one-shot on fresh start), then verifies with `ping()`. Sets `redis_connected = true` only after PING succeeds. (3) `mark_disconnected()` sets `redis_connected = false` and spawns exactly one background reconnect task (CAS-guarded via `reconnecting: AtomicBool`). (4) Reconnect task retries `client.init()` every 500ms (fred allows re-init after disconnect). On success, verifies with PING, then sets `redis_connected = true`. (5) All Redis calls (init, get, set, ping) wrapped in `tokio::time::timeout(2s)` as safety net. +## What's Left To Do ---- +1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. -## Testing +2. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. -### Framework +3. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -System tests live in `integration/rust/tests/integration/` alongside the existing integration suite. They use: -- `sqlx` and `tokio-postgres` for PG queries through pgdog on port 6432 -- `#[tokio::test]` + `#[serial]` from `serial_test` for test isolation -- `reqwest` to read metrics from `http://127.0.0.1:9090/metrics` +4. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. -Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -### External Dependencies +6. **Rewrite cache_key_hash computation** — Change from `pg_query::fingerprint(query).value.wrapping_add(db_hash)` to correct combined hashing that doesn't use arithmetic addition, ensuring stronger collision resistance. -Redis must be running locally on port 6379 before cache tests execute. Unlike Postgres, Redis is **not** currently provisioned by `integration/setup.sh` or any CI workflow. To add cache tests to CI: -- **GitHub Actions:** add `sudo apt-get install -y redis-server && sudo service redis-server start` in `.github/workflows/ci.yml` -- **RWX:** add a `*redis-bg-process` alias in `.rwx/integration.yml` (same pattern as `*postgres-bg-process`) -- **Local/dev:** start Redis manually (expected on `127.0.0.1:6379`) +7. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests -1. **Database key namespace collision** — Two databases (`db_a`, `db_b`) sharing one Redis, both running `SELECT 1 AS val` but with different underlying PG data. Verify each database gets its own correct data and no cross-database cache hit occurs. +1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. 2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. 3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. -4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these operations do not populate or consume the cache. -5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking or crashing. +4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. +5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. 6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. From a7112ffb9a30aba07ee7af03009f536e246b5766 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 04/23] update key hashing algorithm --- Cargo.lock | 7 +++++++ docs/CACHE.md | 8 +++----- pgdog/Cargo.toml | 1 + .../client/query_engine/cache/integration.rs | 13 +++++-------- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1596b7c5b..f7b17c532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2978,6 +2978,7 @@ dependencies = [ "tracing-subscriber", "url", "uuid", + "xxhash-rust", ] [[package]] @@ -5975,6 +5976,12 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "yoke" version = "0.8.0" diff --git a/docs/CACHE.md b/docs/CACHE.md index a6f302711..fe342a7d1 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -94,7 +94,7 @@ Key methods: - `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- Cache key: `pg_query::fingerprint(query).value.wrapping_add(db_hash)` +- Cache key: XXH3 hash of `database_name + raw_query_string` ### Files Modified @@ -229,7 +229,7 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -4. **Cache key collision across databases sharing one Redis** — Database name is hashed via `DefaultHasher` and combined with `pg_query::fingerprint(query).value` using `wrapping_add` to produce unique per-database keys even on shared Redis. +4. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. 5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. @@ -247,9 +247,7 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -6. **Rewrite cache_key_hash computation** — Change from `pg_query::fingerprint(query).value.wrapping_add(db_hash)` to correct combined hashing that doesn't use arithmetic addition, ensuring stronger collision resistance. - -7. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +6. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index 05e69167f..e3f2e7bbb 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -71,6 +71,7 @@ pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} fred = { version = "9", features = ["enable-rustls"] } scc = "3.7" +xxhash-rust = { version = "0.8", features = ["xxh3"]} [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.6" diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs index 625d2ac15..f0986e232 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -1,4 +1,4 @@ -use std::hash::{DefaultHasher, Hasher}; +use std::hash::{Hash, Hasher}; use crate::{ frontend::client::query_engine::{cache::Cache, QueryEngineContext}, @@ -42,15 +42,12 @@ impl Cache { _ => return CacheCheckResult::Passthrough, }; - let db_hash = { - let mut hasher = DefaultHasher::new(); - hasher.write(self.database.as_bytes()); + let cache_key_hash = { + let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); + self.database.hash(&mut hasher); + query.query().hash(&mut hasher); hasher.finish() }; - let cache_key_hash = pg_query::fingerprint(query.query()) - .expect("We're sure that query is correct if we've reached here.") - .value - .wrapping_add(db_hash); let cache_directive = self .policy_dispatcher From 093e7ba6ca833284d4f2af9a25421c70a1262f65 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 05/23] avoid caching error responses --- docs/CACHE.md | 9 ++++++--- .../frontend/client/query_engine/cache/context.rs | 13 ++++++++++++- .../client/query_engine/cache/integration.rs | 4 ++++ pgdog/src/frontend/client/query_engine/cache/mod.rs | 11 +++++++---- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index fe342a7d1..a93c87611 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -86,9 +86,10 @@ Key methods: - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` - Internally: `Arc>` -**`context.rs`** — Cache context held in `QueryEngineContext`: -- `CacheContext` with `cache_miss: Option<(u64, Option)>` and `response_buffer: Vec` -- `capture_response(message)` — stores message in buffer when cache miss is tracked + **`context.rs`** — Cache context held in `QueryEngineContext`: + - `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` + - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages + - `reset()` — clears all state for per-query isolation **`integration.rs`** — Integration methods on `impl Cache`: - `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis @@ -233,6 +234,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +6. **Do not cache error responses**. + --- ## What's Left To Do diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/client/query_engine/cache/context.rs index 3234f6be1..2aaede4ff 100644 --- a/pgdog/src/frontend/client/query_engine/cache/context.rs +++ b/pgdog/src/frontend/client/query_engine/cache/context.rs @@ -1,17 +1,28 @@ -use crate::net::Message; +use crate::net::{Message, messages::Protocol}; /// Cache context to use in QueryEngineContext. #[derive(Default)] pub struct CacheContext { pub cache_miss: Option<(u64, Option)>, pub response_buffer: Vec, + pub had_error: bool, } impl CacheContext { /// Capture a response message for caching. pub fn capture_response(&mut self, message: Message) { if self.cache_miss.is_some() { + if message.code() == 'E' { + self.had_error = true; + } self.response_buffer.push(message); } } + + /// Reset the cache context for a new query. + pub fn reset(&mut self) { + self.cache_miss = None; + self.response_buffer.clear(); + self.had_error = false; + } } diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/client/query_engine/cache/integration.rs index f0986e232..39e7d966a 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/client/query_engine/cache/integration.rs @@ -25,6 +25,10 @@ impl Cache { &self, context: &mut QueryEngineContext<'_>, ) -> CacheCheckResult { + if context.in_transaction() { + return CacheCheckResult::Passthrough; + } + let route = match context.client_request.route.as_ref() { Some(r) => r, None => return CacheCheckResult::Passthrough, diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/client/query_engine/cache/mod.rs index 012a53852..726243b19 100644 --- a/pgdog/src/frontend/client/query_engine/cache/mod.rs +++ b/pgdog/src/frontend/client/query_engine/cache/mod.rs @@ -50,6 +50,7 @@ impl Cache { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); self.send_cached_response(context, cached).await?; + context.cache_context.reset(); return Ok(true); } CacheCheckResult::Miss { @@ -58,20 +59,22 @@ impl Cache { } => { context.cache_context.cache_miss = Some((cache_key_hash, ttl)); context.cache_context.response_buffer.clear(); + context.cache_context.had_error = false; debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { - context.cache_context.cache_miss = None; + context.cache_context.reset(); } } - return Ok(false); + Ok(false) } /// Finalize caching by storing the response in Redis. - pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>,) { + pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>) { if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { - if !context.cache_context.response_buffer.is_empty() { + if !context.cache_context.had_error && !context.cache_context.response_buffer.is_empty() + { let messages = std::mem::take(&mut context.cache_context.response_buffer); if let Err(e) = self.cache_response(cache_key, messages, ttl).await { debug!("Failed to cache response: {:?}", e); From eb977edb9fd9bc949c181eb4a12fa26f33aeeffe Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 06/23] fixed setting pgdog.cache via DSN options --- docs/CACHE.md | 27 +++++++++++++++++++------ pgdog/src/net/messages/hello.rs | 36 ++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index a93c87611..5815d7a86 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -211,6 +211,18 @@ SET pgdog.cache = 'cache'; SET pgdog.cache = 'cache ttl=300'; ``` +```sh +# Session-wide: all queries in this connection bypass cache +# Attention: this only supports `no_cache` with underscore +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dno_cache + +# Session-wide: cache all queries with default TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache + +# Session-wide: cache all queries with 5-minute TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 +``` + ### Priority Order Extractors are checked in order — first non-`None` result wins, then falls through to database config: @@ -236,21 +248,24 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Do not cache error responses**. +7. **Setting pgdog.cache via connection url doesn't work**. + --- ## What's Left To Do 1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. -2. **Multi-step execution caching** — InsertSplit and ShardingKeyUpdate rewrite paths use process_server_message() which captures responses, but the finalize_cache() call happens after match command block. Need to verify caching works correctly for multi-step rewrites. +2. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) + +3. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -3. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) +4. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -4. **max_result_size config** — Implemented but not exposed in the initial pgdog.toml. Worth documenting in the config. +5. **Magic numbers in send_cached_response()**. -5. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +6. **Make statistics collection async** — for auto policy. -6. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` ### Planned Tests @@ -259,4 +274,4 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. 4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. 5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. -6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. +6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. \ No newline at end of file diff --git a/pgdog/src/net/messages/hello.rs b/pgdog/src/net/messages/hello.rs index 84f901b06..436fb209f 100644 --- a/pgdog/src/net/messages/hello.rs +++ b/pgdog/src/net/messages/hello.rs @@ -58,9 +58,10 @@ impl Startup { let value = search_path(&value); params.insert(name, value); } else if name == "options" { + let value = options_unescape(&value); let kvs = value.split("-c"); for kv in kvs { - let mut nvs = kv.split("="); + let mut nvs = kv.splitn(2, "="); let name = nvs.next(); let value = nvs.next(); @@ -249,6 +250,26 @@ fn search_path(value: &str) -> ParameterValue { ParameterValue::Tuple(value) } +fn options_unescape(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let mut chars = input.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(&next) = chars.peek() { + chars.next(); + result.push(next); + } else { + result.push(c); + } + } else { + result.push(c); + } + } + + result +} + #[cfg(test)] mod test { use crate::net::messages::ToBytes; @@ -309,4 +330,17 @@ mod test { let startup = Startup::from_stream(&mut read).await.unwrap(); assert!(matches!(startup, Startup::GssEnc)); } + + #[test] + fn test_options_unescape() { + assert_eq!(options_unescape("cache\\ ttl=5"), "cache ttl=5"); + assert_eq!(options_unescape("cache\\\\ttl=5"), "cache\\ttl=5"); + assert_eq!(options_unescape("simple"), "simple"); + assert_eq!(options_unescape("a\\=b"), "a=b"); + assert_eq!(options_unescape("trail\\"), "trail\\"); + assert_eq!( + options_unescape("cache\\ ttl\\=5"), + "cache ttl=5" + ); + } } From 7a6d81ddfceab9b47f850c9a53598e1c4a0a6402 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 07/23] changed cache scope from connection to global --- docs/CACHE.md | 24 +++++--- .../{client/query_engine => }/cache/client.rs | 25 +++----- .../query_engine => }/cache/context.rs | 0 .../query_engine => }/cache/integration.rs | 55 +++++++++-------- .../{client/query_engine => }/cache/mod.rs | 59 +++++++++++-------- .../{client/query_engine => }/cache/policy.rs | 0 .../{client/query_engine => }/cache/stats.rs | 0 .../frontend/client/query_engine/context.rs | 4 +- pgdog/src/frontend/client/query_engine/mod.rs | 27 +++++---- pgdog/src/frontend/mod.rs | 1 + 10 files changed, 110 insertions(+), 85 deletions(-) rename pgdog/src/frontend/{client/query_engine => }/cache/client.rs (93%) rename pgdog/src/frontend/{client/query_engine => }/cache/context.rs (100%) rename pgdog/src/frontend/{client/query_engine => }/cache/integration.rs (71%) rename pgdog/src/frontend/{client/query_engine => }/cache/mod.rs (56%) rename pgdog/src/frontend/{client/query_engine => }/cache/policy.rs (100%) rename pgdog/src/frontend/{client/query_engine => }/cache/stats.rs (100%) diff --git a/docs/CACHE.md b/docs/CACHE.md index 5815d7a86..3fc52462a 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -30,9 +30,9 @@ Added `cache: Cache` field to `General` struct — **cache config is global**, n Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. -#### 4. `pgdog/src/frontend/client/query_engine/cache/` (new module) +#### 4. `pgdog/src/frontend/cache/` (module) -**`mod.rs`** — Module exports and main `Cache` struct: +**`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust pub mod client; pub mod context; @@ -41,6 +41,7 @@ pub mod policy; pub mod stats; pub use client::CacheClient; +pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, @@ -49,15 +50,17 @@ pub use policy::{ pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `CacheConfig`, `database`, `policy_dispatcher`. +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `policy_dispatcher`. + +**Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new(cache_config, database)` — creates client, stats, dispatcher -- `try_read_cache(context)` — calls `cache_check()`, handles HIT/MISS/PASS-through -- `save_response_in_cache(context)` — finalizes by storing the captured response +- `new()` — creates client (reads config internally), stats, dispatcher +- `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `save_response_in_cache(cache_context)` — finalizes by storing the captured response **`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new(config)` — builds client from `&CacheConfig`, returns disabled stub if no config/URL +- `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag - `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` @@ -78,7 +81,7 @@ Key methods: - `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter - `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result - Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) -- Tier 2: Database config `CachePolicy` (`NoCache` / `Cache` / `Auto`) +- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` **`stats.rs`** — Per-fingerprint query statistics tracker: @@ -250,6 +253,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 7. **Setting pgdog.cache via connection url doesn't work**. +8. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. + --- ## What's Left To Do @@ -266,6 +271,9 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Make statistics collection async** — for auto policy. +7. **Provide config hotswap**. + +8. **Review and rewrite CacheClient**. ### Planned Tests diff --git a/pgdog/src/frontend/client/query_engine/cache/client.rs b/pgdog/src/frontend/cache/client.rs similarity index 93% rename from pgdog/src/frontend/client/query_engine/cache/client.rs rename to pgdog/src/frontend/cache/client.rs index 9ae4577e0..dfede8e1d 100644 --- a/pgdog/src/frontend/client/query_engine/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use std::time::Duration; use tracing::{debug, error, info}; -use pgdog_config::Cache as CacheConfig; +use crate::config::config; const CACHE_KEY_PREFIX: &str = "pgdog:"; @@ -15,7 +15,6 @@ const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); #[derive(Clone)] pub struct CacheClient { client: Option, - config: CacheConfig, /// Master connection state flag. Set true only after PING succeeds /// on init or reconnect. Set false immediately on any error/timeout. redis_connected: Arc, @@ -27,7 +26,6 @@ impl std::fmt::Debug for CacheClient { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CacheClient") .field("client", &self.client.as_ref().map(|_| "...")) - .field("config", &self.config) .field( "redis_connected", &self.redis_connected.load(Ordering::Relaxed), @@ -38,13 +36,12 @@ impl std::fmt::Debug for CacheClient { } impl CacheClient { - pub fn new(config: &CacheConfig) -> Self { - let cache_config = config.clone(); + pub fn new() -> Self { + let cache_config = &config().config.general.cache; if !cache_config.is_enabled() || cache_config.redis_url.is_none() { return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -57,7 +54,6 @@ impl CacheClient { error!("Failed to parse Redis URL: {}", e); return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -70,7 +66,6 @@ impl CacheClient { error!("Failed to build Redis client: {}", e); return Self { client: None, - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), }; @@ -79,7 +74,6 @@ impl CacheClient { Self { client: Some(client), - config: cache_config, redis_connected: Arc::new(AtomicBool::new(false)), reconnecting: Arc::new(AtomicBool::new(false)), } @@ -252,7 +246,9 @@ impl CacheClient { let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - if let Some(max_size) = self.config.max_result_size() { + let cache_config = &config().config.general.cache; + + if let Some(max_size) = cache_config.max_result_size() { if value.len() > max_size { debug!( "Skipping cache for key {}: size {} exceeds max {}", @@ -264,7 +260,7 @@ impl CacheClient { } } - let ttl_seconds = ttl.unwrap_or_else(|| self.config.ttl()) as i64; + let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl()) as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, @@ -295,12 +291,9 @@ impl CacheClient { } } - pub fn config(&self) -> &CacheConfig { - &self.config - } - pub fn is_enabled(&self) -> bool { - self.client.is_some() && self.config.is_enabled() + let cache_config = &config().config.general.cache; + self.client.is_some() && cache_config.is_enabled() } } diff --git a/pgdog/src/frontend/client/query_engine/cache/context.rs b/pgdog/src/frontend/cache/context.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/context.rs rename to pgdog/src/frontend/cache/context.rs diff --git a/pgdog/src/frontend/client/query_engine/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs similarity index 71% rename from pgdog/src/frontend/client/query_engine/cache/integration.rs rename to pgdog/src/frontend/cache/integration.rs index 39e7d966a..4bf7d0b2b 100644 --- a/pgdog/src/frontend/client/query_engine/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,13 +1,14 @@ use std::hash::{Hash, Hasher}; use crate::{ - frontend::client::query_engine::{cache::Cache, QueryEngineContext}, - net::{FromBytes, Message, ToBytes}, + config::config, + frontend::ClientRequest, + net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; use tracing::debug; -use super::CachePolicyResolver; +use super::{Cache, CachePolicyResolver}; pub enum CacheCheckResult { Hit { @@ -23,48 +24,52 @@ pub enum CacheCheckResult { impl Cache { pub(super) async fn cache_check( &self, - context: &mut QueryEngineContext<'_>, - ) -> CacheCheckResult { - if context.in_transaction() { - return CacheCheckResult::Passthrough; + in_transaction: bool, + client_request: &ClientRequest, + params: &Parameters, + ) -> Result { + if in_transaction { + return Ok(CacheCheckResult::Passthrough); } - let route = match context.client_request.route.as_ref() { + let route = match client_request.route.as_ref() { Some(r) => r, - None => return CacheCheckResult::Passthrough, + None => return Ok(CacheCheckResult::Passthrough), }; // Detect read-only status via the AST parser's route classification. // When caching is enabled, the query parser is auto-enabled. let is_read = route.is_read(); if !is_read { - return CacheCheckResult::Passthrough; + return Ok(CacheCheckResult::Passthrough); } - let query = match context.client_request.query() { + let query = match client_request.query() { Ok(Some(q)) => q, - _ => return CacheCheckResult::Passthrough, + _ => return Ok(CacheCheckResult::Passthrough), }; + let user = params.get_required("user")?; + let database = params.get_default("database", user); let cache_key_hash = { let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); - self.database.hash(&mut hasher); + database.hash(&mut hasher); query.query().hash(&mut hasher); hasher.finish() }; - let cache_directive = self - .policy_dispatcher - .extract(query.query(), context.params); + let cache_directive = self.policy_dispatcher.extract(query.query(), params); + let cache_config = &config().config.general.cache; + debug!( "cache_check: sql={}, db_config={:?}", query.query(), - self.config + cache_config ); let decision = CachePolicyResolver::resolve( cache_directive, - &self.config, + cache_config, is_read, cache_key_hash, &self.stats, @@ -72,31 +77,31 @@ impl Cache { .await; if !decision.should_cache() { - return CacheCheckResult::Passthrough; + return Ok(CacheCheckResult::Passthrough); } match self.client.get(cache_key_hash).await { Ok(Some(cached)) => { self.stats.record_hit(cache_key_hash, cached.len()).await; - CacheCheckResult::Hit { cached } + Ok(CacheCheckResult::Hit { cached }) } Ok(None) => { self.stats.record_miss(cache_key_hash).await; - CacheCheckResult::Miss { + Ok(CacheCheckResult::Miss { cache_key_hash, ttl: decision.ttl(), - } + }) } Err(e) => { debug!("Cache get error: {}", e); - CacheCheckResult::Passthrough + Ok(CacheCheckResult::Passthrough) } } } pub(super) async fn send_cached_response( &self, - context: &mut QueryEngineContext<'_>, + stream: &mut Stream, cached: Vec, ) -> Result<(), crate::frontend::Error> { let mut offset = 0; @@ -124,7 +129,7 @@ impl Cache { let msg = Message::from_bytes(msg_bytes)?; offset = end; - context.stream.send_flush(&msg).await?; + stream.send_flush(&msg).await?; } Ok(()) diff --git a/pgdog/src/frontend/client/query_engine/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs similarity index 56% rename from pgdog/src/frontend/client/query_engine/cache/mod.rs rename to pgdog/src/frontend/cache/mod.rs index 726243b19..44d6ef89f 100644 --- a/pgdog/src/frontend/client/query_engine/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -5,65 +5,79 @@ pub mod policy; pub mod stats; pub use client::CacheClient; +pub use context::CacheContext; pub use integration::CacheCheckResult; -use pgdog_config::Cache as CacheConfig; pub use policy::{ CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, CommentCacheExtractor, ParameterCacheExtractor, }; pub use stats::QueryStatsTracker; + +use once_cell::sync::Lazy; +use std::sync::Arc; use tracing::debug; -use crate::frontend::client::query_engine::QueryEngineContext; +use crate::{ + frontend::ClientRequest, + net::{Parameters, Stream}, +}; #[derive(Debug)] pub struct Cache { client: CacheClient, stats: QueryStatsTracker, - config: CacheConfig, - database: String, policy_dispatcher: CachePolicyDispatcher, } +static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); + +pub fn cache() -> Arc { + CACHE.clone() +} + impl Cache { - pub fn new(cache_config: &CacheConfig, database: &str) -> Self { + fn new() -> Self { let mut dispatcher = CachePolicyDispatcher::new(); dispatcher.add_extractor(Box::new(CommentCacheExtractor)); dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); Cache { - client: CacheClient::new(cache_config), + client: CacheClient::new(), stats: QueryStatsTracker::default(), - config: cache_config.clone(), - database: database.to_string(), policy_dispatcher: dispatcher, } } pub async fn try_read_cache( - &mut self, - context: &mut QueryEngineContext<'_>, + &self, + cache_context: &mut CacheContext, + in_transaction: bool, + client_request: &ClientRequest, + params: &Parameters, + stream: &mut Stream, ) -> Result { - let cache_result = self.cache_check(context).await; + let cache_result = self + .cache_check(in_transaction, client_request, params) + .await?; match cache_result { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); - self.send_cached_response(context, cached).await?; - context.cache_context.reset(); + self.send_cached_response(stream, cached).await?; + cache_context.reset(); return Ok(true); } CacheCheckResult::Miss { cache_key_hash, ttl, } => { - context.cache_context.cache_miss = Some((cache_key_hash, ttl)); - context.cache_context.response_buffer.clear(); - context.cache_context.had_error = false; + cache_context.cache_miss = Some((cache_key_hash, ttl)); + cache_context.response_buffer.clear(); + cache_context.had_error = false; debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { - context.cache_context.reset(); + cache_context.reset(); } } @@ -71,15 +85,14 @@ impl Cache { } /// Finalize caching by storing the response in Redis. - pub async fn save_response_in_cache(&self, context: &mut QueryEngineContext<'_>) { - if let Some((cache_key, ttl)) = context.cache_context.cache_miss.take() { - if !context.cache_context.had_error && !context.cache_context.response_buffer.is_empty() - { - let messages = std::mem::take(&mut context.cache_context.response_buffer); + pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { + if let Some((cache_key, ttl)) = cache_context.cache_miss.take() { + if !cache_context.had_error && !cache_context.response_buffer.is_empty() { + let messages = std::mem::take(&mut cache_context.response_buffer); if let Err(e) = self.cache_response(cache_key, messages, ttl).await { debug!("Failed to cache response: {:?}", e); } } } } -} \ No newline at end of file +} diff --git a/pgdog/src/frontend/client/query_engine/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/policy.rs rename to pgdog/src/frontend/cache/policy.rs diff --git a/pgdog/src/frontend/client/query_engine/cache/stats.rs b/pgdog/src/frontend/cache/stats.rs similarity index 100% rename from pgdog/src/frontend/client/query_engine/cache/stats.rs rename to pgdog/src/frontend/cache/stats.rs diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index a2cdc7ca2..42ef8b21f 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,7 +1,7 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, query_engine::cache::context::CacheContext, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult + Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult, cache::context::CacheContext }, net::{BackendKeyData, Parameters, Stream}, }; @@ -38,7 +38,7 @@ pub struct QueryEngineContext<'a> { /// Rewrite result. pub(super) rewrite_result: Option, /// Cache context. - pub(super) cache_context: CacheContext + pub(super) cache_context: CacheContext, } impl<'a> QueryEngineContext<'a> { diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index d59adba46..32b753205 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -2,11 +2,10 @@ use crate::{ backend::pool::{Connection, Request}, config::config, frontend::{ - BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, client::query_engine::{ - cache::Cache, - hooks::QueryEngineHooks, - route_query::ClusterCheck, - }, router::{Route, parser::Shard} + cache::cache, + client::query_engine::{hooks::QueryEngineHooks, route_query::ClusterCheck}, + router::{parser::Shard, Route}, + BufferedQuery, Client, ClientComms, Command, Error, Router, RouterContext, Stats, }, net::{ErrorResponse, Message, Parameters}, state::State, @@ -14,7 +13,6 @@ use crate::{ use tracing::debug; -pub mod cache; pub mod connect; pub mod context; pub mod deallocate; @@ -58,7 +56,6 @@ pub struct QueryEngine { notify_buffer: NotifyBuffer, pending_explain: Option, hooks: QueryEngineHooks, - cache: Cache, } impl QueryEngine { @@ -68,7 +65,6 @@ impl QueryEngine { let database = params.get_default("database", user); let backend = Connection::new(user, database, admin)?; - let cache_config = &config().config.general.cache; Ok(Self { backend, @@ -81,7 +77,6 @@ impl QueryEngine { pending_explain: None, begin_stmt: None, router: Router::default(), - cache: Cache::new(cache_config, database), }) } @@ -135,7 +130,17 @@ impl QueryEngine { return Ok(()); } - if self.cache.try_read_cache(context).await? { + let in_transaction = context.in_transaction(); + if cache() + .try_read_cache( + &mut context.cache_context, + in_transaction, + context.client_request, + context.params, + context.stream, + ) + .await? + { self.update_stats(context); return Ok(()); } @@ -239,7 +244,7 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } - self.cache.save_response_in_cache(context).await; + cache().save_response_in_cache(&mut context.cache_context).await; self.hooks.after_execution(context)?; diff --git a/pgdog/src/frontend/mod.rs b/pgdog/src/frontend/mod.rs index 284b777b0..aa1bbe523 100644 --- a/pgdog/src/frontend/mod.rs +++ b/pgdog/src/frontend/mod.rs @@ -1,6 +1,7 @@ //! pgDog frontend manages connections to clients. pub mod buffered_query; +pub mod cache; pub mod client; pub mod client_request; pub mod comms; From b44783dcf7c42ff674d334a76322b1d5e5c909dc Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 08/23] reexport cache config --- pgdog/src/config/cache.rs | 1 + pgdog/src/config/mod.rs | 2 ++ pgdog/src/frontend/cache/policy.rs | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 pgdog/src/config/cache.rs diff --git a/pgdog/src/config/cache.rs b/pgdog/src/config/cache.rs new file mode 100644 index 000000000..a089ff680 --- /dev/null +++ b/pgdog/src/config/cache.rs @@ -0,0 +1 @@ +pub use pgdog_config::cache::*; \ No newline at end of file diff --git a/pgdog/src/config/mod.rs b/pgdog/src/config/mod.rs index 835a0f10e..6ecd3785d 100644 --- a/pgdog/src/config/mod.rs +++ b/pgdog/src/config/mod.rs @@ -1,6 +1,7 @@ //! Configuration. // Submodules +pub mod cache; pub mod convert; pub mod core; pub mod database; @@ -15,6 +16,7 @@ pub mod rewrite; pub mod sharding; pub mod users; +pub use cache::*; pub use core::{Config, ConfigAndUsers}; pub use database::{Database, Role}; pub use error::Error; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 582fde578..dcf6965f7 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,6 +1,6 @@ use core::fmt; -use pgdog_config::{Cache as CacheConfig, CachePolicy}; +use crate::config::{Cache as CacheConfig, CachePolicy}; use tracing::debug; use super::stats::QueryStatsTracker; From a71ec7c723b509ac0a81ac8f2bb0bb474a8368c9 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 09/23] Use built-in query comment hints --- docs/CACHE.md | 158 +++---- pgdog-config/src/cache.rs | 4 +- pgdog/src/frontend/cache/integration.rs | 22 +- pgdog/src/frontend/cache/mod.rs | 11 +- pgdog/src/frontend/cache/policy.rs | 415 +++--------------- pgdog/src/frontend/router/parser/cache/ast.rs | 8 +- pgdog/src/frontend/router/parser/comment.rs | 119 ++++- pgdog/src/net/parameter.rs | 1 + 8 files changed, 259 insertions(+), 479 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 3fc52462a..743a2ab25 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -8,9 +8,9 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on ## Implementation -### Files Added +### Configuration (`pgdog-config`) -#### 1. `pgdog-config/src/cache.rs` (new file) +**`cache.rs`** — Cache configuration types: **CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. @@ -22,15 +22,11 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on - `max_result_size: Option` — max cached result bytes - Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` -#### 2. `pgdog-config/src/general.rs` +**`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** -Added `cache: Cache` field to `General` struct — **cache config is global**, not per-database. +**`lib.rs`** — Exports `pub mod cache;` and `pub use cache::{CachePolicy, Cache};`. -#### 3. `pgdog-config/src/lib.rs` - -Added `pub mod cache;` and `pub use cache::{CachePolicy, Cache};` to public exports. - -#### 4. `pgdog/src/frontend/cache/` (module) +### Cache Module (`pgdog/src/frontend/cache/`) **`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust @@ -43,19 +39,16 @@ pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; -pub use policy::{ - CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, - CommentCacheExtractor, ParameterCacheExtractor, -}; +pub use policy::CacheDecision; pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`, `policy_dispatcher`. +`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`. **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new()` — creates client (reads config internally), stats, dispatcher +- `new()` — creates client (reads config internally) and stats tracker - `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through - `save_response_in_cache(cache_context)` — finalizes by storing the captured response @@ -74,13 +67,13 @@ Key methods: - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net -**`policy.rs`** — 3-tier policy resolution with trait-based extraction: -- `CacheDirective` enum: `None`, `Cache { ttl_seconds }`, `NoCache` -- `trait CachePolicyExtractor`: abstract interface with `fn extract(query, params) -> CacheDirective` -- `struct CommentCacheExtractor`: scans SQL query string with standalone regex — **works even when parser is bypassed** -- `struct ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter -- `struct CachePolicyDispatcher`: chains extractors in priority order, returns first non-`None` result -- Tier 1: Extractor result (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache` from comments/params) +**`policy.rs`** — 3-tier policy resolution via free functions: +- `CacheDirective` enum: `Cache { ttl_seconds }`, `NoCache` (default) +- `CacheDecision` enum: `Skip`, `Cache(Option)` +- `resolve(client_request, params, is_read, cache_key_hash, stats)` — main resolver function, chains all tiers +- `get_cache_directive(client_request, params)` — comment hint (from AST) has priority over connection parameter (`pgdog.cache`) +- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N` +- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache`) - Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) - Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` @@ -89,47 +82,46 @@ Key methods: - `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` - Internally: `Arc>` - **`context.rs`** — Cache context held in `QueryEngineContext`: - - `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` - - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages - - `reset()` — clears all state for per-query isolation +**`context.rs`** — Cache context held in `QueryEngineContext`: +- `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` +- `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages +- `reset()` — clears all state for per-query isolation **`integration.rs`** — Integration methods on `impl Cache`: -- `cache_check()` — main entry point, checks route, extracts directive, resolves policy, checks Redis +- `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis - `send_cached_response()` — deserializes wire-format bytes and sends to client - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis - Cache key: XXH3 hash of `database_name + raw_query_string` -### Files Modified - -#### 5. `pgdog/Cargo.toml` - -- Added `fred = { version = "9", features = ["enable-rustls"] }` to dependencies - -#### 6. `pgdog/src/frontend/client/query_engine/mod.rs` +### Query Engine Integration -- Added `pub mod cache;` module declaration -- Added `cache: Cache` field to `QueryEngine` -- `new()` loads `cache_config` from `config().config.general.cache` and creates `Cache::new(cache_config, database)` +**`pgdog/src/frontend/client/query_engine/mod.rs`** +- Declares `pub mod cache;` module +- `QueryEngine` holds `cache: Cache` field - `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. -- After `match command`, calls `self.cache.save_response_in_cache(context)` to store the captured response in Redis. +- After `match command`, calls `self.cache.save_response_in_cache(context)` to finalize caching. -#### 7. `pgdog/src/frontend/client/query_engine/query.rs` +**`pgdog/src/frontend/client/query_engine/query.rs`** +- `process_server_message()` calls `context.cache_context.capture_response(message.clone())`. -- `process_server_message()` added cache capture: `context.cache_context.capture_response(message.clone())`. +**`pgdog/src/frontend/client/query_engine/context.rs`** +- `QueryEngineContext` holds `cache_context: CacheContext` field. -#### 8. `pgdog/src/frontend/client/query_engine/context.rs` +### Backend and Config Integration -- Added `cache_context: CacheContext` field to `QueryEngineContext`. +**`pgdog/src/backend/pool/cluster.rs`** +- `ClusterConfig` and `Cluster` hold `cache_enabled: bool` field +- Query parser requirement check includes `|| self.cache_enabled()` — when caching is on, the query parser is forced on. -#### 9. `pgdog/src/backend/pool/cluster.rs` +**`pgdog-config/src/core.rs`** +- Startup warning emitted when `cache.is_enabled()` and parser is `Off` or `SessionControl`. -- Added `cache_enabled: bool` field to `ClusterConfig` and `Cluster` -- `cluster.rs` adds `|| self.cache_enabled()` in query parser requirement check — when caching is on, the query parser is forced on alongside `dry_run`, `prepared_statements`, `pub_sub`, and `regex_parser` +### Dependencies -#### 10. `pgdog-config/src/core.rs` - -- Added startup warning: `cache requires enabled query parser but it's disabled or session controlled` when `cache.is_enabled()` and parser is `Off` or `SessionControl` +**`pgdog/Cargo.toml`** +fred = { version = "9", features = ["enable-rustls"] } +scc = "3.7" +xxhash-rust = { version = "0.8", features = ["xxh3"]} --- @@ -145,40 +137,11 @@ Key methods: | Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | -| Cache key | `pg_query::fingerprint(query).value.wrapping_add(db_hash)` where `db_hash = DefaultHasher of database name` | +| Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- -## Bugs Found & Fixed - -1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). - -2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. - -3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. - -4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. - -5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. - ---- - -## Refactoring: Decoupled Cache Policy Extraction - -The original implementation entangled cache directive parsing with pgdog's general comment parser, which only activates when the full query parser runs. This meant `/* pgdog_cache: ... */` annotations were silently ignored for simple queries and when `query_parser_bypass()` triggered. - -**What was done:** - -- New **`cache/`** module created under `query_engine/` -- `CachePolicyExtractor` trait with `fn extract(query, params) -> CacheDirective` -- `CommentCacheExtractor`: standalone regex scan on raw query string — works independent of AST parser -- `ParameterCacheExtractor`: reads `pgdog.cache` connection startup parameter -- `CachePolicyDispatcher`: chains extractors, returns first non-`None` result -- `Cache` struct as abstraction layer over client, stats, config, and dispatcher -- `CacheContext` struct holds `cache_miss` and `response_buffer` per-query -- Cache integration happens via `try_read_cache()` and `save_response_in_cache()` methods on `Cache` - ## How to Control Cache ### SQL Comments @@ -187,7 +150,7 @@ Add a C-style comment before your query. The first matching directive wins: ```sql -- Force bypass cache for this query -/* pgdog_cache: no-cache */ +/* pgdog_cache: no_cache */ SELECT * FROM users WHERE id = 1; -- Cache with database default TTL @@ -205,7 +168,7 @@ Set `pgdog.cache` at connection time (via DSN options) or with `SET` after conne ```sql -- Session-wide: all queries in this connection bypass cache -SET pgdog.cache = 'no-cache'; +SET pgdog.cache = 'no_cache'; -- Session-wide: cache all queries with default TTL SET pgdog.cache = 'cache'; @@ -216,7 +179,6 @@ SET pgdog.cache = 'cache ttl=300'; ```sh # Session-wide: all queries in this connection bypass cache -# Attention: this only supports `no_cache` with underscore psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dno_cache # Session-wide: cache all queries with default TTL @@ -228,7 +190,7 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c ### Priority Order -Extractors are checked in order — first non-`None` result wins, then falls through to database config: +Sources are checked in order — first non-None result wins, then falls through to global config: ``` SQL comment → pgdog.cache parameter → DB policy config → Auto-decision @@ -239,21 +201,33 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi ## Completed -1. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. +1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). + +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. + +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. + +4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. + +5. **DB cache config defaults** - Observation: `Cache.policy` defaults to `CachePolicy::NoCache`. Even with `enabled = true`, caching is skipped unless policy is explicitly set. User action taken: Add `policy = "cache"` to pgdog.toml. -2. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. +6. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -3. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. +7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. -4. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. +8. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -5. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. -6. **Do not cache error responses**. +10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. -7. **Setting pgdog.cache via connection url doesn't work**. +11. **Do not cache error responses**. -8. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. +12. **Setting pgdog.cache via connection url doesn't work** — now works. + +13. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. + +14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). --- @@ -275,6 +249,10 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 8. **Review and rewrite CacheClient**. +9. **Force-cache hint support**. + +10. **Add cache config to .schema**. + ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index ef1c44530..0afd7c7ea 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -21,7 +21,7 @@ impl std::str::FromStr for CachePolicy { fn from_str(s: &str) -> Result { match s.to_lowercase().as_str() { - "no_cache" | "no-cache" => Ok(Self::NoCache), + "no_cache" => Ok(Self::NoCache), "cache" => Ok(Self::Cache), "auto" => Ok(Self::Auto), _ => Err(format!("Invalid cache policy: {}", s)), @@ -74,4 +74,4 @@ impl Cache { pub fn max_result_size(&self) -> Option { self.max_result_size } -} \ No newline at end of file +} diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 4bf7d0b2b..59245b1a6 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,14 +1,13 @@ use std::hash::{Hash, Hasher}; use crate::{ - config::config, frontend::ClientRequest, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; use tracing::debug; -use super::{Cache, CachePolicyResolver}; +use super::{policy, Cache}; pub enum CacheCheckResult { Hit { @@ -58,23 +57,8 @@ impl Cache { hasher.finish() }; - let cache_directive = self.policy_dispatcher.extract(query.query(), params); - let cache_config = &config().config.general.cache; - - debug!( - "cache_check: sql={}, db_config={:?}", - query.query(), - cache_config - ); - - let decision = CachePolicyResolver::resolve( - cache_directive, - cache_config, - is_read, - cache_key_hash, - &self.stats, - ) - .await; + let decision = + policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; if !decision.should_cache() { return Ok(CacheCheckResult::Passthrough); diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 44d6ef89f..a28ceca3a 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -7,10 +7,7 @@ pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; -pub use policy::{ - CacheDecision, CachePolicyDispatcher, CachePolicyExtractor, CachePolicyResolver, - CommentCacheExtractor, ParameterCacheExtractor, -}; +pub use policy::CacheDecision; pub use stats::QueryStatsTracker; use once_cell::sync::Lazy; @@ -26,7 +23,6 @@ use crate::{ pub struct Cache { client: CacheClient, stats: QueryStatsTracker, - policy_dispatcher: CachePolicyDispatcher, } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -37,14 +33,9 @@ pub fn cache() -> Arc { impl Cache { fn new() -> Self { - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(CommentCacheExtractor)); - dispatcher.add_extractor(Box::new(ParameterCacheExtractor::new())); - Cache { client: CacheClient::new(), stats: QueryStatsTracker::default(), - policy_dispatcher: dispatcher, } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index dcf6965f7..b186bee8e 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,198 +1,18 @@ -use core::fmt; - -use crate::config::{Cache as CacheConfig, CachePolicy}; -use tracing::debug; - use super::stats::QueryStatsTracker; - +use crate::config::{config, CachePolicy}; +use crate::frontend::ClientRequest; use crate::net::parameter::ParameterValue; use crate::net::Parameters; -use once_cell::sync::Lazy; -use regex::Regex; -static CACHE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache: *(no-cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() -}); - -/// Cache directive from SQL comment. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum CacheDirective { - #[default] - None, Cache { ttl_seconds: Option, }, + #[default] NoCache, } -impl CacheDirective { - pub fn is_cache(&self) -> bool { - matches!(self, CacheDirective::Cache { .. }) - } - - pub fn is_no_cache(&self) -> bool { - matches!(self, CacheDirective::NoCache) - } - - pub fn ttl(&self) -> Option { - match self { - CacheDirective::Cache { ttl_seconds } => *ttl_seconds, - _ => None, - } - } -} - -pub trait CachePolicyExtractor: Send + Sync + fmt::Debug { - fn extract(&self, query: &str, params: &Parameters) -> CacheDirective; -} - -#[derive(Debug)] -pub struct CommentCacheExtractor; - -impl CachePolicyExtractor for CommentCacheExtractor { - fn extract(&self, query: &str, _params: &Parameters) -> CacheDirective { - for cap in CACHE.captures_iter(query) { - if let Some(action) = cap.get(1) { - let action = action.as_str(); - if action == "no-cache" { - return CacheDirective::NoCache; - } else if action.starts_with("cache") { - let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); - return CacheDirective::Cache { ttl_seconds: ttl }; - } - } else { - return CacheDirective::Cache { ttl_seconds: None }; - } - } - CacheDirective::None - } -} - -#[derive(Debug)] -pub struct ParameterCacheExtractor { - key: String, -} - -impl ParameterCacheExtractor { - pub fn new() -> Self { - Self { - key: "pgdog.cache".to_string(), - } - } -} - -impl CachePolicyExtractor for ParameterCacheExtractor { - fn extract(&self, _query: &str, params: &Parameters) -> CacheDirective { - let value = match params.get(&self.key) { - Some(p) => p, - None => return CacheDirective::None, - }; - - let s = match value { - ParameterValue::String(v) => v.as_str(), - _ => return CacheDirective::None, - }; - - match s { - "no-cache" => CacheDirective::NoCache, - "cache" => CacheDirective::Cache { ttl_seconds: None }, - _ => { - if let Some(ttl) = s - .strip_prefix("cache ttl=") - .and_then(|t| t.trim().parse::().ok()) - { - CacheDirective::Cache { - ttl_seconds: Some(ttl), - } - } else if let Some(ttl) = s - .strip_prefix("cache ttl =") - .and_then(|t| t.trim().parse::().ok()) - { - CacheDirective::Cache { - ttl_seconds: Some(ttl), - } - } else { - CacheDirective::None - } - } - } - } -} - -#[derive(Debug)] -pub struct CachePolicyDispatcher { - extractors: Vec>, -} - -impl CachePolicyDispatcher { - pub fn new() -> Self { - Self { - extractors: Vec::new(), - } - } - - pub fn add_extractor(&mut self, extractor: Box) { - self.extractors.push(extractor); - } - - pub fn extract(&self, query: &str, params: &Parameters) -> CacheDirective { - for extractor in &self.extractors { - let result = extractor.extract(query, params); - if result != CacheDirective::None { - debug!("Cache directive for query {} is {:?}", query, result); - return result; - } - } - CacheDirective::None - } - - pub fn is_empty(&self) -> bool { - self.extractors.is_empty() - } -} - -pub struct CachePolicyResolver; - -impl CachePolicyResolver { - pub async fn resolve( - cache_directive: CacheDirective, - cache_config: &CacheConfig, - is_read: bool, - cache_key_hash: u64, - stats: &QueryStatsTracker, - ) -> CacheDecision { - if !is_read { - return CacheDecision::Skip; - } - - if let CacheDirective::NoCache = cache_directive { - return CacheDecision::Skip; - } - - if let CacheDirective::Cache { ttl_seconds } = cache_directive { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))); - } - - match cache_config.policy() { - CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), - CachePolicy::Auto => Self::auto_decision(cache_key_hash, stats).await, - } - } - - async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { - let query_stats = stats.get(cache_key_hash).await; - - if query_stats.hit_count > query_stats.miss_count - && query_stats.avg_result_size() < 1_000_000 - { - CacheDecision::Cache(None) - } else { - CacheDecision::Skip - } - } -} - pub enum CacheDecision { Skip, Cache(Option), @@ -211,188 +31,83 @@ impl CacheDecision { } } -#[cfg(test)] -mod tests { - use super::*; +const KEY: &str = "pgdog.cache"; - #[tokio::test] - async fn test_skip_for_writes() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::Cache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::None, - &cache_config, - false, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(!decision.should_cache()); - } +pub async fn resolve( + client_request: &ClientRequest, + params: &Parameters, + is_read: bool, + cache_key_hash: u64, + stats: &QueryStatsTracker, +) -> CacheDecision { + let cache_config = &config().config.general.cache; - #[tokio::test] - async fn test_no_cache_directive() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::Cache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::NoCache, - &cache_config, - true, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(!decision.should_cache()); + if !is_read { + return CacheDecision::Skip; } - #[tokio::test] - async fn test_cache_directive_with_ttl() { - let cache_config = CacheConfig { - enabled: Some(true), - policy: Some(CachePolicy::NoCache), - ttl: None, - redis_url: None, - max_result_size: None, - }; - let decision = CachePolicyResolver::resolve( - CacheDirective::Cache { - ttl_seconds: Some(120), - }, - &cache_config, - true, - 0xAABBCCDD, - &QueryStatsTracker::default(), - ) - .await; - assert!(decision.should_cache()); - assert_eq!(decision.ttl(), Some(120)); - } - - #[test] - fn test_comment_extractor_no_cache() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = - extractor.extract("SELECT * FROM users /* pgdog_cache: no-cache */", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } - - #[test] - fn test_comment_extractor_cache_default_ttl() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users /* pgdog_cache: cache */", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), - _ => panic!("Expected Cache directive"), + let cache_directive = get_cache_directive(client_request, params); + match cache_directive { + Some(CacheDirective::NoCache) => return CacheDecision::Skip, + Some(CacheDirective::Cache { ttl_seconds }) => { + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))) } + _ => (), } - #[test] - fn test_comment_extractor_cache_with_ttl() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract( - "SELECT * FROM users /* pgdog_cache: cache ttl=60 */", - ¶ms, - ); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), - _ => panic!("Expected Cache directive"), - } + match cache_config.policy() { + CachePolicy::NoCache => CacheDecision::Skip, + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } +} - #[test] - fn test_comment_extractor_no_directive() { - let extractor = CommentCacheExtractor; - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::None)); +async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let query_stats = stats.get(cache_key_hash).await; + if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { + CacheDecision::Cache(None) + } else { + CacheDecision::Skip } +} - #[test] - fn test_parameter_extractor_no_cache() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } +// Comment hint has priority over connection parameter +fn get_cache_directive( + client_request: &ClientRequest, + params: &Parameters, +) -> Option { + client_request + .ast + .as_ref() + .map(|ast| ast.comment_cache) + .flatten() + .or_else(|| extract_parameter_directive(params)) +} - #[test] - fn test_parameter_extractor_cache() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "cache"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert!(ttl_seconds.is_none()), - _ => panic!("Expected Cache directive"), - } - } +fn extract_parameter_directive(params: &Parameters) -> Option { + let value = params.get(KEY)?; + let s = match value { + ParameterValue::String(v) => v.as_str().trim(), + _ => return None, + }; - #[test] - fn test_parameter_extractor_cache_with_ttl() { - let extractor = ParameterCacheExtractor::new(); - let mut params = Parameters::default(); - params.insert("pgdog.cache", "cache ttl=120"); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(120)), - _ => panic!("Expected Cache directive"), - } + match s { + "no_cache" => return Some(CacheDirective::NoCache), + "cache" => return Some(CacheDirective::Cache { ttl_seconds: None }), + _ => (), } - #[test] - fn test_parameter_extractor_no_param() { - let extractor = ParameterCacheExtractor::new(); - let params = Parameters::default(); - let directive = extractor.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::None)); + if let Some(ttl) = s + .strip_prefix("cache") + .map(|s| s.trim_start()) + .map(|s| s.strip_prefix("ttl=")) + .flatten() + .and_then(|t| t.trim().parse::().ok()) + { + return Some(CacheDirective::Cache { + ttl_seconds: Some(ttl), + }); } - #[test] - fn test_dispatcher_comment_wins() { - let comment_extractor = CommentCacheExtractor; - let parameter_extractor = ParameterCacheExtractor::new(); - - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(comment_extractor)); - dispatcher.add_extractor(Box::new(parameter_extractor)); - - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - - let directive = dispatcher.extract("SELECT * /* pgdog_cache: cache ttl=60 */", ¶ms); - match directive { - CacheDirective::Cache { ttl_seconds } => assert_eq!(ttl_seconds, Some(60)), - _ => panic!("Expected comment to win"), - } - } - - #[test] - fn test_dispatcher_parameter_fallback() { - let comment_extractor = CommentCacheExtractor; - let parameter_extractor = ParameterCacheExtractor::new(); - - let mut dispatcher = CachePolicyDispatcher::new(); - dispatcher.add_extractor(Box::new(comment_extractor)); - dispatcher.add_extractor(Box::new(parameter_extractor)); - - let mut params = Parameters::default(); - params.insert("pgdog.cache", "no-cache"); - - let directive = dispatcher.extract("SELECT * FROM users", ¶ms); - assert!(matches!(directive, CacheDirective::NoCache)); - } + None } diff --git a/pgdog/src/frontend/router/parser/cache/ast.rs b/pgdog/src/frontend/router/parser/cache/ast.rs index c34d865dc..179344a48 100644 --- a/pgdog/src/frontend/router/parser/cache/ast.rs +++ b/pgdog/src/frontend/router/parser/cache/ast.rs @@ -12,6 +12,7 @@ use super::super::{ }; use super::{Fingerprint, Stats}; use crate::backend::schema::Schema; +use crate::frontend::cache::policy::CacheDirective; use crate::frontend::router::parser::rewrite::statement::RewritePlan; use crate::frontend::{BufferedQuery, PreparedStatements}; use crate::net::parameter::ParameterValue; @@ -37,6 +38,8 @@ pub struct AstInner { pub comment_shard: Option, /// Role. pub comment_role: Option, + /// Cache. + pub comment_cache: Option, /// Rewrite plan. pub rewrite_plan: RewritePlan, /// Fingerprint. @@ -44,13 +47,13 @@ pub struct AstInner { } impl AstInner { - /// Create new AST record, with no rewrite or comment routing. pub fn new(ast: ParseResult) -> Self { Self { ast, stats: Mutex::new(Stats::new()), comment_role: None, comment_shard: None, + comment_cache: None, rewrite_plan: RewritePlan::default(), fingerprint: Fingerprint::default(), } @@ -81,7 +84,7 @@ impl Ast { QueryParserEngine::PgQueryRaw => parse_raw(query), } .map_err(Error::PgQuery)?; - let (comment_shard, comment_role) = comment(query, schema)?; + let (comment_shard, comment_role, comment_cache) = comment(query, schema)?; let fingerprint = Fingerprint::new(query, schema.query_parser_engine).map_err(Error::PgQuery)?; @@ -113,6 +116,7 @@ impl Ast { stats: Mutex::new(stats), comment_shard, comment_role, + comment_cache, ast, rewrite_plan, fingerprint, diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index a87883adb..53cf1431f 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -6,6 +6,7 @@ use regex::Regex; use crate::backend::ShardingSchema; use crate::config::database::Role; +use crate::frontend::cache::policy::CacheDirective; use crate::frontend::router::sharding::ContextBuilder; use super::super::parser::Shard; @@ -16,6 +17,9 @@ static SHARDING_KEY: Lazy = Lazy::new(|| { Regex::new(r#"pgdog_sharding_key: *(?:"([^"]*)"|'([^']*)'|([0-9a-zA-Z-]+))"#).unwrap() }); static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); +static CACHE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache: *(no_cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() +}); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { caps.get(1) @@ -24,23 +28,24 @@ fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { .map(|m| m.as_str()) } -/// Extract shard number from a comment. +/// Extract shard number, role and cache directive from a comment. /// /// Comment style uses the C-style comments (not SQL comments!) /// as to allow the comment to appear anywhere in the query. /// -/// See [`SHARD`] and [`SHARDING_KEY`] for the style of comment we expect. +/// See [`SHARD`], [`SHARDING_KEY`], [`ROLE`] and [`CACHE`] for the style of comment we expect. /// pub fn comment( query: &str, schema: &ShardingSchema, -) -> Result<(Option, Option), Error> { +) -> Result<(Option, Option, Option), Error> { let tokens = match schema.query_parser_engine { QueryParserEngine::PgQueryProtobuf => scan(query), QueryParserEngine::PgQueryRaw => scan_raw(query), } .map_err(Error::PgQuery)?; let mut role = None; + let mut cache = None; for token in tokens.tokens.iter() { if token.token == Token::CComment as i32 { @@ -54,15 +59,26 @@ pub fn comment( } } } + if let Some(cap) = CACHE.captures(comment) { + if let Some(action) = cap.get(1) { + let action = action.as_str(); + if action == "no_cache" { + cache = Some(CacheDirective::NoCache); + } else { + let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + cache = Some(CacheDirective::Cache { ttl_seconds: ttl }); + } + } + } if let Some(cap) = SHARDING_KEY.captures(comment) { if let Some(sharding_key) = get_matched_value(&cap) { if let Some(schema) = schema.schemas.get(Some(sharding_key.into())) { - return Ok((Some(schema.shard().into()), role)); + return Ok((Some(schema.shard().into()), role, cache)); } let ctx = ContextBuilder::infer_from_from_and_config(sharding_key, schema)? .shards(schema.shards) .build()?; - return Ok((Some(ctx.apply()?), role)); + return Ok((Some(ctx.apply()?), role, cache)); } } if let Some(cap) = SHARD.captures(comment) { @@ -77,13 +93,14 @@ pub fn comment( .unwrap_or(Shard::All), ), role, + cache, )); } } } } - Ok((None, role)) + Ok((None, role, cache)) } #[cfg(test)] @@ -255,4 +272,94 @@ mod tests { let result = comment(query, &schema).unwrap(); assert_eq!(result.0, Some(Shard::Direct(1))); } + + #[test] + fn test_cache_hint_no_cache() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: no_cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!(result.2, Some(CacheDirective::NoCache))); + } + + #[test] + fn test_cache_hint_cache_default_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { ttl_seconds: None }) + )); + } + + #[test] + fn test_cache_hint_cache_with_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: cache ttl=60 */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { + ttl_seconds: Some(60) + }) + )); + } + + #[test] + fn test_cache_hint_no_directive() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users"; + let result = comment(query, &schema).unwrap(); + assert!(matches!(result.2, None)); + } + + #[test] + fn test_combined_shard_and_cache_hints() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_role: replica pgdog_shard: 1 pgdog_cache: cache ttl=300 */"; + let result = comment(query, &schema).unwrap(); + assert_eq!(result.1, Some(Role::Replica)); + assert_eq!(result.0, Some(Shard::Direct(1))); + assert!(matches!( + result.2, + Some(CacheDirective::Cache { + ttl_seconds: Some(300) + }) + )); + } } diff --git a/pgdog/src/net/parameter.rs b/pgdog/src/net/parameter.rs index 1502d0397..4dd0c6114 100644 --- a/pgdog/src/net/parameter.rs +++ b/pgdog/src/net/parameter.rs @@ -33,6 +33,7 @@ static UNTRACKED_PARAMS: Lazy> = Lazy::new(|| { String::from("pgdog.role"), String::from("pgdog.shard"), String::from("pgdog.sharding_key"), + String::from("pgdog.cache"), ]) }); From 9d682c6dcc8087ee4b4792bc55a9247f20ba6368 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 10/23] updated cache config --- .schema/pgdog.schema.json | 74 ++++++++++++++++++++++++++++++ docs/CACHE.md | 17 ++++--- pgdog-config/src/cache.rs | 67 +++++++++++++++++++-------- pgdog-config/src/core.rs | 2 +- pgdog/src/backend/pool/cluster.rs | 2 +- pgdog/src/frontend/cache/client.rs | 26 +++++------ pgdog/src/frontend/cache/policy.rs | 6 +-- 7 files changed, 147 insertions(+), 47 deletions(-) diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index 78238e78b..906dae5c1 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -31,6 +31,13 @@ "ban_timeout": 300000, "broadcast_address": null, "broadcast_port": 6433, + "cache": { + "enabled": false, + "max_result_size": 0, + "policy": "auto", + "redis_url": "redis://localhost:6379", + "ttl": 300 + }, "checkout_timeout": 5000, "client_connection_recovery": "drop", "client_idle_in_transaction_timeout": 9223372036854775807, @@ -275,6 +282,62 @@ } ] }, + "Cache": { + "description": "Redis cache configuration for a database.", + "type": "object", + "properties": { + "enabled": { + "description": "Whether to enable caching for this database.\n\n_Default:_ `false`", + "type": "boolean", + "default": false + }, + "max_result_size": { + "description": "Maximum result size in bytes to cache (0 = unlimited).\n\n_Default:_ `0`", + "type": "integer", + "format": "uint", + "default": 0, + "minimum": 0 + }, + "policy": { + "description": "Cache policy: no_cache, cache, or auto.\n\n_Default:_ `auto`", + "$ref": "#/$defs/CachePolicy", + "default": "auto" + }, + "redis_url": { + "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", + "type": "string", + "default": "redis://localhost:6379" + }, + "ttl": { + "description": "Default TTL in seconds for cached queries.\n\n_Default:_ `300`", + "type": "integer", + "format": "uint64", + "default": 300, + "minimum": 0 + } + }, + "additionalProperties": false + }, + "CachePolicy": { + "description": "Cache policy.", + "oneOf": [ + { + "description": "Never cache queries for this database.", + "type": "string", + "const": "no_cache" + }, + { + "description": "Always cache read queries.", + "type": "string", + "const": "cache" + }, + { + "description": "Dynamically decide based on Redis memory and query stats.", + "type": "string", + "const": "auto" + } + ] + }, "ConnectionRecovery": { "description": "controls if server connections are recovered or dropped if a client abruptly disconnects.\n\nhttps://docs.pgdog.dev/configuration/pgdog.toml/general/#connection_recovery", "oneOf": [ @@ -574,6 +637,17 @@ "maximum": 65535, "minimum": 0 }, + "cache": { + "description": "Redis cache configuration for this database.", + "$ref": "#/$defs/Cache", + "default": { + "enabled": false, + "max_result_size": 0, + "policy": "auto", + "redis_url": "redis://localhost:6379", + "ttl": 300 + } + }, "checkout_timeout": { "description": "Maximum amount of time a client is allowed to wait for a connection from the pool.\n\n_Default:_ `5000`\n\nhttps://docs.pgdog.dev/configuration/pgdog.toml/general/#checkout_timeout", "type": "integer", diff --git a/docs/CACHE.md b/docs/CACHE.md index 743a2ab25..bcc7f4840 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -12,15 +12,14 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **`cache.rs`** — Cache configuration types: -**CachePolicy enum:** `NoCache` (default), `Cache`, `Auto`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. +**CachePolicy enum:** `NoCache`, `Cache`, `Auto` (default). Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. **Cache struct:** -- `enabled: Option` — is caching on? -- `policy: Option` — which policy? -- `ttl: Option` — default TTL seconds (default 300) -- `redis_url: Option` — Redis connection URL -- `max_result_size: Option` — max cached result bytes -- Helper methods: `is_enabled()`, `policy()`, `ttl()`, `max_result_size()` +- `enabled: bool` — is caching on? +- `policy: CachePolicy` — which policy? +- `ttl: u64` — default TTL seconds (default 300) +- `redis_url: String` — Redis connection URL +- `max_result_size: usize` — max cached result bytes **`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** @@ -229,6 +228,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). +15. **Add cache config to .schema**. + --- ## What's Left To Do @@ -251,8 +252,6 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 9. **Force-cache hint support**. -10. **Add cache config to .schema**. - ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 0afd7c7ea..57d75adb3 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -8,11 +8,11 @@ use serde::{Deserialize, Serialize}; #[serde(rename_all = "snake_case")] pub enum CachePolicy { /// Never cache queries for this database. - #[default] NoCache, /// Always cache read queries. Cache, /// Dynamically decide based on Redis memory and query stats. + #[default] Auto, } @@ -41,37 +41,66 @@ impl std::fmt::Display for CachePolicy { } /// Redis cache configuration for a database. -#[derive( - Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, JsonSchema, -)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Cache { /// Whether to enable caching for this database. - pub enabled: Option, + /// + /// _Default:_ `false` + #[serde(default = "Cache::enabled")] + pub enabled: bool, /// Cache policy: no_cache, cache, or auto. - pub policy: Option, + /// + /// _Default:_ `auto` + #[serde(default = "Cache::policy")] + pub policy: CachePolicy, /// Default TTL in seconds for cached queries. - pub ttl: Option, - /// Redis connection URL (e.g., redis://localhost:6379). - pub redis_url: Option, + /// + /// _Default:_ `300` + #[serde(default = "Cache::ttl")] + pub ttl: u64, + /// Redis connection URL. + /// + /// _Default:_ `redis://localhost:6379` + #[serde(default = "Cache::redis_url")] + pub redis_url: String, /// Maximum result size in bytes to cache (0 = unlimited). - pub max_result_size: Option, + /// + /// _Default:_ `0` + #[serde(default = "Cache::max_result_size")] + pub max_result_size: usize, +} + +impl Default for Cache { + fn default() -> Self { + Self { + enabled: Self::enabled(), + policy: Self::policy(), + ttl: Self::ttl(), + redis_url: Self::redis_url(), + max_result_size: Self::max_result_size(), + } + } } impl Cache { - pub fn is_enabled(&self) -> bool { - self.enabled.unwrap_or(false) + fn enabled() -> bool { + false } - pub fn policy(&self) -> CachePolicy { - self.policy.unwrap_or_default() + fn policy() -> CachePolicy { + Default::default() } - pub fn ttl(&self) -> u64 { - self.ttl.unwrap_or(300) + fn ttl() -> u64 { + 300 } - pub fn max_result_size(&self) -> Option { - self.max_result_size + fn redis_url() -> String { + "redis://localhost:6379".to_string() } -} + + fn max_result_size() -> usize { + 0 + } +} \ No newline at end of file diff --git a/pgdog-config/src/core.rs b/pgdog-config/src/core.rs index 98135178c..52e0187fb 100644 --- a/pgdog-config/src/core.rs +++ b/pgdog-config/src/core.rs @@ -565,7 +565,7 @@ impl Config { ); } - if self.general.cache.is_enabled() + if self.general.cache.enabled && matches!(self.general.query_parser, QueryParserLevel::Off | QueryParserLevel::SessionControl) { warn!("cache requires enabled query parser but it's disabled or session controlled"); } diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index 22bee793e..b636d87fc 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -212,7 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, - cache_enabled: general.cache.is_enabled() + cache_enabled: general.cache.enabled } } } diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index dfede8e1d..5cc9bf37e 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -39,7 +39,7 @@ impl CacheClient { pub fn new() -> Self { let cache_config = &config().config.general.cache; - if !cache_config.is_enabled() || cache_config.redis_url.is_none() { + if !cache_config.enabled { return Self { client: None, redis_connected: Arc::new(AtomicBool::new(false)), @@ -47,7 +47,7 @@ impl CacheClient { }; } - let url = cache_config.redis_url.as_ref().unwrap(); + let url = cache_config.redis_url.as_str(); let client_config = match RedisConfig::from_url(url) { Ok(c) => c, Err(e) => { @@ -248,19 +248,17 @@ impl CacheClient { let cache_config = &config().config.general.cache; - if let Some(max_size) = cache_config.max_result_size() { - if value.len() > max_size { - debug!( - "Skipping cache for key {}: size {} exceeds max {}", - key, - value.len(), - max_size - ); - return Ok(()); - } + if cache_config.max_result_size != 0 && value.len() > cache_config.max_result_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + cache_config.max_result_size + ); + return Ok(()); } - let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl()) as i64; + let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl) as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, @@ -293,7 +291,7 @@ impl CacheClient { pub fn is_enabled(&self) -> bool { let cache_config = &config().config.general.cache; - self.client.is_some() && cache_config.is_enabled() + self.client.is_some() && cache_config.enabled } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index b186bee8e..e6f92514f 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -50,14 +50,14 @@ pub async fn resolve( match cache_directive { Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl()))) + return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl))) } _ => (), } - match cache_config.policy() { + match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl())), + CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl)), CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } } From 44ae0ed1f4fc485551f18970ed0147409c8e8cb6 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 11/23] force-cache hint support --- docs/CACHE.md | 28 ++++++++- pgdog/src/frontend/cache/client.rs | 4 +- pgdog/src/frontend/cache/context.rs | 4 +- pgdog/src/frontend/cache/integration.rs | 69 +++++++++++++-------- pgdog/src/frontend/cache/mod.rs | 15 ++--- pgdog/src/frontend/cache/policy.rs | 42 ++++++------- pgdog/src/frontend/router/parser/comment.rs | 47 +++++++++++++- 7 files changed, 143 insertions(+), 66 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index bcc7f4840..ac215d2d4 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -159,6 +159,16 @@ SELECT * FROM products WHERE category = 'electronics'; -- Cache with custom TTL in seconds /* pgdog_cache: cache ttl=300 */ SELECT * FROM orders; + +-- Force cache with database default TTL +-- Query hash computed as if comment were like "/* pgdog_cache: cache */" +/* pgdog_cache: force_cache */ +SELECT * FROM products WHERE category = 'electronics'; + +-- Force cache with custom TTL in seconds +-- Query hash computed as if comment were like "/* pgdog_cache: cache ttl=300*/" +/* pgdog_cache: force_cache ttl=300 */ +SELECT * FROM orders; ``` ### Connection Parameter @@ -174,6 +184,12 @@ SET pgdog.cache = 'cache'; -- Session-wide: cache all queries with 5-minute TTL SET pgdog.cache = 'cache ttl=300'; + +-- Session-wide: force cache all queries with default TTL +SET pgdog.cache = 'force_cache'; + +-- Session-wide: force cache all queries with 5-minute TTL +SET pgdog.cache = 'force_cache ttl=300'; ``` ```sh @@ -185,6 +201,12 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c # Session-wide: cache all queries with 5-minute TTL psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 + +# Session-wide: force cache all queries with default TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache + +# Session-wide: force cache all queries with 5-minute TTL +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%5C%20ttl%3D300 ``` ### Priority Order @@ -230,6 +252,8 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 15. **Add cache config to .schema**. +16. **Force-cache hint support**. + --- ## What's Left To Do @@ -244,14 +268,12 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 5. **Magic numbers in send_cached_response()**. -6. **Make statistics collection async** — for auto policy. +6. **Make statistics collection deferred** — for auto policy. 7. **Provide config hotswap**. 8. **Review and rewrite CacheClient**. -9. **Force-cache hint support**. - ### Planned Tests 1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index 5cc9bf37e..de59dc686 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -229,7 +229,7 @@ impl CacheClient { } } - pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: Option) -> Result<(), Error> { + pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { if !self.ensure_connected().await { if !self.is_connected() { self.spawn_reconnect(); @@ -258,7 +258,7 @@ impl CacheClient { return Ok(()); } - let ttl_seconds = ttl.unwrap_or_else(|| cache_config.ttl) as i64; + let ttl_seconds = ttl as i64; match tokio::time::timeout( REDIS_OPERATION_TIMEOUT, diff --git a/pgdog/src/frontend/cache/context.rs b/pgdog/src/frontend/cache/context.rs index 2aaede4ff..42fd0fecf 100644 --- a/pgdog/src/frontend/cache/context.rs +++ b/pgdog/src/frontend/cache/context.rs @@ -1,9 +1,9 @@ -use crate::net::{Message, messages::Protocol}; +use crate::{frontend::cache::integration::CacheMiss, net::{Message, messages::Protocol}}; /// Cache context to use in QueryEngineContext. #[derive(Default)] pub struct CacheContext { - pub cache_miss: Option<(u64, Option)>, + pub cache_miss: Option, pub response_buffer: Vec, pub had_error: bool, } diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 59245b1a6..4d236954b 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,7 +1,10 @@ use std::hash::{Hash, Hasher}; +use once_cell::sync::Lazy; +use regex::Regex; + use crate::{ - frontend::ClientRequest, + frontend::{ClientRequest, cache::CacheDecision}, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; @@ -9,14 +12,20 @@ use tracing::debug; use super::{policy, Cache}; +static FORCE_CACHE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap() +}); + +pub struct CacheMiss { + pub cache_key_hash: u64, + pub ttl: u64, +} + pub enum CacheCheckResult { Hit { cached: Vec, }, - Miss { - cache_key_hash: u64, - ttl: Option, - }, + Miss(CacheMiss), Passthrough, } @@ -53,33 +62,41 @@ impl Cache { let cache_key_hash = { let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); database.hash(&mut hasher); - query.query().hash(&mut hasher); + let normalized_query = FORCE_CACHE_RE.replace(query.query(), "pgdog_cache: cache"); + normalized_query.hash(&mut hasher); hasher.finish() }; let decision = policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; - - if !decision.should_cache() { - return Ok(CacheCheckResult::Passthrough); - } - - match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => { - self.stats.record_hit(cache_key_hash, cached.len()).await; - Ok(CacheCheckResult::Hit { cached }) - } - Ok(None) => { + match decision { + CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), + CacheDecision::ForceCache(ttl) => { self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss { + Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, - ttl: decision.ttl(), - }) - } - Err(e) => { - debug!("Cache get error: {}", e); - Ok(CacheCheckResult::Passthrough) - } + ttl, + })) + }, + CacheDecision::Cache(ttl) => { + match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => { + self.stats.record_hit(cache_key_hash, cached.len()).await; + Ok(CacheCheckResult::Hit { cached }) + } + Ok(None) => { + self.stats.record_miss(cache_key_hash).await; + Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl: ttl, + })) + } + Err(e) => { + debug!("Cache get error: {}", e); + Ok(CacheCheckResult::Passthrough) + } + } + }, } } @@ -123,7 +140,7 @@ impl Cache { &self, cache_key_hash: u64, messages: Vec, - ttl: Option, + ttl: u64, ) -> Result<(), ()> { if messages.is_empty() || !self.client.is_enabled() { return Ok(()); diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index a28ceca3a..337e258ec 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use tracing::debug; use crate::{ - frontend::ClientRequest, + frontend::{ClientRequest, cache::integration::CacheMiss}, net::{Parameters, Stream}, }; @@ -58,14 +58,11 @@ impl Cache { cache_context.reset(); return Ok(true); } - CacheCheckResult::Miss { - cache_key_hash, - ttl, - } => { - cache_context.cache_miss = Some((cache_key_hash, ttl)); + CacheCheckResult::Miss(cache_miss) => { + debug!("Cache miss for key hash: {}", cache_miss.cache_key_hash); + cache_context.cache_miss = Some(cache_miss); cache_context.response_buffer.clear(); cache_context.had_error = false; - debug!("Cache miss for key hash: {}", cache_key_hash); } CacheCheckResult::Passthrough => { cache_context.reset(); @@ -77,10 +74,10 @@ impl Cache { /// Finalize caching by storing the response in Redis. pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { - if let Some((cache_key, ttl)) = cache_context.cache_miss.take() { + if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); - if let Err(e) = self.cache_response(cache_key, messages, ttl).await { + if let Err(e) = self.cache_response(cache_key_hash, messages, ttl).await { debug!("Failed to cache response: {:?}", e); } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index e6f92514f..2a2f6a4ba 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -9,26 +9,17 @@ pub enum CacheDirective { Cache { ttl_seconds: Option, }, + ForceCache { + ttl_seconds: Option, + }, #[default] NoCache, } pub enum CacheDecision { Skip, - Cache(Option), -} - -impl CacheDecision { - pub fn should_cache(&self) -> bool { - matches!(self, CacheDecision::Cache(_)) - } - - pub fn ttl(&self) -> Option { - match self { - CacheDecision::Cache(ttl) => *ttl, - _ => None, - } - } + Cache(u64), + ForceCache(u64), } const KEY: &str = "pgdog.cache"; @@ -50,22 +41,26 @@ pub async fn resolve( match cache_directive { Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { - return CacheDecision::Cache(ttl_seconds.or(Some(cache_config.ttl))) + return CacheDecision::Cache(ttl_seconds.unwrap_or(cache_config.ttl)) + }, + Some(CacheDirective::ForceCache { ttl_seconds }) => { + return CacheDecision::ForceCache(ttl_seconds.unwrap_or(cache_config.ttl)) } _ => (), } match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, - CachePolicy::Cache => CacheDecision::Cache(Some(cache_config.ttl)), + CachePolicy::Cache => CacheDecision::Cache(cache_config.ttl), CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, } } async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { + let cache_config = &config().config.general.cache; let query_stats = stats.get(cache_key_hash).await; if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { - CacheDecision::Cache(None) + CacheDecision::Cache(cache_config.ttl) } else { CacheDecision::Skip } @@ -93,20 +88,25 @@ fn extract_parameter_directive(params: &Parameters) -> Option { match s { "no_cache" => return Some(CacheDirective::NoCache), + "force_cache" => return Some(CacheDirective::ForceCache { ttl_seconds: None }), "cache" => return Some(CacheDirective::Cache { ttl_seconds: None }), _ => (), } if let Some(ttl) = s - .strip_prefix("cache") + .strip_prefix("force_cache") + .or_else(|| s.strip_prefix("cache")) .map(|s| s.trim_start()) .map(|s| s.strip_prefix("ttl=")) .flatten() .and_then(|t| t.trim().parse::().ok()) { - return Some(CacheDirective::Cache { - ttl_seconds: Some(ttl), - }); + let ttl_seconds = Some(ttl); + if s.starts_with("force_cache") { + return Some(CacheDirective::ForceCache { ttl_seconds }); + } else { + return Some(CacheDirective::Cache { ttl_seconds }); + } } None diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 53cf1431f..29494c287 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -18,7 +18,7 @@ static SHARDING_KEY: Lazy = Lazy::new(|| { }); static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); static CACHE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache: *(no_cache|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() + Regex::new(r#"pgdog_cache: *(no_cache|force_cache(?:\s+ttl\s*=\s*([0-9]+))?|cache(?:\s+ttl\s*=\s*([0-9]+))?)?"#).unwrap() }); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { @@ -64,8 +64,11 @@ pub fn comment( let action = action.as_str(); if action == "no_cache" { cache = Some(CacheDirective::NoCache); - } else { + } else if action.starts_with("force_cache") { let ttl = cap.get(2).and_then(|m| m.as_str().parse::().ok()); + cache = Some(CacheDirective::ForceCache { ttl_seconds: ttl }); + } else { + let ttl = cap.get(3).and_then(|m| m.as_str().parse::().ok()); cache = Some(CacheDirective::Cache { ttl_seconds: ttl }); } } @@ -350,7 +353,7 @@ mod tests { tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), ..Default::default() }; - + let query = "SELECT * FROM users /* pgdog_role: replica pgdog_shard: 1 pgdog_cache: cache ttl=300 */"; let result = comment(query, &schema).unwrap(); assert_eq!(result.1, Some(Role::Replica)); @@ -362,4 +365,42 @@ mod tests { }) )); } + + #[test] + fn test_cache_hint_force_cache() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: force_cache */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::ForceCache { ttl_seconds: None }) + )); + } + + #[test] + fn test_cache_hint_force_cache_with_ttl() { + use crate::backend::ShardedTables; + + let schema = ShardingSchema { + shards: 2, + tables: ShardedTables::new(vec![], vec![], false, SystemCatalogsBehavior::default()), + ..Default::default() + }; + + let query = "SELECT * FROM users /* pgdog_cache: force_cache ttl=60 */"; + let result = comment(query, &schema).unwrap(); + assert!(matches!( + result.2, + Some(CacheDirective::ForceCache { + ttl_seconds: Some(60) + }) + )); + } } From efa318dfa2c2d04144916d6d01579e405e3af421 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 12/23] return comment --- pgdog/src/frontend/router/parser/cache/ast.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/pgdog/src/frontend/router/parser/cache/ast.rs b/pgdog/src/frontend/router/parser/cache/ast.rs index 179344a48..855792f96 100644 --- a/pgdog/src/frontend/router/parser/cache/ast.rs +++ b/pgdog/src/frontend/router/parser/cache/ast.rs @@ -47,6 +47,7 @@ pub struct AstInner { } impl AstInner { + /// Create new AST record, with no rewrite or comment routing. pub fn new(ast: ParseResult) -> Self { Self { ast, From 1aceafc3ab5ff561a5f2ed9e703a5e943de242f0 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 13/23] remove unescaping on pgdog's side --- docs/CACHE.md | 4 ++-- pgdog/src/net/messages/hello.rs | 34 --------------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index ac215d2d4..f59fb2403 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -200,13 +200,13 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache # Session-wide: cache all queries with 5-minute TTL -psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%5C%20ttl%3D300 +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dcache%20ttl%3D300 # Session-wide: force cache all queries with default TTL psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache # Session-wide: force cache all queries with 5-minute TTL -psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%5C%20ttl%3D300 +psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.cache%3Dforce_cache%20ttl%3D300 ``` ### Priority Order diff --git a/pgdog/src/net/messages/hello.rs b/pgdog/src/net/messages/hello.rs index 436fb209f..5c989221b 100644 --- a/pgdog/src/net/messages/hello.rs +++ b/pgdog/src/net/messages/hello.rs @@ -58,7 +58,6 @@ impl Startup { let value = search_path(&value); params.insert(name, value); } else if name == "options" { - let value = options_unescape(&value); let kvs = value.split("-c"); for kv in kvs { let mut nvs = kv.splitn(2, "="); @@ -250,26 +249,6 @@ fn search_path(value: &str) -> ParameterValue { ParameterValue::Tuple(value) } -fn options_unescape(input: &str) -> String { - let mut result = String::with_capacity(input.len()); - let mut chars = input.chars().peekable(); - - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(&next) = chars.peek() { - chars.next(); - result.push(next); - } else { - result.push(c); - } - } else { - result.push(c); - } - } - - result -} - #[cfg(test)] mod test { use crate::net::messages::ToBytes; @@ -330,17 +309,4 @@ mod test { let startup = Startup::from_stream(&mut read).await.unwrap(); assert!(matches!(startup, Startup::GssEnc)); } - - #[test] - fn test_options_unescape() { - assert_eq!(options_unescape("cache\\ ttl=5"), "cache ttl=5"); - assert_eq!(options_unescape("cache\\\\ttl=5"), "cache\\ttl=5"); - assert_eq!(options_unescape("simple"), "simple"); - assert_eq!(options_unescape("a\\=b"), "a=b"); - assert_eq!(options_unescape("trail\\"), "trail\\"); - assert_eq!( - options_unescape("cache\\ ttl\\=5"), - "cache ttl=5" - ); - } } From bd4a264c4c664c846255c2b433983c2346f2120e Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 15:54:27 +0300 Subject: [PATCH 14/23] remove auto policy and stats tracker --- .schema/pgdog.schema.json | 13 ++---- Cargo.lock | 30 +------------ pgdog-config/src/cache.rs | 10 ++--- pgdog/Cargo.toml | 1 - pgdog/src/frontend/cache/integration.rs | 49 +++++++------------- pgdog/src/frontend/cache/mod.rs | 4 -- pgdog/src/frontend/cache/policy.rs | 14 ------ pgdog/src/frontend/cache/stats.rs | 59 ------------------------- 8 files changed, 26 insertions(+), 154 deletions(-) delete mode 100644 pgdog/src/frontend/cache/stats.rs diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index 906dae5c1..b4997615d 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -34,7 +34,7 @@ "cache": { "enabled": false, "max_result_size": 0, - "policy": "auto", + "policy": "no_cache", "redis_url": "redis://localhost:6379", "ttl": 300 }, @@ -299,9 +299,9 @@ "minimum": 0 }, "policy": { - "description": "Cache policy: no_cache, cache, or auto.\n\n_Default:_ `auto`", + "description": "Cache policy: no_cache or cache.\n\n_Default:_ `no_cache`", "$ref": "#/$defs/CachePolicy", - "default": "auto" + "default": "no_cache" }, "redis_url": { "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", @@ -330,11 +330,6 @@ "description": "Always cache read queries.", "type": "string", "const": "cache" - }, - { - "description": "Dynamically decide based on Redis memory and query stats.", - "type": "string", - "const": "auto" } ] }, @@ -643,7 +638,7 @@ "default": { "enabled": false, "max_result_size": 0, - "policy": "auto", + "policy": "no_cache", "redis_url": "redis://localhost:6379", "ttl": 300 } diff --git a/Cargo.lock b/Cargo.lock index f7b17c532..d2a7fa197 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2959,7 +2959,6 @@ dependencies = [ "rust_decimal", "rustls-native-certs 0.8.1", "rustls-pki-types", - "scc 3.7.0", "scram", "semver", "serde", @@ -3991,29 +3990,13 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "saa" -version = "5.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8d438861332c3b1ac396c77bd9cac620ea1ff347efb63c05a83d8f0a593899" - [[package]] name = "scc" version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" dependencies = [ - "sdd 3.0.8", -] - -[[package]] -name = "scc" -version = "3.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16c154cf1d115a1e901d7f4e3f279eb6eb455f0d670c1cf3c1aa74d50ad37fa9" -dependencies = [ - "saa", - "sdd 4.8.6", + "sdd", ] [[package]] @@ -4089,15 +4072,6 @@ version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21" -[[package]] -name = "sdd" -version = "4.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5f0e40a01b94e35d1dacbcfbe5bfd3d31e37d9590b2e6d86a82b0e87bd4f551" -dependencies = [ - "saa", -] - [[package]] name = "seahash" version = "4.1.0" @@ -4220,7 +4194,7 @@ dependencies = [ "log", "once_cell", "parking_lot", - "scc 2.3.4", + "scc", "serial_test_derive", ] diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 57d75adb3..7450c4730 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -8,12 +8,10 @@ use serde::{Deserialize, Serialize}; #[serde(rename_all = "snake_case")] pub enum CachePolicy { /// Never cache queries for this database. + #[default] NoCache, /// Always cache read queries. Cache, - /// Dynamically decide based on Redis memory and query stats. - #[default] - Auto, } impl std::str::FromStr for CachePolicy { @@ -23,7 +21,6 @@ impl std::str::FromStr for CachePolicy { match s.to_lowercase().as_str() { "no_cache" => Ok(Self::NoCache), "cache" => Ok(Self::Cache), - "auto" => Ok(Self::Auto), _ => Err(format!("Invalid cache policy: {}", s)), } } @@ -34,7 +31,6 @@ impl std::fmt::Display for CachePolicy { let display = match self { Self::NoCache => "no_cache", Self::Cache => "cache", - Self::Auto => "auto", }; write!(f, "{}", display) } @@ -49,9 +45,9 @@ pub struct Cache { /// _Default:_ `false` #[serde(default = "Cache::enabled")] pub enabled: bool, - /// Cache policy: no_cache, cache, or auto. + /// Cache policy: no_cache or cache. /// - /// _Default:_ `auto` + /// _Default:_ `no_cache` #[serde(default = "Cache::policy")] pub policy: CachePolicy, /// Default TTL in seconds for cached queries. diff --git a/pgdog/Cargo.toml b/pgdog/Cargo.toml index e3f2e7bbb..461daec45 100644 --- a/pgdog/Cargo.toml +++ b/pgdog/Cargo.toml @@ -70,7 +70,6 @@ pgdog-vector = { path = "../pgdog-vector" } pgdog-stats = { path = "../pgdog-stats" } pgdog-postgres-types = { path = "../pgdog-postgres-types"} fred = { version = "9", features = ["enable-rustls"] } -scc = "3.7" xxhash-rust = { version = "0.8", features = ["xxh3"]} [target.'cfg(not(target_env = "msvc"))'.dependencies] diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 4d236954b..d585c3fe4 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::{ - frontend::{ClientRequest, cache::CacheDecision}, + frontend::{cache::CacheDecision, ClientRequest}, net::{FromBytes, Message, Parameters, Stream, ToBytes}, }; @@ -12,9 +12,8 @@ use tracing::debug; use super::{policy, Cache}; -static FORCE_CACHE_RE: Lazy = Lazy::new(|| { - Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap() -}); +static FORCE_CACHE_RE: Lazy = + Lazy::new(|| Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap()); pub struct CacheMiss { pub cache_key_hash: u64, @@ -22,9 +21,7 @@ pub struct CacheMiss { } pub enum CacheCheckResult { - Hit { - cached: Vec, - }, + Hit { cached: Vec }, Miss(CacheMiss), Passthrough, } @@ -67,34 +64,22 @@ impl Cache { hasher.finish() }; - let decision = - policy::resolve(client_request, params, is_read, cache_key_hash, &self.stats).await; + let decision = policy::resolve(client_request, params, is_read).await; match decision { CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), - CacheDecision::ForceCache(ttl) => { - self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss(CacheMiss { + CacheDecision::ForceCache(ttl) => Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl, + })), + CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { + Ok(Some(cached)) => Ok(CacheCheckResult::Hit { cached }), + Ok(None) => Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, - ttl, - })) - }, - CacheDecision::Cache(ttl) => { - match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => { - self.stats.record_hit(cache_key_hash, cached.len()).await; - Ok(CacheCheckResult::Hit { cached }) - } - Ok(None) => { - self.stats.record_miss(cache_key_hash).await; - Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, - ttl: ttl, - })) - } - Err(e) => { - debug!("Cache get error: {}", e); - Ok(CacheCheckResult::Passthrough) - } + ttl: ttl, + })), + Err(e) => { + debug!("Cache get error: {}", e); + Ok(CacheCheckResult::Passthrough) } }, } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 337e258ec..85ddb948d 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -2,13 +2,11 @@ pub mod client; pub mod context; pub mod integration; pub mod policy; -pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; -pub use stats::QueryStatsTracker; use once_cell::sync::Lazy; use std::sync::Arc; @@ -22,7 +20,6 @@ use crate::{ #[derive(Debug)] pub struct Cache { client: CacheClient, - stats: QueryStatsTracker, } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -35,7 +32,6 @@ impl Cache { fn new() -> Self { Cache { client: CacheClient::new(), - stats: QueryStatsTracker::default(), } } diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 2a2f6a4ba..35d4bef17 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -1,4 +1,3 @@ -use super::stats::QueryStatsTracker; use crate::config::{config, CachePolicy}; use crate::frontend::ClientRequest; use crate::net::parameter::ParameterValue; @@ -28,8 +27,6 @@ pub async fn resolve( client_request: &ClientRequest, params: &Parameters, is_read: bool, - cache_key_hash: u64, - stats: &QueryStatsTracker, ) -> CacheDecision { let cache_config = &config().config.general.cache; @@ -52,17 +49,6 @@ pub async fn resolve( match cache_config.policy { CachePolicy::NoCache => CacheDecision::Skip, CachePolicy::Cache => CacheDecision::Cache(cache_config.ttl), - CachePolicy::Auto => auto_decision(cache_key_hash, stats).await, - } -} - -async fn auto_decision(cache_key_hash: u64, stats: &QueryStatsTracker) -> CacheDecision { - let cache_config = &config().config.general.cache; - let query_stats = stats.get(cache_key_hash).await; - if query_stats.hit_count > query_stats.miss_count && query_stats.avg_result_size() < 1_000_000 { - CacheDecision::Cache(cache_config.ttl) - } else { - CacheDecision::Skip } } diff --git a/pgdog/src/frontend/cache/stats.rs b/pgdog/src/frontend/cache/stats.rs deleted file mode 100644 index e2946c667..000000000 --- a/pgdog/src/frontend/cache/stats.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::sync::Arc; - -use scc::HashMap; - -#[derive(Debug, Clone, Default)] -pub struct QueryStats { - pub hit_count: u64, - pub miss_count: u64, - pub total_result_size: u64, -} - -impl QueryStats { - pub fn avg_result_size(&self) -> u64 { - let total = self.hit_count + self.miss_count; - if total == 0 { - 0 - } else { - self.total_result_size / total - } - } -} - -#[derive(Debug, Clone, Default)] -pub struct QueryStatsTracker { - stats: Arc>, -} - -impl QueryStatsTracker { - pub async fn record_hit(&self, cache_key_hash: u64, result_size: usize) { - let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); - entry.hit_count += 1; - entry.total_result_size += result_size as u64; - } - - pub async fn record_miss(&self, cache_key_hash: u64) { - let mut entry = self.stats.entry_async(cache_key_hash).await.or_default(); - entry.miss_count += 1; - } - - pub async fn get(&self, cache_key_hash: u64) -> QueryStats { - self.stats - .get_async(&cache_key_hash) - .await - .map(|entry| entry.get().clone()) - .unwrap_or_default() - } - - pub async fn clear(&self) { - self.stats.clear_async().await - } - - pub async fn len(&self) -> usize { - self.stats.len() - } - - pub async fn is_empty(&self) -> bool { - self.stats.is_empty() - } -} From 8d5cf1b26bd780d50944867ac6d73adc1431a540 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 13 May 2026 16:59:23 +0300 Subject: [PATCH 15/23] bring doc to current state --- docs/CACHE.md | 73 ++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index f59fb2403..8ddf2d8ab 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -1,8 +1,8 @@ -# Redis Cache for pgdog — State of Implementation +# Cache for pgdog — State of Implementation ## Architecture -Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Three-tier policy resolution: SQL comment → per-database config → auto-decision engine. +Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on cache miss. Two-tier policy resolution: SQL comment/connection parameter → pgdog's config. --- @@ -12,7 +12,7 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **`cache.rs`** — Cache configuration types: -**CachePolicy enum:** `NoCache`, `Cache`, `Auto` (default). Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. +**CachePolicy enum:** `NoCache` (default), `Cache`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. **Cache struct:** - `enabled: bool` — is caching on? @@ -33,21 +33,19 @@ pub mod client; pub mod context; pub mod integration; pub mod policy; -pub mod stats; pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; -pub use stats::QueryStatsTracker; ``` -`Cache` struct wraps: `CacheClient`, `QueryStatsTracker`. +`Cache` struct wraps: `CacheClient`. **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. Key methods: -- `new()` — creates client (reads config internally) and stats tracker +- `new()` — creates client (reads config internally) - `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through - `save_response_in_cache(cache_context)` — finalizes by storing the captured response @@ -66,23 +64,17 @@ Key methods: - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net -**`policy.rs`** — 3-tier policy resolution via free functions: -- `CacheDirective` enum: `Cache { ttl_seconds }`, `NoCache` (default) -- `CacheDecision` enum: `Skip`, `Cache(Option)` -- `resolve(client_request, params, is_read, cache_key_hash, stats)` — main resolver function, chains all tiers +**`policy.rs`** — 2-tier policy resolution: +- `CacheDirective` enum: `Cache { ttl_seconds }`, `ForceCache { ttl_seconds }`, `NoCache` (default) +- `CacheDecision` enum: `Skip`, `Cache(u64)`, `ForceCache(u64)` +- `resolve(client_request, params, is_read)` — main resolver function, chains all tiers - `get_cache_directive(client_request, params)` — comment hint (from AST) has priority over connection parameter (`pgdog.cache`) -- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N` -- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }` or `CacheDirective::NoCache`) -- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache` / `Auto`) -- Tier 3: `auto_decision()` — caches when `hit_count > miss_count` AND `avg_result_size < 1MB` - -**`stats.rs`** — Per-fingerprint query statistics tracker: -- `QueryStats` struct: `hit_count`, `miss_count`, `total_result_size`, `avg_result_size()` -- `QueryStatsTracker` with `record_hit(fingerprint, size)` / `record_miss(fingerprint)` / `get(fingerprint)` -- Internally: `Arc>` +- `extract_parameter_directive(params)` — parses `pgdog.cache` parameter: `no_cache`, `cache`, `cache ttl=N`, `force_cache`, `force_cache ttl=N` +- Tier 1: Extractor directive (`CacheDirective::Cache { ttl }`, `CacheDirective::ForceCache { ttl }`, or `CacheDirective::NoCache`) +- Tier 2: Global config `CachePolicy` (`NoCache` / `Cache`) **`context.rs`** — Cache context held in `QueryEngineContext`: -- `CacheContext` with `cache_miss: Option<(u64, Option)>`, `response_buffer: Vec`, and `had_error: bool` +- `CacheContext` with `cache_miss: Option`, `response_buffer: Vec`, and `had_error: bool` - `capture_response(message)` — stores message in buffer when cache miss is tracked; sets `had_error = true` on `E` messages - `reset()` — clears all state for per-query isolation @@ -95,10 +87,9 @@ Key methods: ### Query Engine Integration **`pgdog/src/frontend/client/query_engine/mod.rs`** -- Declares `pub mod cache;` module -- `QueryEngine` holds `cache: Cache` field -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `self.cache.try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. -- After `match command`, calls `self.cache.save_response_in_cache(context)` to finalize caching. +- Imports global `cache()` from `frontend::cache` +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- After `match command`, calls `cache().save_response_in_cache(context)` to finalize caching. **`pgdog/src/frontend/client/query_engine/query.rs`** - `process_server_message()` calls `context.cache_context.capture_response(message.clone())`. @@ -119,7 +110,6 @@ Key methods: **`pgdog/Cargo.toml`** fred = { version = "9", features = ["enable-rustls"] } -scc = "3.7" xxhash-rust = { version = "0.8", features = ["xxh3"]} --- @@ -132,10 +122,9 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Cache config scope | **Global** (`config.general.cache`) | | Redis client | `fred` crate v9 (async-native, tokio integration) | | Cacheable queries | Only reads (`route.is_read()`) | -| Cache policy resolution | 3-tier: SQL comment → pgdog.cache param → DB policy → auto-decision | +| Cache policy resolution | 2-tier: SQL comment/param → DB policy | | Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | -| Auto-decision engine | `hit_count > miss_count` AND `avg_result_size < 1MB` | | Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | @@ -214,8 +203,8 @@ psql postgresql://postgres:postgres@127.0.0.1:5432/postgres?options=-c%20pgdog.c Sources are checked in order — first non-None result wins, then falls through to global config: ``` -SQL comment → pgdog.cache parameter → DB policy config → Auto-decision -(highest) (lowest) +SQL comment → pgdog.cache parameter → DB policy config +(highest) (lowest) ``` --- @@ -234,11 +223,11 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 6. **Query parser auto-upgrade for caching** — When caching is enabled and parser is `Auto`/`Off`/`SessionControl`, the parser is forced to `On` via `|| self.cache_enabled()` check in `cluster.rs`. A startup warning is emitted in `core.rs` if parser remains incompatible. -7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. +7. **Decoupled cache policy extraction** — Cache directives extracted via standalone regex in `cache/policy.rs`, works regardless of parser state. Supports `/* pgdog_cache: ... */` format with optional `ttl=` parameter. Unified with sharding hints via `comment()` function in `comment.rs`. 8. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. +9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. `force_cache` hints normalize the query in the hash to use the same key as regular `cache`. 10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. @@ -248,31 +237,27 @@ SQL comment → pgdog.cache parameter → DB policy config → Auto-decisi 13. **Moved all cache-related structs from QueryEngine to Client** — now all cache structs including redis client are creating for whole pgdog's lifetime. -14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors (`CachePolicyExtractor`, `CommentCacheExtractor`, `ParameterCacheExtractor`, `CachePolicyDispatcher`, `CachePolicyResolver`) replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. `CacheDirective::None` removed in favor of `Option` with `NoCache` as default. Parameter format unified to `no_cache` (underscore, not dash). +14. **Use built-in query comment hints** — Cache hints (`pgdog_cache:`) are now extracted alongside sharding hints (`pgdog_shard:`, `pgdog_sharding_key:`, `pgdog_role:`) via the unified `comment()` function in `comment.rs`. The `comment_cache` field is stored in `AstInner` and accessed during cache checking via `client_request.ast.comment_cache`. Policy resolution simplified: trait-based extractors replaced with free functions (`resolve()`, `get_cache_directive()`, `extract_parameter_directive()`). Comment hint (from AST) has priority over connection parameter `pgdog.cache`. `Cache` struct no longer needs `policy_dispatcher` field. Parameter format unified to `no_cache` (underscore, not dash). 15. **Add cache config to .schema**. -16. **Force-cache hint support**. +16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache (cache key normalized), bypassing normal cache miss flow considerations. --- ## What's Left To Do -1. **Auto policy** — Implemented but untested. Relies on stats tracker to decide based on hit/miss ratio and avg result size after enough observations. - -2. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) - -3. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -4. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. -5. **Magic numbers in send_cached_response()**. +3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -6. **Make statistics collection deferred** — for auto policy. +4. **Magic numbers in send_cached_response()**. -7. **Provide config hotswap**. +5. **Provide config hotswap**. -8. **Review and rewrite CacheClient**. +6. **Review and rewrite CacheClient**. ### Planned Tests From 48d67be859aa5cdf2825a4bdd45f3ab872cb5ac3 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Thu, 14 May 2026 13:07:04 +0300 Subject: [PATCH 16/23] cache processing and client responding refined --- docs/CACHE.md | 33 ++++--- pgdog/src/frontend/cache/client.rs | 60 +++++++------ pgdog/src/frontend/cache/integration.rs | 89 +++++++++++++------ pgdog/src/frontend/cache/mod.rs | 25 +++--- pgdog/src/frontend/client/query_engine/mod.rs | 6 +- 5 files changed, 136 insertions(+), 77 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 8ddf2d8ab..365da7dad 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -46,20 +46,23 @@ pub use policy::CacheDecision; Key methods: - `new()` — creates client (reads config internally) -- `try_read_cache(cache_context, in_transaction, client_request, params, stream)` — calls `cache_check()`, handles HIT/MISS/PASS-through +- `try_read_cache(cache_context, in_transaction, client_request, params)` — calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH - `save_response_in_cache(cache_context)` — finalizes by storing the captured response **`client.rs`** — Redis client wrapper using `fred` v9: - `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL - `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag -- `get(&self, key)` — returns `Result>>`; fetches cached wire-protocol bytes +- `get(&self, key)` — returns `Result, Error>`; fetches cached wire-protocol bytes. Returns `Err(Error::CacheMiss)` on a key miss (distinct from connection errors) - `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` - `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` - `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) - `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) - `is_enabled()` — returns true if both client exists and config enabled - Keys are prefixed with `"pgdog:"` -- Error types: `RedisError(String)`, `ConnectionFailed(String)` +- Error types: + - `RedisError { cmd: &'static str, key: u64, err: RedisError }` — Redis command failed (includes command name and key for context) + - `ConnectionFailed(&'static str)` — not connected or not configured + - `CacheMiss(u64)` — key not present in Redis (not an error condition, used for control flow) - `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks - All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net @@ -80,7 +83,7 @@ Key methods: **`integration.rs`** — Integration methods on `impl Cache`: - `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis -- `send_cached_response()` — deserializes wire-format bytes and sends to client +- `deserialize_cached(Vec) -> Vec` — parses a flat blob of concatenated PostgreSQL wire messages into individual `Message` values. Wire format: `[1B code][4B length (incl. itself)][payload]`. Named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL` replace the former magic numbers. Not Redis-specific — usable with any cache backend that stores raw bytes. - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis - Cache key: XXH3 hash of `database_name + raw_query_string` @@ -88,7 +91,7 @@ Key methods: **`pgdog/src/frontend/client/query_engine/mod.rs`** - Imports global `cache()` from `frontend::cache` -- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: sends cached response and returns. On MISS: stores state in `context.cache_context`. +- `handle()` flow: after `route_query()` and before `before_execution()`, calls `cache().try_read_cache(context)`. If HIT: replays each cached `Message` through `process_server_message()` (same pipeline as live backend responses — stats, transaction state, hooks all fire correctly), then returns. On MISS: stores state in `context.cache_context`. - After `match command`, calls `cache().save_response_in_cache(context)` to finalize caching. **`pgdog/src/frontend/client/query_engine/query.rs`** @@ -123,7 +126,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Redis client | `fred` crate v9 (async-native, tokio integration) | | Cacheable queries | Only reads (`route.is_read()`) | | Cache policy resolution | 2-tier: SQL comment/param → DB policy | -| Cache HIT flow | Deserialize wire bytes → parse messages → send to client → return `Ok(true)` | +| Cache HIT flow | Deserialize wire bytes → `Vec` → replay each through `process_server_message()` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | | Cache key | XXH3 hash of `database_name + raw_query_string` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | @@ -213,9 +216,9 @@ SQL comment → pgdog.cache parameter → DB policy config 1. **Redis client never connects** - Problem: CacheClient::new() built the client but never called init(). Fred requires explicit connection initialization. Fix: Added lazy `ensure_connected()` using `client.init().await`, guarded by `AtomicBool` so it runs exactly once on first get()/set(). Changed CacheClient from `#[derive(Debug)]` to manual Debug impl (contains `Arc`). -2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. +2. **Redis GET fails on NULL / cache miss** - Problem: `client.get::()` throws `Parse Error: Cannot parse into bytes` when the key doesn't exist. Fix: Use `client.get::()` and check `val.is_null()` before extracting bytes. Later refined: `get()` now returns `Result, Error>` instead of `Result>>` — a missing key yields `Err(Error::CacheMiss)`, which is matched explicitly in `cache_check()` and converted to `CacheCheckResult::Miss`. Other errors propagate as `Passthrough`. -3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`. +3. **Wire format deserialization wrong in send_cached_response** - Problem: PostgreSQL wire message structure is `[1B code][4B length]` where length includes the 4B itself. I calculated `offset + 5 + msg_len` (treating length as payload-only), causing incorrect byte slicing. Fix: Corrected to `offset + 1 + msg_len`, then replaced magic numbers with named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL`. 4. **Route incorrectly reports read-only as write when parser is disabled** - Problem: `query_parser_bypass()` conservatively returns `Route::write()` for all SQL when the query parser is disabled. Since pgdog doesn't enable the parser by default for simple queries, `route.is_read()` was false for `SELECT 1`. Fix: When any database has `cache.enabled = true`, the query parser level is auto-upgraded to `On` in the cluster config. The `|| self.cache_enabled()` check in `cluster.rs:475` forces the parser on. Cache also emits a startup warning if parser is `Off` or `SessionControl`. The old `is_likely_read()` string-prefix heuristic has been removed entirely. @@ -229,7 +232,7 @@ SQL comment → pgdog.cache parameter → DB policy config 9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. `force_cache` hints normalize the query in the hash to use the same key as regular `cache`. -10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation: `offset + 1 + msg_len`. +10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation expressed via named constants (`HEADER_CODE_LEN = 1`, `HEADER_LEN_SIZE = 4`, `HEADER_TOTAL = 5`). Deserialization extracted into `deserialize_cached()` with inline comments explaining each boundary check. 11. **Do not cache error responses**. @@ -243,21 +246,25 @@ SQL comment → pgdog.cache parameter → DB policy config 16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache (cache key normalized), bypassing normal cache miss flow considerations. +17. **Cache HIT replays through the server-message pipeline** — Previously, cache hits sent responses directly to the stream, bypassing `process_server_message()`. Now `try_read_cache()` returns `Option>` and the caller (`handle()`) feeds each message through `process_server_message()` — giving correct stats accounting, transaction state updates from `ReadyForQuery`, and hook invocations on every cache hit. + +18. **CacheClient error types refined** — `get()` now returns `Result, Error>` (no more `Option`). `Error::CacheMiss(u64)` is a dedicated variant for key-not-found; `Error::RedisError` is now a struct variant carrying `cmd: &'static str`, `key: u64`, and the underlying error for richer diagnostics. `Error::ConnectionFailed` uses `&'static str` instead of `String` to avoid heap allocation on the hot path. + --- ## What's Left To Do 1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. 3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` -4. **Magic numbers in send_cached_response()**. +4. **Provide config hotswap**. -5. **Provide config hotswap**. +5. **Review and rewrite CacheClient**. -6. **Review and rewrite CacheClient**. +6. **Abstract storage backend** — `CacheClient` is Redis-specific. A `CacheStorage` trait (`get`, `set`, `is_enabled`) would allow plugging in other backends (e.g. memcached) via config. `deserialize_cached()` is already backend-agnostic (pure wire-protocol parsing) and would be shared across all backends. ### Planned Tests diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs index de59dc686..87ebd8e1a 100644 --- a/pgdog/src/frontend/cache/client.rs +++ b/pgdog/src/frontend/cache/client.rs @@ -182,19 +182,19 @@ impl CacheClient { self.redis_connected.load(Ordering::Relaxed) } - pub(crate) async fn get(&self, key: u64) -> Result>, Error> { + pub(crate) async fn get(&self, key: u64) -> Result, Error> { if !self.ensure_connected().await { if !self.is_connected() { self.spawn_reconnect(); return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background".to_string(), + "Redis disconnected, reconnecting in background", )); } - return Err(Error::ConnectionFailed("Redis not connected".to_string())); + return Err(Error::ConnectionFailed("Redis not connected")); } let Some(ref client) = self.client else { - return Ok(None); + return Err(Error::ConnectionFailed("Redis not configured")); }; let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); @@ -205,27 +205,25 @@ impl CacheClient { .await { Ok(Ok(v)) => v, - Ok(Err(e)) => { - debug!("Redis GET error for key {}: {}", key, e); + Ok(Err(err)) => { self.mark_disconnected(); - return Err(Error::RedisError(e.to_string())); + return Err(Error::RedisError { + cmd: "GET", + key, + err, + }); } Err(_) => { - error!("Redis GET timed out for key {}", key); self.mark_disconnected(); - return Err(Error::ConnectionFailed("Redis GET timed out".to_string())); + return Err(Error::ConnectionFailed("Redis GET timed out")); } }; - if val.is_null() { - debug!("Cache miss for key {}", key); - Ok(None) - } else if let Some(bytes) = val.into_bytes() { + if let Some(bytes) = val.into_bytes() { debug!("Cache hit for key {}", key); - Ok(Some(bytes.to_vec())) + Ok(bytes.to_vec()) } else { - debug!("Redis GET value not bytes for key {}", key); - Ok(None) + Err(Error::CacheMiss(key)) } } @@ -234,14 +232,14 @@ impl CacheClient { if !self.is_connected() { self.spawn_reconnect(); return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background".to_string(), + "Redis disconnected, reconnecting in background", )); } - return Err(Error::ConnectionFailed("Redis not connected".to_string())); + return Err(Error::ConnectionFailed("Redis not connected")); } let Some(ref client) = self.client else { - return Ok(()); + return Err(Error::ConnectionFailed("Redis not configured")); }; let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); @@ -276,15 +274,17 @@ impl CacheClient { debug!("Cached key {} with TTL {}s", key, ttl_seconds); Ok(()) } - Ok(Err(e)) => { - debug!("Redis SET error for key {}: {}", key, e); + Ok(Err(err)) => { self.mark_disconnected(); - Err(Error::RedisError(e.to_string())) + Err(Error::RedisError { + cmd: "SET", + key, + err, + }) } Err(_) => { - error!("Redis SET timed out for key {}", key); self.mark_disconnected(); - Err(Error::ConnectionFailed("Redis SET timed out".to_string())) + Err(Error::ConnectionFailed("Redis SET timed out")) } } } @@ -297,8 +297,14 @@ impl CacheClient { #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Redis error: {0}")] - RedisError(String), + #[error("Redis {cmd} error for key {key}: {err}")] + RedisError { + cmd: &'static str, + key: u64, + err: RedisError, + }, #[error("Connection failed: {0}")] - ConnectionFailed(String), + ConnectionFailed(&'static str), + #[error("Cache miss for key {0}")] + CacheMiss(u64), } diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index d585c3fe4..84beb2aa5 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -4,11 +4,14 @@ use once_cell::sync::Lazy; use regex::Regex; use crate::{ - frontend::{cache::CacheDecision, ClientRequest}, - net::{FromBytes, Message, Parameters, Stream, ToBytes}, + frontend::{ + cache::{client::Error as CacheClientError, CacheDecision}, + ClientRequest, + }, + net::{FromBytes, Message, Parameters, ToBytes}, }; -use tracing::debug; +use tracing::{debug, warn}; use super::{policy, Cache}; @@ -26,6 +29,10 @@ pub enum CacheCheckResult { Passthrough, } +const HEADER_CODE_LEN: usize = 1; +const HEADER_LEN_SIZE: usize = 4; +const HEADER_TOTAL: usize = HEADER_CODE_LEN + HEADER_LEN_SIZE; + impl Cache { pub(super) async fn cache_check( &self, @@ -72,33 +79,58 @@ impl Cache { ttl, })), CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { - Ok(Some(cached)) => Ok(CacheCheckResult::Hit { cached }), - Ok(None) => Ok(CacheCheckResult::Miss(CacheMiss { + Ok(cached) => Ok(CacheCheckResult::Hit { cached }), + Err(CacheClientError::CacheMiss(_)) => Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl: ttl, })), Err(e) => { - debug!("Cache get error: {}", e); + warn!("{}", e); Ok(CacheCheckResult::Passthrough) } }, } } - pub(super) async fn send_cached_response( - &self, - stream: &mut Stream, - cached: Vec, - ) -> Result<(), crate::frontend::Error> { + /// Deserializes a flat byte blob (N concatenated PostgreSQL wire messages) into `Vec`. + /// + /// Redis stores cache responses as raw wire-format bytes concatenated together without framing. + /// We walk through the blob reading each message boundary, then slice out the individual message. + /// + /// ### PostgreSQL wire protocol message layout: + /// + /// [Source](https://www.postgresql.org/docs/current/protocol-overview.html) + /// + /// ```text + /// +----------+--------------------------+-------------------+ + /// | 1 byte | 4 bytes (big-endian) | N bytes (payload) | + /// | code | length (incl. 4B itself) | data | + /// +----------+--------------------------+-------------------+ + /// ``` + /// + /// Constants for parsing: + /// - `HEADER_CODE_LEN` = 1 byte (message type code, e.g. 'T' = RowDescription) + /// - `HEADER_LEN_SIZE` = 4 bytes (message length, includes itself but NOT the code byte) + /// - `HEADER_TOTAL` = 5 bytes (minimum bytes needed to read the length field) + pub(super) fn deserialize_cached(cached: Vec) -> Vec { + let mut messages = Vec::new(); let mut offset = 0; let len = cached.len(); while offset < len { - if offset + 5 > len { + // Need at least a full header (code + length) to proceed. + if offset + HEADER_TOTAL > len { + debug!( + "deserializing cached response: not enough bytes for message header (offset={}, len={})", + offset, len + ); break; } let _code = cached[offset] as char; + + // Read the message length field (4 bytes, big-endian). + // This length includes the 4-byte length field itself but NOT the code byte. let msg_len = u32::from_be_bytes([ cached[offset + 1], cached[offset + 2], @@ -106,19 +138,28 @@ impl Cache { cached[offset + 4], ]) as usize; - if msg_len < 4 || offset + 1 + msg_len > len { + // Sanity checks: + // 1. Length must be at least 4 (the length field itself): if < 4 the data is corrupt. + // 2. Must not read past the end of the blob. + if msg_len < 4 || offset + HEADER_CODE_LEN + msg_len > len { + debug!( + "deserializing cached response: invalid msg length {} (offset={}, len={})", + msg_len, offset, len + ); break; } - let end = offset + 1 + msg_len; + // Full message spans: 1 byte (code) + msg_len (length field + payload) + let end = offset + HEADER_CODE_LEN + msg_len; + let msg_bytes: bytes::Bytes = cached[offset..end].to_vec().into(); - let msg = Message::from_bytes(msg_bytes)?; + if let Ok(msg) = Message::from_bytes(msg_bytes) { + messages.push(msg); + } offset = end; - - stream.send_flush(&msg).await?; } - Ok(()) + messages } pub(super) async fn cache_response( @@ -126,9 +167,9 @@ impl Cache { cache_key_hash: u64, messages: Vec, ttl: u64, - ) -> Result<(), ()> { + ) { if messages.is_empty() || !self.client.is_enabled() { - return Ok(()); + return; } let mut buffer = Vec::new(); @@ -137,19 +178,17 @@ impl Cache { Ok(bytes) => buffer.extend_from_slice(&bytes), Err(e) => { debug!("Failed to serialize message for caching: {}", e); - return Ok(()); + return; } } } if buffer.is_empty() { - return Ok(()); + return; } if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { - debug!("Failed to cache response: {}", e); + debug!("Failed to cache response: {:?}", e); } - - Ok(()) } } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 85ddb948d..42ee811a2 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -14,7 +14,7 @@ use tracing::debug; use crate::{ frontend::{ClientRequest, cache::integration::CacheMiss}, - net::{Parameters, Stream}, + net::{Message, Parameters}, }; #[derive(Debug)] @@ -35,14 +35,21 @@ impl Cache { } } + /// Check the cache for a query response. + /// + /// On HIT returns `Ok(Some(messages))` — the caller is responsible for + /// replaying these messages through the normal server-message pipeline. + /// + /// On MISS or PASSTHROUGH returns `Ok(None)` and updates `cache_context` + /// so that the response can later be captured and stored via + /// `save_response_in_cache`. pub async fn try_read_cache( &self, cache_context: &mut CacheContext, in_transaction: bool, client_request: &ClientRequest, params: &Parameters, - stream: &mut Stream, - ) -> Result { + ) -> Result>, crate::frontend::Error> { let cache_result = self .cache_check(in_transaction, client_request, params) .await?; @@ -50,22 +57,22 @@ impl Cache { match cache_result { CacheCheckResult::Hit { cached } => { debug!("Cache hit, serving from cache"); - self.send_cached_response(stream, cached).await?; + let messages = Self::deserialize_cached(cached); cache_context.reset(); - return Ok(true); + Ok(Some(messages)) } CacheCheckResult::Miss(cache_miss) => { debug!("Cache miss for key hash: {}", cache_miss.cache_key_hash); cache_context.cache_miss = Some(cache_miss); cache_context.response_buffer.clear(); cache_context.had_error = false; + Ok(None) } CacheCheckResult::Passthrough => { cache_context.reset(); + Ok(None) } } - - Ok(false) } /// Finalize caching by storing the response in Redis. @@ -73,9 +80,7 @@ impl Cache { if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); - if let Err(e) = self.cache_response(cache_key_hash, messages, ttl).await { - debug!("Failed to cache response: {:?}", e); - } + self.cache_response(cache_key_hash, messages, ttl).await; } } } diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index 32b753205..e302e717d 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -131,16 +131,18 @@ impl QueryEngine { } let in_transaction = context.in_transaction(); - if cache() + if let Some(cached_messages) = cache() .try_read_cache( &mut context.cache_context, in_transaction, context.client_request, context.params, - context.stream, ) .await? { + for msg in cached_messages { + self.process_server_message(context, msg).await?; + } self.update_stats(context); return Ok(()); } From 6e08a252c174a5198def08043e24b3a29271366a Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Fri, 15 May 2026 16:00:52 +0300 Subject: [PATCH 17/23] added support for multiple backends and provided config hotswap for cache --- .schema/pgdog.schema.json | 61 ++++- docs/CACHE.md | 105 ++++---- pgdog-config/src/cache.rs | 86 +++++- pgdog-config/src/lib.rs | 2 +- pgdog/src/frontend/cache/client.rs | 310 ---------------------- pgdog/src/frontend/cache/integration.rs | 41 +-- pgdog/src/frontend/cache/mod.rs | 72 ++++- pgdog/src/frontend/cache/policy.rs | 6 +- pgdog/src/frontend/cache/storage/mod.rs | 58 ++++ pgdog/src/frontend/cache/storage/redis.rs | 246 +++++++++++++++++ 10 files changed, 581 insertions(+), 406 deletions(-) delete mode 100644 pgdog/src/frontend/cache/client.rs create mode 100644 pgdog/src/frontend/cache/storage/mod.rs create mode 100644 pgdog/src/frontend/cache/storage/redis.rs diff --git a/.schema/pgdog.schema.json b/.schema/pgdog.schema.json index b4997615d..1b8f6385f 100644 --- a/.schema/pgdog.schema.json +++ b/.schema/pgdog.schema.json @@ -32,10 +32,14 @@ "broadcast_address": null, "broadcast_port": 6433, "cache": { + "backend": "redis", "enabled": false, "max_result_size": 0, "policy": "no_cache", - "redis_url": "redis://localhost:6379", + "redis": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + }, "ttl": 300 }, "checkout_timeout": 5000, @@ -283,11 +287,16 @@ ] }, "Cache": { - "description": "Redis cache configuration for a database.", + "description": "Cache configuration.", "type": "object", "properties": { + "backend": { + "description": "Which storage backend to use.\n\n_Default:_ `redis`", + "$ref": "#/$defs/CacheBackend", + "default": "redis" + }, "enabled": { - "description": "Whether to enable caching for this database.\n\n_Default:_ `false`", + "description": "Whether to enable caching.\n\n_Default:_ `false`", "type": "boolean", "default": false }, @@ -299,14 +308,17 @@ "minimum": 0 }, "policy": { - "description": "Cache policy: no_cache or cache.\n\n_Default:_ `no_cache`", + "description": "Cache policy: `no_cache` or `cache`.\n\n_Default:_ `no_cache`", "$ref": "#/$defs/CachePolicy", "default": "no_cache" }, - "redis_url": { - "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", - "type": "string", - "default": "redis://localhost:6379" + "redis": { + "description": "Redis backend configuration.\n\nOnly read when `backend = \"redis\"`.", + "$ref": "#/$defs/RedisConfig", + "default": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + } }, "ttl": { "description": "Default TTL in seconds for cached queries.\n\n_Default:_ `300`", @@ -318,6 +330,16 @@ }, "additionalProperties": false }, + "CacheBackend": { + "description": "Cache storage backend discriminator.", + "oneOf": [ + { + "description": "Redis backend (default).", + "type": "string", + "const": "redis" + } + ] + }, "CachePolicy": { "description": "Cache policy.", "oneOf": [ @@ -636,10 +658,14 @@ "description": "Redis cache configuration for this database.", "$ref": "#/$defs/Cache", "default": { + "backend": "redis", "enabled": false, "max_result_size": 0, "policy": "no_cache", - "redis_url": "redis://localhost:6379", + "redis": { + "cache_key_prefix": "pgdog:", + "url": "redis://localhost:6379" + }, "ttl": 300 } }, @@ -1510,6 +1536,23 @@ } ] }, + "RedisConfig": { + "description": "Redis-specific cache backend configuration.\n\nCorresponds to the `[general.cache.redis]` TOML section.", + "type": "object", + "properties": { + "cache_key_prefix": { + "description": "Key prefix prepended to every cache key stored in Redis.\n\n_Default:_ `pgdog:`", + "type": "string", + "default": "pgdog:" + }, + "url": { + "description": "Redis connection URL.\n\n_Default:_ `redis://localhost:6379`", + "type": "string", + "default": "redis://localhost:6379" + } + }, + "additionalProperties": false + }, "ReplicaLag": { "description": "Replica lag banning configuration. When a replica's replication lag exceeds the threshold, it is banned from serving read queries.", "type": "object", diff --git a/docs/CACHE.md b/docs/CACHE.md index 365da7dad..1119ae438 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -14,58 +14,78 @@ Cache SELECT queries in Redis, bypass PostgreSQL on cache hit, populate cache on **CachePolicy enum:** `NoCache` (default), `Cache`. Implements `FromStr`, `Display`, `Serialize`, `Deserialize`, `Copy`, `JsonSchema`. -**Cache struct:** -- `enabled: bool` — is caching on? -- `policy: CachePolicy` — which policy? -- `ttl: u64` — default TTL seconds (default 300) -- `redis_url: String` — Redis connection URL -- `max_result_size: usize` — max cached result bytes +**CacheBackend enum:** `Redis` (default). Discriminator for selecting the storage backend and for hotswap detection when the backend type changes in config. + +**RedisConfig struct** (`[general.cache.redis]`): +- `url: String` — Redis connection URL (default `redis://localhost:6379`) +- `cache_key_prefix: String` — prefix prepended to every Redis key (default `pgdog:`) + +**Cache struct** (`[general.cache]`): +- `enabled: bool` — is caching on? (default `false`) +- `policy: CachePolicy` — which policy? (default `no_cache`) +- `ttl: u64` — default TTL seconds (default `300`) +- `backend: CacheBackend` — which storage backend (default `redis`) +- `redis: RedisConfig` — Redis-specific settings +- `max_result_size: usize` — max cached result bytes (default `0` = unlimited) + +Example TOML: +```toml +[general.cache] +enabled = true +policy = "cache" +ttl = 300 + +[general.cache.redis] +url = "redis://localhost:6379" +cache_key_prefix = "pgdog:" +``` **`general.rs`** — `General` struct holds `cache: Cache` field. **Cache config is global.** -**`lib.rs`** — Exports `pub mod cache;` and `pub use cache::{CachePolicy, Cache};`. +**`lib.rs`** — Exports `pub use cache::{CacheBackend, CachePolicy, Cache, RedisConfig as CacheRedisConfig};`. ### Cache Module (`pgdog/src/frontend/cache/`) **`mod.rs`** — Module exports, global singleton, and main `Cache` struct: ```rust -pub mod client; pub mod context; pub mod integration; pub mod policy; +pub mod storage; -pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; +pub use storage::{CacheStorage, RedisCacheStorage}; ``` -`Cache` struct wraps: `CacheClient`. +`Cache` struct wraps `RwLock>>` (tokio `RwLock`). **Global singleton:** Cache is global-scoped, not connection-scoped. Accessed via `cache()` function which returns `Arc` from a `Lazy>` static. `Cache::new()` reads config internally — no parameters needed. +**Config hotswap:** `hotswap_if_needed()` is called at the top of `try_read_cache` and `save_response_in_cache`. It fast-paths with a read-lock; acquires write-lock only if the URL or backend type has changed, then rebuilds the storage. + Key methods: -- `new()` — creates client (reads config internally) -- `try_read_cache(cache_context, in_transaction, client_request, params)` — calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH -- `save_response_in_cache(cache_context)` — finalizes by storing the captured response - -**`client.rs`** — Redis client wrapper using `fred` v9: -- `CacheClient::new()` — builds client from global `config().config.general.cache`, returns disabled stub if no config/URL -- `ensure_connected()` — lazy one-time `client.init().await` followed by `client.ping()` verification; sets `redis_connected` flag -- `get(&self, key)` — returns `Result, Error>`; fetches cached wire-protocol bytes. Returns `Err(Error::CacheMiss)` on a key miss (distinct from connection errors) -- `set(&self, key, value, ttl)` — stores bytes with EX expiration; respects `max_result_size` -- `spawn_reconnect()` — background task that retries `init()` every 500ms, verifies with `ping()`, sets `redis_connected = true` -- `mark_disconnected()` — sets `redis_connected = false`, spawns reconnect if not already running (CAS-guarded) -- `is_connected()` — reads our atomic flag (not fred's potentially stale `ClientState`) -- `is_enabled()` — returns true if both client exists and config enabled -- Keys are prefixed with `"pgdog:"` -- Error types: - - `RedisError { cmd: &'static str, key: u64, err: RedisError }` — Redis command failed (includes command name and key for context) - - `ConnectionFailed(&'static str)` — not connected or not configured - - `CacheMiss(u64)` — key not present in Redis (not an error condition, used for control flow) -- `redis_connected: Arc` — authoritative connection gate, only true after PING succeeds +- `new()` — creates storage from current config (or `None` if disabled) +- `hotswap_if_needed()` — compares live config against the active storage's one with `has_config_changed()`; swaps if `true` returned +- `try_read_cache(cache_context, in_transaction, client_request, params)` — hotswaps, calls `cache_check()`, returns `Ok(Some(Vec))` on HIT (caller replays through pipeline), `Ok(None)` on MISS/PASSTHROUGH +- `save_response_in_cache(cache_context)` — hotswaps, finalizes by storing the captured response + +**`storage/mod.rs`** — Abstract storage trait and error type: +- `CacheStorage` trait: `get`, `set`, `is_enabled`, `has_config_changed` — implemented by all cache backends +- `Error` enum shared across all backends: `RedisError`, `ConnectionFailed`, `CacheMiss` + +**`storage/redis.rs`** — Redis storage backend (`RedisCacheStorage`) implementing `CacheStorage`: +- `RedisCacheStorage::new(config)` — builds client from given URL; immediately spawns a background connection task; returns `None` if URL is invalid +- Background connect task: retries `init()` in a loop (5ms to 5s exponential backoff); sets `reconnecting = false` on success; CAS-guarded so only one task runs at a time +- `get(&self, key)` — returns `Result, Error>`; returns `Err(Error::ConnectionFailed)` immediately (triggering cache miss) if not yet connected; marks `reconnecting` and spawns reconnect on Redis errors +- `set(&self, key, value, ttl)` — stores bytes with EX expiration; returns immediately on disconnect; respects `max_result_size` from live config +- `reconnect()` — spawns reconnect if not already running (CAS-guarded) +- `has_config_changed()` — returns `true` if cache config has changed (used for hotswap detection) +- `is_enabled()` — reads live `config().config.general.cache.enabled` +- Key prefix comes from `config().config.general.cache.redis.cache_key_prefix` - `reconnecting: Arc` — prevents multiple concurrent reconnect tasks -- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) as safety net +- All Redis operations wrapped in `tokio::time::timeout(REDIS_OPERATION_TIMEOUT)` (2s) **`policy.rs`** — 2-tier policy resolution: - `CacheDirective` enum: `Cache { ttl_seconds }`, `ForceCache { ttl_seconds }`, `NoCache` (default) @@ -250,27 +270,20 @@ SQL comment → pgdog.cache parameter → DB policy config 18. **CacheClient error types refined** — `get()` now returns `Result, Error>` (no more `Option`). `Error::CacheMiss(u64)` is a dedicated variant for key-not-found; `Error::RedisError` is now a struct variant carrying `cmd: &'static str`, `key: u64`, and the underlying error for richer diagnostics. `Error::ConnectionFailed` uses `&'static str` instead of `String` to avoid heap allocation on the hot path. ---- +19. **Config hotswap** — `Cache` singleton holds `Arc>>>`. `hotswap_if_needed()` runs at the start of every `try_read_cache` and `save_response_in_cache` call: read-locks to compare the active backend's URL against `config().config.general.cache.redis.url`; if they differ (or the backend type changes) it write-locks and rebuilds the storage. Fast path is a read-lock-only check with no allocation. -## What's Left To Do +20. **CacheClient rewritten as `RedisCacheStorage`** — Replaced `CacheClient` with `RedisCacheStorage` implementing the `CacheStorage` trait. Key improvements: background connect task is spawned immediately in `new()` so the first query never blocks on init; `get`/`set` check only one atomic flag (`reconnecting`) and return immediately if `true` returned instead of running `ensure_connected`; the `Option` field and the three-condition guard at the top of every operation are gone; `reconnect` is the single place that sets the flag and CAS-guards the reconnect spawn. -1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) +21. **Abstract storage backend** — `storage/mod.rs` defines the `CacheStorage` trait (`get`, `set`, `is_enabled`, `has_config_changed`) and the shared `Error` enum. `storage/redis.rs` is the Redis implementation. `Cache` holds `Box` behind a tokio `RwLock` so any backend (e.g. Memcached) can be plugged in by adding a sub-module under `storage/` and a variant to `CacheBackend`. `deserialize_cached()` remains backend-agnostic in `integration.rs`. -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but the fast-path check (`ensure_connected`) and the reconnect task can have timing edge cases under rapid disconnect/reconnect cycles. Need to stress-test. +22. **Nested backend config** — Backend-specific settings live in their own TOML subtable (`[general.cache.redis]`) rather than flat fields on `[general.cache]`. `RedisConfig` holds `url` and `cache_key_prefix`. When a new backend is added, it gets its own subtable (e.g. `[general.cache.memcached]`) without polluting the top-level cache section. `client.rs` renamed to `storage/redis.rs`. -3. **Integration tests** — Tests live in `integration/rust/tests/integration/`. Redis must be running on 127.0.0.1:6379 before tests. Run with: `cd integration/rust && cargo nextest run --no-fail-fast --test-threads=1` - -4. **Provide config hotswap**. +--- -5. **Review and rewrite CacheClient**. +## What's Left To Do -6. **Abstract storage backend** — `CacheClient` is Redis-specific. A `CacheStorage` trait (`get`, `set`, `is_enabled`) would allow plugging in other backends (e.g. memcached) via config. `deserialize_cached()` is already backend-agnostic (pure wire-protocol parsing) and would be shared across all backends. +1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) -### Planned Tests +2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. -1. **Database key namespace collision** — Two databases sharing one Redis, both running same query but with different underlying PG data. Verify correct isolation. -2. **Basic cache hit/miss** — Run a SELECT once (expect miss), run again (expect hit), verify metrics. -3. **TTL expiration** — Cache a query with short TTL, wait for expiry, verify miss on third call. -4. **Write bypasses cache** — Execute INSERT/UPDATE/DELETE, verify these do not populate or consume the cache. -5. **Redis unavailable** — Stop Redis mid-flight, verify queries pass through to PG without blocking. -6. **Redis reconnection** — Restart Redis after disconnect, verify cache recovers automatically. \ No newline at end of file +3. **Integration tests**. diff --git a/pgdog-config/src/cache.rs b/pgdog-config/src/cache.rs index 7450c4730..0a7ae9021 100644 --- a/pgdog-config/src/cache.rs +++ b/pgdog-config/src/cache.rs @@ -36,30 +36,89 @@ impl std::fmt::Display for CachePolicy { } } -/// Redis cache configuration for a database. +/// Cache storage backend discriminator. +#[derive( + Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Copy, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum CacheBackend { + /// Redis backend (default). + #[default] + Redis, +} + +/// Redis-specific cache backend configuration. +/// +/// Corresponds to the `[general.cache.redis]` TOML section. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct RedisConfig { + /// Redis connection URL. + /// + /// _Default:_ `redis://localhost:6379` + #[serde(default = "RedisConfig::url")] + pub url: String, + + /// Key prefix prepended to every cache key stored in Redis. + /// + /// _Default:_ `pgdog:` + #[serde(default = "RedisConfig::cache_key_prefix")] + pub cache_key_prefix: String, +} + +impl Default for RedisConfig { + fn default() -> Self { + Self { + url: Self::url(), + cache_key_prefix: Self::cache_key_prefix(), + } + } +} + +impl RedisConfig { + fn url() -> String { + "redis://localhost:6379".to_string() + } + + fn cache_key_prefix() -> String { + "pgdog:".to_string() + } +} + +/// Cache configuration. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Cache { - /// Whether to enable caching for this database. - /// + /// Whether to enable caching. + /// /// _Default:_ `false` #[serde(default = "Cache::enabled")] pub enabled: bool, - /// Cache policy: no_cache or cache. + + /// Cache policy: `no_cache` or `cache`. /// /// _Default:_ `no_cache` #[serde(default = "Cache::policy")] pub policy: CachePolicy, + /// Default TTL in seconds for cached queries. /// /// _Default:_ `300` #[serde(default = "Cache::ttl")] pub ttl: u64, - /// Redis connection URL. + + /// Which storage backend to use. /// - /// _Default:_ `redis://localhost:6379` - #[serde(default = "Cache::redis_url")] - pub redis_url: String, + /// _Default:_ `redis` + #[serde(default = "Cache::backend")] + pub backend: CacheBackend, + + /// Redis backend configuration. + /// + /// Only read when `backend = "redis"`. + #[serde(default)] + pub redis: RedisConfig, + /// Maximum result size in bytes to cache (0 = unlimited). /// /// _Default:_ `0` @@ -73,7 +132,8 @@ impl Default for Cache { enabled: Self::enabled(), policy: Self::policy(), ttl: Self::ttl(), - redis_url: Self::redis_url(), + backend: Self::backend(), + redis: RedisConfig::default(), max_result_size: Self::max_result_size(), } } @@ -85,18 +145,18 @@ impl Cache { } fn policy() -> CachePolicy { - Default::default() + CachePolicy::default() } fn ttl() -> u64 { 300 } - fn redis_url() -> String { - "redis://localhost:6379".to_string() + fn backend() -> CacheBackend { + CacheBackend::default() } fn max_result_size() -> usize { 0 } -} \ No newline at end of file +} diff --git a/pgdog-config/src/lib.rs b/pgdog-config/src/lib.rs index 399fd8e3d..22ab53404 100644 --- a/pgdog-config/src/lib.rs +++ b/pgdog-config/src/lib.rs @@ -19,7 +19,7 @@ pub mod users; pub mod util; pub use auth::{AuthType, PassthroughAuth}; -pub use cache::{CachePolicy, Cache}; +pub use cache::{CacheBackend, CachePolicy, Cache, RedisConfig as CacheRedisConfig}; pub use core::{Config, ConfigAndUsers}; pub use data_types::*; pub use database::{ diff --git a/pgdog/src/frontend/cache/client.rs b/pgdog/src/frontend/cache/client.rs deleted file mode 100644 index 87ebd8e1a..000000000 --- a/pgdog/src/frontend/cache/client.rs +++ /dev/null @@ -1,310 +0,0 @@ -use fred::prelude::*; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Duration; -use tracing::{debug, error, info}; - -use crate::config::config; - -const CACHE_KEY_PREFIX: &str = "pgdog:"; - -/// Timeout for individual Redis operations (GET/SET/init). -/// Safety net — should never fire in normal operation since the atomic flag gates all calls. -const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); - -#[derive(Clone)] -pub struct CacheClient { - client: Option, - /// Master connection state flag. Set true only after PING succeeds - /// on init or reconnect. Set false immediately on any error/timeout. - redis_connected: Arc, - /// Prevents spawning multiple reconnect tasks simultaneously. - reconnecting: Arc, -} - -impl std::fmt::Debug for CacheClient { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CacheClient") - .field("client", &self.client.as_ref().map(|_| "...")) - .field( - "redis_connected", - &self.redis_connected.load(Ordering::Relaxed), - ) - .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) - .finish() - } -} - -impl CacheClient { - pub fn new() -> Self { - let cache_config = &config().config.general.cache; - - if !cache_config.enabled { - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - - let url = cache_config.redis_url.as_str(); - let client_config = match RedisConfig::from_url(url) { - Ok(c) => c, - Err(e) => { - error!("Failed to parse Redis URL: {}", e); - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - }; - - let client = match Builder::from_config(client_config).build() { - Ok(c) => c, - Err(e) => { - error!("Failed to build Redis client: {}", e); - return Self { - client: None, - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - }; - } - }; - - Self { - client: Some(client), - redis_connected: Arc::new(AtomicBool::new(false)), - reconnecting: Arc::new(AtomicBool::new(false)), - } - } - - async fn ensure_connected(&self) -> bool { - if self.redis_connected.load(Ordering::Acquire) { - return true; - } - - if self.reconnecting.load(Ordering::Relaxed) { - return false; - } - - if let Some(ref client) = self.client { - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { - Ok(Ok(_)) => { - if Self::ping_client(client).await { - self.redis_connected.store(true, Ordering::Release); - info!("Connected to Redis"); - return true; - } else { - debug!("Redis init returned OK but PING failed — Redis not ready"); - } - } - Ok(Err(e)) => { - debug!("Redis init failed: {}", e); - } - Err(_) => { - error!("Redis init timed out"); - } - } - } - false - } - - async fn ping_client(client: &RedisClient) -> bool { - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.ping::()).await { - Ok(Ok(resp)) => { - info!("Redis PING succeeded: {}", resp); - true - } - Ok(Err(e)) => { - debug!("Redis PING failed: {}", e); - false - } - Err(_) => { - debug!("Redis PING timed out"); - false - } - } - } - - fn spawn_reconnect(&self) { - if self - .reconnecting - .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed) - .is_err() - { - debug!("Redis reconnect task already running, skipping"); - return; - } - - let Some(ref client) = self.client else { - error!("Redis reconnect: no client available"); - self.reconnecting.store(false, Ordering::Release); - return; - }; - - let client = client.clone(); - let redis_connected = self.redis_connected.clone(); - let reconnecting = self.reconnecting.clone(); - - tokio::spawn(async move { - info!("Redis reconnect task started"); - let mut attempt = 0; - loop { - attempt += 1; - debug!("Redis reconnect attempt #{}", attempt); - - let init_ok = - match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { - Ok(Ok(_)) => true, - Ok(Err(_)) | Err(_) => false, - }; - - if init_ok || Self::ping_client(&client).await { - redis_connected.store(true, Ordering::Release); - reconnecting.store(false, Ordering::Release); - info!("Redis reconnected successfully"); - return; - } - tokio::time::sleep(Duration::from_millis(500)).await; - } - }); - - info!("Spawning Redis reconnect task"); - } - - fn mark_disconnected(&self) { - self.redis_connected.store(false, Ordering::Release); - self.spawn_reconnect(); - } - - pub fn is_connected(&self) -> bool { - self.redis_connected.load(Ordering::Relaxed) - } - - pub(crate) async fn get(&self, key: u64) -> Result, Error> { - if !self.ensure_connected().await { - if !self.is_connected() { - self.spawn_reconnect(); - return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background", - )); - } - return Err(Error::ConnectionFailed("Redis not connected")); - } - - let Some(ref client) = self.client else { - return Err(Error::ConnectionFailed("Redis not configured")); - }; - - let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - let val = match tokio::time::timeout( - REDIS_OPERATION_TIMEOUT, - client.get::(full_key), - ) - .await - { - Ok(Ok(v)) => v, - Ok(Err(err)) => { - self.mark_disconnected(); - return Err(Error::RedisError { - cmd: "GET", - key, - err, - }); - } - Err(_) => { - self.mark_disconnected(); - return Err(Error::ConnectionFailed("Redis GET timed out")); - } - }; - - if let Some(bytes) = val.into_bytes() { - debug!("Cache hit for key {}", key); - Ok(bytes.to_vec()) - } else { - Err(Error::CacheMiss(key)) - } - } - - pub(crate) async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { - if !self.ensure_connected().await { - if !self.is_connected() { - self.spawn_reconnect(); - return Err(Error::ConnectionFailed( - "Redis disconnected, reconnecting in background", - )); - } - return Err(Error::ConnectionFailed("Redis not connected")); - } - - let Some(ref client) = self.client else { - return Err(Error::ConnectionFailed("Redis not configured")); - }; - - let full_key = format!("{}{}", CACHE_KEY_PREFIX, key); - - let cache_config = &config().config.general.cache; - - if cache_config.max_result_size != 0 && value.len() > cache_config.max_result_size { - debug!( - "Skipping cache for key {}: size {} exceeds max {}", - key, - value.len(), - cache_config.max_result_size - ); - return Ok(()); - } - - let ttl_seconds = ttl as i64; - - match tokio::time::timeout( - REDIS_OPERATION_TIMEOUT, - client.set::<(), _, _>( - full_key, - value, - Some(Expiration::EX(ttl_seconds)), - None, - false, - ), - ) - .await - { - Ok(Ok(_)) => { - debug!("Cached key {} with TTL {}s", key, ttl_seconds); - Ok(()) - } - Ok(Err(err)) => { - self.mark_disconnected(); - Err(Error::RedisError { - cmd: "SET", - key, - err, - }) - } - Err(_) => { - self.mark_disconnected(); - Err(Error::ConnectionFailed("Redis SET timed out")) - } - } - } - - pub fn is_enabled(&self) -> bool { - let cache_config = &config().config.general.cache; - self.client.is_some() && cache_config.enabled - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("Redis {cmd} error for key {key}: {err}")] - RedisError { - cmd: &'static str, - key: u64, - err: RedisError, - }, - #[error("Connection failed: {0}")] - ConnectionFailed(&'static str), - #[error("Cache miss for key {0}")] - CacheMiss(u64), -} diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 84beb2aa5..8a84970a0 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -5,7 +5,7 @@ use regex::Regex; use crate::{ frontend::{ - cache::{client::Error as CacheClientError, CacheDecision}, + cache::{storage::Error as CacheStorageError, CacheDecision}, ClientRequest, }, net::{FromBytes, Message, Parameters, ToBytes}, @@ -78,17 +78,22 @@ impl Cache { cache_key_hash, ttl, })), - CacheDecision::Cache(ttl) => match self.client.get(cache_key_hash).await { - Ok(cached) => Ok(CacheCheckResult::Hit { cached }), - Err(CacheClientError::CacheMiss(_)) => Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, - ttl: ttl, - })), - Err(e) => { - warn!("{}", e); - Ok(CacheCheckResult::Passthrough) + CacheDecision::Cache(ttl) => { + let guard = self.storage.read().await; + match guard.as_ref() { + None => Ok(CacheCheckResult::Passthrough), + Some(storage) => match storage.get(cache_key_hash).await { + Ok(cached) => Ok(CacheCheckResult::Hit { cached }), + Err(CacheStorageError::CacheMiss(_)) => { + Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl })) + } + Err(e) => { + warn!("{}", e); + Ok(CacheCheckResult::Passthrough) + } + }, } - }, + } } } @@ -168,7 +173,13 @@ impl Cache { messages: Vec, ttl: u64, ) { - if messages.is_empty() || !self.client.is_enabled() { + let guard = self.storage.read().await; + let storage = match guard.as_ref() { + Some(s) if s.is_enabled() => s, + _ => return, + }; + + if messages.is_empty() { return; } @@ -177,7 +188,7 @@ impl Cache { match msg.to_bytes() { Ok(bytes) => buffer.extend_from_slice(&bytes), Err(e) => { - debug!("Failed to serialize message for caching: {}", e); + warn!("Failed to serialize message for caching: {}", e); return; } } @@ -187,8 +198,8 @@ impl Cache { return; } - if let Err(e) = self.client.set(cache_key_hash, &buffer, ttl).await { - debug!("Failed to cache response: {:?}", e); + if let Err(e) = storage.set(cache_key_hash, &buffer, ttl).await { + warn!("{}", e); } } } diff --git a/pgdog/src/frontend/cache/mod.rs b/pgdog/src/frontend/cache/mod.rs index 42ee811a2..6c0023b2d 100644 --- a/pgdog/src/frontend/cache/mod.rs +++ b/pgdog/src/frontend/cache/mod.rs @@ -1,25 +1,37 @@ -pub mod client; pub mod context; pub mod integration; pub mod policy; +pub mod storage; -pub use client::CacheClient; pub use context::CacheContext; pub use integration::CacheCheckResult; pub use policy::CacheDecision; +pub use storage::{CacheStorage, RedisCacheStorage}; use once_cell::sync::Lazy; use std::sync::Arc; +use tokio::sync::RwLock; use tracing::debug; use crate::{ - frontend::{ClientRequest, cache::integration::CacheMiss}, + config::config, + frontend::{ + cache::{integration::CacheMiss, storage::build_storage}, + ClientRequest, + }, net::{Message, Parameters}, }; -#[derive(Debug)] +/// Wraps the active storage backend behind a tokio `RwLock` so it can be +/// hotswapped without restarting pgdog. pub struct Cache { - client: CacheClient, + storage: RwLock>>, +} + +impl std::fmt::Debug for Cache { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Cache").field("storage", &"...").finish() + } } static CACHE: Lazy> = Lazy::new(|| Arc::new(Cache::new())); @@ -30,11 +42,47 @@ pub fn cache() -> Arc { impl Cache { fn new() -> Self { + let storage = build_storage(); Cache { - client: CacheClient::new(), + storage: RwLock::new(storage), } } + /// Replace the storage backend if the config has changed (URL or backend type). + /// + /// Acquires the write lock only when a change is detected; otherwise the + /// read-lock path is zero-allocation and very fast. + async fn hotswap_if_needed(&self) { + let cfg = &config().config.general.cache; + + // Fast path: read-lock to check whether anything has changed. + { + let guard = self.storage.read().await; + let needs_swap = match guard.as_ref() { + Some(s) => s.has_config_changed(cfg), + None => cfg.enabled, + }; + if !needs_swap { + return; + } + } + + // Slow path: write-lock and rebuild. + let mut guard = self.storage.write().await; + // Re-check under the write lock (another task may have already swapped). + let needs_swap = match guard.as_ref() { + Some(s) => s.has_config_changed(cfg), + None => cfg.enabled, + }; + + if needs_swap { + debug!("Cache storage config changed — rebuilding backend"); + *guard = build_storage(); + } + } + + // ── public API ─────────────────────────────────────────────────────────── + /// Check the cache for a query response. /// /// On HIT returns `Ok(Some(messages))` — the caller is responsible for @@ -50,6 +98,8 @@ impl Cache { client_request: &ClientRequest, params: &Parameters, ) -> Result>, crate::frontend::Error> { + self.hotswap_if_needed().await; + let cache_result = self .cache_check(in_transaction, client_request, params) .await?; @@ -75,9 +125,15 @@ impl Cache { } } - /// Finalize caching by storing the response in Redis. + /// Finalize caching by storing the response in the active backend. pub async fn save_response_in_cache(&self, cache_context: &mut CacheContext) { - if let Some(CacheMiss { cache_key_hash, ttl } ) = cache_context.cache_miss.take() { + self.hotswap_if_needed().await; + + if let Some(CacheMiss { + cache_key_hash, + ttl, + }) = cache_context.cache_miss.take() + { if !cache_context.had_error && !cache_context.response_buffer.is_empty() { let messages = std::mem::take(&mut cache_context.response_buffer); self.cache_response(cache_key_hash, messages, ttl).await; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 35d4bef17..7782dce2a 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -60,8 +60,7 @@ fn get_cache_directive( client_request .ast .as_ref() - .map(|ast| ast.comment_cache) - .flatten() + .and_then(|ast| ast.comment_cache) .or_else(|| extract_parameter_directive(params)) } @@ -83,8 +82,7 @@ fn extract_parameter_directive(params: &Parameters) -> Option { .strip_prefix("force_cache") .or_else(|| s.strip_prefix("cache")) .map(|s| s.trim_start()) - .map(|s| s.strip_prefix("ttl=")) - .flatten() + .and_then(|s| s.strip_prefix("ttl=")) .and_then(|t| t.trim().parse::().ok()) { let ttl_seconds = Some(ttl); diff --git a/pgdog/src/frontend/cache/storage/mod.rs b/pgdog/src/frontend/cache/storage/mod.rs new file mode 100644 index 000000000..13f53af89 --- /dev/null +++ b/pgdog/src/frontend/cache/storage/mod.rs @@ -0,0 +1,58 @@ +pub mod redis; + +pub use redis::RedisCacheStorage; + +use async_trait::async_trait; + +use crate::config::{ + cache::{Cache as CacheConfig, CacheBackend}, + config, +}; + +/// Errors returned by cache storage backends. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Redis {cmd} error for key {key}: {err}")] + RedisError { + cmd: &'static str, + key: u64, + err: fred::error::RedisError, + }, + #[error("Connection failed: {0}")] + ConnectionFailed(&'static str), + #[error("Cache miss for key {0}")] + CacheMiss(u64), +} + +/// Abstract cache storage backend. +/// +/// Implementations must be `Send + Sync` so they can be held behind +/// something like `Arc>` and shared across async tasks. +#[async_trait] +pub trait CacheStorage: Send + Sync { + /// Fetch cached bytes for `key`. Returns [`Error::CacheMiss`] when the + /// key is absent (not an error condition — used for control flow). + async fn get(&self, key: u64) -> Result, Error>; + + /// Store `value` under `key` with a `ttl` in seconds. + async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error>; + + /// Returns `true` when the backend is configured and enabled. + fn is_enabled(&self) -> bool; + + /// Returns `true` if cache config has changed (used for hotswap detection). + fn has_config_changed(&self, new_config: &CacheConfig) -> bool; +} + +/// Construct the appropriate storage backend from the current config. +pub fn build_storage() -> Option> { + let cfg = &config().config.general.cache; + if !cfg.enabled { + return None; + } + match cfg.backend { + CacheBackend::Redis => { + RedisCacheStorage::new(&cfg).map(|s| Box::new(s) as Box) + } + } +} diff --git a/pgdog/src/frontend/cache/storage/redis.rs b/pgdog/src/frontend/cache/storage/redis.rs new file mode 100644 index 000000000..e0aec87a6 --- /dev/null +++ b/pgdog/src/frontend/cache/storage/redis.rs @@ -0,0 +1,246 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use fred::prelude::*; +use pgdog_config::CacheBackend; +use tracing::{debug, error, info}; + +use crate::config::{cache::Cache as CacheConfig, config}; + +use super::{CacheStorage, Error}; + +/// Timeout for individual Redis operations (GET/SET/ping). +const REDIS_OPERATION_TIMEOUT: Duration = Duration::from_secs(2); +/// Max time between reconnection attempts +const MAX_REDIS_RECONNECTION_PERIOD: Duration = Duration::from_secs(5); + +/// Redis implementation of [`CacheStorage`]. +/// +/// Connection is established in a background task spawned from [`RedisCacheStorage::new`]. +/// All operations return immediately if the connection is not yet ready — `get` returns +/// [`Error::ConnectionFailed`] (triggering a cache-miss path) and `set` is silently dropped. +/// +/// At most one reconnect task runs at any time, enforced by a CAS on `reconnecting`. +pub struct RedisCacheStorage { + client: RedisClient, + /// Cache config. + config: CacheConfig, + /// Guards against spawning multiple concurrent reconnect tasks. + reconnecting: Arc, +} + +impl std::fmt::Debug for RedisCacheStorage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RedisCacheStorage") + .field("config", &self.config) + .field("reconnecting", &self.reconnecting.load(Ordering::Relaxed)) + .finish() + } +} + +impl RedisCacheStorage { + /// Build a new storage instance for `url` and immediately start a background + /// connection task. Returns `None` when the URL cannot be parsed. + pub fn new(config: &CacheConfig) -> Option { + let client_config = match RedisConfig::from_url(&config.redis.url) { + Ok(c) => c, + Err(e) => { + error!("Failed to parse Redis URL '{}': {}", config.redis.url, e); + return None; + } + }; + + let client = match Builder::from_config(client_config).build() { + Ok(c) => c, + Err(e) => { + error!("Failed to build Redis client: {}", e); + return None; + } + }; + + let reconnecting = Arc::new(AtomicBool::new(true)); // treat initial connect as "reconnecting" + + let storage = Self { + client, + config: config.clone(), + reconnecting, + }; + + // Fire-and-forget initial connection. + storage.spawn_connect_task(); + + Some(storage) + } + + // ── internal helpers ──────────────────────────────────────────────────── + + /// Spawn the (re)connect background loop. Uses a CAS to ensure only one + /// task is ever running at a time. + fn spawn_connect_task(&self) { + let client = self.client.clone(); + let reconnecting = self.reconnecting.clone(); + + tokio::spawn(async move { + info!("Redis connect task started"); + let mut attempt = 0u32; + + loop { + attempt += 1; + debug!("Redis connect attempt #{}", attempt); + + let init_ok = + match tokio::time::timeout(REDIS_OPERATION_TIMEOUT, client.init()).await { + Ok(Ok(_)) => true, + Ok(Err(e)) => { + debug!("Redis init error: {}", e); + false + } + Err(_) => { + debug!("Redis init timed out"); + false + } + }; + + if init_ok { + reconnecting.store(false, Ordering::Release); + info!("Redis connected (attempt #{})", attempt); + return; + } + + // Exponential backoff + tokio::time::sleep( + const { Duration::from_millis(5) } + .saturating_mul(1u32 << attempt.min(10)) + .min(MAX_REDIS_RECONNECTION_PERIOD), + ) + .await; + } + }); + } + + /// Mark the reconnecting as true and spawn a reconnect task if one is not + /// already running. + fn reconnect(&self) { + if self + .reconnecting + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + { + self.spawn_connect_task(); + } else { + debug!("Redis reconnect task already running"); + } + } +} + +#[async_trait] +impl CacheStorage for RedisCacheStorage { + async fn get(&self, key: u64) -> Result, Error> { + if self.reconnecting.load(Ordering::Acquire) { + return Err(Error::ConnectionFailed("Redis not connected")); + } + + let full_key = format!("{}{}", self.config.redis.cache_key_prefix, key); + + let redis_result = tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + self.client.get::(full_key), + ) + .await; + let val = match redis_result { + Ok(Ok(v)) => v, + Ok(Err(err)) => { + self.reconnect(); + return Err(Error::RedisError { + cmd: "GET", + key, + err, + }); + } + Err(_) => { + self.reconnect(); + return Err(Error::ConnectionFailed("Redis GET timed out")); + } + }; + + match val.into_bytes() { + Some(bytes) => { + debug!("Cache hit for key {}", key); + Ok(bytes.to_vec()) + } + None => Err(Error::CacheMiss(key)), + } + } + + async fn set(&self, key: u64, value: &[u8], ttl: u64) -> Result<(), Error> { + if self.reconnecting.load(Ordering::Acquire) { + return Err(Error::ConnectionFailed("Redis not connected")); + } + + let max_result_size = config().config.general.cache.max_result_size; + if max_result_size != 0 && value.len() > max_result_size { + debug!( + "Skipping cache for key {}: size {} exceeds max {}", + key, + value.len(), + max_result_size + ); + return Ok(()); + } + + let full_key = format!("{}{}", self.config.redis.cache_key_prefix, key); + let ttl_seconds = ttl as i64; + + match tokio::time::timeout( + REDIS_OPERATION_TIMEOUT, + self.client.set::<(), _, _>( + full_key, + value, + Some(Expiration::EX(ttl_seconds)), + None, + false, + ), + ) + .await + { + Ok(Ok(_)) => { + debug!("Cached key {} with TTL {}s", key, ttl_seconds); + Ok(()) + } + Ok(Err(err)) => { + self.reconnect(); + Err(Error::RedisError { + cmd: "SET", + key, + err, + }) + } + Err(_) => { + self.reconnect(); + Err(Error::ConnectionFailed("Redis SET timed out")) + } + } + } + + fn is_enabled(&self) -> bool { + config().config.general.cache.enabled + } + + fn has_config_changed(&self, new_config: &CacheConfig) -> bool { + new_config.backend != CacheBackend::Redis + || self.config.redis.cmp(&new_config.redis).is_ne() + } +} + +// Avoid shallow copy +impl Clone for RedisCacheStorage { + fn clone(&self) -> Self { + Self { + client: self.client.clone_new(), + config: self.config.clone(), + reconnecting: Arc::new(AtomicBool::new(false)), + } + } +} From 4c771764b7e93b7d0b41268dc3d9477b10a78633 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 10:27:40 +0300 Subject: [PATCH 18/23] deleted files that not belong to the feature: claude debug skill was specific to my system flake should be in another feat --- .claude/skills/debug/SKILL.md | 82 ------------------------------- .gitignore | 1 - flake.lock | 77 ----------------------------- flake.nix | 92 ----------------------------------- 4 files changed, 252 deletions(-) delete mode 100644 .claude/skills/debug/SKILL.md delete mode 100644 flake.lock delete mode 100644 flake.nix diff --git a/.claude/skills/debug/SKILL.md b/.claude/skills/debug/SKILL.md deleted file mode 100644 index d22874e8c..000000000 --- a/.claude/skills/debug/SKILL.md +++ /dev/null @@ -1,82 +0,0 @@ -# Debug Skill — pgdog Development Environment - -## Rules - -1. **Build command**: Always use `cargo build -p pgdog` (debug profile). Never use `--release` during debugging — it slows down builds by 4× and you rarely need to verify the final binary during development. - -2. **Docker environment**: You have access to rootless Docker. Key services: - - PostgreSQL runs on port **5433** (NOT 5432 — your real system postgres uses that) - - Redis is available on the default port **6379** - - If services aren't running, start them first: - ```bash - docker start $(docker ps -a -q --filter "name=pgdog" --latest) - # Or inspect what containers exist: - docker ps -a - ``` - -3. **Running pgdog in background**: Use `systemd-run --user`, NOT `nohup` or `&`. Example: - ```bash - systemd-run --user --collect --unit=pgdog-debug \ - --setenv=RUST_LOG=debug \ - --working-directory= \ - /target/debug/pgdog --config pgdog.toml 2>&1 - ``` - To stop it later: - ```bash - systemctl --user list-units | grep pgdog - systemctl --user stop - ``` - -## Useful Debugging Commands - -### Check docker services - -```bash -docker ps -``` - -### Check redis connectivity - -```bash -redis-cli ping -``` - -### Clear redis cache (useful for testing) - -```bash -redis-cli FLUSHALL -``` - -### Watch cache keys in real-time - -```bash -redis-cli MONITOR | grep "pgdog:" -``` - -### Inspect cached response bytes - -```bash -redis-cli --scan --pattern "pgdog:*" | head -1 | xargs redis-cli GET | xxd | head -20 -``` - -## File Structure Reference - -The cache implementation lives in: - -``` -pgdog/src/frontend/client/query_engine/cache/ -├── mod.rs # Module exports -├── client.rs # Redis client wrapper (fred v9) -├── integration.rs # cache_check(), send_cached_response(), cache_response() -├── policy.rs # CachePolicyResolver (3-tier decision engine) -└── stats.rs # QueryStatsTracker (hit/miss counters) -``` - -State documentation: `CacheState.md` in the project root. - -## Common Pitfalls - -- **Parser disabled by default**: `route.is_read()` returns false for `SELECT 1` when the query parser is off. The `is_likely_read()` heuristic in integration.rs covers this. -- **Policy defaults to NoCache**: `DatabaseCache.policy()` returns `CachePolicy::NoCache` by default. You must set `policy = "cache"` in the config. -- **Cache keys are hashed**: The key is a DefaultHasher hex digest of the raw query string, not the query itself. -- **Wire format is concatenated bytes**: Multiple PostgreSQL messages are concatenated into a single `Vec` with `[code: u8][length: u32be][payload: ...]` structure. diff --git a/.gitignore b/.gitignore index 32aeaec2a..5db985b82 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ perf.data.old CLAUDE.local.md .claude/plans/ .claude/completed_plans/ -!.claude/skills/debug # Ignore generated bindings pgdog-plugin/src/bindings.rs diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 428032335..000000000 --- a/flake.lock +++ /dev/null @@ -1,77 +0,0 @@ -{ - "nodes": { - "crane": { - "locked": { - "lastModified": 1775839657, - "narHash": "sha256-SPm9ck7jh3Un9nwPuMGbRU04UroFmOHjLP56T10MOeM=", - "owner": "ipetkov", - "repo": "crane", - "rev": "7cf72d978629469c4bd4206b95c402514c1f6000", - "type": "github" - }, - "original": { - "owner": "ipetkov", - "repo": "crane", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1776067740, - "narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-25.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "crane": "crane", - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index 4a7fd6c7f..000000000 --- a/flake.nix +++ /dev/null @@ -1,92 +0,0 @@ -{ - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; - flake-utils.url = "github:numtide/flake-utils"; - crane.url = "github:ipetkov/crane"; - }; - - outputs = { self, nixpkgs, flake-utils, crane }: - flake-utils.lib.eachSystem flake-utils.lib.allSystems (system: - let - pkgs = import nixpkgs { inherit system; }; - stdenv' = p: p.stdenvAdapters.withCFlags [ "-O" ] (p.stdenvAdapters.useMoldLinker p.clangStdenv); - stdenv = stdenv' pkgs; - craneLib = (crane.mkLib pkgs).overrideScope (final: prev: { - stdenvSelector = stdenv'; - }); - - env = { - LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; - CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = "${stdenv.cc}/bin/cc"; - CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = "-C link-arg=--ld-path=${stdenv.cc}/bin/ld"; - }; - - commonArgs = { - src = let - unfilteredSrc = ./.; - fs = pkgs.lib.fileset; - in fs.toSource { - root = unfilteredSrc; - fileset = fs.unions [ - (craneLib.fileset.cargoTomlAndLock unfilteredSrc) - (craneLib.fileset.rust unfilteredSrc) - (fs.fileFilter - (file: file.hasExt "c" || file.hasExt "h" || file.hasExt "sql") - unfilteredSrc - ) - ]; - }; - strictDeps = true; - - nativeBuildInputs = with pkgs; [ - pkg-config - ]; - buildInputs = with pkgs; [ - openssl - ]; - - inherit env; - } // (craneLib.crateNameFromCargoToml { cargoToml = ./pgdog/Cargo.toml; }); - - cargoArtifacts = craneLib.buildDepsOnly commonArgs; - - devShell = (craneLib.devShell.override { - mkShell = pkgs.mkShell.override { - inherit stdenv; - }; - }) { - checks = self.checks; - inputsFrom = [ cargoArtifacts ]; - inherit env; - }; - - pgDog = craneLib.buildPackage (commonArgs // { - inherit cargoArtifacts; - doCheck = false; - cargoExtraArgs = "-p pgdog"; - }); - - in { - packages.default = pgDog; - - devShells.default = devShell; - - checks = { - inherit pgDog; - - pgDogClippy = craneLib.cargoClippy (commonArgs // { - inherit cargoArtifacts; - cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings"; - }); - - pgDogFmt = craneLib.cargoFmt commonArgs; - - pgDogNextest = craneLib.cargoNextest (commonArgs // { - inherit cargoArtifacts; - checkPhaseCargoCommand = "echo hello world"; - cargoNextestExtraArgs = "--test-threads=1 --no-fail-fast"; - }); - }; - } - ); -} From d94fdeaf6bca15949cae390ae1525b19aa256f58 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 13:37:56 +0300 Subject: [PATCH 19/23] prepared statement's result caching --- docs/CACHE.md | 8 ++++---- pgdog/src/frontend/cache/integration.rs | 22 ++++++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 1119ae438..86177fbc1 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -278,12 +278,12 @@ SQL comment → pgdog.cache parameter → DB policy config 22. **Nested backend config** — Backend-specific settings live in their own TOML subtable (`[general.cache.redis]`) rather than flat fields on `[general.cache]`. `RedisConfig` holds `url` and `cache_key_prefix`. When a new backend is added, it gets its own subtable (e.g. `[general.cache.memcached]`) without polluting the top-level cache section. `client.rs` renamed to `storage/redis.rs`. +23. **Cache key must include Bind parameters for extended protocol** — For simple `Query` messages, parameter values are embedded in the SQL string, so the XXH3 hash of `database + query_text` is naturally unique per value. For extended protocol (Parse/Bind/Execute), the SQL contains `$1`/`$2` placeholders and the actual values arrive in the `Bind` message separately. The current hash ignores them, so `SELECT * FROM users WHERE id = $1` with `id = 1` and `id = 2` produce the same cache key — wrong rows are returned on the second call. Fix: hash `param.len` (the `i32` field, not the `len()` method which returns wire size) and `param.data` for each entry in `bind.params_raw()` into the hasher in `cache_check()` in `integration.rs`. This affects all production drivers that use extended protocol by default: psycopg3, asyncpg, JDBC, npgsql. Note: pgdog's built-in prepared statement cache (`PreparedStatements` / `GlobalCache`) is a proxy-level plan cache only — it deduplicates backend `Parse` round-trips. It does not cache result rows and is orthogonal to the Redis result cache. + --- ## What's Left To Do -1. **Response capture for prepared statements** — Extended protocol (Parse/Bind/Execute) response capture works through process_server_message() but hasn't been tested with PREPARE/EXECUTE. (Note: pgdog implements prepared statements caching. But unknown what kind of caching this is: just query cache or result cache. And if we implement our cache, will this break this prepared statement cache?) - -2. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. +1. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. -3. **Integration tests**. +2. **Integration tests**. diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 8a84970a0..37b12f34b 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -61,31 +61,41 @@ impl Cache { _ => return Ok(CacheCheckResult::Passthrough), }; - let user = params.get_required("user")?; - let database = params.get_default("database", user); - let cache_key_hash = { + let compute_cache_key_hash = || { + let user = params.get_required("user")?; + let database = params.get_default("database", user); let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); database.hash(&mut hasher); let normalized_query = FORCE_CACHE_RE.replace(query.query(), "pgdog_cache: cache"); normalized_query.hash(&mut hasher); - hasher.finish() + if let Some(bind) = client_request.parameters()? { + for param in bind.params_raw() { + param.len.hash(&mut hasher); + param.data.hash(&mut hasher); + } + }; + Ok::(hasher.finish()) }; let decision = policy::resolve(client_request, params, is_read).await; match decision { CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), CacheDecision::ForceCache(ttl) => Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash, + cache_key_hash: compute_cache_key_hash()?, ttl, })), CacheDecision::Cache(ttl) => { + let cache_key_hash = compute_cache_key_hash()?; let guard = self.storage.read().await; match guard.as_ref() { None => Ok(CacheCheckResult::Passthrough), Some(storage) => match storage.get(cache_key_hash).await { Ok(cached) => Ok(CacheCheckResult::Hit { cached }), Err(CacheStorageError::CacheMiss(_)) => { - Ok(CacheCheckResult::Miss(CacheMiss { cache_key_hash, ttl })) + Ok(CacheCheckResult::Miss(CacheMiss { + cache_key_hash, + ttl, + })) } Err(e) => { warn!("{}", e); From 1bc45852af122c9e830bf3cdcd1213d479e52e7f Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 18 May 2026 13:54:00 +0300 Subject: [PATCH 20/23] fmt and clippy --- pgdog/src/backend/pool/cluster.rs | 6 +++--- pgdog/src/config/cache.rs | 2 +- pgdog/src/frontend/cache/context.rs | 5 ++++- pgdog/src/frontend/cache/integration.rs | 2 +- pgdog/src/frontend/cache/policy.rs | 2 +- pgdog/src/frontend/cache/storage/mod.rs | 2 +- pgdog/src/frontend/client/query_engine/context.rs | 5 ++++- pgdog/src/frontend/client/query_engine/mod.rs | 4 +++- 8 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pgdog/src/backend/pool/cluster.rs b/pgdog/src/backend/pool/cluster.rs index b636d87fc..1dff017a5 100644 --- a/pgdog/src/backend/pool/cluster.rs +++ b/pgdog/src/backend/pool/cluster.rs @@ -158,7 +158,7 @@ pub struct ClusterConfig<'a> { pub reload_schema_on_ddl: bool, pub load_schema: LoadSchema, pub resharding_parallel_copies: usize, - pub cache_enabled: bool + pub cache_enabled: bool, } impl<'a> ClusterConfig<'a> { @@ -212,7 +212,7 @@ impl<'a> ClusterConfig<'a> { reload_schema_on_ddl: general.reload_schema_on_ddl, load_schema: general.load_schema, resharding_parallel_copies: general.resharding_parallel_copies, - cache_enabled: general.cache.enabled + cache_enabled: general.cache.enabled, } } } @@ -250,7 +250,7 @@ impl Cluster { reload_schema_on_ddl, load_schema, resharding_parallel_copies, - cache_enabled + cache_enabled, } = config; let identifier = Arc::new(DatabaseUser { diff --git a/pgdog/src/config/cache.rs b/pgdog/src/config/cache.rs index a089ff680..ece03acb6 100644 --- a/pgdog/src/config/cache.rs +++ b/pgdog/src/config/cache.rs @@ -1 +1 @@ -pub use pgdog_config::cache::*; \ No newline at end of file +pub use pgdog_config::cache::*; diff --git a/pgdog/src/frontend/cache/context.rs b/pgdog/src/frontend/cache/context.rs index 42fd0fecf..aeeab7613 100644 --- a/pgdog/src/frontend/cache/context.rs +++ b/pgdog/src/frontend/cache/context.rs @@ -1,4 +1,7 @@ -use crate::{frontend::cache::integration::CacheMiss, net::{Message, messages::Protocol}}; +use crate::{ + frontend::cache::integration::CacheMiss, + net::{messages::Protocol, Message}, +}; /// Cache context to use in QueryEngineContext. #[derive(Default)] diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 37b12f34b..5114c865f 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -72,7 +72,7 @@ impl Cache { for param in bind.params_raw() { param.len.hash(&mut hasher); param.data.hash(&mut hasher); - } + } }; Ok::(hasher.finish()) }; diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 7782dce2a..60073dcc9 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -39,7 +39,7 @@ pub async fn resolve( Some(CacheDirective::NoCache) => return CacheDecision::Skip, Some(CacheDirective::Cache { ttl_seconds }) => { return CacheDecision::Cache(ttl_seconds.unwrap_or(cache_config.ttl)) - }, + } Some(CacheDirective::ForceCache { ttl_seconds }) => { return CacheDecision::ForceCache(ttl_seconds.unwrap_or(cache_config.ttl)) } diff --git a/pgdog/src/frontend/cache/storage/mod.rs b/pgdog/src/frontend/cache/storage/mod.rs index 13f53af89..91a7b377d 100644 --- a/pgdog/src/frontend/cache/storage/mod.rs +++ b/pgdog/src/frontend/cache/storage/mod.rs @@ -52,7 +52,7 @@ pub fn build_storage() -> Option> { } match cfg.backend { CacheBackend::Redis => { - RedisCacheStorage::new(&cfg).map(|s| Box::new(s) as Box) + RedisCacheStorage::new(cfg).map(|s| Box::new(s) as Box) } } } diff --git a/pgdog/src/frontend/client/query_engine/context.rs b/pgdog/src/frontend/client/query_engine/context.rs index 42ef8b21f..6a1fe3c38 100644 --- a/pgdog/src/frontend/client/query_engine/context.rs +++ b/pgdog/src/frontend/client/query_engine/context.rs @@ -1,7 +1,10 @@ use crate::{ backend::pool::{connection::mirror::Mirror, stats::MemoryStats}, frontend::{ - Client, ClientRequest, PreparedStatements, client::{Sticky, TransactionType, timeouts::Timeouts}, router::parser::rewrite::statement::plan::RewriteResult, cache::context::CacheContext + cache::context::CacheContext, + client::{timeouts::Timeouts, Sticky, TransactionType}, + router::parser::rewrite::statement::plan::RewriteResult, + Client, ClientRequest, PreparedStatements, }, net::{BackendKeyData, Parameters, Stream}, }; diff --git a/pgdog/src/frontend/client/query_engine/mod.rs b/pgdog/src/frontend/client/query_engine/mod.rs index e302e717d..9223f5b86 100644 --- a/pgdog/src/frontend/client/query_engine/mod.rs +++ b/pgdog/src/frontend/client/query_engine/mod.rs @@ -246,7 +246,9 @@ impl QueryEngine { command => self.unknown_command(context, command.clone()).await?, } - cache().save_response_in_cache(&mut context.cache_context).await; + cache() + .save_response_in_cache(&mut context.cache_context) + .await; self.hooks.after_execution(context)?; From 2d9c605e50988c2316017f8b2525e4457927e05b Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Wed, 20 May 2026 17:33:35 +0300 Subject: [PATCH 21/23] add unit tests --- docs/CACHE.md | 8 +- pgdog/src/frontend/cache/integration.rs | 124 +++++++++++++++++++++++- pgdog/src/frontend/cache/policy.rs | 115 ++++++++++++++++++++++ 3 files changed, 243 insertions(+), 4 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 86177fbc1..89cb6dd14 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -105,7 +105,7 @@ Key methods: - `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis - `deserialize_cached(Vec) -> Vec` — parses a flat blob of concatenated PostgreSQL wire messages into individual `Message` values. Wire format: `[1B code][4B length (incl. itself)][payload]`. Named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL` replace the former magic numbers. Not Redis-specific — usable with any cache backend that stores raw bytes. - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- Cache key: XXH3 hash of `database_name + raw_query_string` +- Cache key: XXH3 hash of `database_name + raw_query_string + bind params` ### Query Engine Integration @@ -148,7 +148,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Cache policy resolution | 2-tier: SQL comment/param → DB policy | | Cache HIT flow | Deserialize wire bytes → `Vec` → replay each through `process_server_message()` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | -| Cache key | XXH3 hash of `database_name + raw_query_string` | +| Cache key | XXH3 hash of `database_name + raw_query_string + bind params` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- @@ -287,3 +287,7 @@ SQL comment → pgdog.cache parameter → DB policy config 1. **Redis disconnect/reconnect under heavy load** — The reconnection logic works, but timing edge cases under rapid disconnect/reconnect cycles still need stress-testing. 2. **Integration tests**. + +3. **Set redis query timeout from config** + +4. **Completely remove comments when computing hash for query** \ No newline at end of file diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 5114c865f..56bf9fbcc 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -142,8 +142,6 @@ impl Cache { break; } - let _code = cached[offset] as char; - // Read the message length field (4 bytes, big-endian). // This length includes the 4-byte length field itself but NOT the code byte. let msg_len = u32::from_be_bytes([ @@ -213,3 +211,125 @@ impl Cache { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::net::messages::{CommandComplete, Protocol, ReadyForQuery, ToBytes}; + + /// Build a raw wire-format blob from a list of typed protocol messages. + fn wire_bytes(msgs: &[&dyn ToBytes]) -> Vec { + let mut buf = Vec::new(); + for msg in msgs { + buf.extend_from_slice(&msg.to_bytes().unwrap()); + } + buf + } + + #[test] + fn deserialize_empty_input() { + let messages = Cache::deserialize_cached(vec![]); + assert!(messages.is_empty()); + } + + #[test] + fn deserialize_single_message() { + let rfq = ReadyForQuery::idle(); + let blob = wire_bytes(&[&rfq]); + let messages = Cache::deserialize_cached(blob); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].code(), 'Z'); + } + + #[test] + fn deserialize_multiple_messages_roundtrip() { + let cc = CommandComplete::new("SELECT 1"); + let rfq = ReadyForQuery::idle(); + let blob = wire_bytes(&[&cc, &rfq]); + + let messages = Cache::deserialize_cached(blob); + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].code(), 'C'); + assert_eq!(messages[1].code(), 'Z'); + } + + #[test] + fn deserialize_roundtrip_payload_matches() { + let cc = CommandComplete::new("SELECT 42"); + let rfq = ReadyForQuery::idle(); + let original: Vec = vec![ + Message::new(cc.to_bytes().unwrap()), + Message::new(rfq.to_bytes().unwrap()), + ]; + + // Serialize to flat blob exactly as cache_response does. + let mut blob = Vec::new(); + for msg in &original { + blob.extend_from_slice(&msg.to_bytes().unwrap()); + } + + let deserialized = Cache::deserialize_cached(blob); + assert_eq!(deserialized.len(), original.len()); + for (d, o) in deserialized.iter().zip(original.iter()) { + assert_eq!(d.payload(), o.payload()); + } + } + + #[test] + fn deserialize_truncated_header_no_panic() { + // Only 3 bytes — not enough for a full 5-byte header. + let truncated = vec![b'Z', 0x00, 0x00]; + let messages = Cache::deserialize_cached(truncated); + assert!(messages.is_empty()); + } + + #[test] + fn deserialize_truncated_payload_no_panic() { + // Valid header claiming length 8 (4-byte len field + 4-byte payload), + // but we only provide the header and 2 payload bytes instead of 4. + let mut blob = Vec::new(); + blob.push(b'C'); // code byte + blob.extend_from_slice(&8u32.to_be_bytes()); // length = 8 (includes itself) + blob.extend_from_slice(&[0u8, 0]); // only 2 of the expected 4 payload bytes + let messages = Cache::deserialize_cached(blob); + assert!(messages.is_empty()); + } + + #[test] + fn deserialize_corrupt_length_no_panic() { + // Length field set to 0 — invalid (must be >= 4). + let mut blob = Vec::new(); + blob.push(b'Z'); + blob.extend_from_slice(&0u32.to_be_bytes()); + let messages = Cache::deserialize_cached(blob); + assert!(messages.is_empty()); + } + + #[test] + fn deserialize_length_of_three_no_panic() { + // Length field = 3 — below minimum of 4, should be rejected. + let mut blob = Vec::new(); + blob.push(b'Z'); + blob.extend_from_slice(&3u32.to_be_bytes()); + blob.extend_from_slice(&[0u8; 3]); + let messages = Cache::deserialize_cached(blob); + assert!(messages.is_empty()); + } + + #[test] + fn deserialize_many_messages() { + // Round-trip 10 CommandComplete messages. + let n = 10usize; + let mut blob = Vec::new(); + for i in 0..n { + let cc = CommandComplete::new(format!("SELECT {}", i)); + blob.extend_from_slice(&cc.to_bytes().unwrap()); + } + + let messages = Cache::deserialize_cached(blob); + assert_eq!(messages.len(), n); + for msg in &messages { + assert_eq!(msg.code(), 'C'); + } + } +} diff --git a/pgdog/src/frontend/cache/policy.rs b/pgdog/src/frontend/cache/policy.rs index 60073dcc9..862fbc922 100644 --- a/pgdog/src/frontend/cache/policy.rs +++ b/pgdog/src/frontend/cache/policy.rs @@ -95,3 +95,118 @@ fn extract_parameter_directive(params: &Parameters) -> Option { None } + +#[cfg(test)] +mod tests { + use super::*; + + fn extract(s: &str) -> Option { + let mut params = Parameters::default(); + params.insert("pgdog.cache", s); + extract_parameter_directive(¶ms) + } + + #[test] + fn no_cache_directive() { + assert_eq!(extract("no_cache"), Some(CacheDirective::NoCache)); + } + + #[test] + fn cache_directive_no_ttl() { + assert_eq!( + extract("cache"), + Some(CacheDirective::Cache { ttl_seconds: None }) + ); + } + + #[test] + fn cache_directive_with_ttl() { + assert_eq!( + extract("cache ttl=60"), + Some(CacheDirective::Cache { + ttl_seconds: Some(60) + }) + ); + } + + #[test] + fn cache_directive_with_large_ttl() { + assert_eq!( + extract("cache ttl=86400"), + Some(CacheDirective::Cache { + ttl_seconds: Some(86400) + }) + ); + } + + #[test] + fn force_cache_no_ttl() { + assert_eq!( + extract("force_cache"), + Some(CacheDirective::ForceCache { ttl_seconds: None }) + ); + } + + #[test] + fn force_cache_with_ttl() { + assert_eq!( + extract("force_cache ttl=120"), + Some(CacheDirective::ForceCache { + ttl_seconds: Some(120) + }) + ); + } + + #[test] + fn garbage_input_returns_none() { + assert_eq!(extract("garbage"), None); + } + + #[test] + fn invalid_ttl_letters_returns_none() { + assert_eq!(extract("cache ttl=abc"), None); + } + + #[test] + fn empty_ttl_returns_none() { + assert_eq!(extract("cache ttl="), None); + } + + #[test] + fn ttl_zero_is_valid() { + // 0 is a valid u64, even if semantically it means "expire immediately" + assert_eq!( + extract("cache ttl=0"), + Some(CacheDirective::Cache { + ttl_seconds: Some(0) + }) + ); + } + + #[test] + fn missing_key_returns_none() { + let params = Parameters::default(); + assert_eq!(extract_parameter_directive(¶ms), None); + } + + #[test] + fn force_cache_invalid_ttl_returns_none() { + assert_eq!(extract("force_cache ttl=bad"), None); + } + + #[test] + fn force_cache_empty_ttl_returns_none() { + assert_eq!(extract("force_cache ttl="), None); + } + + #[test] + fn whitespace_trimmed_around_value() { + // The value stored in the param is retrieved with .trim() in extract_parameter_directive + let mut params = Parameters::default(); + params.insert("pgdog.cache", " no_cache "); + assert_eq!( + extract_parameter_directive(¶ms), + Some(CacheDirective::NoCache) + ); + } +} From 3e6e76f4d91774800bc0bc4e752387c871d738a2 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Thu, 21 May 2026 13:01:54 +0300 Subject: [PATCH 22/23] strip sql comments while hashing --- docs/CACHE.md | 21 +- pgdog/src/frontend/cache/integration.rs | 281 +++++++++++++++++++++--- 2 files changed, 270 insertions(+), 32 deletions(-) diff --git a/docs/CACHE.md b/docs/CACHE.md index 89cb6dd14..f92e5f245 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -105,7 +105,7 @@ Key methods: - `cache_check()` — main entry point, checks route, calls `policy::resolve()`, checks Redis - `deserialize_cached(Vec) -> Vec` — parses a flat blob of concatenated PostgreSQL wire messages into individual `Message` values. Wire format: `[1B code][4B length (incl. itself)][payload]`. Named constants `HEADER_CODE_LEN`, `HEADER_LEN_SIZE`, `HEADER_TOTAL` replace the former magic numbers. Not Redis-specific — usable with any cache backend that stores raw bytes. - `cache_response()` — serializes `Vec` into wire bytes and stores in Redis -- Cache key: XXH3 hash of `database_name + raw_query_string + bind params` +- Cache key: XXH3 hash of `database_name + comment-stripped query string + bind params` ### Query Engine Integration @@ -148,7 +148,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"]} | Cache policy resolution | 2-tier: SQL comment/param → DB policy | | Cache HIT flow | Deserialize wire bytes → `Vec` → replay each through `process_server_message()` | | Cache MISS flow | Normal execute → capture response via `CacheContext` → store in Redis → respond | -| Cache key | XXH3 hash of `database_name + raw_query_string + bind params` | +| Cache key | XXH3 hash of `database_name + comment-stripped query string + bind params` | | Wire format | Full PostgreSQL wire messages stored as raw bytes (one concatenated buffer) | --- @@ -173,16 +173,21 @@ SELECT * FROM products WHERE category = 'electronics'; SELECT * FROM orders; -- Force cache with database default TTL --- Query hash computed as if comment were like "/* pgdog_cache: cache */" /* pgdog_cache: force_cache */ SELECT * FROM products WHERE category = 'electronics'; -- Force cache with custom TTL in seconds --- Query hash computed as if comment were like "/* pgdog_cache: cache ttl=300*/" /* pgdog_cache: force_cache ttl=300 */ SELECT * FROM orders; ``` +> **Hash independence from comments:** All SQL comments (block `/* */` and line `--`) are stripped +> before computing the cache key hash. This means a query sent with a `/* pgdog_cache: cache */` +> comment produces **exactly the same cache key** as the same query sent without any comment but +> with the directive supplied via a connection parameter (`SET pgdog.cache = 'cache'`). There is +> no longer a need for special normalization of `force_cache` vs `cache` hints — both result in +> the same hash because comments are removed entirely. + ### Connection Parameter Set `pgdog.cache` at connection time (via DSN options) or with `SET` after connecting: @@ -250,7 +255,7 @@ SQL comment → pgdog.cache parameter → DB policy config 8. **Error handling / Reconnection** — Automatic reconnection with background task, CAS-guarded single reconnect, 2s operation timeout on all Redis calls, PING-based connection verification. -9. **Cache key collision across databases sharing one Redis** — Database name and raw query string are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. `force_cache` hints normalize the query in the hash to use the same key as regular `cache`. +9. **Cache key collision across databases sharing one Redis** — Database name and query string (with all SQL comments stripped) are combined via a single XXH3 hash call, producing deterministic, collision-resistant per-database keys even on shared Redis. Different literal values in queries produce different cache keys. Because all comments are stripped before hashing, the cache key is identical whether the cache directive arrives via a SQL comment or a connection parameter. 10. **Wire format serialization/deserialization** — PostgreSQL wire messages stored as raw bytes. Correct byte slice calculation expressed via named constants (`HEADER_CODE_LEN = 1`, `HEADER_LEN_SIZE = 4`, `HEADER_TOTAL = 5`). Deserialization extracted into `deserialize_cached()` with inline comments explaining each boundary check. @@ -264,7 +269,7 @@ SQL comment → pgdog.cache parameter → DB policy config 15. **Add cache config to .schema**. -16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache (cache key normalized), bypassing normal cache miss flow considerations. +16. **Force-cache hint support** — `/* pgdog_cache: force_cache */` and `/* pgdog_cache: force_cache ttl=N */` directives always attempt to cache. Because all comments are stripped before hashing, `force_cache` and `cache` directives produce the same cache key as the bare query with no comment at all. 17. **Cache HIT replays through the server-message pipeline** — Previously, cache hits sent responses directly to the stream, bypassing `process_server_message()`. Now `try_read_cache()` returns `Option>` and the caller (`handle()`) feeds each message through `process_server_message()` — giving correct stats accounting, transaction state updates from `ReadyForQuery`, and hook invocations on every cache hit. @@ -280,6 +285,8 @@ SQL comment → pgdog.cache parameter → DB policy config 23. **Cache key must include Bind parameters for extended protocol** — For simple `Query` messages, parameter values are embedded in the SQL string, so the XXH3 hash of `database + query_text` is naturally unique per value. For extended protocol (Parse/Bind/Execute), the SQL contains `$1`/`$2` placeholders and the actual values arrive in the `Bind` message separately. The current hash ignores them, so `SELECT * FROM users WHERE id = $1` with `id = 1` and `id = 2` produce the same cache key — wrong rows are returned on the second call. Fix: hash `param.len` (the `i32` field, not the `len()` method which returns wire size) and `param.data` for each entry in `bind.params_raw()` into the hasher in `cache_check()` in `integration.rs`. This affects all production drivers that use extended protocol by default: psycopg3, asyncpg, JDBC, npgsql. Note: pgdog's built-in prepared statement cache (`PreparedStatements` / `GlobalCache`) is a proxy-level plan cache only — it deduplicates backend `Parse` round-trips. It does not cache result rows and is orthogonal to the Redis result cache. +24. **Comments stripped from query before hashing** — All SQL block comments (`/* … */`, including nested) and line comments (`-- …`) are removed from the query string before computing the XXH3 cache key. This makes the cache key independent of whether the cache directive was supplied via a SQL comment or a connection parameter. `compute_cache_key_hash` is a standalone public function in `integration.rs` so it can be unit-tested directly. `strip_sql_comments` returns `Cow<'_, str>`: when no comment markers are present the original string slice is returned without any allocation; only queries that actually contain `/*` or `--` incur a heap allocation. The `FORCE_CACHE_RE` regex normalization that previously converted `force_cache` to `cache` in the hash input has been removed — stripping all comments achieves the same result in a more general way. + --- ## What's Left To Do @@ -290,4 +297,4 @@ SQL comment → pgdog.cache parameter → DB policy config 3. **Set redis query timeout from config** -4. **Completely remove comments when computing hash for query** \ No newline at end of file +4. **Add hint for query hash key** diff --git a/pgdog/src/frontend/cache/integration.rs b/pgdog/src/frontend/cache/integration.rs index 56bf9fbcc..d314e9040 100644 --- a/pgdog/src/frontend/cache/integration.rs +++ b/pgdog/src/frontend/cache/integration.rs @@ -1,23 +1,18 @@ +use std::borrow::Cow; use std::hash::{Hash, Hasher}; -use once_cell::sync::Lazy; -use regex::Regex; - use crate::{ frontend::{ cache::{storage::Error as CacheStorageError, CacheDecision}, ClientRequest, }, - net::{FromBytes, Message, Parameters, ToBytes}, + net::{bind::Bind, FromBytes, Message, Parameters, ToBytes}, }; use tracing::{debug, warn}; use super::{policy, Cache}; -static FORCE_CACHE_RE: Lazy = - Lazy::new(|| Regex::new(r#"pgdog_cache:\s*force_cache"#).unwrap()); - pub struct CacheMiss { pub cache_key_hash: u64, pub ttl: u64, @@ -29,6 +24,98 @@ pub enum CacheCheckResult { Passthrough, } +/// Strip SQL block comments (`/* ... */`, including nested) and line comments (`-- ...`) +/// from `query`, preserving string literals (`'...'`). +/// +/// Returns `Cow::Borrowed(query)` without any allocation when no comment markers +/// are found. Only allocates and builds a new `String` when a `/*` or `--` +/// sequence is actually present in the input. +pub fn strip_sql_comments(query: &str) -> Cow<'_, str> { + // Fast path: scan bytes for comment markers before doing any allocation. + let bytes = query.as_bytes(); + let has_comment = bytes.windows(2).any(|w| w == b"/*" || w == b"--"); + if !has_comment { + return Cow::Borrowed(query); + } + + let mut result = String::with_capacity(query.len()); + let mut chars = query.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + // Block comment — supports PostgreSQL nested `/* */`. + '/' if chars.peek() == Some(&'*') => { + chars.next(); // consume '*' + let mut depth = 1u32; + while depth > 0 { + match chars.next() { + Some('/') if chars.peek() == Some(&'*') => { + chars.next(); + depth += 1; + } + Some('*') if chars.peek() == Some(&'/') => { + chars.next(); + depth -= 1; + } + None => break, // malformed input + _ => {} + } + } + // Replace the entire comment with a single space to avoid + // accidentally merging adjacent tokens (e.g. `SELECT/*c*/1`). + result.push(' '); + } + // Line comment. + '-' if chars.peek() == Some(&'-') => { + for ch in chars.by_ref() { + if ch == '\n' { + result.push('\n'); + break; + } + } + } + // String literal — pass through unchanged so we don't mistake `--` + // or `/*` inside a string for a comment. + '\'' => { + result.push(c); + while let Some(ch) = chars.next() { + result.push(ch); + if ch == '\'' { + // Standard SQL escaped quote: two consecutive single-quotes. + if chars.peek() == Some(&'\'') { + result.push(chars.next().unwrap()); + } else { + break; + } + } + } + } + _ => result.push(c), + } + } + + Cow::Owned(result) +} + +/// Compute the XXH3 cache key hash for a query. +/// +/// All SQL comments are stripped from `query` before hashing so the hash is identical +/// regardless of whether the cache directive was supplied via a comment or a connection +/// parameter. +pub fn compute_cache_key_hash(database: &str, query: &str, bind: Option<&Bind>) -> u64 { + let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); + database.hash(&mut hasher); + let stripped = strip_sql_comments(query); + stripped.trim().hash(&mut hasher); + if let Some(bind) = bind { + for param in bind.params_raw() { + param.len.hash(&mut hasher); + param.data.hash(&mut hasher); + } + } + hasher.finish() +} + const HEADER_CODE_LEN: usize = 1; const HEADER_LEN_SIZE: usize = 4; const HEADER_TOTAL: usize = HEADER_CODE_LEN + HEADER_LEN_SIZE; @@ -40,7 +127,7 @@ impl Cache { client_request: &ClientRequest, params: &Parameters, ) -> Result { - if in_transaction { + if in_transaction || !client_request.is_executable() { return Ok(CacheCheckResult::Passthrough); } @@ -61,31 +148,19 @@ impl Cache { _ => return Ok(CacheCheckResult::Passthrough), }; - let compute_cache_key_hash = || { - let user = params.get_required("user")?; - let database = params.get_default("database", user); - let mut hasher = xxhash_rust::xxh3::Xxh3Default::new(); - database.hash(&mut hasher); - let normalized_query = FORCE_CACHE_RE.replace(query.query(), "pgdog_cache: cache"); - normalized_query.hash(&mut hasher); - if let Some(bind) = client_request.parameters()? { - for param in bind.params_raw() { - param.len.hash(&mut hasher); - param.data.hash(&mut hasher); - } - }; - Ok::(hasher.finish()) - }; + let user = params.get_required("user")?; + let database = params.get_default("database", user); + let bind = client_request.parameters()?; let decision = policy::resolve(client_request, params, is_read).await; match decision { CacheDecision::Skip => Ok(CacheCheckResult::Passthrough), CacheDecision::ForceCache(ttl) => Ok(CacheCheckResult::Miss(CacheMiss { - cache_key_hash: compute_cache_key_hash()?, + cache_key_hash: compute_cache_key_hash(database, query.query(), bind), ttl, })), CacheDecision::Cache(ttl) => { - let cache_key_hash = compute_cache_key_hash()?; + let cache_key_hash = compute_cache_key_hash(database, query.query(), bind); let guard = self.storage.read().await; match guard.as_ref() { None => Ok(CacheCheckResult::Passthrough), @@ -332,4 +407,160 @@ mod tests { assert_eq!(msg.code(), 'C'); } } + + // ------------------------------------------------------------------------- + // strip_sql_comments tests + // ------------------------------------------------------------------------- + + #[test] + fn strip_no_comments() { + let q = "SELECT 1"; + assert_eq!(strip_sql_comments(q), "SELECT 1"); + } + + #[test] + fn strip_no_comments_returns_borrowed() { + // When there are no comment markers the original slice must be returned + // without any allocation (Cow::Borrowed). + let q = "SELECT 1 FROM t WHERE id = 42"; + assert!(matches!( + strip_sql_comments(q), + std::borrow::Cow::Borrowed(_) + )); + } + + #[test] + fn strip_with_comment_returns_owned() { + let q = "/* hint */ SELECT 1"; + assert!(matches!(strip_sql_comments(q), std::borrow::Cow::Owned(_))); + } + + #[test] + fn strip_block_comment() { + let q = "/* pgdog_cache: cache */ SELECT 1"; + let stripped = strip_sql_comments(q); + assert!(!stripped.contains("pgdog_cache")); + assert!(stripped.contains("SELECT 1")); + } + + #[test] + fn strip_line_comment() { + let q = "-- pgdog_cache: cache\nSELECT 1"; + let stripped = strip_sql_comments(q); + assert!(!stripped.contains("pgdog_cache")); + assert!(stripped.contains("SELECT 1")); + } + + #[test] + fn strip_nested_block_comments() { + let q = "/* outer /* inner */ still outer */ SELECT 2"; + let stripped = strip_sql_comments(q); + assert!(!stripped.contains("outer")); + assert!(!stripped.contains("inner")); + assert!(stripped.contains("SELECT 2")); + } + + #[test] + fn strip_does_not_remove_string_literal_contents() { + let q = "SELECT '/* not a comment */' FROM t"; + let stripped = strip_sql_comments(q); + // The string literal must be preserved verbatim. + assert!(stripped.contains("'/* not a comment */'")); + } + + #[test] + fn strip_preserves_escaped_quotes_in_literal() { + let q = "SELECT 'it''s fine' FROM t"; + let stripped = strip_sql_comments(q); + assert_eq!(stripped, "SELECT 'it''s fine' FROM t"); + } + + #[test] + fn strip_multiple_block_comments() { + let q = "/* a */ SELECT /* b */ 1"; + let stripped = strip_sql_comments(q); + assert!(!stripped.contains("/* a */")); + assert!(!stripped.contains("/* b */")); + assert!(stripped.contains("SELECT")); + assert!(stripped.contains("1")); + } + + // ------------------------------------------------------------------------- + // compute_cache_key_hash tests + // ------------------------------------------------------------------------- + + #[test] + fn hash_is_stable() { + let h1 = compute_cache_key_hash("mydb", "SELECT 1", None); + let h2 = compute_cache_key_hash("mydb", "SELECT 1", None); + assert_eq!(h1, h2); + } + + #[test] + fn hash_differs_by_database() { + let h1 = compute_cache_key_hash("db1", "SELECT 1", None); + let h2 = compute_cache_key_hash("db2", "SELECT 1", None); + assert_ne!(h1, h2); + } + + #[test] + fn hash_differs_by_query() { + let h1 = compute_cache_key_hash("db", "SELECT 1", None); + let h2 = compute_cache_key_hash("db", "SELECT 2", None); + assert_ne!(h1, h2); + } + + #[test] + fn hash_same_with_and_without_cache_comment() { + // A block comment containing the cache directive must be stripped so + // the hash is the same whether the directive was in a comment or a + // connection parameter. + let h_with_comment = + compute_cache_key_hash("db", "/* pgdog_cache: cache */ SELECT 1", None); + let h_without_comment = compute_cache_key_hash("db", "SELECT 1", None); + assert_eq!(h_with_comment, h_without_comment); + } + + #[test] + fn hash_same_for_force_cache_and_regular_comment() { + // force_cache and cache hints should produce the same hash (both are + // stripped before hashing, so the underlying query is identical). + let h_force = compute_cache_key_hash("db", "/* pgdog_cache: force_cache */ SELECT 1", None); + let h_cache = compute_cache_key_hash("db", "/* pgdog_cache: cache */ SELECT 1", None); + let h_plain = compute_cache_key_hash("db", "SELECT 1", None); + assert_eq!(h_force, h_cache); + assert_eq!(h_force, h_plain); + } + + #[test] + fn hash_same_for_line_comment_cache_directive() { + let h_with_line = compute_cache_key_hash("db", "-- pgdog_cache: cache\nSELECT 1", None); + let h_plain = compute_cache_key_hash("db", "SELECT 1", None); + assert_eq!(h_with_line, h_plain); + } + + #[test] + fn hash_differs_by_bind_params() { + use crate::net::messages::bind::{Bind, Parameter}; + use bytes::Bytes; + use pgdog_postgres_types::Format; + + let make_bind = |val: &'static [u8]| { + let mut b = Bind::default(); + b.push_param( + Parameter { + len: val.len() as i32, + data: Bytes::from_static(val), + }, + Format::Text, + ); + b + }; + + let b1 = make_bind(b"1"); + let b2 = make_bind(b"2"); + let h1 = compute_cache_key_hash("db", "SELECT $1", Some(&b1)); + let h2 = compute_cache_key_hash("db", "SELECT $1", Some(&b2)); + assert_ne!(h1, h2); + } } From 15bbd9af4536ebe2ef3529733552a4d2b6b49464 Mon Sep 17 00:00:00 2001 From: nutsalhan87 Date: Mon, 25 May 2026 11:31:02 +0300 Subject: [PATCH 23/23] add integratoin tests --- Cargo.lock | 9 + Cargo.toml | 1 + docs/CACHE.md | 24 ++ integration/cache/Cargo.toml | 12 + integration/cache/dev.sh | 8 + integration/cache/pgdog.toml | 31 ++ integration/cache/run.sh | 14 + integration/cache/src/lib.rs | 47 +++ integration/cache/tests/integration/mod.rs | 447 +++++++++++++++++++++ integration/cache/tests/mod.rs | 1 + integration/cache/users.toml | 4 + 11 files changed, 598 insertions(+) create mode 100644 integration/cache/Cargo.toml create mode 100755 integration/cache/dev.sh create mode 100644 integration/cache/pgdog.toml create mode 100755 integration/cache/run.sh create mode 100644 integration/cache/src/lib.rs create mode 100644 integration/cache/tests/integration/mod.rs create mode 100644 integration/cache/tests/mod.rs create mode 100644 integration/cache/users.toml diff --git a/Cargo.lock b/Cargo.lock index d2a7fa197..b445a1d5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -804,6 +804,15 @@ dependencies = [ "either", ] +[[package]] +name = "cache" +version = "0.1.0" +dependencies = [ + "serial_test", + "sqlx", + "tokio", +] + [[package]] name = "castaway" version = "0.2.3" diff --git a/Cargo.toml b/Cargo.toml index d217b8652..d6c4153b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "2" members = [ "examples/demo", "integration/rust", + "integration/cache", "pgdog", "pgdog-config", "pgdog-macros", diff --git a/docs/CACHE.md b/docs/CACHE.md index f92e5f245..9fa79baef 100644 --- a/docs/CACHE.md +++ b/docs/CACHE.md @@ -298,3 +298,27 @@ SQL comment → pgdog.cache parameter → DB policy config 3. **Set redis query timeout from config** 4. **Add hint for query hash key** + +5. **Add flag for required cache storage available** — query will fall with error if redis (or another cache storage) unavaliable. And subtask: first query inits cache client, but connection is established later, which is why the cache storage is unavailable for the first query — so need to wait for established connection. + +6. **Hash query without comments on the fly instead of normalizing it first** — with this no `String` will be allocated. But must deal somehow with getting same hash for "SELECT 1;" and "/* pgdog_cache: cache */ SELECT 1;" because the second one transforms to " SELECT 1;" (with space at the start). + +# Tests + +## Running the tests + +Unit tests (no PostgreSQL or Redis needed) +```sh +cargo nextest run -p pgdog frontend::cache +``` + +## Integration tests (PostgreSQL + Redis + pgdog required) + +```sh +bash integration/cache/run.sh +``` + +Or if you already have pgdog running on port 6432 with that config: +```sh +bash integration/cache/dev.sh +``` \ No newline at end of file diff --git a/integration/cache/Cargo.toml b/integration/cache/Cargo.toml new file mode 100644 index 000000000..c90004800 --- /dev/null +++ b/integration/cache/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "cache" +version = "0.1.0" +edition = "2024" + +[lib] +test = true + +[dependencies] +sqlx = { version = "*", features = ["postgres", "runtime-tokio"]} +tokio = { version = "1", features = ["full"]} +serial_test = "3" diff --git a/integration/cache/dev.sh b/integration/cache/dev.sh new file mode 100755 index 000000000..e1943d893 --- /dev/null +++ b/integration/cache/dev.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Run only the cache integration tests. +set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +pushd "${SCRIPT_DIR}" +cargo nextest run --nff -j 1 integration +popd diff --git a/integration/cache/pgdog.toml b/integration/cache/pgdog.toml new file mode 100644 index 000000000..08c87d054 --- /dev/null +++ b/integration/cache/pgdog.toml @@ -0,0 +1,31 @@ +[general] +query_timeout = 2_000 +checkout_timeout = 2_000 +connect_timeout = 2_000 + +[general.cache] +enabled = true +policy = "cache" +ttl = 5 + +[general.cache.redis] +url = "redis://127.0.0.1:6379" + +# ------------------------------------------------------------------------------ +# ----- Admin ------------------------------------------------------------------ + +[admin] +password = "pgdog" + +# ------------------------------------------------------------------------------ +# ----- Database :: pgdog ------------------------------------------------------ + +[[databases]] +name = "pgdog" +host = "127.0.0.1" + +[[databases]] +name = "pgdog" +host = "127.0.0.1" +role = "replica" +read_only = true \ No newline at end of file diff --git a/integration/cache/run.sh b/integration/cache/run.sh new file mode 100755 index 000000000..4efde3003 --- /dev/null +++ b/integration/cache/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Run cache integration tests with the dedicated cache pgdog config. +# PostgreSQL must be running on 127.0.0.1:5432 and Redis on 127.0.0.1:6379. +# Run integration/setup.sh first if you haven't already. +set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +source "${SCRIPT_DIR}"/../common.sh + +run_pgdog "integration/cache" +wait_for_pgdog + +bash "${SCRIPT_DIR}"/dev.sh + +stop_pgdog diff --git a/integration/cache/src/lib.rs b/integration/cache/src/lib.rs new file mode 100644 index 000000000..6ec5655e6 --- /dev/null +++ b/integration/cache/src/lib.rs @@ -0,0 +1,47 @@ +use sqlx::{Postgres, pool::Pool, postgres::PgPoolOptions}; + + +pub async fn connection() -> Pool { + PgPoolOptions::new() + .max_connections(1) + .connect("postgres://pgdog:pgdog@127.0.0.1:6432/pgdog?application_name=sqlx") + .await + .unwrap() +} + +pub async fn connection_direct() -> Pool { + PgPoolOptions::new() + .max_connections(1) + .connect("postgres://pgdog:pgdog@127.0.0.1:5432/pgdog?application_name=sqlx_direct") + .await + .unwrap() +} + +/// `options` should be a space-separated list of `-c key=value` pairs using +/// the exact percent-encoded form that PostgreSQL DSNs require (space - `%20`, +/// `=` - `%3D`). +pub async fn connection_with_options(options: &str) -> Pool { + PgPoolOptions::new() + .max_connections(1) + .connect(&format!( + "postgres://pgdog:pgdog@127.0.0.1:6432/pgdog?application_name=sqlx&options={}", + options, + )) + .await + .unwrap() +} + +pub async fn redis_available() -> bool { + tokio::net::TcpStream::connect("127.0.0.1:6379") + .await + .is_ok() +} + +#[macro_export] +macro_rules! require_redis { + () => { + if !redis_available().await { + panic!("Redis required at 127.0.0.1:6379 — start it before running cache tests"); + } + }; +} diff --git a/integration/cache/tests/integration/mod.rs b/integration/cache/tests/integration/mod.rs new file mode 100644 index 000000000..13d544be1 --- /dev/null +++ b/integration/cache/tests/integration/mod.rs @@ -0,0 +1,447 @@ +use std::time::Duration; + +use cache::*; +use serial_test::serial; +use sqlx::Executor; +use tokio::time::sleep; + +/// Verifies that a second identical SELECT is served from Redis instead of PostgreSQL. +/// +/// Strategy: +/// 1. Create and populate a test table. +/// 2. `no_cache` SELECT to verify the row exists in PG (does not warm Redis). +/// 3. Normal SELECT through pgdog (cache miss → response stored in Redis). +/// 4. Delete the row *directly* on PostgreSQL port 5432 so Redis is not invalidated. +/// 5. Normal SELECT again — must return the cached row even though PG has none. +#[tokio::test] +#[serial] +async fn test_cache_hit() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_hit (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_hit").await.unwrap(); + pool.execute("INSERT INTO cache_test_hit VALUES (1, 'hello')") + .await + .unwrap(); + + // Confirm the row is there without touching the Redis cache. + let rows: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: no_cache */ SELECT id, val FROM cache_test_hit WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(rows.len(), 1, "row must exist in PG before cache warm-up"); + + // Warm up the Redis cache with a regular (cacheable) SELECT. + let first: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_hit WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(first.len(), 1, "first SELECT must return the row"); + + // Delete the row directly in Postgres, bypassing pgdog so Redis is not invalidated. + let direct = connection_direct().await; + direct + .execute("DELETE FROM cache_test_hit WHERE id = 1") + .await + .unwrap(); + direct.close().await; + + // Confirm the row is actually gone in PG (no_cache hint bypasses Redis). + let gone: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: no_cache */ SELECT id, val FROM cache_test_hit WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert!( + gone.is_empty(), + "row must be gone in PG after direct delete" + ); + + // Now the same query through pgdog without a hint: must be served from Redis. + let cached: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_hit WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!( + cached.len(), + 1, + "cached SELECT must still return the row served from Redis" + ); + assert_eq!(cached[0].1, "hello"); + + pool.execute("DROP TABLE IF EXISTS cache_test_hit") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that queries inside an explicit transaction are never served from Redis. +/// +/// The cache must be bypassed for in-transaction queries so that the client +/// always sees the latest database state as part of its own transaction. +#[tokio::test] +#[serial] +async fn test_cache_bypassed_in_transaction() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_txn (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_txn").await.unwrap(); + pool.execute("INSERT INTO cache_test_txn VALUES (1, 'original')") + .await + .unwrap(); + + // Warm up the cache. + let _: Vec<(i64, String)> = sqlx::query_as("SELECT id, val FROM cache_test_txn WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + + // Inside a transaction, update the row and then SELECT — must see the updated value, + // not the stale cached one. + let mut tx = pool.begin().await.unwrap(); + sqlx::query("UPDATE cache_test_txn SET val = 'updated' WHERE id = 1") + .execute(&mut *tx) + .await + .unwrap(); + + let in_tx: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_txn WHERE id = 1") + .fetch_all(&mut *tx) + .await + .unwrap(); + assert_eq!( + in_tx[0].1, "updated", + "SELECT inside a transaction must see the transaction's own write, not the Redis cache" + ); + + tx.commit().await.unwrap(); + + pool.execute("DROP TABLE IF EXISTS cache_test_txn") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that cached results expire after the configured TTL. +#[tokio::test] +#[serial] +async fn test_cache_ttl_expiry() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_ttl (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_ttl").await.unwrap(); + pool.execute("INSERT INTO cache_test_ttl VALUES (1, 'original')") + .await + .unwrap(); + + // Warm up Redis. + let _: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: cache ttl=1 */ SELECT id, val FROM cache_test_ttl WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + + // Remove the row directly from PG so Redis is stale. + let direct = connection_direct().await; + direct + .execute("DELETE FROM cache_test_ttl WHERE id = 1") + .await + .unwrap(); + direct.close().await; + + // Wait for the Redis entry to expire + sleep(Duration::from_secs(2)).await; + + // After expiry pgdog must query PG and return no rows. + let rows: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: cache ttl=1 */ SELECT id, val FROM cache_test_ttl WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert!( + rows.is_empty(), + "after TTL expiry the cached row must no longer be returned" + ); + + pool.execute("DROP TABLE IF EXISTS cache_test_ttl") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that the extended protocol (parameterized `$1` queries) uses the +/// bind parameter values in the cache key. +#[tokio::test] +#[serial] +async fn test_extended_protocol_different_params_have_different_cache_keys() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_ext (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_ext").await.unwrap(); + pool.execute("INSERT INTO cache_test_ext VALUES (1, 'one'), (2, 'two')") + .await + .unwrap(); + + // Warm cache for id=1. + let r1: Vec<(i64, String)> = sqlx::query_as("SELECT id, val FROM cache_test_ext WHERE id = $1") + .bind(1i64) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(r1.len(), 1); + assert_eq!(r1[0].1, "one"); + + // Warm cache for id=2. + let r2: Vec<(i64, String)> = sqlx::query_as("SELECT id, val FROM cache_test_ext WHERE id = $1") + .bind(2i64) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(r2.len(), 1); + assert_eq!(r2[0].1, "two"); + + // Delete both rows directly in PG so that any result must come from Redis. + let direct = connection_direct().await; + direct.execute("DELETE FROM cache_test_ext").await.unwrap(); + direct.close().await; + + let cached1: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_ext WHERE id = $1") + .bind(1i64) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(cached1.len(), 1, "id=1 entry must be served from cache"); + assert_eq!(cached1[0].1, "one"); + + let cached2: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_ext WHERE id = $1") + .bind(2i64) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(cached2.len(), 1, "id=2 entry must be served from cache"); + assert_eq!(cached2[0].1, "two"); + + pool.execute("DROP TABLE IF EXISTS cache_test_ext") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that `/* pgdog_cache: force_cache */` updates cache. +#[tokio::test] +#[serial] +async fn test_force_cache() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_force (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_force").await.unwrap(); + pool.execute("INSERT INTO cache_test_force VALUES (1, 'not_forced')") + .await + .unwrap(); + + // Warm cache + let r1: Vec<(i64, String)> = + sqlx::query_as("/* pgdog_cache: cache ttl=2 */ SELECT id, val FROM cache_test_force WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(r1.len(), 1); + assert_eq!(r1[0].1, "not_forced"); + + let direct = connection_direct().await; + direct.execute("UPDATE cache_test_force SET val = 'forced' WHERE id = 1") + .await + .unwrap(); + direct.close().await; + + let r: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: force_cache ttl=3 */ SELECT id, val FROM cache_test_force WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(r.len(), 1); + assert_eq!(r[0].1, "forced"); + + let cached: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_force WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(cached.len(), 1); + assert_eq!(cached[0].1, "forced"); + + pool.execute("DROP TABLE IF EXISTS cache_test_force") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that `/* pgdog_cache: no_cache */` prevents the response from being +/// stored in Redis, so a subsequent plain SELECT actually hits PostgreSQL. +#[tokio::test] +#[serial] +async fn test_no_cache_hint_does_not_warm_redis() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_no_warm (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_no_warm").await.unwrap(); + pool.execute("INSERT INTO cache_test_no_warm VALUES (1, 'original')") + .await + .unwrap(); + + // Fetch with no_cache — must NOT warm Redis. + let r: Vec<(i64, String)> = sqlx::query_as( + "/* pgdog_cache: no_cache */ SELECT id, val FROM cache_test_no_warm WHERE id = 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!(r.len(), 1); + + let direct = connection_direct().await; + direct + .execute("DELETE FROM cache_test_no_warm WHERE id = 1") + .await + .unwrap(); + direct.close().await; + + // A plain SELECT must reach PG (no cache entry) and return 0 rows. + let after: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_no_warm WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert!( + after.is_empty(), + "no_cache hint must not warm Redis, so PG miss returns 0 rows" + ); + + pool.execute("DROP TABLE IF EXISTS cache_test_no_warm") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that passing `pgdog.cache=no_cache` in the connection DSN options +/// bypasses the cache for all queries on that connection. +#[tokio::test] +#[serial] +async fn test_connection_option_no_cache_bypasses_redis() { + require_redis!(); + let pool = connection().await; + + pool.execute("CREATE TABLE IF NOT EXISTS cache_test_param (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("TRUNCATE cache_test_param").await.unwrap(); + pool.execute("INSERT INTO cache_test_param VALUES (1, 'cached_val')") + .await + .unwrap(); + + // Warm the cache via a normal connection. + let _: Vec<(i64, String)> = sqlx::query_as("SELECT id, val FROM cache_test_param WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + + let direct = connection_direct().await; + direct + .execute("DELETE FROM cache_test_param WHERE id = 1") + .await + .unwrap(); + direct.close().await; + + // A connection with pgdog.cache=no_cache must bypass Redis and hit PG, + // returning 0 rows because the row was deleted. + let no_cache_conn = connection_with_options("-c%20pgdog.cache%3Dno_cache").await; + let rows: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_param WHERE id = 1") + .fetch_all(&no_cache_conn) + .await + .unwrap(); + assert!( + rows.is_empty(), + "connection-level no_cache must bypass Redis and see the deleted row" + ); + no_cache_conn.close().await; + + pool.execute("DROP TABLE IF EXISTS cache_test_param") + .await + .unwrap(); + pool.close().await; +} + +/// Verifies that error responses are never stored in Redis. +/// +/// A query that initially errors must not poison the cache: after the error +/// is fixed (table created), the same query must reach PG and return live data. +#[tokio::test] +#[serial] +async fn test_error_response_not_cached() { + require_redis!(); + let pool = connection().await; + + pool.execute("DROP TABLE IF EXISTS cache_test_error") + .await + .unwrap(); + + // This SELECT will produce an error (table does not exist). + let err = sqlx::query("SELECT id, val FROM cache_test_error WHERE id = 1") + .fetch_all(&pool) + .await; + assert!(err.is_err(), "query on missing table must return an error"); + + // Now create the table and insert a row. + pool.execute("CREATE TABLE cache_test_error (id BIGINT PRIMARY KEY, val TEXT)") + .await + .unwrap(); + pool.execute("INSERT INTO cache_test_error VALUES (1, 'live')") + .await + .unwrap(); + + // The same query must now hit PG (the previous error was not cached). + let rows: Vec<(i64, String)> = + sqlx::query_as("SELECT id, val FROM cache_test_error WHERE id = 1") + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!( + rows.len(), + 1, + "error must not be cached; must return live row" + ); + assert_eq!(rows[0].1, "live"); + + pool.execute("DROP TABLE IF EXISTS cache_test_error") + .await + .unwrap(); + pool.close().await; +} diff --git a/integration/cache/tests/mod.rs b/integration/cache/tests/mod.rs new file mode 100644 index 000000000..7221f6b15 --- /dev/null +++ b/integration/cache/tests/mod.rs @@ -0,0 +1 @@ +pub mod integration; \ No newline at end of file diff --git a/integration/cache/users.toml b/integration/cache/users.toml new file mode 100644 index 000000000..77fa26a15 --- /dev/null +++ b/integration/cache/users.toml @@ -0,0 +1,4 @@ +[[users]] +name = "pgdog" +database = "pgdog" +password = "pgdog" \ No newline at end of file