From a3f683f33ba31c2cafa1fa9ca76acb7874bc25f8 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 14 May 2026 03:06:08 +0000 Subject: [PATCH 1/3] feat: support Query::select + remaining query types on ClickHouse adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing Query types so audit callers can opt in to slim projections and the full filter/order menu rather than being limited to a subset: - `Query::select(['col', ...])` — column projection. Multiple `select()` calls combine; `id` is always projected so the Log model still has its identifier. Each requested column is validated against the schema and identifier-escaped at SQL build time. Without `select()`, the existing full-column behaviour is unchanged. - `Query::notStartsWith(...)` / `Query::notEndsWith(...)` — symmetric with the existing `startsWith` / `endsWith`, emitted as `NOT startsWith(col, val)` / `NOT endsWith(col, val)`. - `Query::regex(col, pattern)` — compiled to ClickHouse's `match(haystack, pattern)`. Pattern is parameter-bound, never inlined. - `Query::orderRandom()` — `ORDER BY rand()`. Mutually exclusive with cursor pagination — combining the two throws, since cursor needs a stable order to anchor the next page on. `select`, `regex`, `notStartsWith`, `notEndsWith` are added to `VALUE_REQUIRED_METHODS` so they fail loudly when given an empty values array, matching the existing contract. Skipped: full-text `search`/`notSearch`, `exists`/`notExists`, `containsAny`/`containsAll`/`elemMatch`, vector / spatial types — none map cleanly to the audit table's scalar columns. `and` / `or` logical combinations are filed as a separate follow-up because they need a recursive filter compiler that we don't have today. Eight new tests cover happy paths, unknown-column rejection, empty-values rejection, and the cursor + random incompatibility. --- src/Audit/Adapter/ClickHouse.php | 134 ++++++++++++++++++++++++- tests/Audit/Adapter/ClickHouseTest.php | 94 +++++++++++++++++ 2 files changed, 223 insertions(+), 5 deletions(-) diff --git a/src/Audit/Adapter/ClickHouse.php b/src/Audit/Adapter/ClickHouse.php index e1fff33..5b7cf95 100644 --- a/src/Audit/Adapter/ClickHouse.php +++ b/src/Audit/Adapter/ClickHouse.php @@ -42,7 +42,11 @@ class ClickHouse extends SQL Query::TYPE_CONTAINS, Query::TYPE_NOT_CONTAINS, Query::TYPE_STARTS_WITH, + Query::TYPE_NOT_STARTS_WITH, Query::TYPE_ENDS_WITH, + Query::TYPE_NOT_ENDS_WITH, + Query::TYPE_REGEX, + Query::TYPE_SELECT, ]; private string $host; @@ -866,8 +870,15 @@ public function find(array $queries = []): array // Parse queries $parsed = $this->parseQueries($queries); - // Build SELECT clause - $selectColumns = $this->getSelectColumns(); + // Random ordering and cursor pagination are incompatible: cursor needs + // a stable order to anchor the next page on, rand() has none. + if (!empty($parsed['randomOrder']) && isset($parsed['cursor'])) { + throw new Exception('Cursor pagination cannot be combined with orderRandom'); + } + + // Build SELECT clause — respect Query::select if provided, otherwise + // fall back to the full column list. + $selectColumns = $this->buildProjection($parsed['select'] ?? null); $filters = $parsed['filters']; $params = $parsed['params']; @@ -892,11 +903,14 @@ public function find(array $queries = []): array $whereClause = ' WHERE ' . implode(' AND ', $conditions); } - // Build ORDER BY clause — when cursor is in play, rebuild from + // Build ORDER BY clause. Random takes priority over column-based + // ordering, and otherwise: when cursor is in play, rebuild from // orderAttributes (always non-empty after resolveCursorOrder, which // appends an id tiebreaker), flipping directions for `cursorBefore`. $orderClause = ''; - if (isset($parsed['cursor'])) { + if (!empty($parsed['randomOrder'])) { + $orderClause = ' ORDER BY rand()'; + } elseif (isset($parsed['cursor'])) { $orderSql = $this->buildOrderBySql($orderAttributes, flip: $cursorDirection === 'before'); $orderClause = ' ORDER BY ' . implode(', ', $orderSql); } elseif (!empty($parsed['orderBy'])) { @@ -923,6 +937,38 @@ public function find(array $queries = []): array return $rows; } + /** + * Build the SELECT projection list. When `$select` is null, returns the + * full column list from `getSelectColumns()`; otherwise validates and + * escapes each requested column. + * + * `id` is always projected so `parseJsonResults` can map it back to the + * `$id` field on the `Log` model. Callers requesting a slim projection + * don't have to remember to include it. + * + * @param list|null $select + * @throws Exception + */ + private function buildProjection(?array $select): string + { + if ($select === null) { + return $this->getSelectColumns(); + } + + $columns = []; + $seen = []; + foreach (array_merge(['id'], $select) as $column) { + if (isset($seen[$column])) { + continue; + } + $this->validateAttributeName($column); + $columns[] = $this->escapeIdentifier($column); + $seen[$column] = true; + } + + return implode(', ', $columns); + } + /** * Count logs using Query objects. * @@ -985,7 +1031,7 @@ public function count(array $queries = [], ?int $max = null): int * Parse Query objects into SQL components. * * @param array $queries - * @return array{filters: array, params: array, orderBy?: array, orderAttributes?: array, limit?: int, offset?: int, cursor?: array, cursorDirection?: string} + * @return array{filters: array, params: array, orderBy?: array, orderAttributes?: array, randomOrder?: bool, limit?: int, offset?: int, cursor?: array, cursorDirection?: string, select?: list} * @throws Exception */ private function parseQueries(array $queries): array @@ -998,6 +1044,8 @@ private function parseQueries(array $queries): array $offset = null; $cursor = null; $cursorDirection = null; + $select = null; + $randomOrder = false; $paramCounter = 0; foreach ($queries as $query) { @@ -1163,6 +1211,18 @@ private function parseQueries(array $queries): array $params[$paramName] = $needle; break; + case Query::TYPE_NOT_STARTS_WITH: + $this->validateAttributeName($attribute); + $escapedAttr = $this->escapeIdentifier($attribute); + $needle = $values[0] ?? null; + if (!is_string($needle)) { + throw new Exception("notStartsWith needle must be a string for attribute '{$attribute}'"); + } + $paramName = 'param_' . $paramCounter++; + $filters[] = "NOT startsWith({$escapedAttr}, {{$paramName}:String})"; + $params[$paramName] = $needle; + break; + case Query::TYPE_ENDS_WITH: $this->validateAttributeName($attribute); $escapedAttr = $this->escapeIdentifier($attribute); @@ -1175,6 +1235,55 @@ private function parseQueries(array $queries): array $params[$paramName] = $needle; break; + case Query::TYPE_NOT_ENDS_WITH: + $this->validateAttributeName($attribute); + $escapedAttr = $this->escapeIdentifier($attribute); + $needle = $values[0] ?? null; + if (!is_string($needle)) { + throw new Exception("notEndsWith needle must be a string for attribute '{$attribute}'"); + } + $paramName = 'param_' . $paramCounter++; + $filters[] = "NOT endsWith({$escapedAttr}, {{$paramName}:String})"; + $params[$paramName] = $needle; + break; + + case Query::TYPE_REGEX: + $this->validateAttributeName($attribute); + $escapedAttr = $this->escapeIdentifier($attribute); + $pattern = $values[0] ?? null; + if (!is_string($pattern)) { + throw new Exception("regex pattern must be a string for attribute '{$attribute}'"); + } + $paramName = 'param_' . $paramCounter++; + // ClickHouse's `match(haystack, pattern)` is the re2-style + // regex predicate. Pattern is bound as a parameter, never + // interpolated, so it can't escape into the SQL. + $filters[] = "match({$escapedAttr}, {{$paramName}:String})"; + $params[$paramName] = $pattern; + break; + + case Query::TYPE_SELECT: + if (empty($values)) { + // VALUE_REQUIRED_METHODS already rejects empty values + // earlier, but the explicit check keeps this branch safe + // if the guard is ever bypassed. + break; + } + // Multiple Query::select(...) calls combine into a single + // projection. Duplicates are removed; column names are + // validated and escaped at SQL build time in find(). + $select ??= []; + foreach ($values as $column) { + if (!is_string($column) || $column === '') { + throw new Exception('select columns must be non-empty strings'); + } + $this->validateAttributeName($column); + if (!in_array($column, $select, true)) { + $select[] = $column; + } + } + break; + case Query::TYPE_ORDER_DESC: $this->validateAttributeName($attribute); $escapedAttr = $this->escapeIdentifier($attribute); @@ -1189,6 +1298,13 @@ private function parseQueries(array $queries): array $orderAttributes[] = ['attribute' => $attribute, 'direction' => 'ASC']; break; + case Query::TYPE_ORDER_RANDOM: + // ClickHouse's rand() is the per-row PRNG used for random + // sampling. Single emission across the result set — repeated + // Query::orderRandom() calls collapse into one ORDER BY rand(). + $randomOrder = true; + break; + case Query::TYPE_LIMIT: if (!\is_int($values[0])) { throw new \Exception('Invalid limit value. Expected int'); @@ -1231,6 +1347,10 @@ private function parseQueries(array $queries): array $result['orderAttributes'] = $orderAttributes; } + if ($randomOrder) { + $result['randomOrder'] = true; + } + if ($limit !== null) { $result['limit'] = $limit; } @@ -1244,6 +1364,10 @@ private function parseQueries(array $queries): array $result['cursorDirection'] = $cursorDirection; } + if ($select !== null) { + $result['select'] = $select; + } + return $result; } diff --git a/tests/Audit/Adapter/ClickHouseTest.php b/tests/Audit/Adapter/ClickHouseTest.php index 521d143..eb5fc57 100644 --- a/tests/Audit/Adapter/ClickHouseTest.php +++ b/tests/Audit/Adapter/ClickHouseTest.php @@ -773,4 +773,98 @@ public function testEqualRejectsEmptyValues(): void new Query(Query::TYPE_EQUAL, 'event', []), ]); } + + public function testSelectProjectsRequestedColumns(): void + { + $logs = $this->audit->find([ + Query::select(['event', 'resource']), + Query::equal('userId', 'userId'), + Query::limit(1), + ]); + + $this->assertGreaterThanOrEqual(1, count($logs)); + + $row = $logs[0]->getArrayCopy(); + // `id` is always projected so the Log model still has its identifier + $this->assertArrayHasKey('$id', $row); + // Requested columns present + $this->assertArrayHasKey('event', $row); + $this->assertArrayHasKey('resource', $row); + // Unrequested columns are absent + $this->assertArrayNotHasKey('userAgent', $row); + $this->assertArrayNotHasKey('ip', $row); + $this->assertArrayNotHasKey('data', $row); + } + + public function testSelectRejectsUnknownColumn(): void + { + $this->expectException(\Exception::class); + $this->expectExceptionMessage('Invalid attribute name: bogus_column'); + + $this->audit->find([ + Query::select(['bogus_column']), + ]); + } + + public function testSelectRejectsEmptyValues(): void + { + $this->expectException(\Exception::class); + $this->expectExceptionMessage('Select queries require at least one value.'); + + $this->audit->find([ + Query::select([]), + ]); + } + + public function testNotStartsWithFilter(): void + { + $logs = $this->audit->find([ + Query::notStartsWith('resource', 'database/'), + ]); + // From fixture: only 'user/null' doesn't start with 'database/' + $this->assertCount(1, $logs); + $this->assertEquals('user/null', $logs[0]->getResource()); + } + + public function testNotEndsWithFilter(): void + { + $logs = $this->audit->find([ + Query::notEndsWith('resource', '/null'), + ]); + // From fixture: 3 logs are on database/document/{1,2,2} + $this->assertCount(3, $logs); + foreach ($logs as $log) { + $this->assertStringStartsNotWith('user/', $log->getResource()); + } + } + + public function testRegexFilter(): void + { + $logs = $this->audit->find([ + Query::regex('resource', '^database/document/\\d+$'), + ]); + // From fixture: 3 database/document/{1,2,2} rows match + $this->assertCount(3, $logs); + } + + public function testOrderRandomReturnsRows(): void + { + $logs = $this->audit->find([ + Query::orderRandom(), + Query::limit(2), + ]); + // Hard to assert randomness; just confirm the query executes and limits. + $this->assertCount(2, $logs); + } + + public function testOrderRandomRejectedWithCursor(): void + { + $this->expectException(\Exception::class); + $this->expectExceptionMessage('Cursor pagination cannot be combined with orderRandom'); + + $this->audit->find([ + Query::orderRandom(), + Query::cursorAfter(['id' => 'whatever']), + ]); + } } From c1b85ad620e706cb063687f19e4f0b9759367a67 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 14 May 2026 03:10:42 +0000 Subject: [PATCH 2/3] feat: always include tenant in slim projection when sharedTables is on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `sharedTables` is enabled the full-projection path already appends the `tenant` column to every SELECT, so the slim `Query::select(...)` path should match — `tenant` is metadata callers expect on every row regardless of which columns they explicitly listed. Force-include it alongside `id` (which was already always-projected so the Log model keeps its identifier). --- src/Audit/Adapter/ClickHouse.php | 13 +++++++--- tests/Audit/Adapter/ClickHouseTest.php | 33 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/Audit/Adapter/ClickHouse.php b/src/Audit/Adapter/ClickHouse.php index 5b7cf95..f84cfce 100644 --- a/src/Audit/Adapter/ClickHouse.php +++ b/src/Audit/Adapter/ClickHouse.php @@ -943,8 +943,10 @@ public function find(array $queries = []): array * escapes each requested column. * * `id` is always projected so `parseJsonResults` can map it back to the - * `$id` field on the `Log` model. Callers requesting a slim projection - * don't have to remember to include it. + * `$id` field on the `Log` model. When `sharedTables` is enabled, the + * `tenant` column is also always projected — it's metadata callers expect + * on every row and the full-projection path already includes it. Callers + * requesting a slim projection don't have to remember either. * * @param list|null $select * @throws Exception @@ -955,9 +957,14 @@ private function buildProjection(?array $select): string return $this->getSelectColumns(); } + $forced = ['id']; + if ($this->sharedTables) { + $forced[] = 'tenant'; + } + $columns = []; $seen = []; - foreach (array_merge(['id'], $select) as $column) { + foreach (array_merge($forced, $select) as $column) { if (isset($seen[$column])) { continue; } diff --git a/tests/Audit/Adapter/ClickHouseTest.php b/tests/Audit/Adapter/ClickHouseTest.php index eb5fc57..db45e59 100644 --- a/tests/Audit/Adapter/ClickHouseTest.php +++ b/tests/Audit/Adapter/ClickHouseTest.php @@ -796,6 +796,39 @@ public function testSelectProjectsRequestedColumns(): void $this->assertArrayNotHasKey('data', $row); } + public function testSelectAutoIncludesTenantWhenShared(): void + { + $host = getenv('CLICKHOUSE_HOST') ?: 'clickhouse'; + $port = (int) (getenv('CLICKHOUSE_PORT') ?: 8123); + + $adapter = new ClickHouse( + host: $host, + username: 'default', + password: 'clickhouse', + port: $port, + ); + $adapter->setNamespace('select_tenant_test'); + $adapter->setSharedTables(true); + $adapter->setTenant(7); + $adapter->setup(); + + $audit = new Audit($adapter); + $audit->log('u1', 'create', 'doc/1', 'agent', '127.0.0.1', 'US', $this->getRequiredAttributes()); + + $logs = $audit->find([ + Query::select(['event']), + Query::limit(1), + ]); + + $this->assertCount(1, $logs); + $row = $logs[0]->getArrayCopy(); + $this->assertArrayHasKey('$id', $row); + $this->assertArrayHasKey('event', $row); + // tenant is always projected when sharedTables is on, even if the + // caller didn't list it + $this->assertArrayHasKey('tenant', $row); + } + public function testSelectRejectsUnknownColumn(): void { $this->expectException(\Exception::class); From 437ad9ea338191b56f5c4c3e27526208bc5f0d16 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 14 May 2026 03:32:00 +0000 Subject: [PATCH 3/3] fix: address greptile review on PR #116 Two follow-ups from greptile on c1b85ad: - buildProjection no longer re-validates user-supplied select columns. parseQueries already calls validateAttributeName on each column inside the TYPE_SELECT branch, so the second walk through getAttributes() in buildProjection was wasted work. The forced columns (id, tenant) still get the defensive check since they're injected here, not user input. - orderRandom combined with orderAsc/orderDesc now throws. Previously rand() silently took precedence over the requested column order, which is inconsistent with how the cursor + random combination is rejected. The new guard mirrors that pattern so callers see the conflict explicitly rather than getting unexpected results. --- src/Audit/Adapter/ClickHouse.php | 32 ++++++++++++++++++++------ tests/Audit/Adapter/ClickHouseTest.php | 11 +++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/Audit/Adapter/ClickHouse.php b/src/Audit/Adapter/ClickHouse.php index f84cfce..8613d98 100644 --- a/src/Audit/Adapter/ClickHouse.php +++ b/src/Audit/Adapter/ClickHouse.php @@ -870,11 +870,17 @@ public function find(array $queries = []): array // Parse queries $parsed = $this->parseQueries($queries); - // Random ordering and cursor pagination are incompatible: cursor needs - // a stable order to anchor the next page on, rand() has none. + // Random ordering can't combine with anything that asks for a + // specific row order: cursor pagination needs a stable anchor, and + // mixing column-based ORDER BY with rand() would silently drop the + // column order. Reject loudly in both cases so the caller fixes the + // query rather than getting unexpected results. if (!empty($parsed['randomOrder']) && isset($parsed['cursor'])) { throw new Exception('Cursor pagination cannot be combined with orderRandom'); } + if (!empty($parsed['randomOrder']) && !empty($parsed['orderBy'])) { + throw new Exception('orderRandom cannot be combined with orderAsc/orderDesc'); + } // Build SELECT clause — respect Query::select if provided, otherwise // fall back to the full column list. @@ -903,10 +909,11 @@ public function find(array $queries = []): array $whereClause = ' WHERE ' . implode(' AND ', $conditions); } - // Build ORDER BY clause. Random takes priority over column-based - // ordering, and otherwise: when cursor is in play, rebuild from - // orderAttributes (always non-empty after resolveCursorOrder, which - // appends an id tiebreaker), flipping directions for `cursorBefore`. + // Build ORDER BY clause. orderRandom is mutually exclusive with + // cursor and column ordering (rejected at the top of find()); when + // cursor is in play, rebuild from orderAttributes (always non-empty + // after resolveCursorOrder, which appends an id tiebreaker), + // flipping directions for `cursorBefore`. $orderClause = ''; if (!empty($parsed['randomOrder'])) { $orderClause = ' ORDER BY rand()'; @@ -957,6 +964,10 @@ private function buildProjection(?array $select): string return $this->getSelectColumns(); } + // Forced columns are injected here, so they're validated defensively. + // User-supplied columns in $select are already validated inside the + // TYPE_SELECT branch of parseQueries() — no need to walk + // getAttributes() a second time per column. $forced = ['id']; if ($this->sharedTables) { $forced[] = 'tenant'; @@ -964,7 +975,7 @@ private function buildProjection(?array $select): string $columns = []; $seen = []; - foreach (array_merge($forced, $select) as $column) { + foreach ($forced as $column) { if (isset($seen[$column])) { continue; } @@ -972,6 +983,13 @@ private function buildProjection(?array $select): string $columns[] = $this->escapeIdentifier($column); $seen[$column] = true; } + foreach ($select as $column) { + if (isset($seen[$column])) { + continue; + } + $columns[] = $this->escapeIdentifier($column); + $seen[$column] = true; + } return implode(', ', $columns); } diff --git a/tests/Audit/Adapter/ClickHouseTest.php b/tests/Audit/Adapter/ClickHouseTest.php index db45e59..b50da71 100644 --- a/tests/Audit/Adapter/ClickHouseTest.php +++ b/tests/Audit/Adapter/ClickHouseTest.php @@ -900,4 +900,15 @@ public function testOrderRandomRejectedWithCursor(): void Query::cursorAfter(['id' => 'whatever']), ]); } + + public function testOrderRandomRejectedWithColumnOrder(): void + { + $this->expectException(\Exception::class); + $this->expectExceptionMessage('orderRandom cannot be combined with orderAsc/orderDesc'); + + $this->audit->find([ + Query::orderRandom(), + Query::orderDesc('time'), + ]); + } }