Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions src/main/java/com/spandigital/cel2sql/dialect/RegexSafety.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package com.spandigital.cel2sql.dialect;

import com.spandigital.cel2sql.error.ConversionException;

import java.util.regex.Pattern;

/**
* Shared ReDoS-safety validation for the RE2-style regex dialects
* (PostgreSQL, MySQL, DuckDB, BigQuery, Spark).
*
* <p>Every dialect that accepts regular expressions enforces the same structural
* limits to prevent catastrophic-backtracking attacks (CWE-1333): a maximum
* pattern length, capture-group count, and nesting depth, plus heuristics that
* reject nested quantifiers and quantified alternation. Those checks are
* dialect-agnostic — they operate on the RE2 source pattern before any
* dialect-specific conversion — so they live here once instead of being copied
* into each {@code XxxRegex} class.</p>
*
* <p>Dialect-specific concerns (unsupported-feature detection, the actual
* RE2-to-native conversion) remain in the per-dialect classes.</p>
*/
public final class RegexSafety {

/** Maximum allowed regex pattern length. */
public static final int MAX_PATTERN_LENGTH = 500;

/** Maximum allowed capture groups in a pattern. */
public static final int MAX_GROUPS = 20;

/** Maximum allowed nesting depth of parenthesized groups. */
public static final int MAX_NESTING_DEPTH = 10;

private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");

private RegexSafety() {
}

/**
* Enforces the maximum pattern-length limit.
*
* @param pattern the RE2 regex pattern
* @throws ConversionException if the pattern exceeds {@link #MAX_PATTERN_LENGTH}
*/
public static void checkLength(String pattern) throws ConversionException {
if (pattern.length() > MAX_PATTERN_LENGTH) {
throw new ConversionException(
"Invalid pattern in expression",
String.format("pattern length %d exceeds limit of %d characters",
pattern.length(), MAX_PATTERN_LENGTH));
}
}

/**
* Runs the shared structural ReDoS checks against a pattern, in order:
* <ol>
* <li>simple back-to-back quantifiers ({@code a*+}, {@code a++})</li>
* <li>quantified groups that themselves contain inner quantifiers ({@code (a+)+})</li>
* <li>capture-group count limit</li>
* <li>quantified alternation ({@code (a|b)+})</li>
* <li>group nesting-depth limit</li>
* </ol>
*
* @param pattern the RE2 regex pattern (after any case-insensitivity flag has been stripped)
* @throws ConversionException if any limit is exceeded or a catastrophic construct is detected
*/
public static void checkReDoS(String pattern) throws ConversionException {
if (NESTED_QUANTIFIERS.matcher(pattern).find()) {
throw new ConversionException(
"Invalid pattern in expression",
"regex contains catastrophic nested quantifiers that could cause ReDoS");
}

validateNoNestedQuantifiers(pattern);

int groupCount = countUnescapedParens(pattern);
if (groupCount > MAX_GROUPS) {
throw new ConversionException(
"Invalid pattern in expression",
String.format("regex contains %d capture groups, exceeds limit of %d",
groupCount, MAX_GROUPS));
}

if (QUANTIFIED_ALTERNATION.matcher(pattern).find()) {
throw new ConversionException(
"Invalid pattern in expression",
"regex contains quantified alternation that could cause ReDoS");
}

int maxDepth = computeMaxNestingDepth(pattern);
if (maxDepth > MAX_NESTING_DEPTH) {
throw new ConversionException(
"Invalid pattern in expression",
String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
}
}

/**
* Validates that no quantified groups contain inner quantifiers (nested quantifiers).
* This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
*/
private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
int depth = 0;
boolean[] groupHasQuantifier = new boolean[pattern.length() + 1]; // oversized but safe
int stackTop = -1;

for (int i = 0; i < pattern.length(); i++) {
char ch = pattern.charAt(i);

// Skip escaped characters
if (i > 0 && pattern.charAt(i - 1) == '\\') {
continue;
}

switch (ch) {
case '(' -> {
depth++;
stackTop++;
groupHasQuantifier[stackTop] = false;
}
case ')' -> {
if (depth > 0) {
depth--;
if (i + 1 < pattern.length()) {
char next = pattern.charAt(i + 1);
if (next == '*' || next == '+' || next == '?' || next == '{') {
if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
throw new ConversionException(
"Invalid pattern in expression",
"regex contains catastrophic nested quantifiers that could cause ReDoS");
}
}
}
if (stackTop > 0 && groupHasQuantifier[stackTop]) {
groupHasQuantifier[stackTop - 1] = true;
}
if (stackTop >= 0) {
stackTop--;
}
}
}
case '*', '+', '?', '{' -> {
if (stackTop >= 0) {
groupHasQuantifier[stackTop] = true;
}
}
}
}
}

/**
* Counts the number of unescaped opening parentheses in the pattern.
*/
private static int countUnescapedParens(String pattern) {
int count = 0;
for (int i = 0; i < pattern.length(); i++) {
if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
count++;
}
}
return count;
}

/**
* Computes the maximum nesting depth of parenthesized groups in the pattern.
*/
private static int computeMaxNestingDepth(String pattern) {
int maxDepth = 0;
int currentDepth = 0;
for (int i = 0; i < pattern.length(); i++) {
char ch = pattern.charAt(i);
if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
currentDepth++;
if (currentDepth > maxDepth) {
maxDepth = currentDepth;
}
} else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
currentDepth--;
}
}
return maxDepth;
}
}
147 changes: 147 additions & 0 deletions src/main/java/com/spandigital/cel2sql/dialect/SqlEmitters.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package com.spandigital.cel2sql.dialect;

import com.spandigital.cel2sql.error.ConversionException;

import java.util.List;
import java.util.function.UnaryOperator;

/**
* Shared SQL-emission helpers for the recurring fragment shapes that several
* dialects render identically.
*
* <p>Each dialect still declares its own {@code Dialect} override (so the
* per-dialect behaviour stays explicit and greppable), but the dialects that
* happen to share a shape delegate the actual {@link StringBuilder} writing
* here instead of copy-pasting the body. Dialects whose output genuinely
* differs keep their own inline implementation.</p>
*/
public final class SqlEmitters {

private SqlEmitters() {
}

/**
* Writes a two-argument function call: {@code func(a, b)}.
*/
public static void writeBinaryCall(StringBuilder w, String func, SqlWriter writeA, SqlWriter writeB)
throws ConversionException {
w.append(func).append('(');
writeA.write();
w.append(", ");
writeB.write();
w.append(')');
}

/**
* Writes an array-to-string join: {@code func(array, delim)} where a null
* delimiter falls back to the empty string, optionally followed by a
* trailing empty-string argument (PostgreSQL's {@code ARRAY_TO_STRING}
* null-replacement parameter).
*/
public static void writeArrayJoin(StringBuilder w, String func, SqlWriter writeArray,
SqlWriter writeDelim, boolean trailingEmptyArg)
throws ConversionException {
w.append(func).append('(');
writeArray.write();
w.append(", ");
if (writeDelim != null) {
writeDelim.write();
} else {
w.append("''");
}
if (trailingEmptyArg) {
w.append(", ''");
}
w.append(')');
}

/**
* Writes the {@code json_each} membership idiom shared by SQLite and DuckDB:
* {@code EXISTS (SELECT 1 FROM json_each(array) WHERE value = elem)}.
*/
public static void writeJsonEachMembership(StringBuilder w, SqlWriter writeArray, SqlWriter writeElem)
throws ConversionException {
w.append("EXISTS (SELECT 1 FROM json_each(");
writeArray.write();
w.append(") WHERE value = ");
writeElem.write();
w.append(')');
}

/**
* Writes a JSON path-existence probe: {@code func(root, '$.seg.seg...')}
* followed by {@code suffix} (e.g. {@code " IS NOT NULL"}). Each path
* segment is escaped via {@code escape}.
*/
public static void writeJsonPathProbe(StringBuilder w, String func, SqlWriter writeRoot,
List<String> pathSegments, String suffix, UnaryOperator<String> escape)
throws ConversionException {
w.append(func).append('(');
writeRoot.write();
w.append(", '$");
for (String segment : pathSegments) {
w.append('.').append(escape.apply(segment));
}
w.append("')").append(suffix);
}

/**
* Writes an infix regex match: {@code target <op> 'pattern'} with the
* pattern's single quotes doubled for SQL-string escaping. Used by dialects
* whose regex operator is a binary infix token ({@code ~}/{@code ~*},
* {@code REGEXP}, {@code RLIKE}).
*/
public static void writeInfixRegex(StringBuilder w, SqlWriter writeTarget, String op, String pattern)
throws ConversionException {
writeTarget.write();
w.append(op);
w.append('\'').append(pattern.replace("'", "''")).append('\'');
}

/**
* Writes a standard SQL {@code EXTRACT(part FROM expr [AT TIME ZONE tz])}
* clause. The day-of-week conversion wrapping that some dialects apply is
* left to the caller.
*/
public static void writeStandardExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ)
throws ConversionException {
w.append("EXTRACT(").append(part).append(" FROM ");
writeExpr.write();
if (writeTZ != null) {
w.append(" AT TIME ZONE ");
writeTZ.write();
}
w.append(')');
}

/**
* Writes a standard {@code EXTRACT}, applying the PostgreSQL/DuckDB
* day-of-week remapping {@code (EXTRACT(DOW FROM ...) + 6) % 7} when
* {@code part} is {@code "DOW"}. Both engines share this exact convention.
*/
public static void writeExtractWithPostgresDow(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ)
throws ConversionException {
boolean isDOW = "DOW".equals(part);
if (isDOW) {
w.append('(');
}
writeStandardExtract(w, part, writeExpr, writeTZ);
if (isDOW) {
w.append(" + 6) % 7");
}
}

/**
* Writes PostgreSQL/DuckDB arrow-operator JSON field access:
* {@code base->>'field'} (final/text extraction) or {@code base->'field'}
* (intermediate/json extraction). The field name is escaped via
* {@code escape}.
*/
public static void writeArrowJsonAccess(StringBuilder w, SqlWriter writeBase, String fieldName,
boolean isFinal, UnaryOperator<String> escape)
throws ConversionException {
writeBase.write();
w.append(isFinal ? "->>'" : "->'");
w.append(escape.apply(fieldName)).append('\'');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;

Expand Down Expand Up @@ -195,13 +196,7 @@ public void writeJSONArrayLength(StringBuilder w, SqlWriter writeExpr) throws Co

@Override
public void writeJSONExtractPath(StringBuilder w, List<String> pathSegments, SqlWriter writeRoot) throws ConversionException {
w.append("JSON_VALUE(");
writeRoot.write();
w.append(", '$");
for (String segment : pathSegments) {
w.append('.').append(escapeJSONFieldName(segment));
}
w.append("') IS NOT NULL");
SqlEmitters.writeJsonPathProbe(w, "JSON_VALUE", writeRoot, pathSegments, " IS NOT NULL", BigQueryDialect::escapeJSONFieldName);
}

@Override
Expand Down Expand Up @@ -246,13 +241,7 @@ public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlW
}
w.append(") - 1)");
} else {
w.append("EXTRACT(").append(part).append(" FROM ");
writeExpr.write();
if (writeTZ != null) {
w.append(" AT TIME ZONE ");
writeTZ.write();
}
w.append(')');
SqlEmitters.writeStandardExtract(w, part, writeExpr, writeTZ);
}
}

Expand Down Expand Up @@ -282,11 +271,7 @@ public void writeContains(StringBuilder w, SqlWriter writeHaystack, SqlWriter wr

@Override
public void writeSplit(StringBuilder w, SqlWriter writeStr, SqlWriter writeDelim) throws ConversionException {
w.append("SPLIT(");
writeStr.write();
w.append(", ");
writeDelim.write();
w.append(')');
SqlEmitters.writeBinaryCall(w, "SPLIT", writeStr, writeDelim);
}

@Override
Expand All @@ -300,15 +285,7 @@ public void writeSplitWithLimit(StringBuilder w, SqlWriter writeStr, SqlWriter w

@Override
public void writeJoin(StringBuilder w, SqlWriter writeArray, SqlWriter writeDelim) throws ConversionException {
w.append("ARRAY_TO_STRING(");
writeArray.write();
w.append(", ");
if (writeDelim != null) {
writeDelim.write();
} else {
w.append("''");
}
w.append(')');
SqlEmitters.writeArrayJoin(w, "ARRAY_TO_STRING", writeArray, writeDelim, false);
}

@Override
Expand Down
Loading
Loading