diff --git a/src/main/java/com/spandigital/cel2sql/dialect/RegexSafety.java b/src/main/java/com/spandigital/cel2sql/dialect/RegexSafety.java
new file mode 100644
index 0000000..a42be8d
--- /dev/null
+++ b/src/main/java/com/spandigital/cel2sql/dialect/RegexSafety.java
@@ -0,0 +1,183 @@
+package com.spandigital.cel2sql.dialect;
+
+import com.spandigital.cel2sql.error.ConversionException;
+
+import java.util.regex.Pattern;
+
+/**
+ * Shared ReDoS-safety validation for the RE2-style regex dialects
+ * (PostgreSQL, MySQL, DuckDB, BigQuery, Spark).
+ *
+ *
Every dialect that accepts regular expressions enforces the same structural
+ * limits to prevent catastrophic-backtracking attacks (CWE-1333): a maximum
+ * pattern length, capture-group count, and nesting depth, plus heuristics that
+ * reject nested quantifiers and quantified alternation. Those checks are
+ * dialect-agnostic — they operate on the RE2 source pattern before any
+ * dialect-specific conversion — so they live here once instead of being copied
+ * into each {@code XxxRegex} class.
+ *
+ * Dialect-specific concerns (unsupported-feature detection, the actual
+ * RE2-to-native conversion) remain in the per-dialect classes.
+ */
+public final class RegexSafety {
+
+ /** Maximum allowed regex pattern length. */
+ public static final int MAX_PATTERN_LENGTH = 500;
+
+ /** Maximum allowed capture groups in a pattern. */
+ public static final int MAX_GROUPS = 20;
+
+ /** Maximum allowed nesting depth of parenthesized groups. */
+ public static final int MAX_NESTING_DEPTH = 10;
+
+ private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
+ private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
+
+ private RegexSafety() {
+ }
+
+ /**
+ * Enforces the maximum pattern-length limit.
+ *
+ * @param pattern the RE2 regex pattern
+ * @throws ConversionException if the pattern exceeds {@link #MAX_PATTERN_LENGTH}
+ */
+ public static void checkLength(String pattern) throws ConversionException {
+ if (pattern.length() > MAX_PATTERN_LENGTH) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ String.format("pattern length %d exceeds limit of %d characters",
+ pattern.length(), MAX_PATTERN_LENGTH));
+ }
+ }
+
+ /**
+ * Runs the shared structural ReDoS checks against a pattern, in order:
+ *
+ * - simple back-to-back quantifiers ({@code a*+}, {@code a++})
+ * - quantified groups that themselves contain inner quantifiers ({@code (a+)+})
+ * - capture-group count limit
+ * - quantified alternation ({@code (a|b)+})
+ * - group nesting-depth limit
+ *
+ *
+ * @param pattern the RE2 regex pattern (after any case-insensitivity flag has been stripped)
+ * @throws ConversionException if any limit is exceeded or a catastrophic construct is detected
+ */
+ public static void checkReDoS(String pattern) throws ConversionException {
+ if (NESTED_QUANTIFIERS.matcher(pattern).find()) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ "regex contains catastrophic nested quantifiers that could cause ReDoS");
+ }
+
+ validateNoNestedQuantifiers(pattern);
+
+ int groupCount = countUnescapedParens(pattern);
+ if (groupCount > MAX_GROUPS) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ String.format("regex contains %d capture groups, exceeds limit of %d",
+ groupCount, MAX_GROUPS));
+ }
+
+ if (QUANTIFIED_ALTERNATION.matcher(pattern).find()) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ "regex contains quantified alternation that could cause ReDoS");
+ }
+
+ int maxDepth = computeMaxNestingDepth(pattern);
+ if (maxDepth > MAX_NESTING_DEPTH) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
+ }
+ }
+
+ /**
+ * Validates that no quantified groups contain inner quantifiers (nested quantifiers).
+ * This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
+ */
+ private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
+ int depth = 0;
+ boolean[] groupHasQuantifier = new boolean[pattern.length() + 1]; // oversized but safe
+ int stackTop = -1;
+
+ for (int i = 0; i < pattern.length(); i++) {
+ char ch = pattern.charAt(i);
+
+ // Skip escaped characters
+ if (i > 0 && pattern.charAt(i - 1) == '\\') {
+ continue;
+ }
+
+ switch (ch) {
+ case '(' -> {
+ depth++;
+ stackTop++;
+ groupHasQuantifier[stackTop] = false;
+ }
+ case ')' -> {
+ if (depth > 0) {
+ depth--;
+ if (i + 1 < pattern.length()) {
+ char next = pattern.charAt(i + 1);
+ if (next == '*' || next == '+' || next == '?' || next == '{') {
+ if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
+ throw new ConversionException(
+ "Invalid pattern in expression",
+ "regex contains catastrophic nested quantifiers that could cause ReDoS");
+ }
+ }
+ }
+ if (stackTop > 0 && groupHasQuantifier[stackTop]) {
+ groupHasQuantifier[stackTop - 1] = true;
+ }
+ if (stackTop >= 0) {
+ stackTop--;
+ }
+ }
+ }
+ case '*', '+', '?', '{' -> {
+ if (stackTop >= 0) {
+ groupHasQuantifier[stackTop] = true;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Counts the number of unescaped opening parentheses in the pattern.
+ */
+ private static int countUnescapedParens(String pattern) {
+ int count = 0;
+ for (int i = 0; i < pattern.length(); i++) {
+ if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ /**
+ * Computes the maximum nesting depth of parenthesized groups in the pattern.
+ */
+ private static int computeMaxNestingDepth(String pattern) {
+ int maxDepth = 0;
+ int currentDepth = 0;
+ for (int i = 0; i < pattern.length(); i++) {
+ char ch = pattern.charAt(i);
+ if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
+ currentDepth++;
+ if (currentDepth > maxDepth) {
+ maxDepth = currentDepth;
+ }
+ } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
+ currentDepth--;
+ }
+ }
+ return maxDepth;
+ }
+}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/SqlEmitters.java b/src/main/java/com/spandigital/cel2sql/dialect/SqlEmitters.java
new file mode 100644
index 0000000..188af4f
--- /dev/null
+++ b/src/main/java/com/spandigital/cel2sql/dialect/SqlEmitters.java
@@ -0,0 +1,147 @@
+package com.spandigital.cel2sql.dialect;
+
+import com.spandigital.cel2sql.error.ConversionException;
+
+import java.util.List;
+import java.util.function.UnaryOperator;
+
+/**
+ * Shared SQL-emission helpers for the recurring fragment shapes that several
+ * dialects render identically.
+ *
+ * Each dialect still declares its own {@code Dialect} override (so the
+ * per-dialect behaviour stays explicit and greppable), but the dialects that
+ * happen to share a shape delegate the actual {@link StringBuilder} writing
+ * here instead of copy-pasting the body. Dialects whose output genuinely
+ * differs keep their own inline implementation.
+ */
+public final class SqlEmitters {
+
+ private SqlEmitters() {
+ }
+
+ /**
+ * Writes a two-argument function call: {@code func(a, b)}.
+ */
+ public static void writeBinaryCall(StringBuilder w, String func, SqlWriter writeA, SqlWriter writeB)
+ throws ConversionException {
+ w.append(func).append('(');
+ writeA.write();
+ w.append(", ");
+ writeB.write();
+ w.append(')');
+ }
+
+ /**
+ * Writes an array-to-string join: {@code func(array, delim)} where a null
+ * delimiter falls back to the empty string, optionally followed by a
+ * trailing empty-string argument (PostgreSQL's {@code ARRAY_TO_STRING}
+ * null-replacement parameter).
+ */
+ public static void writeArrayJoin(StringBuilder w, String func, SqlWriter writeArray,
+ SqlWriter writeDelim, boolean trailingEmptyArg)
+ throws ConversionException {
+ w.append(func).append('(');
+ writeArray.write();
+ w.append(", ");
+ if (writeDelim != null) {
+ writeDelim.write();
+ } else {
+ w.append("''");
+ }
+ if (trailingEmptyArg) {
+ w.append(", ''");
+ }
+ w.append(')');
+ }
+
+ /**
+ * Writes the {@code json_each} membership idiom shared by SQLite and DuckDB:
+ * {@code EXISTS (SELECT 1 FROM json_each(array) WHERE value = elem)}.
+ */
+ public static void writeJsonEachMembership(StringBuilder w, SqlWriter writeArray, SqlWriter writeElem)
+ throws ConversionException {
+ w.append("EXISTS (SELECT 1 FROM json_each(");
+ writeArray.write();
+ w.append(") WHERE value = ");
+ writeElem.write();
+ w.append(')');
+ }
+
+ /**
+ * Writes a JSON path-existence probe: {@code func(root, '$.seg.seg...')}
+ * followed by {@code suffix} (e.g. {@code " IS NOT NULL"}). Each path
+ * segment is escaped via {@code escape}.
+ */
+ public static void writeJsonPathProbe(StringBuilder w, String func, SqlWriter writeRoot,
+ List pathSegments, String suffix, UnaryOperator escape)
+ throws ConversionException {
+ w.append(func).append('(');
+ writeRoot.write();
+ w.append(", '$");
+ for (String segment : pathSegments) {
+ w.append('.').append(escape.apply(segment));
+ }
+ w.append("')").append(suffix);
+ }
+
+ /**
+ * Writes an infix regex match: {@code target 'pattern'} with the
+ * pattern's single quotes doubled for SQL-string escaping. Used by dialects
+ * whose regex operator is a binary infix token ({@code ~}/{@code ~*},
+ * {@code REGEXP}, {@code RLIKE}).
+ */
+ public static void writeInfixRegex(StringBuilder w, SqlWriter writeTarget, String op, String pattern)
+ throws ConversionException {
+ writeTarget.write();
+ w.append(op);
+ w.append('\'').append(pattern.replace("'", "''")).append('\'');
+ }
+
+ /**
+ * Writes a standard SQL {@code EXTRACT(part FROM expr [AT TIME ZONE tz])}
+ * clause. The day-of-week conversion wrapping that some dialects apply is
+ * left to the caller.
+ */
+ public static void writeStandardExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ)
+ throws ConversionException {
+ w.append("EXTRACT(").append(part).append(" FROM ");
+ writeExpr.write();
+ if (writeTZ != null) {
+ w.append(" AT TIME ZONE ");
+ writeTZ.write();
+ }
+ w.append(')');
+ }
+
+ /**
+ * Writes a standard {@code EXTRACT}, applying the PostgreSQL/DuckDB
+ * day-of-week remapping {@code (EXTRACT(DOW FROM ...) + 6) % 7} when
+ * {@code part} is {@code "DOW"}. Both engines share this exact convention.
+ */
+ public static void writeExtractWithPostgresDow(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ)
+ throws ConversionException {
+ boolean isDOW = "DOW".equals(part);
+ if (isDOW) {
+ w.append('(');
+ }
+ writeStandardExtract(w, part, writeExpr, writeTZ);
+ if (isDOW) {
+ w.append(" + 6) % 7");
+ }
+ }
+
+ /**
+ * Writes PostgreSQL/DuckDB arrow-operator JSON field access:
+ * {@code base->>'field'} (final/text extraction) or {@code base->'field'}
+ * (intermediate/json extraction). The field name is escaped via
+ * {@code escape}.
+ */
+ public static void writeArrowJsonAccess(StringBuilder w, SqlWriter writeBase, String fieldName,
+ boolean isFinal, UnaryOperator escape)
+ throws ConversionException {
+ writeBase.write();
+ w.append(isFinal ? "->>'" : "->'");
+ w.append(escape.apply(fieldName)).append('\'');
+ }
+}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryDialect.java
index db5ec41..c4dfe41 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -195,13 +196,7 @@ public void writeJSONArrayLength(StringBuilder w, SqlWriter writeExpr) throws Co
@Override
public void writeJSONExtractPath(StringBuilder w, List pathSegments, SqlWriter writeRoot) throws ConversionException {
- w.append("JSON_VALUE(");
- writeRoot.write();
- w.append(", '$");
- for (String segment : pathSegments) {
- w.append('.').append(escapeJSONFieldName(segment));
- }
- w.append("') IS NOT NULL");
+ SqlEmitters.writeJsonPathProbe(w, "JSON_VALUE", writeRoot, pathSegments, " IS NOT NULL", BigQueryDialect::escapeJSONFieldName);
}
@Override
@@ -246,13 +241,7 @@ public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlW
}
w.append(") - 1)");
} else {
- w.append("EXTRACT(").append(part).append(" FROM ");
- writeExpr.write();
- if (writeTZ != null) {
- w.append(" AT TIME ZONE ");
- writeTZ.write();
- }
- w.append(')');
+ SqlEmitters.writeStandardExtract(w, part, writeExpr, writeTZ);
}
}
@@ -282,11 +271,7 @@ public void writeContains(StringBuilder w, SqlWriter writeHaystack, SqlWriter wr
@Override
public void writeSplit(StringBuilder w, SqlWriter writeStr, SqlWriter writeDelim) throws ConversionException {
- w.append("SPLIT(");
- writeStr.write();
- w.append(", ");
- writeDelim.write();
- w.append(')');
+ SqlEmitters.writeBinaryCall(w, "SPLIT", writeStr, writeDelim);
}
@Override
@@ -300,15 +285,7 @@ public void writeSplitWithLimit(StringBuilder w, SqlWriter writeStr, SqlWriter w
@Override
public void writeJoin(StringBuilder w, SqlWriter writeArray, SqlWriter writeDelim) throws ConversionException {
- w.append("ARRAY_TO_STRING(");
- writeArray.write();
- w.append(", ");
- if (writeDelim != null) {
- writeDelim.write();
- } else {
- w.append("''");
- }
- w.append(')');
+ SqlEmitters.writeArrayJoin(w, "ARRAY_TO_STRING", writeArray, writeDelim, false);
}
@Override
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryRegex.java b/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryRegex.java
index fa3f397..d09488f 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryRegex.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/bigquery/BigQueryRegex.java
@@ -1,6 +1,7 @@
package com.spandigital.cel2sql.dialect.bigquery;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.RegexSafety;
import com.spandigital.cel2sql.error.ConversionException;
import java.util.regex.Pattern;
@@ -15,18 +16,6 @@
*/
final class BigQueryRegex {
- /** Maximum allowed regex pattern length. */
- static final int MAX_PATTERN_LENGTH = 500;
-
- /** Maximum allowed capture groups in a pattern. */
- static final int MAX_GROUPS = 20;
-
- /** Maximum allowed nesting depth of parenthesized groups. */
- static final int MAX_NESTING_DEPTH = 10;
-
- private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
- private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
-
private BigQueryRegex() {
}
@@ -57,12 +46,7 @@ private BigQueryRegex() {
*/
static RegexResult convertRE2ToBigQuery(String re2Pattern) throws ConversionException {
// 1. Check pattern length
- if (re2Pattern.length() > MAX_PATTERN_LENGTH) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("pattern length %d exceeds limit of %d characters",
- re2Pattern.length(), MAX_PATTERN_LENGTH));
- }
+ RegexSafety.checkLength(re2Pattern);
// 2. Validate pattern compiles
try {
@@ -90,39 +74,9 @@ static RegexResult convertRE2ToBigQuery(String re2Pattern) throws ConversionExce
"named capture groups (?P...) are not supported in BigQuery regex");
}
- // 4. Detect catastrophic nested quantifiers
- if (NESTED_QUANTIFIERS.matcher(re2Pattern).find()) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
-
- // 5. Check nested quantifiers in groups
- validateNoNestedQuantifiers(re2Pattern);
-
- // 6. Count and limit capture groups
- int groupCount = countUnescapedParens(re2Pattern);
- if (groupCount > MAX_GROUPS) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("regex contains %d capture groups, exceeds limit of %d",
- groupCount, MAX_GROUPS));
- }
-
- // 7. Detect exponential alternation patterns
- if (QUANTIFIED_ALTERNATION.matcher(re2Pattern).find()) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains quantified alternation that could cause ReDoS");
- }
-
- // 8. Check nesting depth
- int maxDepth = computeMaxNestingDepth(re2Pattern);
- if (maxDepth > MAX_NESTING_DEPTH) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
- }
+ // 4-8. Shared ReDoS safety checks (nested quantifiers, group count,
+ // quantified alternation, nesting depth)
+ RegexSafety.checkReDoS(re2Pattern);
// 9. Handle (?i) flag -> set caseInsensitive=true, strip prefix
boolean caseInsensitive = false;
@@ -146,97 +100,4 @@ static RegexResult convertRE2ToBigQuery(String re2Pattern) throws ConversionExce
return new RegexResult(pattern, caseInsensitive);
}
-
- /**
- * Validates that no quantified groups contain inner quantifiers (nested quantifiers).
- * This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
- */
- private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
- int depth = 0;
- boolean[] groupHasQuantifier = new boolean[pattern.length()]; // oversized but safe
- int stackTop = -1;
-
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
-
- // Skip escaped characters
- if (i > 0 && pattern.charAt(i - 1) == '\\') {
- continue;
- }
-
- switch (ch) {
- case '(' -> {
- depth++;
- stackTop++;
- groupHasQuantifier[stackTop] = false;
- }
- case ')' -> {
- if (depth > 0) {
- depth--;
- if (i + 1 < pattern.length()) {
- char next = pattern.charAt(i + 1);
- if (next == '*' || next == '+' || next == '?' || next == '{') {
- if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- }
- }
- if (stackTop > 0) {
- if (groupHasQuantifier[stackTop]) {
- groupHasQuantifier[stackTop - 1] = true;
- }
- }
- if (stackTop >= 0) {
- stackTop--;
- }
- }
- }
- case '*', '+', '?' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- case '{' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- }
- }
- }
-
- /**
- * Counts the number of unescaped opening parentheses in the pattern.
- */
- private static int countUnescapedParens(String pattern) {
- int count = 0;
- for (int i = 0; i < pattern.length(); i++) {
- if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- count++;
- }
- }
- return count;
- }
-
- /**
- * Computes the maximum nesting depth of parenthesized groups in the pattern.
- */
- private static int computeMaxNestingDepth(String pattern) {
- int maxDepth = 0;
- int currentDepth = 0;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth++;
- if (currentDepth > maxDepth) {
- maxDepth = currentDepth;
- }
- } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth--;
- }
- }
- return maxDepth;
- }
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbDialect.java
index b76557a..bf23194 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -61,14 +62,7 @@ public void writeStringConcat(StringBuilder w, SqlWriter writeLHS, SqlWriter wri
@Override
public void writeRegexMatch(StringBuilder w, SqlWriter writeTarget, String pattern, boolean caseInsensitive) throws ConversionException {
- writeTarget.write();
- if (caseInsensitive) {
- w.append(" ~* ");
- } else {
- w.append(" ~ ");
- }
- String escaped = pattern.replace("'", "''");
- w.append('\'').append(escaped).append('\'');
+ SqlEmitters.writeInfixRegex(w, writeTarget, caseInsensitive ? " ~* " : " ~ ", pattern);
}
@Override
@@ -160,14 +154,7 @@ public void writeEmptyTypedArray(StringBuilder w, String typeName) {
@Override
public void writeJSONFieldAccess(StringBuilder w, SqlWriter writeBase, String fieldName, boolean isFinal) throws ConversionException {
- writeBase.write();
- String escapedField = escapeJSONFieldName(fieldName);
- if (isFinal) {
- w.append("->>'");
- } else {
- w.append("->'");
- }
- w.append(escapedField).append('\'');
+ SqlEmitters.writeArrowJsonAccess(w, writeBase, fieldName, isFinal, DuckDbDialect::escapeJSONFieldName);
}
@Override
@@ -194,31 +181,17 @@ public void writeJSONArrayLength(StringBuilder w, SqlWriter writeExpr) throws Co
@Override
public void writeJSONExtractPath(StringBuilder w, List pathSegments, SqlWriter writeRoot) throws ConversionException {
- w.append("json_exists(");
- writeRoot.write();
- w.append(", '$");
- for (String segment : pathSegments) {
- w.append('.').append(escapeJSONFieldName(segment));
- }
- w.append("')");
+ SqlEmitters.writeJsonPathProbe(w, "json_exists", writeRoot, pathSegments, "", DuckDbDialect::escapeJSONFieldName);
}
@Override
public void writeJSONArrayMembership(StringBuilder w, String jsonFunc, SqlWriter writeElem, SqlWriter writeArray) throws ConversionException {
- w.append("EXISTS (SELECT 1 FROM json_each(");
- writeArray.write();
- w.append(") WHERE value = ");
- writeElem.write();
- w.append(')');
+ SqlEmitters.writeJsonEachMembership(w, writeArray, writeElem);
}
@Override
public void writeNestedJSONArrayMembership(StringBuilder w, SqlWriter writeElem, SqlWriter writeArray) throws ConversionException {
- w.append("EXISTS (SELECT 1 FROM json_each(");
- writeArray.write();
- w.append(") WHERE value = ");
- writeElem.write();
- w.append(')');
+ SqlEmitters.writeJsonEachMembership(w, writeArray, writeElem);
}
// --- Timestamps ---
@@ -237,20 +210,7 @@ public void writeInterval(StringBuilder w, SqlWriter writeValue, String unit) th
@Override
public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ) throws ConversionException {
- boolean isDOW = "DOW".equals(part);
- if (isDOW) {
- w.append('(');
- }
- w.append("EXTRACT(").append(part).append(" FROM ");
- writeExpr.write();
- if (writeTZ != null) {
- w.append(" AT TIME ZONE ");
- writeTZ.write();
- }
- w.append(')');
- if (isDOW) {
- w.append(" + 6) % 7");
- }
+ SqlEmitters.writeExtractWithPostgresDow(w, part, writeExpr, writeTZ);
}
@Override
@@ -273,11 +233,7 @@ public void writeContains(StringBuilder w, SqlWriter writeHaystack, SqlWriter wr
@Override
public void writeSplit(StringBuilder w, SqlWriter writeStr, SqlWriter writeDelim) throws ConversionException {
- w.append("STRING_SPLIT(");
- writeStr.write();
- w.append(", ");
- writeDelim.write();
- w.append(')');
+ SqlEmitters.writeBinaryCall(w, "STRING_SPLIT", writeStr, writeDelim);
}
@Override
@@ -291,15 +247,7 @@ public void writeSplitWithLimit(StringBuilder w, SqlWriter writeStr, SqlWriter w
@Override
public void writeJoin(StringBuilder w, SqlWriter writeArray, SqlWriter writeDelim) throws ConversionException {
- w.append("ARRAY_TO_STRING(");
- writeArray.write();
- w.append(", ");
- if (writeDelim != null) {
- writeDelim.write();
- } else {
- w.append("''");
- }
- w.append(')');
+ SqlEmitters.writeArrayJoin(w, "ARRAY_TO_STRING", writeArray, writeDelim, false);
}
@Override
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbRegex.java b/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbRegex.java
index 3bfe38e..3ae028c 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbRegex.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/duckdb/DuckDbRegex.java
@@ -1,6 +1,7 @@
package com.spandigital.cel2sql.dialect.duckdb;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.RegexSafety;
import com.spandigital.cel2sql.error.ConversionException;
import java.util.regex.Pattern;
@@ -15,18 +16,6 @@
*/
final class DuckDbRegex {
- /** Maximum allowed regex pattern length. */
- static final int MAX_PATTERN_LENGTH = 500;
-
- /** Maximum allowed capture groups in a pattern. */
- static final int MAX_GROUPS = 20;
-
- /** Maximum allowed nesting depth of parenthesized groups. */
- static final int MAX_NESTING_DEPTH = 10;
-
- private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
- private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
-
private DuckDbRegex() {
}
@@ -56,12 +45,7 @@ private DuckDbRegex() {
*/
static RegexResult convertRE2ToDuckDB(String re2Pattern) throws ConversionException {
// 1. Check pattern length
- if (re2Pattern.length() > MAX_PATTERN_LENGTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("pattern length %d exceeds limit of %d characters",
- re2Pattern.length(), MAX_PATTERN_LENGTH));
- }
+ RegexSafety.checkLength(re2Pattern);
// 2. Validate pattern compiles
try {
@@ -89,39 +73,9 @@ static RegexResult convertRE2ToDuckDB(String re2Pattern) throws ConversionExcept
"named capture groups (?P...) are not supported in DuckDB regex");
}
- // 4. Detect catastrophic nested quantifiers
- if (NESTED_QUANTIFIERS.matcher(re2Pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
-
- // 5. Check for groups with quantifiers that are themselves quantified
- validateNoNestedQuantifiers(re2Pattern);
-
- // 6. Count and limit capture groups
- int groupCount = countUnescapedParens(re2Pattern);
- if (groupCount > MAX_GROUPS) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("regex contains %d capture groups, exceeds limit of %d",
- groupCount, MAX_GROUPS));
- }
-
- // 7. Detect exponential alternation patterns
- if (QUANTIFIED_ALTERNATION.matcher(re2Pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains quantified alternation that could cause ReDoS");
- }
-
- // 8. Check nesting depth
- int maxDepth = computeMaxNestingDepth(re2Pattern);
- if (maxDepth > MAX_NESTING_DEPTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
- }
+ // 4-8. Shared ReDoS safety checks (nested quantifiers, group count,
+ // quantified alternation, nesting depth)
+ RegexSafety.checkReDoS(re2Pattern);
// 9. Handle (?i) flag
boolean caseInsensitive = false;
@@ -143,97 +97,4 @@ static RegexResult convertRE2ToDuckDB(String re2Pattern) throws ConversionExcept
return new RegexResult(pattern, caseInsensitive);
}
-
- /**
- * Validates that no quantified groups contain inner quantifiers (nested quantifiers).
- * This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
- */
- private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
- int depth = 0;
- boolean[] groupHasQuantifier = new boolean[pattern.length()]; // oversized but safe
- int stackTop = -1;
-
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
-
- // Skip escaped characters
- if (i > 0 && pattern.charAt(i - 1) == '\\') {
- continue;
- }
-
- switch (ch) {
- case '(' -> {
- depth++;
- stackTop++;
- groupHasQuantifier[stackTop] = false;
- }
- case ')' -> {
- if (depth > 0) {
- depth--;
- if (i + 1 < pattern.length()) {
- char next = pattern.charAt(i + 1);
- if (next == '*' || next == '+' || next == '?' || next == '{') {
- if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- }
- }
- if (stackTop > 0) {
- if (groupHasQuantifier[stackTop]) {
- groupHasQuantifier[stackTop - 1] = true;
- }
- }
- if (stackTop >= 0) {
- stackTop--;
- }
- }
- }
- case '*', '+', '?' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- case '{' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- }
- }
- }
-
- /**
- * Counts the number of unescaped opening parentheses in the pattern.
- */
- private static int countUnescapedParens(String pattern) {
- int count = 0;
- for (int i = 0; i < pattern.length(); i++) {
- if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- count++;
- }
- }
- return count;
- }
-
- /**
- * Computes the maximum nesting depth of parenthesized groups in the pattern.
- */
- private static int computeMaxNestingDepth(String pattern) {
- int maxDepth = 0;
- int currentDepth = 0;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth++;
- if (currentDepth > maxDepth) {
- maxDepth = currentDepth;
- }
- } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth--;
- }
- }
- return maxDepth;
- }
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlDialect.java
index 2f2e838..0c84394 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -63,10 +64,7 @@ public void writeStringConcat(StringBuilder w, SqlWriter writeLHS, SqlWriter wri
@Override
public void writeRegexMatch(StringBuilder w, SqlWriter writeTarget, String pattern, boolean caseInsensitive) throws ConversionException {
- writeTarget.write();
- w.append(" REGEXP ");
- String escaped = pattern.replace("'", "''");
- w.append('\'').append(escaped).append('\'');
+ SqlEmitters.writeInfixRegex(w, writeTarget, " REGEXP ", pattern);
}
@Override
@@ -247,13 +245,7 @@ public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlW
}
w.append(") + 5) % 7");
} else {
- w.append("EXTRACT(").append(part).append(" FROM ");
- writeExpr.write();
- if (writeTZ != null) {
- w.append(" AT TIME ZONE ");
- writeTZ.write();
- }
- w.append(')');
+ SqlEmitters.writeStandardExtract(w, part, writeExpr, writeTZ);
}
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlRegex.java b/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlRegex.java
index fab8b50..b9da7f0 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlRegex.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/mysql/MySqlRegex.java
@@ -1,6 +1,7 @@
package com.spandigital.cel2sql.dialect.mysql;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.RegexSafety;
import com.spandigital.cel2sql.error.ConversionException;
import java.util.regex.Pattern;
@@ -14,18 +15,6 @@
*/
final class MySqlRegex {
- /** Maximum allowed regex pattern length. */
- static final int MAX_PATTERN_LENGTH = 500;
-
- /** Maximum allowed capture groups in a pattern. */
- static final int MAX_GROUPS = 20;
-
- /** Maximum allowed nesting depth of parenthesized groups. */
- static final int MAX_NESTING_DEPTH = 10;
-
- private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
- private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
-
private MySqlRegex() {
}
@@ -56,12 +45,7 @@ private MySqlRegex() {
*/
static RegexResult convertRE2ToMySQL(String re2Pattern) throws ConversionException {
// 1. Check pattern length
- if (re2Pattern.length() > MAX_PATTERN_LENGTH) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("pattern length %d exceeds limit of %d characters",
- re2Pattern.length(), MAX_PATTERN_LENGTH));
- }
+ RegexSafety.checkLength(re2Pattern);
// 2. Validate pattern compiles
try {
@@ -89,39 +73,9 @@ static RegexResult convertRE2ToMySQL(String re2Pattern) throws ConversionExcepti
"named capture groups (?P...) are not supported in MySQL regex");
}
- // 4. Detect catastrophic nested quantifiers
- if (NESTED_QUANTIFIERS.matcher(re2Pattern).find()) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
-
- // 5. Check nested quantifiers in groups
- validateNoNestedQuantifiers(re2Pattern);
-
- // 6. Count and limit capture groups
- int groupCount = countUnescapedParens(re2Pattern);
- if (groupCount > MAX_GROUPS) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("regex contains %d capture groups, exceeds limit of %d",
- groupCount, MAX_GROUPS));
- }
-
- // 7. Detect exponential alternation patterns
- if (QUANTIFIED_ALTERNATION.matcher(re2Pattern).find()) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains quantified alternation that could cause ReDoS");
- }
-
- // 8. Check nesting depth
- int maxDepth = computeMaxNestingDepth(re2Pattern);
- if (maxDepth > MAX_NESTING_DEPTH) {
- throw ConversionException.of(
- "Invalid regex pattern",
- String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
- }
+ // 4-8. Shared ReDoS safety checks (nested quantifiers, group count,
+ // quantified alternation, nesting depth)
+ RegexSafety.checkReDoS(re2Pattern);
// 9. Handle (?i) flag -> set caseInsensitive=true, strip prefix
boolean caseInsensitive = false;
@@ -146,97 +100,4 @@ static RegexResult convertRE2ToMySQL(String re2Pattern) throws ConversionExcepti
// 13. Return result
return new RegexResult(pattern, caseInsensitive);
}
-
- /**
- * Validates that no quantified groups contain inner quantifiers (nested quantifiers).
- * This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
- */
- private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
- int depth = 0;
- boolean[] groupHasQuantifier = new boolean[pattern.length()]; // oversized but safe
- int stackTop = -1;
-
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
-
- // Skip escaped characters
- if (i > 0 && pattern.charAt(i - 1) == '\\') {
- continue;
- }
-
- switch (ch) {
- case '(' -> {
- depth++;
- stackTop++;
- groupHasQuantifier[stackTop] = false;
- }
- case ')' -> {
- if (depth > 0) {
- depth--;
- if (i + 1 < pattern.length()) {
- char next = pattern.charAt(i + 1);
- if (next == '*' || next == '+' || next == '?' || next == '{') {
- if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
- throw ConversionException.of(
- "Invalid regex pattern",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- }
- }
- if (stackTop > 0) {
- if (groupHasQuantifier[stackTop]) {
- groupHasQuantifier[stackTop - 1] = true;
- }
- }
- if (stackTop >= 0) {
- stackTop--;
- }
- }
- }
- case '*', '+', '?' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- case '{' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- }
- }
- }
-
- /**
- * Counts the number of unescaped opening parentheses in the pattern.
- */
- private static int countUnescapedParens(String pattern) {
- int count = 0;
- for (int i = 0; i < pattern.length(); i++) {
- if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- count++;
- }
- }
- return count;
- }
-
- /**
- * Computes the maximum nesting depth of parenthesized groups in the pattern.
- */
- private static int computeMaxNestingDepth(String pattern) {
- int maxDepth = 0;
- int currentDepth = 0;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth++;
- if (currentDepth > maxDepth) {
- maxDepth = currentDepth;
- }
- } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth--;
- }
- }
- return maxDepth;
- }
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresDialect.java
index 7479882..d880f76 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -61,14 +62,7 @@ public void writeStringConcat(StringBuilder w, SqlWriter writeLHS, SqlWriter wri
@Override
public void writeRegexMatch(StringBuilder w, SqlWriter writeTarget, String pattern, boolean caseInsensitive) throws ConversionException {
- writeTarget.write();
- if (caseInsensitive) {
- w.append(" ~* ");
- } else {
- w.append(" ~ ");
- }
- String escaped = pattern.replace("'", "''");
- w.append('\'').append(escaped).append('\'');
+ SqlEmitters.writeInfixRegex(w, writeTarget, caseInsensitive ? " ~* " : " ~ ", pattern);
}
@Override
@@ -162,14 +156,7 @@ public void writeEmptyTypedArray(StringBuilder w, String typeName) {
@Override
public void writeJSONFieldAccess(StringBuilder w, SqlWriter writeBase, String fieldName, boolean isFinal) throws ConversionException {
- writeBase.write();
- String escapedField = escapeJSONFieldName(fieldName);
- if (isFinal) {
- w.append("->>'");
- } else {
- w.append("->'");
- }
- w.append(escapedField).append('\'');
+ SqlEmitters.writeArrowJsonAccess(w, writeBase, fieldName, isFinal, PostgresDialect::escapeJSONFieldName);
}
@Override
@@ -243,20 +230,7 @@ public void writeInterval(StringBuilder w, SqlWriter writeValue, String unit) th
@Override
public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlWriter writeTZ) throws ConversionException {
- boolean isDOW = "DOW".equals(part);
- if (isDOW) {
- w.append('(');
- }
- w.append("EXTRACT(").append(part).append(" FROM ");
- writeExpr.write();
- if (writeTZ != null) {
- w.append(" AT TIME ZONE ");
- writeTZ.write();
- }
- w.append(')');
- if (isDOW) {
- w.append(" + 6) % 7");
- }
+ SqlEmitters.writeExtractWithPostgresDow(w, part, writeExpr, writeTZ);
}
@Override
@@ -279,11 +253,7 @@ public void writeContains(StringBuilder w, SqlWriter writeHaystack, SqlWriter wr
@Override
public void writeSplit(StringBuilder w, SqlWriter writeStr, SqlWriter writeDelim) throws ConversionException {
- w.append("STRING_TO_ARRAY(");
- writeStr.write();
- w.append(", ");
- writeDelim.write();
- w.append(')');
+ SqlEmitters.writeBinaryCall(w, "STRING_TO_ARRAY", writeStr, writeDelim);
}
@Override
@@ -297,15 +267,7 @@ public void writeSplitWithLimit(StringBuilder w, SqlWriter writeStr, SqlWriter w
@Override
public void writeJoin(StringBuilder w, SqlWriter writeArray, SqlWriter writeDelim) throws ConversionException {
- w.append("ARRAY_TO_STRING(");
- writeArray.write();
- w.append(", ");
- if (writeDelim != null) {
- writeDelim.write();
- } else {
- w.append("''");
- }
- w.append(", '')");
+ SqlEmitters.writeArrayJoin(w, "ARRAY_TO_STRING", writeArray, writeDelim, true);
}
@Override
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresRegex.java b/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresRegex.java
index edc3e1d..4f25d90 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresRegex.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/postgres/PostgresRegex.java
@@ -1,10 +1,9 @@
package com.spandigital.cel2sql.dialect.postgres;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.RegexSafety;
import com.spandigital.cel2sql.error.ConversionException;
-import java.util.regex.Pattern;
-
/**
* Converts RE2 regex patterns to POSIX ERE format for PostgreSQL.
* Performs security validation to prevent ReDoS attacks (CWE-1333).
@@ -13,18 +12,6 @@
*/
final class PostgresRegex {
- /** Maximum allowed regex pattern length. */
- static final int MAX_PATTERN_LENGTH = 500;
-
- /** Maximum allowed capture groups in a pattern. */
- static final int MAX_GROUPS = 20;
-
- /** Maximum allowed nesting depth of parenthesized groups. */
- static final int MAX_NESTING_DEPTH = 10;
-
- private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
- private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
-
private PostgresRegex() {
}
@@ -61,12 +48,7 @@ private PostgresRegex() {
*/
static RegexResult convertRE2ToPOSIX(String re2Pattern) throws ConversionException {
// 1. Check pattern length
- if (re2Pattern.length() > MAX_PATTERN_LENGTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("pattern length %d exceeds limit of %d characters",
- re2Pattern.length(), MAX_PATTERN_LENGTH));
- }
+ RegexSafety.checkLength(re2Pattern);
// 2. Extract case-insensitive flag
boolean caseInsensitive = false;
@@ -98,39 +80,9 @@ static RegexResult convertRE2ToPOSIX(String re2Pattern) throws ConversionExcepti
"inline flags other than (?i) are not supported in PostgreSQL POSIX regex");
}
- // 4. Detect catastrophic nested quantifiers
- if (NESTED_QUANTIFIERS.matcher(pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
-
- // Check for groups with quantifiers that are themselves quantified
- validateNoNestedQuantifiers(pattern);
-
- // 5. Count and limit capture groups
- int groupCount = countUnescapedParens(pattern);
- if (groupCount > MAX_GROUPS) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("regex contains %d capture groups, exceeds limit of %d",
- groupCount, MAX_GROUPS));
- }
-
- // 6. Detect exponential alternation patterns
- if (QUANTIFIED_ALTERNATION.matcher(pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains quantified alternation that could cause ReDoS");
- }
-
- // 7. Check nesting depth
- int maxDepth = computeMaxNestingDepth(pattern);
- if (maxDepth > MAX_NESTING_DEPTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
- }
+ // 4-7. Shared ReDoS safety checks (nested quantifiers, group count,
+ // quantified alternation, nesting depth)
+ RegexSafety.checkReDoS(pattern);
// 8. Convert RE2 to POSIX
String posix = pattern;
@@ -146,97 +98,4 @@ static RegexResult convertRE2ToPOSIX(String re2Pattern) throws ConversionExcepti
return new RegexResult(posix, caseInsensitive);
}
-
- /**
- * Validates that no quantified groups contain inner quantifiers (nested quantifiers).
- * This detects patterns like {@code (a+)+} that can cause catastrophic backtracking.
- */
- private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
- int depth = 0;
- boolean[] groupHasQuantifier = new boolean[pattern.length()]; // oversized but safe
- int stackTop = -1;
-
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
-
- // Skip escaped characters
- if (i > 0 && pattern.charAt(i - 1) == '\\') {
- continue;
- }
-
- switch (ch) {
- case '(' -> {
- depth++;
- stackTop++;
- groupHasQuantifier[stackTop] = false;
- }
- case ')' -> {
- if (depth > 0) {
- depth--;
- if (i + 1 < pattern.length()) {
- char next = pattern.charAt(i + 1);
- if (next == '*' || next == '+' || next == '?' || next == '{') {
- if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- }
- }
- if (stackTop > 0) {
- if (groupHasQuantifier[stackTop]) {
- groupHasQuantifier[stackTop - 1] = true;
- }
- }
- if (stackTop >= 0) {
- stackTop--;
- }
- }
- }
- case '*', '+', '?' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- case '{' -> {
- if (stackTop >= 0) {
- groupHasQuantifier[stackTop] = true;
- }
- }
- }
- }
- }
-
- /**
- * Counts the number of unescaped opening parentheses in the pattern.
- */
- private static int countUnescapedParens(String pattern) {
- int count = 0;
- for (int i = 0; i < pattern.length(); i++) {
- if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- count++;
- }
- }
- return count;
- }
-
- /**
- * Computes the maximum nesting depth of parenthesized groups in the pattern.
- */
- private static int computeMaxNestingDepth(String pattern) {
- int maxDepth = 0;
- int currentDepth = 0;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth++;
- if (currentDepth > maxDepth) {
- maxDepth = currentDepth;
- }
- } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth--;
- }
- }
- return maxDepth;
- }
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkDialect.java
index efeefe5..d6e8bd2 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -80,11 +81,7 @@ public void writeStringConcat(StringBuilder w, SqlWriter writeLHS, SqlWriter wri
public void writeRegexMatch(StringBuilder w, SqlWriter writeTarget, String pattern, boolean caseInsensitive) throws ConversionException {
// Spark regex uses Java pattern syntax; (?i) inline flag is honoured by the
// engine, so caseInsensitive is folded into the pattern by SparkRegex.
- writeTarget.write();
- w.append(" RLIKE '");
- String escaped = pattern.replace("'", "''");
- w.append(escaped);
- w.append('\'');
+ SqlEmitters.writeInfixRegex(w, writeTarget, " RLIKE ", pattern);
}
@Override
@@ -215,13 +212,7 @@ public void writeJSONArrayLength(StringBuilder w, SqlWriter writeExpr) throws Co
@Override
public void writeJSONExtractPath(StringBuilder w, List pathSegments, SqlWriter writeRoot) throws ConversionException {
- w.append("get_json_object(");
- writeRoot.write();
- w.append(", '$");
- for (String segment : pathSegments) {
- w.append('.').append(escapeJSONFieldName(segment));
- }
- w.append("') IS NOT NULL");
+ SqlEmitters.writeJsonPathProbe(w, "get_json_object", writeRoot, pathSegments, " IS NOT NULL", SparkDialect::escapeJSONFieldName);
}
/**
@@ -286,13 +277,7 @@ public void writeExtract(StringBuilder w, String part, SqlWriter writeExpr, SqlW
w.append(") - 1)");
return;
}
- w.append("EXTRACT(").append(part).append(" FROM ");
- writeExpr.write();
- if (writeTZ != null) {
- w.append(" AT TIME ZONE ");
- writeTZ.write();
- }
- w.append(')');
+ SqlEmitters.writeStandardExtract(w, part, writeExpr, writeTZ);
}
@Override
@@ -316,11 +301,7 @@ public void writeContains(StringBuilder w, SqlWriter writeHaystack, SqlWriter wr
@Override
public void writeSplit(StringBuilder w, SqlWriter writeStr, SqlWriter writeDelim) throws ConversionException {
- w.append("split(");
- writeStr.write();
- w.append(", ");
- writeDelim.write();
- w.append(')');
+ SqlEmitters.writeBinaryCall(w, "split", writeStr, writeDelim);
}
@Override
@@ -335,15 +316,7 @@ public void writeSplitWithLimit(StringBuilder w, SqlWriter writeStr, SqlWriter w
@Override
public void writeJoin(StringBuilder w, SqlWriter writeArray, SqlWriter writeDelim) throws ConversionException {
- w.append("array_join(");
- writeArray.write();
- w.append(", ");
- if (writeDelim != null) {
- writeDelim.write();
- } else {
- w.append("''");
- }
- w.append(')');
+ SqlEmitters.writeArrayJoin(w, "array_join", writeArray, writeDelim, false);
}
@Override
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkRegex.java b/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkRegex.java
index 9867807..60e28da 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkRegex.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/spark/SparkRegex.java
@@ -1,6 +1,7 @@
package com.spandigital.cel2sql.dialect.spark;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.RegexSafety;
import com.spandigital.cel2sql.error.ConversionException;
import java.util.regex.Pattern;
@@ -20,18 +21,6 @@
*/
final class SparkRegex {
- /** Maximum allowed regex pattern length. */
- static final int MAX_PATTERN_LENGTH = 500;
-
- /** Maximum allowed capture groups in a pattern. */
- static final int MAX_GROUPS = 20;
-
- /** Maximum allowed nesting depth of parenthesized groups. */
- static final int MAX_NESTING_DEPTH = 10;
-
- private static final Pattern NESTED_QUANTIFIERS = Pattern.compile("[*+][*+]");
- private static final Pattern QUANTIFIED_ALTERNATION = Pattern.compile("\\([^)]*\\|[^)]*\\)[*+]");
-
private SparkRegex() {}
/**
@@ -41,12 +30,7 @@ private SparkRegex() {}
* — the engine will honour the inline flag if present.
*/
static RegexResult convertRE2ToSpark(String re2Pattern) throws ConversionException {
- if (re2Pattern.length() > MAX_PATTERN_LENGTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("pattern length %d exceeds limit of %d characters",
- re2Pattern.length(), MAX_PATTERN_LENGTH));
- }
+ RegexSafety.checkLength(re2Pattern);
try {
Pattern.compile(re2Pattern);
} catch (PatternSyntaxException e) {
@@ -69,31 +53,10 @@ static RegexResult convertRE2ToSpark(String re2Pattern) throws ConversionExcepti
"Invalid pattern in expression",
"named capture groups (?P...) are not supported in Spark regex");
}
- if (NESTED_QUANTIFIERS.matcher(re2Pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- validateNoNestedQuantifiers(re2Pattern);
+ // Shared ReDoS safety checks (nested quantifiers, group count,
+ // quantified alternation, nesting depth)
+ RegexSafety.checkReDoS(re2Pattern);
- int groupCount = countUnescapedParens(re2Pattern);
- if (groupCount > MAX_GROUPS) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("regex contains %d capture groups, exceeds limit of %d",
- groupCount, MAX_GROUPS));
- }
- if (QUANTIFIED_ALTERNATION.matcher(re2Pattern).find()) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains quantified alternation that could cause ReDoS");
- }
- int maxDepth = computeMaxNestingDepth(re2Pattern);
- if (maxDepth > MAX_NESTING_DEPTH) {
- throw new ConversionException(
- "Invalid pattern in expression",
- String.format("nesting depth %d exceeds limit of %d", maxDepth, MAX_NESTING_DEPTH));
- }
if (re2Pattern.contains("(?m") || re2Pattern.contains("(?s") || re2Pattern.contains("(?-")) {
throw new ConversionException(
"Invalid pattern in expression",
@@ -101,68 +64,4 @@ static RegexResult convertRE2ToSpark(String re2Pattern) throws ConversionExcepti
}
return new RegexResult(re2Pattern, false);
}
-
- private static void validateNoNestedQuantifiers(String pattern) throws ConversionException {
- int depth = 0;
- boolean[] groupHasQuantifier = new boolean[pattern.length() + 1];
- int stackTop = -1;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (i > 0 && pattern.charAt(i - 1) == '\\') continue;
- switch (ch) {
- case '(' -> {
- depth++;
- stackTop++;
- groupHasQuantifier[stackTop] = false;
- }
- case ')' -> {
- if (depth > 0) {
- depth--;
- if (i + 1 < pattern.length()) {
- char next = pattern.charAt(i + 1);
- if (next == '*' || next == '+' || next == '?' || next == '{') {
- if (stackTop >= 0 && groupHasQuantifier[stackTop]) {
- throw new ConversionException(
- "Invalid pattern in expression",
- "regex contains catastrophic nested quantifiers that could cause ReDoS");
- }
- }
- }
- if (stackTop > 0 && groupHasQuantifier[stackTop]) {
- groupHasQuantifier[stackTop - 1] = true;
- }
- if (stackTop >= 0) stackTop--;
- }
- }
- case '*', '+', '?', '{' -> {
- if (stackTop >= 0) groupHasQuantifier[stackTop] = true;
- }
- }
- }
- }
-
- private static int countUnescapedParens(String pattern) {
- int count = 0;
- for (int i = 0; i < pattern.length(); i++) {
- if (pattern.charAt(i) == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- count++;
- }
- }
- return count;
- }
-
- private static int computeMaxNestingDepth(String pattern) {
- int maxDepth = 0;
- int currentDepth = 0;
- for (int i = 0; i < pattern.length(); i++) {
- char ch = pattern.charAt(i);
- if (ch == '(' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth++;
- if (currentDepth > maxDepth) maxDepth = currentDepth;
- } else if (ch == ')' && (i == 0 || pattern.charAt(i - 1) != '\\')) {
- currentDepth--;
- }
- }
- return maxDepth;
- }
}
diff --git a/src/main/java/com/spandigital/cel2sql/dialect/sqlite/SqliteDialect.java b/src/main/java/com/spandigital/cel2sql/dialect/sqlite/SqliteDialect.java
index d0cd8ab..98aac74 100644
--- a/src/main/java/com/spandigital/cel2sql/dialect/sqlite/SqliteDialect.java
+++ b/src/main/java/com/spandigital/cel2sql/dialect/sqlite/SqliteDialect.java
@@ -7,6 +7,7 @@
import com.spandigital.cel2sql.dialect.IndexRecommendation;
import com.spandigital.cel2sql.dialect.PatternType;
import com.spandigital.cel2sql.dialect.RegexResult;
+import com.spandigital.cel2sql.dialect.SqlEmitters;
import com.spandigital.cel2sql.dialect.SqlWriter;
import com.spandigital.cel2sql.error.ConversionException;
@@ -185,31 +186,17 @@ public void writeJSONArrayLength(StringBuilder w, SqlWriter writeExpr) throws Co
@Override
public void writeJSONExtractPath(StringBuilder w, List pathSegments, SqlWriter writeRoot) throws ConversionException {
- w.append("json_type(");
- writeRoot.write();
- w.append(", '$");
- for (String segment : pathSegments) {
- w.append('.').append(escapeJSONFieldName(segment));
- }
- w.append("') IS NOT NULL");
+ SqlEmitters.writeJsonPathProbe(w, "json_type", writeRoot, pathSegments, " IS NOT NULL", SqliteDialect::escapeJSONFieldName);
}
@Override
public void writeJSONArrayMembership(StringBuilder w, String jsonFunc, SqlWriter writeElem, SqlWriter writeArray) throws ConversionException {
- w.append("EXISTS (SELECT 1 FROM json_each(");
- writeArray.write();
- w.append(") WHERE value = ");
- writeElem.write();
- w.append(')');
+ SqlEmitters.writeJsonEachMembership(w, writeArray, writeElem);
}
@Override
public void writeNestedJSONArrayMembership(StringBuilder w, SqlWriter writeElem, SqlWriter writeArray) throws ConversionException {
- w.append("EXISTS (SELECT 1 FROM json_each(");
- writeArray.write();
- w.append(") WHERE value = ");
- writeElem.write();
- w.append(')');
+ SqlEmitters.writeJsonEachMembership(w, writeArray, writeElem);
}
// --- Timestamps ---