diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp index e999c777..201b6193 100644 --- a/src/parser/Parser.cpp +++ b/src/parser/Parser.cpp @@ -821,6 +821,9 @@ namespace OpenLogReplicator { transaction->commitSequence = redoLogRecord1->sequence; transaction->commitScn = redoLogRecord1->scn; transaction->commitTimestamp = redoLogRecord1->timestamp; + if ((redoLogRecord1->flg & OpCode::FLG_ROLLBACK_OP0504) != 0) + transaction->rollback = true; + if ((transaction->commitScn > metadata->firstDataScn && !transaction->system) || (transaction->commitScn > metadata->firstSchemaScn && transaction->system)) { if (transaction->begin) { diff --git a/src/parser/Transaction.cpp b/src/parser/Transaction.cpp index 33aaf7af..7ecf7337 100644 --- a/src/parser/Transaction.cpp +++ b/src/parser/Transaction.cpp @@ -140,11 +140,13 @@ namespace OpenLogReplicator { return; } - // In RAC online mode, Oracle generates phantom undo records for INSERTs on LOB tables. - // These appear as INSERT rollbacks with no preceding LOB index records on the stack. - // Legitimate rollbacks (out-of-line LOBs) always have LOB records stripped first. + // In RAC online mode, Oracle generates phantom undo records for DML on LOB tables. + // These appear as rollbacks with no preceding LOB index records on the stack. + // Legitimate LOB rollbacks always strip LOB index records (0x0A02 etc.) first. + // Covers INSERT->DELETE (0x0B02->0x0B03) and UPDATE->UPDATE (0x0B05->0x0B05). if (!lobStripped && transactionBuffer->deferCommittedTransactions && - lastRedoLogRecord2->opCode == 0x0B02 && redoLogRecord1->opCode == 0x0B03) { + ((lastRedoLogRecord2->opCode == 0x0B02 && redoLogRecord1->opCode == 0x0B03) || + (lastRedoLogRecord2->opCode == 0x0B05 && redoLogRecord1->opCode == 0x0B05))) { const DbTable* table = metadata->schema->checkTableDict(redoLogRecord1->obj); if (table != nullptr && !table->lobs.empty()) { return; @@ -493,6 +495,20 @@ namespace OpenLogReplicator { } else { metadata->ctx->warning(60017, "minimal supplemental log missing or redo log inconsistency for transaction " + xid.toString() + ", offset: " + redoLogRecord1->fileOffset.toString()); + // Pending redo1/redo2 contains an incomplete multi-piece DML from a different row/table. + // Discard the orphaned fragment and start fresh with the current record. + redo1.clear(); + redo2.clear(); + + if (op == 0x05010B02) + transactionType = Format::TRANSACTION_TYPE::INSERT; + else if (op == 0x05010B03) + transactionType = Format::TRANSACTION_TYPE::DELETE; + else + transactionType = Format::TRANSACTION_TYPE::UPDATE; + + redo1.push_back(redoLogRecord1); + redo2.push_back(redoLogRecord2); } } diff --git a/tests/KNOWN-LIMITATIONS.md b/tests/KNOWN-LIMITATIONS.md index 414e4ca7..8a86d394 100644 --- a/tests/KNOWN-LIMITATIONS.md +++ b/tests/KNOWN-LIMITATIONS.md @@ -4,8 +4,8 @@ Each entry includes evidence from source code, Oracle behavior, or test results. Claims without evidence should not be added. Entries are split into two categories: -- **External limitations** (L1-L7): Oracle LogMiner or Debezium behavior that - cannot be fixed in OLR. These require workarounds in test comparison scripts. +- **External limitations** (L1-L7, L13): Oracle LogMiner or Debezium behavior + that cannot be fixed in OLR. These require workarounds in test comparison scripts. - **OLR bugs** (L8-L12): Issues in OLR that should be fixed. Each has a corresponding GitHub issue. @@ -284,6 +284,36 @@ applies at DB creation, not pre-built). --- +## L13. LogMiner LOB After-Image Unavailable for Unchanged Columns + +Oracle LogMiner only includes LOB (CLOB/BLOB) column values in change events +when the column was **explicitly set or changed** by the SQL statement. +Unchanged LOB columns are not included in the redo data and appear as +`__debezium_unavailable_value` in Debezium output. + +This is documented behavior, not a bug. OLR reads LOB data directly from redo +log records and can deliver actual LOB content even when LogMiner cannot. + +**Evidence — Debezium documentation:** + +> Oracle only supplies column values for CLOB, NCLOB, and BLOB data types if +> they're explicitly set or changed in a SQL statement. As a result, change +> events never contain the value of an unchanged CLOB, NCLOB, or BLOB column. +> Instead, they contain placeholders as defined by the connector property, +> `unavailable.value.placeholder`. + +Source: [Debezium Connector for Oracle — LOB support](https://debezium.io/documentation/reference/stable/connectors/oracle.html), +[DBZ-4276](https://github.com/debezium/debezium/pull/2929) + +**Evidence — fuzz test (2026-03-29):** 776 value diffs on FUZZ_LOB table where +LogMiner has `__debezium_unavailable_value` and OLR has actual BLOB/CLOB data. +Zero reverse cases (OLR unavailable, LogMiner has value). + +**Test handling:** `validator.py` skips comparison when either side has +`__debezium_unavailable_value` (both before and after images). + +--- + ## Summary ### External Limitations (Oracle / Debezium — cannot be fixed in OLR) @@ -297,6 +327,7 @@ applies at DB creation, not pre-built). | L5 | NCHAR uses UNISTR() encoding | Oracle LogMiner | Decode in logminer2json.py | | L6 | LOB disabled by default | Debezium config | Set `lob.enabled=true` | | L7 | No mid-stream DDL | Debezium OLR adapter | Skip DDL in twin-test ([#13](https://github.com/rophy/olr/issues/13)) | +| L13 | LOB after-image unavailable for unchanged cols | Oracle LogMiner / [Debezium docs](https://debezium.io/documentation/reference/stable/connectors/oracle.html) | Skip unavailable in validator | ### OLR Bugs (should be fixed) diff --git a/tests/dbz-twin/rac/fuzz-test.sh b/tests/dbz-twin/rac/fuzz-test.sh index 70049258..54a567e6 100755 --- a/tests/dbz-twin/rac/fuzz-test.sh +++ b/tests/dbz-twin/rac/fuzz-test.sh @@ -183,8 +183,13 @@ action_up() { action_run() { local duration_min="${1:-30}" local duration_sec=$(( duration_min * 60 )) + local skip_lob="${SKIP_LOB:-0}" - echo "=== Running fuzz workload for ${duration_min} minutes ===" + if [[ "$skip_lob" == "1" ]]; then + echo "=== Running fuzz workload for ${duration_min} minutes (LOB skipped) ===" + else + echo "=== Running fuzz workload for ${duration_min} minutes ===" + fi local work_dir work_dir=$(mktemp -d /tmp/fuzz_rac_XXXXXX) @@ -202,12 +207,12 @@ SQL # Create runner scripts cat > "$work_dir/fuzz_node1.sql" < ${duration_sec}, p_seed => 42, p_node_id => 1); +EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 42, p_node_id => 1, p_skip_lob => ${skip_lob}); EXIT; SQL cat > "$work_dir/fuzz_node2.sql" < ${duration_sec}, p_seed => 137, p_node_id => 2); +EXEC FUZZ_WKL.run(p_duration_secs => ${duration_sec}, p_seed => 137, p_node_id => 2, p_skip_lob => ${skip_lob}); EXIT; SQL diff --git a/tests/dbz-twin/rac/perf/fuzz-workload.sql b/tests/dbz-twin/rac/perf/fuzz-workload.sql index 83ce5247..f3089ca7 100644 --- a/tests/dbz-twin/rac/perf/fuzz-workload.sql +++ b/tests/dbz-twin/rac/perf/fuzz-workload.sql @@ -149,7 +149,8 @@ CREATE OR REPLACE PACKAGE olr_test.FUZZ_WKL AS PROCEDURE run( p_duration_secs IN NUMBER DEFAULT 1800, p_seed IN NUMBER DEFAULT 1, - p_node_id IN NUMBER DEFAULT 1 + p_node_id IN NUMBER DEFAULT 1, + p_skip_lob IN NUMBER DEFAULT 0 -- 1 = skip LOB table operations ); END FUZZ_WKL; / @@ -170,6 +171,7 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS g_rollback_cnt PLS_INTEGER := 0; g_lob_cnt PLS_INTEGER := 0; g_total_ops PLS_INTEGER := 0; + g_skip_lob PLS_INTEGER := 0; -- 1 = skip LOB table operations -- Per-table ID tracking for UPDATE/DELETE targeting. -- Stores the last inserted ID for each table so UPDATE/DELETE can @@ -637,8 +639,12 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS v_count PLS_INTEGER; BEGIN -- Pick table (weighted) - -- 30% scalar, 10% wide, 15% lob, 10% partitioned, 10% nopk, - -- 10% maxstr, 5% interval, 10% null-heavy + -- 30% scalar, 10% wide, 15% lob, 10% part, 10% nopk, 10% maxstr, 5% interval, 10% null + -- When g_skip_lob=1, remap the 15% LOB range (41-55) to scalar (1-30) + IF g_skip_lob = 1 AND v_table_dice > 40 AND v_table_dice <= 55 THEN + -- Remap LOB range (41-55) to scalar + v_table_dice := rand_int(1, 30); + END IF; IF v_table_dice <= 30 THEN -- FUZZ_SCALAR v_count := rand_int(1, 20); @@ -707,7 +713,8 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS PROCEDURE run( p_duration_secs IN NUMBER DEFAULT 1800, p_seed IN NUMBER DEFAULT 1, - p_node_id IN NUMBER DEFAULT 1 + p_node_id IN NUMBER DEFAULT 1, + p_skip_lob IN NUMBER DEFAULT 0 ) IS v_start TIMESTAMP := SYSTIMESTAMP; v_deadline TIMESTAMP := SYSTIMESTAMP + NUMTODSINTERVAL(p_duration_secs, 'SECOND'); @@ -722,6 +729,7 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS g_event_seq := 0; g_insert_cnt := 0; g_update_cnt := 0; g_delete_cnt := 0; g_rollback_cnt := 0; g_lob_cnt := 0; g_total_ops := 0; + g_skip_lob := p_skip_lob; DBMS_RANDOM.SEED(p_seed); @@ -737,12 +745,14 @@ CREATE OR REPLACE PACKAGE BODY olr_test.FUZZ_WKL AS VALUES (v_seed_id, 'SEED', DBMS_RANDOM.STRING('x', 20), 0); track_id(g_scalar_ids, g_scalar_id_cnt, v_seed_id); END LOOP; - FOR i IN 1..5 LOOP - v_seed_id := next_id; - INSERT INTO olr_test.FUZZ_LOB (id, event_id, label, content) - VALUES (v_seed_id, 'SEED', 'seed', 'seed'); - track_id(g_lob_ids, g_lob_id_cnt, v_seed_id); - END LOOP; + IF g_skip_lob = 0 THEN + FOR i IN 1..5 LOOP + v_seed_id := next_id; + INSERT INTO olr_test.FUZZ_LOB (id, event_id, label, content) + VALUES (v_seed_id, 'SEED', 'seed', 'seed'); + track_id(g_lob_ids, g_lob_id_cnt, v_seed_id); + END LOOP; + END IF; FOR i IN 1..20 LOOP v_seed_id := next_id; v_seed_region := REGIONS(rand_int(1, 5)); diff --git a/tests/dbz-twin/rac/validator.py b/tests/dbz-twin/rac/validator.py index 370ca50a..a5160def 100644 --- a/tests/dbz-twin/rac/validator.py +++ b/tests/dbz-twin/rac/validator.py @@ -84,9 +84,11 @@ def merge_lob_records(records): def compare_values(lm_cols, olr_cols, table, section='after'): """Compare two normalized column dicts. Returns list of diff strings. - section: 'before' or 'after'. LOB unavailable markers are only skipped - in 'before' images — Oracle doesn't provide old LOB values in redo. - In 'after' images, unavailable markers indicate a real problem. + LOB unavailable markers are skipped in both before and after images. + Oracle LogMiner only includes LOB column values when they are explicitly + changed by the SQL statement — unchanged LOB columns are emitted as + __debezium_unavailable_value. This is documented Debezium behavior, not + a bug. See: KNOWN-LIMITATIONS.md L13. """ diffs = [] all_keys = set(lm_cols.keys()) | set(olr_cols.keys()) @@ -97,8 +99,8 @@ def compare_values(lm_cols, olr_cols, table, section='after'): vb = olr_cols.get(key) if key not in lm_cols or key not in olr_cols: continue # Supplemental logging differences - if section == 'before' and (is_unavailable(va) or is_unavailable(vb)): - continue # LOB before-image unavailable (Oracle limitation) + if is_unavailable(va) or is_unavailable(vb): + continue # LOB unavailable — Oracle/Debezium limitation (L1, L13) if va != vb: diffs.append(f" {key}: LM={va!r} OLR={vb!r}") return diffs @@ -119,12 +121,15 @@ def main(): conn.execute("PRAGMA journal_mode=WAL") cursor_by_node = {'N1': '', 'N2': ''} # Per-node watermark + safe_frontier = {} # Last frontier before idle-timeout widening total_validated = 0 total_matched = 0 total_mismatches = 0 total_lob_known = 0 # Known LOB issues (expected) total_missing_lm = 0 total_missing_olr = 0 + total_tail_olr = 0 # OLR ahead of LM at drain time (not a bug) + total_tail_lm = 0 # LM ahead of OLR at drain time (not a bug) last_new_events = time.time() prev_lm_count = 0 prev_olr_count = 0 @@ -170,6 +175,9 @@ def main(): if time.time() - last_new_events > IDLE_TIMEOUT: print(f"[validator] Idle timeout ({IDLE_TIMEOUT}s). " f"Final validation pass...", flush=True) + # Save safe frontier before widening — events beyond this + # are tail lag (OLR or LM ahead), not real mismatches. + safe_frontier = dict(node_frontiers) # Widen frontier to max of both sides per node to catch # truly missing events (one side never delivered them). for node_prefix in ('N1', 'N2'): @@ -214,6 +222,12 @@ def main(): in_lm = eid in lm_ids in_olr = eid in olr_ids + # Check if this event is beyond the safe frontier (tail lag) + node_prefix = eid[:2] + is_tail = (safe_frontier + and node_prefix in safe_frontier + and eid > safe_frontier[node_prefix]) + # Determine table from whichever side has the event if in_lm: tbl_row = conn.execute( @@ -228,7 +242,9 @@ def main(): if in_lm and not in_olr: total_missing_olr += 1 - if is_lob: + if is_tail: + total_tail_lm += 1 + elif is_lob: total_lob_known += 1 else: total_mismatches += 1 @@ -238,7 +254,9 @@ def main(): if in_olr and not in_lm: total_missing_lm += 1 - if is_lob: + if is_tail: + total_tail_olr += 1 + elif is_lob: total_lob_known += 1 else: total_mismatches += 1 @@ -338,9 +356,12 @@ def main(): # Progress report frontier_str = ','.join(f'{k}={v}' for k, v in sorted(cursor_by_node.items())) + tail_str = (f" tail_olr={total_tail_olr} tail_lm={total_tail_lm}" + if total_tail_olr or total_tail_lm else "") print(f"[validator] validated={total_validated} matched={total_matched} " f"mismatches={total_mismatches} lob_known={total_lob_known} " - f"missing_olr={total_missing_olr} extra_olr={total_missing_lm} " + f"missing_olr={total_missing_olr} extra_olr={total_missing_lm}" + f"{tail_str} " f"lm_total={lm_count} olr_total={olr_count} " f"frontier={frontier_str}", flush=True) @@ -359,15 +380,23 @@ def main(): print(f" LOB known issues: {total_lob_known}", flush=True) print(f" Missing from OLR: {total_missing_olr}", flush=True) print(f" Extra in OLR: {total_missing_lm}", flush=True) + if total_tail_olr or total_tail_lm: + print(f" Tail (OLR ahead): {total_tail_olr}", flush=True) + print(f" Tail (LM ahead): {total_tail_lm}", flush=True) if total_mismatches > 0: print(f"\n RESULT: FAIL ({total_mismatches} unexpected mismatches)", flush=True) sys.exit(1) else: - print(f"\n RESULT: PASS", flush=True) + print("\n RESULT: PASS", flush=True) + qualifiers = [] if total_lob_known > 0: - print(f" (with {total_lob_known} known LOB issues)", flush=True) + qualifiers.append(f"{total_lob_known} known LOB issues") + if total_tail_olr + total_tail_lm > 0: + qualifiers.append(f"{total_tail_olr + total_tail_lm} tail events") + if qualifiers: + print(f" ({', '.join(qualifiers)})", flush=True) sys.exit(0)