ActiveState · icanhasmath · Jun 23, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -50,6 +50,7 @@
 else:
     from io import StringIO
 
+import binascii
 import struct
 import zlib
 
@@ -169,25 +170,29 @@ def decode(data, decodeParms=None):
         :return: a string conversion in base-7 ASCII, where each of its values
             v is such that 0 <= ord(v) <= 127.
         """
-        retval = ""
-        hex_pair = ""
-        index = 0
-        while True:
-            if index >= len(data):
-                raise PdfStreamError("Unexpected EOD in ASCIIHexDecode")
-            char = data[index]
-            if char == ">":
-                break
-            elif char.isspace():
-                index += 1
-                continue
-            hex_pair += char
-            if len(hex_pair) == 2:
-                retval += chr(int(hex_pair, base=16))
-                hex_pair = ""
-            index += 1
-        assert hex_pair == ""
-        return retval
+        # CVE-2026-28804: the previous character-by-character accumulation
+        # (retval += ..., hex_pair += ...) is quadratic, so a large
+        # /ASCIIHexDecode stream caused excessive CPU time. Locate the EOD
+        # marker once, strip whitespace, and bulk-decode with binascii.
+        eod = data.find(">")
+        if eod == -1:
+            raise PdfStreamError("Unexpected EOD in ASCIIHexDecode")
+        hex_str = b"".join(data[:eod].split()) if isinstance(
+            data, bytes
+        ) else "".join(data[:eod].split())
+        # Per ISO 32000 §7.4.2, a final odd hex digit is assumed to be
+        # followed by a "0".
+        if len(hex_str) % 2 == 1:
+            hex_str += b"0" if isinstance(hex_str, bytes) else "0"
+        try:
+            return binascii.unhexlify(hex_str)
+        except (binascii.Error, TypeError):
+            raise PdfStreamError("Invalid hexadecimal data in ASCIIHexDecode")
+
+
+# CVE-2025-62708 / CVE-2025-66019: bound LZWDecode output so a small stream
+# cannot amplify into gigabytes of memory. Set to 0 to disable (trusted input).
+LZW_MAX_OUTPUT_LENGTH = 75000000  # 75 MB
 
 
 class LZWDecode(object):
@@ -196,10 +201,11 @@ class LZWDecode(object):
     """
 
     class Decoder(object):
-        def __init__(self, data):
+        def __init__(self, data, max_output_length=LZW_MAX_OUTPUT_LENGTH):
             self.STOP = 257
             self.CLEARDICT = 256
             self.data = data
+            self.max_output_length = max_output_length
             self.bytepos = 0
             self.bitpos = 0
             self.dict = [""] * 4096
@@ -246,6 +252,11 @@ def decode(self):
             cW = self.CLEARDICT
             baos = ""
             while True:
+                if self.max_output_length and len(baos) > self.max_output_length:
+                    raise PdfReadError(
+                        "Output exceeds maximum allowed length (%d bytes) "
+                        "while decoding LZW stream." % self.max_output_length
+                    )
                 pW = cW
                 cW = self.next_code()
                 if cW == -1:

diff --git a/Tests/test_security_lzw_hex.py b/Tests/test_security_lzw_hex.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Regression tests for the LZW and ASCIIHex decoder hardening backports:
+
+- CVE-2026-28804          : ASCIIHexDecode quadratic decoding -> bulk decode.
+- CVE-2025-62708/66019    : bound LZWDecode output (decompression bomb).
+"""
+import pytest
+
+from PyPDF2 import filters
+from PyPDF2.errors import PdfReadError, PdfStreamError
+
+
+# --- CVE-2026-28804: ASCIIHexDecode ---------------------------------------
+
+def test_asciihex_basic():
+    assert filters.ASCIIHexDecode.decode("48656c6c6f>") == b"Hello"
+
+
+def test_asciihex_ignores_whitespace():
+    assert filters.ASCIIHexDecode.decode("48 65 6c\n6c\t6f >") == b"Hello"
+
+
+def test_asciihex_odd_length_padded():
+    # ISO 32000 §7.4.2: a trailing odd digit is treated as followed by "0".
+    assert filters.ASCIIHexDecode.decode("4>") == b"@"  # 0x40
+
+
+def test_asciihex_missing_eod_raises():
+    with pytest.raises(PdfStreamError):
+        filters.ASCIIHexDecode.decode("48656c6c6f")  # no '>'
+
+
+# --- CVE-2025-62708 / CVE-2025-66019: LZWDecode output cap -----------------
+
+def _pack_lzw(codes, width=9):
+    """Pack a list of fixed-width LZW codes MSB-first into bytes.
+
+    Kept small so the code width stays at the initial 9 bits (dictlen < 511).
+    """
+    bits = "".join(format(c, "0%db" % width) for c in codes)
+    while len(bits) % 8:
+        bits += "0"
+    return bytes(bytearray(int(bits[i : i + 8], 2) for i in range(0, len(bits), 8)))
+
+
+def test_lzw_decodes_normally():
+    # Three literal 'A' (65) codes then STOP (257) -> "AAA".
+    data = _pack_lzw([65, 65, 65, 257])
+    assert filters.LZWDecode.Decoder(data).decode() == b"AAA"
+
+
+def test_lzw_output_is_capped():
+    # Many literal codes with no STOP; a tiny cap must abort before exhaustion.
+    data = _pack_lzw([65] * 200)
+    with pytest.raises(PdfReadError):
+        filters.LZWDecode.Decoder(data, max_output_length=5).decode()