From fd8034c2be57fb494645ff0071e188510dbf5cc0 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Wed, 29 Apr 2026 11:27:29 -0500
Subject: [PATCH 1/2] fix(#1433): allow leading underscore in attribute names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The attribute_name parser in declare.py was pp.Word over [a-z] init chars
and [a-z0-9_] body chars, rejecting any name starting with `_`. But the
framework already treats names starting with `_` as hidden attributes
(Heading.attributes filters by is_hidden = name.startswith("_")), and
internal hidden columns like _job_start_time, _job_duration, _job_version,
and _singleton are injected programmatically, bypassing the parser.

User-defined hidden attributes — documented at
docs.datajoint.com/reference/specs/table-declaration/#34-hidden-attributes —
hit the parser and failed with a cryptic pyparsing ParseException.

Allow `_` in the init-chars set so user code like

    _params_hash: varchar(32)
    unique index (tool, _params_hash)

declares cleanly. Names starting with a digit are still rejected.

Fixes #1433
---
 src/datajoint/declare.py                    |  2 +-
 tests/unit/test_declare_hidden_attribute.py | 44 +++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/test_declare_hidden_attribute.py

diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py
index 1370628bc..1021b9c48 100644
--- a/src/datajoint/declare.py
+++ b/src/datajoint/declare.py
@@ -147,7 +147,7 @@ def build_attribute_parser() -> pp.ParserElement:
     """
     quoted = pp.QuotedString('"') ^ pp.QuotedString("'")
     colon = pp.Literal(":").suppress()
-    attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name")
+    attribute_name = pp.Word(pp.srange("[_a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name")
     data_type = (
         pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted))
         ^ pp.QuotedString("<", end_quote_char=">", unquote_results=False)
diff --git a/tests/unit/test_declare_hidden_attribute.py b/tests/unit/test_declare_hidden_attribute.py
new file mode 100644
index 000000000..33e1dd76f
--- /dev/null
+++ b/tests/unit/test_declare_hidden_attribute.py
@@ -0,0 +1,44 @@
+"""Unit tests for hidden attribute names (leading underscore) in table declarations.
+
+Regression coverage for issue #1433: the declaration parser previously rejected
+attribute names starting with ``_``, even though hidden-attribute semantics
+(``is_hidden = name.startswith("_")``) were already implemented in ``Heading``.
+"""
+
+import pytest
+
+from datajoint.declare import attribute_parser
+
+
+@pytest.mark.parametrize(
+    "line",
+    [
+        "_hidden: bool",
+        "_params_hash: varchar(32)",
+        "_job_start_time=null: datetime(3)",
+        "_a: int",
+    ],
+)
+def test_parser_accepts_leading_underscore(line):
+    match = attribute_parser.parse_string(line + "#", parse_all=True)
+    assert match["name"].startswith("_")
+
+
+def test_parser_still_accepts_plain_names():
+    match = attribute_parser.parse_string("name: varchar(40)#", parse_all=True)
+    assert match["name"] == "name"
+
+
+def test_parser_rejects_digit_start():
+    """Numeric leading char remains invalid (preserved behavior)."""
+    import pyparsing as pp
+
+    with pytest.raises(pp.ParseException):
+        attribute_parser.parse_string("1bad: int#", parse_all=True)
+
+
+def test_parser_extracts_hidden_name_for_is_hidden_dispatch():
+    """The parsed name is what Heading uses to set is_hidden via name.startswith('_')."""
+    match = attribute_parser.parse_string("_secret: int#", parse_all=True)
+    name = match["name"]
+    assert name.startswith("_")

From 46f4ad91abd6e9d14c4fd7add5bc0baa1a2e5f05 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Wed, 29 Apr 2026 12:01:55 -0500
Subject: [PATCH 2/2] fix(#1433): replace cryptic ParseException with clear
 DataJointError
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User-defined hidden attributes (names starting with `_`) are intentionally
not supported. The framework filters hidden columns out of every public
API surface — fetch, dict restriction, insert, update1, describe — and
populates platform-managed hidden columns (`_job_*`, `_singleton`) via
raw SQL during the populate() lifecycle, not via the user-facing methods.
Allowing users to declare hidden columns produces a feature with no
public-API write path, no describe() round-trip, and silent dict-
restriction filtering. The right fix for cases users reach for hidden
attributes (e.g. an index-backing hash like `params_hash`) is a regular
attribute.

Add a pre-flight check in compile_attribute that detects a leading
underscore and raises DataJointError with a clear message pointing to
the alternative, instead of leaking pyparsing internals:

    Attribute name in line "_hidden: bool" starts with an underscore.
    Names with leading underscore are reserved for platform-managed
    columns (e.g. _job_start_time, _singleton). Use a regular attribute
    name; if you need to control visibility at the call site, use proj().

Platform code is unaffected: `_job_*` and `_singleton` are injected
programmatically *after* parsing, so they bypass compile_attribute.

Replaces 7 unit tests asserting "parser accepts _" with 4 asserting
"compile_attribute rejects _ with helpful message" and "parser remains
strict".

Fixes #1433
---
 src/datajoint/declare.py                    | 10 ++++-
 tests/unit/test_declare_hidden_attribute.py | 43 ++++++++++++---------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py
index 1021b9c48..dfd4c85df 100644
--- a/src/datajoint/declare.py
+++ b/src/datajoint/declare.py
@@ -147,7 +147,7 @@ def build_attribute_parser() -> pp.ParserElement:
     """
     quoted = pp.QuotedString('"') ^ pp.QuotedString("'")
     colon = pp.Literal(":").suppress()
-    attribute_name = pp.Word(pp.srange("[_a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name")
+    attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name")
     data_type = (
         pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted))
         ^ pp.QuotedString("<", end_quote_char=">", unquote_results=False)
@@ -855,6 +855,14 @@ def compile_attribute(
     DataJointError
         If syntax is invalid, primary key is nullable, or blob has invalid default.
     """
+    if line.lstrip().startswith("_"):
+        raise DataJointError(
+            f'Attribute name in line "{line}" starts with an underscore. '
+            "Names with leading underscore are reserved for platform-managed "
+            "columns (e.g. _job_start_time, _singleton). Use a regular "
+            "attribute name; if you need to control visibility at the call "
+            "site, use proj()."
+        )
     try:
         match = attribute_parser.parse_string(line + "#", parse_all=True)
     except pp.ParseException as err:
diff --git a/tests/unit/test_declare_hidden_attribute.py b/tests/unit/test_declare_hidden_attribute.py
index 33e1dd76f..0a1db6555 100644
--- a/tests/unit/test_declare_hidden_attribute.py
+++ b/tests/unit/test_declare_hidden_attribute.py
@@ -1,13 +1,19 @@
-"""Unit tests for hidden attribute names (leading underscore) in table declarations.
+"""Unit tests for the leading-underscore guard in attribute declarations.
 
-Regression coverage for issue #1433: the declaration parser previously rejected
-attribute names starting with ``_``, even though hidden-attribute semantics
-(``is_hidden = name.startswith("_")``) were already implemented in ``Heading``.
+Regression coverage for issue #1433: declarations like ``_hidden: bool``
+previously failed with a cryptic ``pyparsing.ParseException``. The framework
+intentionally does not support user-defined hidden attributes — those names
+are reserved for platform-managed columns (e.g. ``_job_start_time``,
+``_singleton``) which DataJoint injects programmatically after parsing.
+
+This test ensures the user gets a clear ``DataJointError`` pointing to the
+right alternative, not a parser-internals error.
 """
 
 import pytest
 
-from datajoint.declare import attribute_parser
+from datajoint.declare import attribute_parser, compile_attribute
+from datajoint.errors import DataJointError
 
 
 @pytest.mark.parametrize(
@@ -15,13 +21,21 @@
     [
         "_hidden: bool",
         "_params_hash: varchar(32)",
-        "_job_start_time=null: datetime(3)",
-        "_a: int",
+        "  _leading_whitespace: int32",
     ],
 )
-def test_parser_accepts_leading_underscore(line):
-    match = attribute_parser.parse_string(line + "#", parse_all=True)
-    assert match["name"].startswith("_")
+def test_compile_attribute_rejects_leading_underscore(line):
+    """The leading-underscore guard fires before the parser, so adapter is unused."""
+    with pytest.raises(DataJointError, match="reserved for platform-managed"):
+        compile_attribute(line, in_key=False, foreign_key_sql=[], context={}, adapter=None)
+
+
+def test_parser_still_rejects_leading_underscore():
+    """Parser regex itself remains strict; the helpful error fires before the parser."""
+    import pyparsing as pp
+
+    with pytest.raises(pp.ParseException):
+        attribute_parser.parse_string("_hidden: bool#", parse_all=True)
 
 
 def test_parser_still_accepts_plain_names():
@@ -34,11 +48,4 @@ def test_parser_rejects_digit_start():
     import pyparsing as pp
 
     with pytest.raises(pp.ParseException):
-        attribute_parser.parse_string("1bad: int#", parse_all=True)
-
-
-def test_parser_extracts_hidden_name_for_is_hidden_dispatch():
-    """The parsed name is what Heading uses to set is_hidden via name.startswith('_')."""
-    match = attribute_parser.parse_string("_secret: int#", parse_all=True)
-    name = match["name"]
-    assert name.startswith("_")
+        attribute_parser.parse_string("1bad: int32#", parse_all=True)