From fd8034c2be57fb494645ff0071e188510dbf5cc0 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 29 Apr 2026 11:27:29 -0500 Subject: [PATCH 1/2] fix(#1433): allow leading underscore in attribute names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attribute_name parser in declare.py was pp.Word over [a-z] init chars and [a-z0-9_] body chars, rejecting any name starting with `_`. But the framework already treats names starting with `_` as hidden attributes (Heading.attributes filters by is_hidden = name.startswith("_")), and internal hidden columns like _job_start_time, _job_duration, _job_version, and _singleton are injected programmatically, bypassing the parser. User-defined hidden attributes — documented at docs.datajoint.com/reference/specs/table-declaration/#34-hidden-attributes — hit the parser and failed with a cryptic pyparsing ParseException. Allow `_` in the init-chars set so user code like _params_hash: varchar(32) unique index (tool, _params_hash) declares cleanly. Names starting with a digit are still rejected. Fixes #1433 --- src/datajoint/declare.py | 2 +- tests/unit/test_declare_hidden_attribute.py | 44 +++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_declare_hidden_attribute.py diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index 1370628bc..1021b9c48 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -147,7 +147,7 @@ def build_attribute_parser() -> pp.ParserElement: """ quoted = pp.QuotedString('"') ^ pp.QuotedString("'") colon = pp.Literal(":").suppress() - attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name") + attribute_name = pp.Word(pp.srange("[_a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name") data_type = ( pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted)) ^ pp.QuotedString("<", end_quote_char=">", unquote_results=False) diff --git a/tests/unit/test_declare_hidden_attribute.py b/tests/unit/test_declare_hidden_attribute.py new file mode 100644 index 000000000..33e1dd76f --- /dev/null +++ b/tests/unit/test_declare_hidden_attribute.py @@ -0,0 +1,44 @@ +"""Unit tests for hidden attribute names (leading underscore) in table declarations. + +Regression coverage for issue #1433: the declaration parser previously rejected +attribute names starting with ``_``, even though hidden-attribute semantics +(``is_hidden = name.startswith("_")``) were already implemented in ``Heading``. +""" + +import pytest + +from datajoint.declare import attribute_parser + + +@pytest.mark.parametrize( + "line", + [ + "_hidden: bool", + "_params_hash: varchar(32)", + "_job_start_time=null: datetime(3)", + "_a: int", + ], +) +def test_parser_accepts_leading_underscore(line): + match = attribute_parser.parse_string(line + "#", parse_all=True) + assert match["name"].startswith("_") + + +def test_parser_still_accepts_plain_names(): + match = attribute_parser.parse_string("name: varchar(40)#", parse_all=True) + assert match["name"] == "name" + + +def test_parser_rejects_digit_start(): + """Numeric leading char remains invalid (preserved behavior).""" + import pyparsing as pp + + with pytest.raises(pp.ParseException): + attribute_parser.parse_string("1bad: int#", parse_all=True) + + +def test_parser_extracts_hidden_name_for_is_hidden_dispatch(): + """The parsed name is what Heading uses to set is_hidden via name.startswith('_').""" + match = attribute_parser.parse_string("_secret: int#", parse_all=True) + name = match["name"] + assert name.startswith("_") From 46f4ad91abd6e9d14c4fd7add5bc0baa1a2e5f05 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 29 Apr 2026 12:01:55 -0500 Subject: [PATCH 2/2] fix(#1433): replace cryptic ParseException with clear DataJointError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-defined hidden attributes (names starting with `_`) are intentionally not supported. The framework filters hidden columns out of every public API surface — fetch, dict restriction, insert, update1, describe — and populates platform-managed hidden columns (`_job_*`, `_singleton`) via raw SQL during the populate() lifecycle, not via the user-facing methods. Allowing users to declare hidden columns produces a feature with no public-API write path, no describe() round-trip, and silent dict- restriction filtering. The right fix for cases users reach for hidden attributes (e.g. an index-backing hash like `params_hash`) is a regular attribute. Add a pre-flight check in compile_attribute that detects a leading underscore and raises DataJointError with a clear message pointing to the alternative, instead of leaking pyparsing internals: Attribute name in line "_hidden: bool" starts with an underscore. Names with leading underscore are reserved for platform-managed columns (e.g. _job_start_time, _singleton). Use a regular attribute name; if you need to control visibility at the call site, use proj(). Platform code is unaffected: `_job_*` and `_singleton` are injected programmatically *after* parsing, so they bypass compile_attribute. Replaces 7 unit tests asserting "parser accepts _" with 4 asserting "compile_attribute rejects _ with helpful message" and "parser remains strict". Fixes #1433 --- src/datajoint/declare.py | 10 ++++- tests/unit/test_declare_hidden_attribute.py | 43 ++++++++++++--------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index 1021b9c48..dfd4c85df 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -147,7 +147,7 @@ def build_attribute_parser() -> pp.ParserElement: """ quoted = pp.QuotedString('"') ^ pp.QuotedString("'") colon = pp.Literal(":").suppress() - attribute_name = pp.Word(pp.srange("[_a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name") + attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]")).set_results_name("name") data_type = ( pp.Combine(pp.Word(pp.alphas) + pp.SkipTo("#", ignore=quoted)) ^ pp.QuotedString("<", end_quote_char=">", unquote_results=False) @@ -855,6 +855,14 @@ def compile_attribute( DataJointError If syntax is invalid, primary key is nullable, or blob has invalid default. """ + if line.lstrip().startswith("_"): + raise DataJointError( + f'Attribute name in line "{line}" starts with an underscore. ' + "Names with leading underscore are reserved for platform-managed " + "columns (e.g. _job_start_time, _singleton). Use a regular " + "attribute name; if you need to control visibility at the call " + "site, use proj()." + ) try: match = attribute_parser.parse_string(line + "#", parse_all=True) except pp.ParseException as err: diff --git a/tests/unit/test_declare_hidden_attribute.py b/tests/unit/test_declare_hidden_attribute.py index 33e1dd76f..0a1db6555 100644 --- a/tests/unit/test_declare_hidden_attribute.py +++ b/tests/unit/test_declare_hidden_attribute.py @@ -1,13 +1,19 @@ -"""Unit tests for hidden attribute names (leading underscore) in table declarations. +"""Unit tests for the leading-underscore guard in attribute declarations. -Regression coverage for issue #1433: the declaration parser previously rejected -attribute names starting with ``_``, even though hidden-attribute semantics -(``is_hidden = name.startswith("_")``) were already implemented in ``Heading``. +Regression coverage for issue #1433: declarations like ``_hidden: bool`` +previously failed with a cryptic ``pyparsing.ParseException``. The framework +intentionally does not support user-defined hidden attributes — those names +are reserved for platform-managed columns (e.g. ``_job_start_time``, +``_singleton``) which DataJoint injects programmatically after parsing. + +This test ensures the user gets a clear ``DataJointError`` pointing to the +right alternative, not a parser-internals error. """ import pytest -from datajoint.declare import attribute_parser +from datajoint.declare import attribute_parser, compile_attribute +from datajoint.errors import DataJointError @pytest.mark.parametrize( @@ -15,13 +21,21 @@ [ "_hidden: bool", "_params_hash: varchar(32)", - "_job_start_time=null: datetime(3)", - "_a: int", + " _leading_whitespace: int32", ], ) -def test_parser_accepts_leading_underscore(line): - match = attribute_parser.parse_string(line + "#", parse_all=True) - assert match["name"].startswith("_") +def test_compile_attribute_rejects_leading_underscore(line): + """The leading-underscore guard fires before the parser, so adapter is unused.""" + with pytest.raises(DataJointError, match="reserved for platform-managed"): + compile_attribute(line, in_key=False, foreign_key_sql=[], context={}, adapter=None) + + +def test_parser_still_rejects_leading_underscore(): + """Parser regex itself remains strict; the helpful error fires before the parser.""" + import pyparsing as pp + + with pytest.raises(pp.ParseException): + attribute_parser.parse_string("_hidden: bool#", parse_all=True) def test_parser_still_accepts_plain_names(): @@ -34,11 +48,4 @@ def test_parser_rejects_digit_start(): import pyparsing as pp with pytest.raises(pp.ParseException): - attribute_parser.parse_string("1bad: int#", parse_all=True) - - -def test_parser_extracts_hidden_name_for_is_hidden_dispatch(): - """The parsed name is what Heading uses to set is_hidden via name.startswith('_').""" - match = attribute_parser.parse_string("_secret: int#", parse_all=True) - name = match["name"] - assert name.startswith("_") + attribute_parser.parse_string("1bad: int32#", parse_all=True)