From a1194a08e39b595d034fde919f9bac112f606651 Mon Sep 17 00:00:00 2001 From: Anna Benke Date: Tue, 12 May 2026 16:08:01 +0200 Subject: [PATCH] feat(gooddata-pipelines): make workspace data filter fields optional Co-Authored-By: Claude Sonnet 4.6 --- .../latest/pipelines/ldm_extension/_index.md | 6 ++- .../ldm_extension/input_processor.py | 46 +++++++++++++------ .../models/custom_data_object.py | 16 ++++++- .../test_models/test_custom_data_object.py | 27 +++++++++++ 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/docs/content/en/latest/pipelines/ldm_extension/_index.md b/docs/content/en/latest/pipelines/ldm_extension/_index.md index 0a32d9c18..1a4de18f1 100644 --- a/docs/content/en/latest/pipelines/ldm_extension/_index.md +++ b/docs/content/en/latest/pipelines/ldm_extension/_index.md @@ -44,8 +44,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def | dataset_reference_source_column | string \| None | **Deprecated** — use `parent_dataset_references` instead. | | dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) \| None | **Deprecated** — use `parent_dataset_references` instead. | | parent_dataset_references | [ParentDatasetReference](#parentdatasetreference)[] \| None | List of references to the parent dataset. | -| workspace_data_filter_id | string | ID of the workspace data filter to use. | -| workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. | +| workspace_data_filter_id | string \| None | ID of the workspace data filter to use. Optional; when omitted the dataset participates in no workspace data filter. | +| workspace_data_filter_column_name | string \| None | Name of the column in custom dataset used for filtering. Optional; must be set whenever `workspace_data_filter_id` is set. | | dataset_description | string \| None | Optional declarative description on the custom dataset. | | dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. | @@ -55,6 +55,8 @@ Either `dataset_source_table` or `dataset_source_sql` must be specified with a t `parent_dataset_references` must contain at least one entry. +`workspace_data_filter_id` and `workspace_data_filter_column_name` must be provided together or both left unset. Setting only one of them raises a `ValidationError`. When both are unset, the resulting dataset is emitted without a workspace data filter binding. + #### ParentDatasetReference Bundles one column of a (possibly composite) join to the parent dataset. Pass a list of these on `CustomDatasetDefinition.parent_dataset_references`, one entry per join column. diff --git a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py index 3c2d9afc2..04e8c4bc2 100644 --- a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py +++ b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/input_processor.py @@ -297,6 +297,35 @@ def datasets_to_ldm( dataset.definition ) + wdf_columns: ( + list[CatalogDeclarativeWorkspaceDataFilterColumn] | None + ) = None + wdf_references: ( + list[CatalogDeclarativeWorkspaceDataFilterReferences] | None + ) = None + wdf_id = dataset.definition.workspace_data_filter_id + wdf_column_name = ( + dataset.definition.workspace_data_filter_column_name + ) + # `check_wdf_pair` on the model guarantees both fields are set + # together or both omitted. + if wdf_id is not None and wdf_column_name is not None: + wdf_columns = [ + CatalogDeclarativeWorkspaceDataFilterColumn( + name=wdf_column_name, + data_type=ColumnDataType.STRING.value, + ) + ] + wdf_references = [ + CatalogDeclarativeWorkspaceDataFilterReferences( + filter_id=CatalogDatasetWorkspaceDataFilterIdentifier( + id=wdf_id + ), + filter_column=wdf_column_name, + filter_column_data_type=ColumnDataType.STRING.value, + ) + ] + # Construct the declarative dataset object and append it to the list. declarative_datasets.append( CatalogDeclarativeDataset( @@ -318,21 +347,8 @@ def datasets_to_ldm( facts=facts, data_source_table_id=dataset_source_table_id, sql=dataset_sql, - workspace_data_filter_columns=[ - CatalogDeclarativeWorkspaceDataFilterColumn( - name=dataset.definition.workspace_data_filter_column_name, - data_type=ColumnDataType.STRING.value, - ) - ], - workspace_data_filter_references=[ - CatalogDeclarativeWorkspaceDataFilterReferences( - filter_id=CatalogDatasetWorkspaceDataFilterIdentifier( - id=dataset.definition.workspace_data_filter_id - ), - filter_column=dataset.definition.workspace_data_filter_column_name, - filter_column_data_type=ColumnDataType.STRING.value, - ) - ], + workspace_data_filter_columns=wdf_columns, + workspace_data_filter_references=wdf_references, tags=_effective_dataset_tags(dataset.definition), ) ) diff --git a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py index 48825212d..1d232d39c 100644 --- a/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py +++ b/packages/gooddata-pipelines/src/gooddata_pipelines/ldm_extension/models/custom_data_object.py @@ -115,8 +115,8 @@ class CustomDatasetDefinition(BaseModel): default=None, description="List of references to the parent dataset.", ) - workspace_data_filter_id: str - workspace_data_filter_column_name: str + workspace_data_filter_id: str | None = None + workspace_data_filter_column_name: str | None = None dataset_description: str | None = Field( default=None, description="Declarative description on the custom dataset.", @@ -164,6 +164,18 @@ def check_reference_form(self) -> "CustomDatasetDefinition": ) return self + @model_validator(mode="after") + def check_wdf_pair(self) -> "CustomDatasetDefinition": + """Workspace data filter id and column name must be provided together or both omitted.""" + has_id = self.workspace_data_filter_id is not None + has_col = self.workspace_data_filter_column_name is not None + if has_id != has_col: + raise ValueError( + "workspace_data_filter_id and workspace_data_filter_column_name " + "must both be set or both be omitted" + ) + return self + class CustomDataset(BaseModel): """Custom dataset with its definition and custom fields.""" diff --git a/packages/gooddata-pipelines/tests/test_ldm_extension/test_models/test_custom_data_object.py b/packages/gooddata-pipelines/tests/test_ldm_extension/test_models/test_custom_data_object.py index d45ee97c4..9d877ba55 100644 --- a/packages/gooddata-pipelines/tests/test_ldm_extension/test_models/test_custom_data_object.py +++ b/packages/gooddata-pipelines/tests/test_ldm_extension/test_models/test_custom_data_object.py @@ -179,3 +179,30 @@ def test_custom_dataset_definition_legacy_reference_fields_optional(): ds = CustomDatasetDefinition(**data) assert ds.dataset_reference_source_column is None assert ds.parent_dataset_references is not None + + +def test_custom_dataset_definition_wdf_optional_both_none(): + data = make_valid_dataset_def( + workspace_data_filter_id=None, workspace_data_filter_column_name=None + ) + ds = CustomDatasetDefinition(**data) + assert ds.workspace_data_filter_id is None + assert ds.workspace_data_filter_column_name is None + + +def test_custom_dataset_definition_wdf_only_id_raises(): + data = make_valid_dataset_def( + workspace_data_filter_id="wdf1", workspace_data_filter_column_name=None + ) + with pytest.raises(ValidationError) as exc: + CustomDatasetDefinition(**data) + assert "both be set or both be omitted" in str(exc.value) + + +def test_custom_dataset_definition_wdf_only_column_raises(): + data = make_valid_dataset_def( + workspace_data_filter_id=None, workspace_data_filter_column_name="col1" + ) + with pytest.raises(ValidationError) as exc: + CustomDatasetDefinition(**data) + assert "both be set or both be omitted" in str(exc.value)