Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/content/en/latest/pipelines/ldm_extension/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def
| dataset_reference_source_column | string \| None | **Deprecated** — use `parent_dataset_references` instead. |
| dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) \| None | **Deprecated** — use `parent_dataset_references` instead. |
| parent_dataset_references | [ParentDatasetReference](#parentdatasetreference)[] \| None | List of references to the parent dataset. |
| workspace_data_filter_id | string | ID of the workspace data filter to use. |
| workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. |
| workspace_data_filter_id | string \| None | ID of the workspace data filter to use. Optional; when omitted the dataset participates in no workspace data filter. |
| workspace_data_filter_column_name | string \| None | Name of the column in custom dataset used for filtering. Optional; must be set whenever `workspace_data_filter_id` is set. |
| dataset_description | string \| None | Optional declarative description on the custom dataset. |
| dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |

Expand All @@ -55,6 +55,8 @@ Either `dataset_source_table` or `dataset_source_sql` must be specified with a t

`parent_dataset_references` must contain at least one entry.

`workspace_data_filter_id` and `workspace_data_filter_column_name` must be provided together or both left unset. Setting only one of them raises a `ValidationError`. When both are unset, the resulting dataset is emitted without a workspace data filter binding.

#### ParentDatasetReference

Bundles one column of a (possibly composite) join to the parent dataset. Pass a list of these on `CustomDatasetDefinition.parent_dataset_references`, one entry per join column.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,35 @@ def datasets_to_ldm(
dataset.definition
)

wdf_columns: (
list[CatalogDeclarativeWorkspaceDataFilterColumn] | None
) = None
wdf_references: (
list[CatalogDeclarativeWorkspaceDataFilterReferences] | None
) = None
wdf_id = dataset.definition.workspace_data_filter_id
wdf_column_name = (
dataset.definition.workspace_data_filter_column_name
)
# `check_wdf_pair` on the model guarantees both fields are set
# together or both omitted.
if wdf_id is not None and wdf_column_name is not None:
wdf_columns = [
CatalogDeclarativeWorkspaceDataFilterColumn(
name=wdf_column_name,
data_type=ColumnDataType.STRING.value,
)
]
wdf_references = [
CatalogDeclarativeWorkspaceDataFilterReferences(
filter_id=CatalogDatasetWorkspaceDataFilterIdentifier(
id=wdf_id
),
filter_column=wdf_column_name,
filter_column_data_type=ColumnDataType.STRING.value,
)
]

# Construct the declarative dataset object and append it to the list.
declarative_datasets.append(
CatalogDeclarativeDataset(
Expand All @@ -318,21 +347,8 @@ def datasets_to_ldm(
facts=facts,
data_source_table_id=dataset_source_table_id,
sql=dataset_sql,
workspace_data_filter_columns=[
CatalogDeclarativeWorkspaceDataFilterColumn(
name=dataset.definition.workspace_data_filter_column_name,
data_type=ColumnDataType.STRING.value,
)
],
workspace_data_filter_references=[
CatalogDeclarativeWorkspaceDataFilterReferences(
filter_id=CatalogDatasetWorkspaceDataFilterIdentifier(
id=dataset.definition.workspace_data_filter_id
),
filter_column=dataset.definition.workspace_data_filter_column_name,
filter_column_data_type=ColumnDataType.STRING.value,
)
],
workspace_data_filter_columns=wdf_columns,
workspace_data_filter_references=wdf_references,
tags=_effective_dataset_tags(dataset.definition),
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ class CustomDatasetDefinition(BaseModel):
default=None,
description="List of references to the parent dataset.",
)
workspace_data_filter_id: str
workspace_data_filter_column_name: str
workspace_data_filter_id: str | None = None
workspace_data_filter_column_name: str | None = None
dataset_description: str | None = Field(
default=None,
description="Declarative description on the custom dataset.",
Expand Down Expand Up @@ -164,6 +164,18 @@ def check_reference_form(self) -> "CustomDatasetDefinition":
)
return self

@model_validator(mode="after")
def check_wdf_pair(self) -> "CustomDatasetDefinition":
"""Workspace data filter id and column name must be provided together or both omitted."""
has_id = self.workspace_data_filter_id is not None
has_col = self.workspace_data_filter_column_name is not None
if has_id != has_col:
raise ValueError(
"workspace_data_filter_id and workspace_data_filter_column_name "
"must both be set or both be omitted"
)
return self


class CustomDataset(BaseModel):
"""Custom dataset with its definition and custom fields."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,30 @@ def test_custom_dataset_definition_legacy_reference_fields_optional():
ds = CustomDatasetDefinition(**data)
assert ds.dataset_reference_source_column is None
assert ds.parent_dataset_references is not None


def test_custom_dataset_definition_wdf_optional_both_none():
data = make_valid_dataset_def(
workspace_data_filter_id=None, workspace_data_filter_column_name=None
)
ds = CustomDatasetDefinition(**data)
assert ds.workspace_data_filter_id is None
assert ds.workspace_data_filter_column_name is None


def test_custom_dataset_definition_wdf_only_id_raises():
data = make_valid_dataset_def(
workspace_data_filter_id="wdf1", workspace_data_filter_column_name=None
)
with pytest.raises(ValidationError) as exc:
CustomDatasetDefinition(**data)
assert "both be set or both be omitted" in str(exc.value)


def test_custom_dataset_definition_wdf_only_column_raises():
data = make_valid_dataset_def(
workspace_data_filter_id=None, workspace_data_filter_column_name="col1"
)
with pytest.raises(ValidationError) as exc:
CustomDatasetDefinition(**data)
assert "both be set or both be omitted" in str(exc.value)
Loading