Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/gooddata-sdk/src/gooddata_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CatalogAILakeOperation,
CatalogAILakeOperationError,
CatalogAILakeService,
CatalogDataSourceInfo,
)
from gooddata_sdk.catalog.appearance.entity_model.color_palette import (
CatalogColorPalette,
Expand Down
158 changes: 154 additions & 4 deletions packages/gooddata-sdk/src/gooddata_sdk/catalog/ai_lake/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
- `get_operation` and `wait_for_operation` cover the polling side of the
long-running operation contract that `analyze_statistics` returns.

The full AI Lake API surface (database provisioning, pipe-table
registration, service commands) is not yet wrapped here; consumers that
need those should call `client.ai_lake_api.<method>` directly until a
ticket adds typed wrappers.
The data-source management surface (multi-datasource CRUD on database
instances) is exposed through four additional methods:
- `list_database_data_sources` — list all data sources for an instance.
- `add_database_data_source` — attach a new data source to an instance.
- `remove_database_data_source` — detach a data source from an instance.
- `update_database_data_source` — rename or replace a data source on an
instance.
"""

from __future__ import annotations
Expand All @@ -24,7 +27,10 @@

from attrs import define
from gooddata_api_client.api.ai_lake_api import AILakeApi
from gooddata_api_client.api.ai_lake_databases_api import AILakeDatabasesApi
from gooddata_api_client.model.add_database_data_source_request import AddDatabaseDataSourceRequest
from gooddata_api_client.model.analyze_statistics_request import AnalyzeStatisticsRequest
from gooddata_api_client.model.update_database_data_source_request import UpdateDatabaseDataSourceRequest

from gooddata_sdk.catalog.base import Base
from gooddata_sdk.client import GoodDataApiClient
Expand Down Expand Up @@ -58,6 +64,28 @@ def is_failed(self) -> bool:
return self.status == "failed"


@define(kw_only=True)
class CatalogDataSourceInfo:
"""A single data source association for an AI Lake Database instance.

``id`` is the internal association-record identifier returned by
list/add operations. It is ``None`` when constructed from an update
response, which only returns ``data_source_id`` and ``data_source_name``.
"""

data_source_id: str
data_source_name: str
id: str | None = None

@classmethod
def _from_api_dict(cls, data: dict[str, Any]) -> CatalogDataSourceInfo:
return cls(
id=data.get("id"),
data_source_id=data["data_source_id"],
data_source_name=data["data_source_name"],
)


class CatalogAILakeOperationError(RuntimeError):
"""Raised when an AI Lake long-running operation finishes in `failed` state."""

Expand All @@ -75,6 +103,7 @@ class CatalogAILakeService:
def __init__(self, api_client: GoodDataApiClient) -> None:
self._client = api_client
self._ai_lake_api: AILakeApi = AILakeApi(api_client._api_client)
self._ai_lake_databases_api: AILakeDatabasesApi = AILakeDatabasesApi(api_client._api_client)

def analyze_statistics(
self,
Expand Down Expand Up @@ -142,3 +171,124 @@ def wait_for_operation(
f"AI Lake operation {operation_id} did not finish within {timeout_s}s (last status: {op.status})"
)
time.sleep(poll_s)

# ------------------------------------------------------------------
# Database data-source management
# ------------------------------------------------------------------

def list_database_data_sources(self, instance_id: str) -> list[CatalogDataSourceInfo]:
"""Return all data source associations for the given database instance.

Args:
instance_id: Database instance name (preferred) or UUID.

Returns:
List of :class:`CatalogDataSourceInfo` objects; empty list when
the instance has no data sources.
"""
response = self._ai_lake_databases_api.list_ai_lake_database_data_sources(instance_id, _check_return_type=False)
data = response.to_dict() if hasattr(response, "to_dict") else dict(response)
return [CatalogDataSourceInfo._from_api_dict(ds) for ds in data.get("data_sources", [])]

def add_database_data_source(
self,
instance_id: str,
data_source_id: str,
*,
data_source_name: str | None = None,
) -> CatalogDataSourceInfo:
"""Associate an additional data source with an AI Lake database instance.

The new data source reuses the same StarRocks connection details as
the instance's primary data source.

Args:
instance_id: Database instance name (preferred) or UUID.
data_source_id: Identifier for the new data source in metadata-api.
Must be unique within the organization.
data_source_name: Optional display name for the new data source.

Returns:
:class:`CatalogDataSourceInfo` describing the newly created
data source association.
"""
kwargs: dict[str, Any] = {}
if data_source_name is not None:
kwargs["data_source_name"] = data_source_name
request = AddDatabaseDataSourceRequest(
data_source_id=data_source_id,
_check_type=False,
**kwargs,
)
response = self._ai_lake_databases_api.add_ai_lake_database_data_source(
instance_id, request, _check_return_type=False
)
data = response.to_dict() if hasattr(response, "to_dict") else dict(response)
return CatalogDataSourceInfo._from_api_dict(data["data_source"])

def remove_database_data_source(
self,
instance_id: str,
data_source_id: str,
) -> str:
"""Remove a data source association from an AI Lake database instance.

This also deletes the corresponding data source from metadata-api.
Fails if removing the data source would leave the instance with no
data sources.

Args:
instance_id: Database instance name (preferred) or UUID.
data_source_id: Identifier of the data source to remove.

Returns:
The identifier of the removed data source.
"""
response = self._ai_lake_databases_api.remove_ai_lake_database_data_source(
instance_id, data_source_id, _check_return_type=False
)
data = response.to_dict() if hasattr(response, "to_dict") else dict(response)
return data["data_source_id"]

def update_database_data_source(
self,
instance_id: str,
old_data_source_id: str,
new_data_source_id: str,
*,
data_source_name: str | None = None,
) -> CatalogDataSourceInfo:
"""Update the data source ID (and optionally name) on a database instance.

Use this to recover from a wrong data source ID provisioned on an
existing database instance without deleting the underlying database.

Args:
instance_id: Database instance name (preferred) or UUID.
old_data_source_id: Identifier of the existing data source to replace.
new_data_source_id: New identifier for the data source in metadata-api.
Must be unique within the organization.
data_source_name: Optional new display name for the data source.

Returns:
:class:`CatalogDataSourceInfo` reflecting the updated state.
``id`` will be ``None`` because the update endpoint does not
return the association-record identifier.
"""
kwargs: dict[str, Any] = {}
if data_source_name is not None:
kwargs["data_source_name"] = data_source_name
request = UpdateDatabaseDataSourceRequest(
old_data_source_id=old_data_source_id,
data_source_id=new_data_source_id,
_check_type=False,
**kwargs,
)
response = self._ai_lake_databases_api.update_ai_lake_database_data_source(
instance_id, request, _check_return_type=False
)
data = response.to_dict() if hasattr(response, "to_dict") else dict(response)
return CatalogDataSourceInfo(
data_source_id=data["data_source_id"],
data_source_name=data["data_source_name"],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
interactions:
- request:
body:
dataSourceId: sdk-test-ds-secondary
dataSourceName: SDK Test Secondary DS
headers:
Accept:
- application/json
Accept-Encoding:
- br, gzip, deflate
Content-Type:
- application/json
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
method: POST
uri: http://localhost:3000/api/v1/ailake/database/instances/demo-ai-lake/dataSources
response:
body:
string:
detail: Your organization does not have the necessary entitlement to use
AI Lake APIs.
status: 403
title: Forbidden
traceId: NORMALIZED_TRACE_ID_000000000000
headers:
Content-Type:
- application/problem+json
DATE: &id001
- PLACEHOLDER
Expires:
- '0'
Pragma:
- no-cache
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
status:
code: 403
message: Forbidden
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- br, gzip, deflate
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
method: DELETE
uri: http://localhost:3000/api/v1/ailake/database/instances/demo-ai-lake/dataSources/sdk-test-ds-secondary
response:
body:
string:
detail: Your organization does not have the necessary entitlement to use
AI Lake APIs.
status: 403
title: Forbidden
traceId: NORMALIZED_TRACE_ID_000000000000
headers:
Content-Type:
- application/problem+json
DATE: *id001
Expires:
- '0'
Pragma:
- no-cache
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
status:
code: 403
message: Forbidden
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
interactions:
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- br, gzip, deflate
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
method: GET
uri: http://localhost:3000/api/v1/ailake/database/instances/demo-ai-lake/dataSources
response:
body:
string:
detail: Your organization does not have the necessary entitlement to use
AI Lake APIs.
status: 403
title: Forbidden
traceId: NORMALIZED_TRACE_ID_000000000000
headers:
Content-Type:
- application/problem+json
DATE: &id001
- PLACEHOLDER
Expires:
- '0'
Pragma:
- no-cache
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
status:
code: 403
message: Forbidden
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interactions:
- request:
body:
dataSourceId: sdk-test-ds-secondary
dataSourceName: SDK Test Secondary DS
headers:
Accept:
- application/json
Accept-Encoding:
- br, gzip, deflate
Content-Type:
- application/json
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
method: POST
uri: http://localhost:3000/api/v1/ailake/database/instances/demo-ai-lake/dataSources
response:
body:
string:
detail: Your organization does not have the necessary entitlement to use
AI Lake APIs.
status: 403
title: Forbidden
traceId: NORMALIZED_TRACE_ID_000000000000
headers:
Content-Type:
- application/problem+json
DATE: &id001
- PLACEHOLDER
Expires:
- '0'
Pragma:
- no-cache
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
status:
code: 403
message: Forbidden
version: 1
Loading
Loading