diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index f93d2354..b8851525 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -538,12 +538,15 @@ ** xref:reference:sql/index.adoc[Redpanda SQL Reference] *** xref:reference:sql/sql-statements/index.adoc[Statements] **** xref:reference:sql/sql-statements/keywords.adoc[] +**** xref:reference:sql/sql-statements/alter-iceberg-catalog.adoc[] **** xref:reference:sql/sql-statements/alter-redpanda-catalog.adoc[] **** xref:reference:sql/sql-statements/alter-storage.adoc[] **** xref:reference:sql/sql-statements/alter-table.adoc[] +**** xref:reference:sql/sql-statements/create-iceberg-catalog.adoc[] **** xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[] **** xref:reference:sql/sql-statements/create-storage.adoc[] **** xref:reference:sql/sql-statements/create-table.adoc[] +**** xref:reference:sql/sql-statements/drop-iceberg-catalog.adoc[] **** xref:reference:sql/sql-statements/drop-redpanda-catalog.adoc[] **** xref:reference:sql/sql-statements/drop-storage.adoc[] **** xref:reference:sql/sql-statements/drop-table.adoc[] diff --git a/modules/reference/pages/sql/sql-statements/alter-iceberg-catalog.adoc b/modules/reference/pages/sql/sql-statements/alter-iceberg-catalog.adoc new file mode 100644 index 00000000..abdc6d45 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/alter-iceberg-catalog.adoc @@ -0,0 +1,40 @@ += ALTER ICEBERG CATALOG +:description: The ALTER ICEBERG CATALOG statement modifies connection properties of an existing Iceberg catalog. +:page-topic-type: reference + +The `ALTER ICEBERG CATALOG` statement modifies connection properties of an existing Iceberg catalog. You must repeat the `STORAGE` clause when altering, even if the storage connection is not changing. + +== Syntax + +[source,sql] +---- +ALTER ICEBERG CATALOG [IF EXISTS] catalog_name STORAGE storage_name +WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name of the Iceberg catalog to modify. +* `IF EXISTS`: Optional. Prevents an error if the Iceberg catalog does not exist. +* `storage_name`: Name of the storage connection backing the catalog. +* `option = 'value'`: One or more connection options to update. See xref:reference:sql/sql-statements/create-iceberg-catalog.adoc[CREATE ICEBERG CATALOG] for the full list of options. + +== Examples + +Update the REST catalog URI for an existing Iceberg catalog: + +[source,sql] +---- +ALTER ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage +WITH (uri = 'https://new-catalog.example.com'); +---- + +Rotate the basic-authentication credentials on an existing Iceberg catalog: + +[source,sql] +---- +ALTER ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage +WITH ( + auth_type = 'basic', + username = '', + password = '' +); +---- diff --git a/modules/reference/pages/sql/sql-statements/create-iceberg-catalog.adoc b/modules/reference/pages/sql/sql-statements/create-iceberg-catalog.adoc new file mode 100644 index 00000000..a9d2da41 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/create-iceberg-catalog.adoc @@ -0,0 +1,208 @@ += CREATE ICEBERG CATALOG +:description: The CREATE ICEBERG CATALOG statement creates a named connection to an Iceberg REST catalog, enabling Redpanda SQL to query Iceberg-translated topic data. +:page-topic-type: reference + +The `CREATE ICEBERG CATALOG` statement creates a named connection to an Iceberg REST catalog. The Iceberg catalog can be queried directly or linked to a Redpanda catalog with `USING CATALOG` so that bridge queries return a unified view of live and Iceberg-committed records. See xref:sql:query-data/query-iceberg-topics.adoc[Query Iceberg topics] for the end-to-end workflow. + +The statement requires an existing xref:reference:sql/sql-statements/create-storage.adoc[storage connection] that holds the object-storage credentials for the Iceberg warehouse. + +== Syntax + +[source,sql] +---- +CREATE ICEBERG CATALOG [IF NOT EXISTS] catalog_name STORAGE storage_name + WITH (option = 'value' [, ...]); +---- + +* `catalog_name`: Name for the new Iceberg catalog. +* `IF NOT EXISTS`: Optional. Prevents an error if an Iceberg catalog with the same name already exists. +* `storage_name`: Name of an existing storage connection. Create it first with xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE]. + +== Options + +[cols="<30%,<15%,<10%,<45%",options="header"] +|=== +|Option |Type |Required |Description + +|`uri` +|STRING +|Yes +|REST catalog endpoint URI. + +|`warehouse` +|STRING +|No +|Iceberg warehouse identifier or location. + +|`auth_type` +|STRING +|No +|Authentication type for the REST catalog. One of `oauth2`, `basic`, or `aws_sigv4`. If omitted, the catalog connects without authentication. Providing an auth-specific option (such as `username` or `aws_region`) without `auth_type` is rejected. + +|`oauth2_client_id` +|STRING +|Required when `auth_type = 'oauth2'` +|OAuth2 client ID. + +|`oauth2_client_secret` +|STRING +|Required when `auth_type = 'oauth2'` +|OAuth2 client secret. + +|`oauth2_scope` +|STRING +|No +|OAuth2 scope to request. + +|`oauth2_token_endpoint_url` +|STRING +|No +|OAuth2 token endpoint URL. Use to override the catalog's default token endpoint. + +|`oauth2_token_refresh_margin_seconds` +|INTEGER +|No +|Number of seconds before token expiry to refresh. Must be between 0 and 2147483647. + +|`username` +|STRING +|Required when `auth_type = 'basic'` +|Basic authentication username. + +|`password` +|STRING +|Required when `auth_type = 'basic'` +|Basic authentication password. + +|`aws_region` +|STRING +|Required when `auth_type = 'aws_sigv4'` +|AWS region for SigV4 request signing (for example, `us-west-2`). + +|`aws_access_key_id` +|STRING +|No +|AWS access key ID for SigV4 signing. Must be set together with `aws_secret_access_key`. If both are omitted, the catalog uses the AWS default credential chain (environment variables, shared config, STS web identity, IMDSv2/ECS). + +|`aws_secret_access_key` +|STRING +|No +|AWS secret access key for SigV4 signing. See `aws_access_key_id` for credential-chain behavior. + +|`ssl_verify` +|STRING +|No +|`'true'` (default) or `'false'`. Whether to verify the REST catalog's TLS certificate. + +|`ssl_ca_info` +|STRING +|No +|Path to a CA certificate file used to verify the REST catalog's TLS certificate. + +|`ssl_ca_path` +|STRING +|No +|Path to a directory containing CA certificates. + +|`ssl_crl_file` +|STRING +|No +|Path to a certificate revocation list (CRL) file. +|=== + +== Examples + +=== Create a basic Iceberg catalog + +Connect to a REST catalog without authentication. The catalog uses TLS verification by default. + +[source,sql] +---- +CREATE ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage + WITH ( + uri = 'https://catalog.example.com', + warehouse = 's3://warehouse/' + ); +---- + +=== Create an Iceberg catalog with OAuth2 authentication + +[source,sql] +---- +CREATE ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage + WITH ( + uri = 'https://catalog.example.com', + warehouse = 's3://lakehouse-data/', + auth_type = 'oauth2', + oauth2_client_id = '', + oauth2_client_secret = '', + oauth2_scope = 'PRINCIPAL_ROLE:ALL', + oauth2_token_endpoint_url = 'https://auth.example.com/token', + oauth2_token_refresh_margin_seconds = 300 + ); +---- + +=== Create an Iceberg catalog with basic authentication + +[source,sql] +---- +CREATE ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage + WITH ( + uri = 'https://catalog.example.com', + warehouse = 's3://warehouse/', + auth_type = 'basic', + username = '', + password = '' + ); +---- + +=== Create an Iceberg catalog with AWS SigV4 authentication + +Use for REST catalogs fronted by AWS services (such as AWS Glue). + +[source,sql] +---- +CREATE ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage + WITH ( + uri = 'https://catalog.example.com', + warehouse = 's3://warehouse/', + auth_type = 'aws_sigv4', + aws_region = 'us-west-2', + aws_access_key_id = '', + aws_secret_access_key = '' + ); +---- + +To use the AWS default credential chain (for example, an EC2 instance-profile role), omit `aws_access_key_id` and `aws_secret_access_key`. They must be set together or omitted together. + +=== Create an Iceberg catalog with custom TLS settings + +[source,sql] +---- +CREATE ICEBERG CATALOG lakehouse_catalog STORAGE iceberg_storage + WITH ( + uri = 'https://catalog.example.com', + warehouse = 's3://warehouse/', + ssl_verify = 'true', + ssl_ca_info = '/etc/ssl/certs/catalog-ca.pem' + ); +---- + +== Related statements + +[cols="<40%,<60%",options="header"] +|=== +|Statement |Description + +|xref:reference:sql/sql-statements/alter-iceberg-catalog.adoc[ALTER ICEBERG CATALOG] +|Modify connection properties of an existing Iceberg catalog. + +|xref:reference:sql/sql-statements/drop-iceberg-catalog.adoc[DROP ICEBERG CATALOG] +|Remove an Iceberg catalog. + +|xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE] +|Create the storage connection that backs the Iceberg catalog. + +|xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG] +|Create a Redpanda catalog. Use `USING CATALOG` to link a Redpanda catalog to an Iceberg catalog for bridge queries. +|=== diff --git a/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc b/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc index fef4446c..c28a059c 100644 --- a/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc +++ b/modules/reference/pages/sql/sql-statements/create-redpanda-catalog.adoc @@ -9,11 +9,13 @@ The `CREATE REDPANDA CATALOG` statement creates a named connection to a Redpanda [source,sql] ---- CREATE REDPANDA CATALOG [IF NOT EXISTS] catalog_name -WITH (option = 'value' [, ...]); + [USING CATALOG [schema.]iceberg_catalog_name] + WITH (option = 'value' [, ...]); ---- * `catalog_name`: Name for the new catalog connection. * `IF NOT EXISTS`: Optional. Prevents an error if a catalog with the same name already exists. +* `USING CATALOG iceberg_catalog_name`: Optional. Links the Redpanda catalog to an existing Iceberg catalog so that queries against tables in this catalog can return data from both the Redpanda topic and its corresponding Iceberg table in a single result. The Iceberg catalog must already exist. You can qualify the Iceberg catalog name with a schema prefix. == Options @@ -60,6 +62,11 @@ WITH (option = 'value' [, ...]); |STRING |No |Schema Registry authentication password. + +|`pandaproxy_url` +|STRING +|Conditional +|Base URL of the Redpanda HTTP Proxy REST API. Required when the catalog includes a `USING CATALOG` clause; Redpanda SQL uses this endpoint to fetch Iceberg translation state for queries that span the topic and its Iceberg table. |=== == Examples @@ -91,3 +98,18 @@ WITH ( schema_registry_password = 'sr_pass' ); ---- + +=== Create a catalog linked to an Iceberg catalog + +Use the `USING CATALOG` clause to link a Redpanda catalog to an existing Iceberg catalog. Queries against tables in the linked Redpanda catalog can then return a non-overlapping continuum of data from the Redpanda topic and its corresponding Iceberg table in a single result. + +[source,sql] +---- +CREATE REDPANDA CATALOG redpanda_with_iceberg + USING CATALOG lakehouse_catalog + WITH ( + initial_brokers = 'broker1:9092', + schema_registry_url = 'http://schema-registry:8081', + pandaproxy_url = 'http://redpanda:8082' + ); +---- diff --git a/modules/reference/pages/sql/sql-statements/drop-iceberg-catalog.adoc b/modules/reference/pages/sql/sql-statements/drop-iceberg-catalog.adoc new file mode 100644 index 00000000..2a212579 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/drop-iceberg-catalog.adoc @@ -0,0 +1,31 @@ += DROP ICEBERG CATALOG +:description: The DROP ICEBERG CATALOG statement removes an Iceberg catalog connection. +:page-topic-type: reference + +The `DROP ICEBERG CATALOG` statement removes an Iceberg catalog connection. + +== Syntax + +[source,sql] +---- +DROP ICEBERG CATALOG [IF EXISTS] catalog_name; +---- + +* `catalog_name`: Name of the Iceberg catalog to remove. +* `IF EXISTS`: Optional. Prevents an error if the Iceberg catalog does not exist. + +== Examples + +Drop an Iceberg catalog: + +[source,sql] +---- +DROP ICEBERG CATALOG lakehouse_catalog; +---- + +Drop the catalog only if it exists: + +[source,sql] +---- +DROP ICEBERG CATALOG IF EXISTS lakehouse_catalog; +---- diff --git a/modules/sql/pages/query-data/query-iceberg-topics.adoc b/modules/sql/pages/query-data/query-iceberg-topics.adoc new file mode 100644 index 00000000..35842a49 --- /dev/null +++ b/modules/sql/pages/query-data/query-iceberg-topics.adoc @@ -0,0 +1,140 @@ += Query Iceberg topics +:description: Query the Iceberg-translated history of a Redpanda topic, and run a single SQL query that spans live records and historical Iceberg-committed records. +:page-topic-type: how-to +:personas: app_developer, data_engineer +:learning-objective-1: Set up the storage, Iceberg catalog, and Redpanda catalog needed to query an Iceberg-enabled topic +:learning-objective-2: Query the Iceberg-committed history of a topic +:learning-objective-3: Run a bridge query that returns live and historical records in a single result + +To query the Iceberg-translated history of a Redpanda topic, you create three objects: a storage connection, an Iceberg catalog, and a Redpanda catalog linked to that Iceberg catalog. The storage connection holds object-storage credentials, the Iceberg catalog exposes the cluster's REST-catalog endpoint, and the linked Redpanda catalog (`CREATE REDPANDA CATALOG ... USING CATALOG `) lets you run bridge queries that span live and historical records, including records that have aged out of the Redpanda topic under its retention settings. + +To query a topic that is not Iceberg-enabled, see xref:sql:query-data/query-streaming-topics.adoc[]. + +After completing these steps, you will be able to: + +* [ ] {learning-objective-1} +* [ ] {learning-objective-2} +* [ ] {learning-objective-3} + +== Prerequisites + +* A Redpanda BYOC cluster on AWS with Redpanda SQL enabled. See xref:sql:get-started/deploy-sql-cluster.adoc[Enable Redpanda SQL]. +* The cluster's xref:reference:properties/cluster-properties.adoc#iceberg_catalog_type[`iceberg_catalog_type`] property is set to `rest`. The `object_storage` catalog type does not support bridge queries from Redpanda SQL. +* The cluster's Iceberg REST catalog is configured. See xref:manage:iceberg/rest-catalog/index.adoc[Integrate with REST Catalogs] for the supported REST catalog options (AWS Glue, Snowflake, Databricks Unity, Polaris, and others) and their configuration steps. +* An Iceberg-enabled Redpanda topic with a schema (Protobuf, Avro, or JSON) registered in Schema Registry. To enable Iceberg translation on a topic, see xref:manage:iceberg/about-iceberg-topics.adoc[]. +* Connect to Redpanda SQL with `psql` or another PostgreSQL client. See xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL]. + +== Set up the Iceberg query catalogs + +You create three objects, in this order: a storage connection, an Iceberg catalog, and a Redpanda catalog that links to the Iceberg catalog. The storage and Iceberg-catalog options must match the cluster's xref:manage:iceberg/rest-catalog/index.adoc[REST catalog configuration] (endpoint, credentials, region). + +. Create a storage connection that holds the object-storage credentials your cluster's REST catalog uses: ++ +[source,sql] +---- +CREATE STORAGE IF NOT EXISTS iceberg_storage TYPE = S3 WITH ( + endpoint = '', + access_key_id = '', + secret_access_key = '', + region = '', + path_style = 'true' +); +---- + +. Create an Iceberg catalog that points at the cluster's REST catalog endpoint. Replace `` with the authentication type your REST catalog requires (for example, `aws_sigv4`, `oauth2`, or `basic`) and provide the matching credential options: ++ +[source,sql] +---- +CREATE ICEBERG CATALOG IF NOT EXISTS lakehouse_catalog STORAGE iceberg_storage WITH ( + uri = '', + warehouse = '', + auth_type = '' +); +---- ++ +For the full option list and per-auth examples, see xref:reference:sql/sql-statements/create-iceberg-catalog.adoc[CREATE ICEBERG CATALOG]. + +. Create a Redpanda catalog linked to the Iceberg catalog with `USING CATALOG`. The `pandaproxy_url` option is required when `USING CATALOG` is set: ++ +[source,sql] +---- +CREATE REDPANDA CATALOG IF NOT EXISTS redpanda_with_iceberg +USING CATALOG lakehouse_catalog WITH ( + initial_brokers = ':', + schema_registry_url = '', + pandaproxy_url = '' +); +---- + +== Map a topic as a SQL table + +Define a SQL table against the Iceberg-enabled topic in the linked Redpanda catalog. Replace `orders` and `orders-value` with your topic name and Schema Registry value subject: + +[source,sql] +---- +CREATE TABLE redpanda_with_iceberg=>orders WITH ( + topic = 'orders', + schema_subject = 'orders-value' +); +---- + +Redpanda SQL reads the registered schema from Schema Registry to map fields to SQL columns. + +When you query a table mapped from a Redpanda topic, Redpanda SQL also exposes two reserved metadata columns alongside your schema's columns: + +* `redpanda` (a struct with topic-level metadata such as partition, offset, timestamp, key, and headers) +* `redpanda_raw` (raw key and value bytes) + +These column names are reserved. A topic schema with a top-level `redpanda` or `redpanda_raw` field conflicts with the metadata columns. + +== Query Iceberg-committed data only + +To query only the records that have been translated to Iceberg, query the Iceberg catalog directly. Iceberg-translated topics appear under the `redpanda` namespace by default (the Iceberg data catalog namespace for Redpanda-translated tables): + +[source,sql] +---- +SELECT * FROM lakehouse_catalog=>redpanda.orders LIMIT 10; +---- + +This query does not include records that are still in the live Redpanda topic but have not yet been translated to Iceberg. + +NOTE: Iceberg-translated data persists independently of Redpanda topic retention. Records that have aged out of the Redpanda topic under its retention settings remain available through the Iceberg catalog. + +== Run a bridge query + +// "Bridge query" is the current working term; final naming TBC for v1 publication (qa-questions.md #20). +// TODO: SME — confirm when REFRESH must be run on the linked Iceberg table. Source shows that if the Iceberg table's schema isn't refreshed, the bridge query fails at planning time with: `Schema not found for Iceberg table ''. Run: REFRESH =>
`. Confirm: +// - Is REFRESH required only the first time, or every time the Iceberg schema changes? +// - Is REFRESH required when new records are added to the Iceberg table (no schema change), or only on schema change? +// Once confirmed, add a prerequisite or step that tells users when to run REFRESH. +A bridge query returns a non-overlapping continuum of records from both the live Redpanda topic and its Iceberg-committed history in a single result, including records that have aged out of the Redpanda topic under its retention settings. Query the table in the linked Redpanda catalog (the one with `USING CATALOG` set) using standard `SELECT` syntax: + +[source,sql] +---- +SELECT * FROM redpanda_with_iceberg=>orders LIMIT 10; +---- + +You don't write a `UNION ALL` because Redpanda SQL plans the union for you, and rows aren't duplicated at the boundary between live and historical data. + +// TODO: Verify with engineering whether there are workload patterns that reliably trigger longer planning, and document them if so (qa-questions.md #22). +NOTE: Bridge queries require additional planning to combine the live Redpanda topic with the Iceberg-committed history, so they may take slightly longer to execute than queries against Iceberg-committed data alone. If you only need historical data, query the Iceberg catalog directly. + +== Schema compatibility between the topic and the Iceberg table + +// TODO: Verify use cases for schema divergence between the topic and the Iceberg table + +For a bridge query against a linked Redpanda catalog, the topic and the Iceberg table can have different schemas, within these rules: + +* The topic's Schema Registry value subject is canonical. The bridge query result's column order, count, and types come from the topic schema. +* The Iceberg table can be a name-subset of the topic schema. If the Iceberg table is missing a column that the topic schema has, Redpanda SQL returns `NULL` for that column on Iceberg-side rows. +* The Iceberg table cannot have columns the topic schema does not. If it does, the bridge query fails at planning time with the error `Kafka schema must be a name-superset of Iceberg schema`. To resolve, either drop the extra Iceberg columns or add the corresponding fields to the topic's Schema Registry subject. + +== Next steps + +* xref:sql:query-data/query-streaming-topics.adoc[Query streaming topics]: query a topic that is not Iceberg-enabled. +* xref:manage:iceberg/rest-catalog/index.adoc[Integrate with REST Catalogs]: managed REST catalog options and their configuration. +* xref:manage:iceberg/use-iceberg-catalogs.adoc[Use Iceberg Catalogs]: more on how Redpanda exposes Iceberg-translated topic data. +* xref:reference:sql/sql-statements/create-storage.adoc[CREATE STORAGE]: full reference for the storage statement. +* xref:reference:sql/sql-statements/create-iceberg-catalog.adoc[CREATE ICEBERG CATALOG]: full reference for the Iceberg catalog statement. +* xref:reference:sql/sql-statements/create-redpanda-catalog.adoc[CREATE REDPANDA CATALOG]: full reference for the Redpanda catalog statement, including the `USING CATALOG` clause. +* xref:reference:sql/index.adoc[Redpanda SQL Reference]: supported SQL statements, clauses, data types, and functions.