apache · ayushtkn · May 25, 2026 · May 26, 2026 · May 28, 2026 · May 28, 2026
diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md
diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml
@@ -44,6 +44,70 @@ spec:
               hiveServer2:
                 description: HiveServer2 component configuration
                 properties:
+                  autoscaling:
+                    description: Autoscaling configuration (requires KEDA installed
+                      in the cluster)
+                    properties:
+                      activationCpuValue:
+                        description: CPU average value below which the trigger is
+                          inactive. Required if targetCpuValue is set.
+                        type: string
+                      cooldownSeconds:
+                        default: 600
+                        description: Cooldown period in seconds after all KEDA triggers
+                          are inactive before scaling from 1 to 0 (scale-to-zero delay)
+                        type: integer
+                      enabled:
+                        default: false
+                        description: Whether autoscaling is enabled for this component
+                        type: boolean
+                      gracePeriodSeconds:
+                        default: 3600
+                        description: Maximum time in seconds to wait for graceful
+                          drain during scale-down before the pod is forcibly terminated.
+                          The pod terminates immediately once sessions/connections
+                          drain to 0; this value is only the upper safety cap.
+                        type: integer
+                      metricsScrapeIntervalSeconds:
+                        default: 10
+                        description: Prometheus scrape interval in seconds for this
+                          component's metrics. Lower values make autoscaling react
+                          faster but increase Prometheus load.
+                        type: integer
+                      minReplicas:
+                        default: 0
+                        description: Minimum number of replicas (floor for scale-down).
+                          Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
+                          for wake-from-zero)
+                        type: integer
+                      scaleDownStabilizationSeconds:
+                        default: 300
+                        description: Stabilization window in seconds for scale-down
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent premature scale-down.
+                        type: integer
+                      scaleDownThreshold:
+                        default: 20
+                        description: Threshold that triggers scale-down for Prometheus-based
+                          metrics
+                        type: integer
+                      scaleUpStabilizationSeconds:
+                        default: 60
+                        description: Stabilization window in seconds for scale-up
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent flapping.
+                        type: integer
+                      scaleUpThreshold:
+                        default: 80
+                        description: "Threshold that triggers scale-up (component-specific:\
+                          \ sessions for HS2, connections for HMS, queue depth for\
+                          \ LLAP, pending tasks for TezAM)"
+                        type: integer
+                      targetCpuValue:
+                        description: "Target CPU average value for scaling (e.g.,\
+                          \ '1500m' or '1'). If omitted, CPU scaling is disabled."
+                        type: string
+                    type: object
                   configOverrides:
                     additionalProperties:
                       type: string
@@ -152,6 +216,70 @@ spec:
               llap:
                 description: LLAP daemon configuration. Enabled by default.
                 properties:
+                  autoscaling:
+                    description: Autoscaling configuration (requires KEDA installed
+                      in the cluster)
+                    properties:
+                      activationCpuValue:
+                        description: CPU average value below which the trigger is
+                          inactive. Required if targetCpuValue is set.
+                        type: string
+                      cooldownSeconds:
+                        default: 600
+                        description: Cooldown period in seconds after all KEDA triggers
+                          are inactive before scaling from 1 to 0 (scale-to-zero delay)
+                        type: integer
+                      enabled:
+                        default: false
+                        description: Whether autoscaling is enabled for this component
+                        type: boolean
+                      gracePeriodSeconds:
+                        default: 3600
+                        description: Maximum time in seconds to wait for graceful
+                          drain during scale-down before the pod is forcibly terminated.
+                          The pod terminates immediately once sessions/connections
+                          drain to 0; this value is only the upper safety cap.
+                        type: integer
+                      metricsScrapeIntervalSeconds:
+                        default: 10
+                        description: Prometheus scrape interval in seconds for this
+                          component's metrics. Lower values make autoscaling react
+                          faster but increase Prometheus load.
+                        type: integer
+                      minReplicas:
+                        default: 0
+                        description: Minimum number of replicas (floor for scale-down).
+                          Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
+                          for wake-from-zero)
+                        type: integer
+                      scaleDownStabilizationSeconds:
+                        default: 300
+                        description: Stabilization window in seconds for scale-down
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent premature scale-down.
+                        type: integer
+                      scaleDownThreshold:
+                        default: 20
+                        description: Threshold that triggers scale-down for Prometheus-based
+                          metrics
+                        type: integer
+                      scaleUpStabilizationSeconds:
+                        default: 60
+                        description: Stabilization window in seconds for scale-up
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent flapping.
+                        type: integer
+                      scaleUpThreshold:
+                        default: 80
+                        description: "Threshold that triggers scale-up (component-specific:\
+                          \ sessions for HS2, connections for HMS, queue depth for\
+                          \ LLAP, pending tasks for TezAM)"
+                        type: integer
+                      targetCpuValue:
+                        description: "Target CPU average value for scaling (e.g.,\
+                          \ '1500m' or '1'). If omitted, CPU scaling is disabled."
+                        type: string
+                    type: object
                   configOverrides:
                     additionalProperties:
                       type: string
@@ -235,6 +363,70 @@ spec:
               metastore:
                 description: Metastore component configuration
                 properties:
+                  autoscaling:
+                    description: Autoscaling configuration (requires KEDA installed
+                      in the cluster)
+                    properties:
+                      activationCpuValue:
+                        description: CPU average value below which the trigger is
+                          inactive. Required if targetCpuValue is set.
+                        type: string
+                      cooldownSeconds:
+                        default: 600
+                        description: Cooldown period in seconds after all KEDA triggers
+                          are inactive before scaling from 1 to 0 (scale-to-zero delay)
+                        type: integer
+                      enabled:
+                        default: false
+                        description: Whether autoscaling is enabled for this component
+                        type: boolean
+                      gracePeriodSeconds:
+                        default: 3600
+                        description: Maximum time in seconds to wait for graceful
+                          drain during scale-down before the pod is forcibly terminated.
+                          The pod terminates immediately once sessions/connections
+                          drain to 0; this value is only the upper safety cap.
+                        type: integer
+                      metricsScrapeIntervalSeconds:
+                        default: 10
+                        description: Prometheus scrape interval in seconds for this
+                          component's metrics. Lower values make autoscaling react
+                          faster but increase Prometheus load.
+                        type: integer
+                      minReplicas:
+                        default: 0
+                        description: Minimum number of replicas (floor for scale-down).
+                          Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
+                          for wake-from-zero)
+                        type: integer
+                      scaleDownStabilizationSeconds:
+                        default: 300
+                        description: Stabilization window in seconds for scale-down
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent premature scale-down.
+                        type: integer
+                      scaleDownThreshold:
+                        default: 20
+                        description: Threshold that triggers scale-down for Prometheus-based
+                          metrics
+                        type: integer
+                      scaleUpStabilizationSeconds:
+                        default: 60
+                        description: Stabilization window in seconds for scale-up
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent flapping.
+                        type: integer
+                      scaleUpThreshold:
+                        default: 80
+                        description: "Threshold that triggers scale-up (component-specific:\
+                          \ sessions for HS2, connections for HMS, queue depth for\
+                          \ LLAP, pending tasks for TezAM)"
+                        type: integer
+                      targetCpuValue:
+                        description: "Target CPU average value for scaling (e.g.,\
+                          \ '1500m' or '1'). If omitted, CPU scaling is disabled."
+                        type: string
+                    type: object
                   configOverrides:
                     additionalProperties:
                       type: string
@@ -371,6 +563,70 @@ spec:
               tezAm:
                 description: Tez Application Master configuration. Enabled by default.
                 properties:
+                  autoscaling:
+                    description: Autoscaling configuration (requires KEDA installed
+                      in the cluster)
+                    properties:
+                      activationCpuValue:
+                        description: CPU average value below which the trigger is
+                          inactive. Required if targetCpuValue is set.
+                        type: string
+                      cooldownSeconds:
+                        default: 600
+                        description: Cooldown period in seconds after all KEDA triggers
+                          are inactive before scaling from 1 to 0 (scale-to-zero delay)
+                        type: integer
+                      enabled:
+                        default: false
+                        description: Whether autoscaling is enabled for this component
+                        type: boolean
+                      gracePeriodSeconds:
+                        default: 3600
+                        description: Maximum time in seconds to wait for graceful
+                          drain during scale-down before the pod is forcibly terminated.
+                          The pod terminates immediately once sessions/connections
+                          drain to 0; this value is only the upper safety cap.
+                        type: integer
+                      metricsScrapeIntervalSeconds:
+                        default: 10
+                        description: Prometheus scrape interval in seconds for this
+                          component's metrics. Lower values make autoscaling react
+                          faster but increase Prometheus load.
+                        type: integer
+                      minReplicas:
+                        default: 0
+                        description: Minimum number of replicas (floor for scale-down).
+                          Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
+                          for wake-from-zero)
+                        type: integer
+                      scaleDownStabilizationSeconds:
+                        default: 300
+                        description: Stabilization window in seconds for scale-down
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent premature scale-down.
+                        type: integer
+                      scaleDownThreshold:
+                        default: 20
+                        description: Threshold that triggers scale-down for Prometheus-based
+                          metrics
+                        type: integer
+                      scaleUpStabilizationSeconds:
+                        default: 60
+                        description: Stabilization window in seconds for scale-up
+                          decisions. HPA picks the highest recommendation within this
+                          window to prevent flapping.
+                        type: integer
+                      scaleUpThreshold:
+                        default: 80
+                        description: "Threshold that triggers scale-up (component-specific:\
+                          \ sessions for HS2, connections for HMS, queue depth for\
+                          \ LLAP, pending tasks for TezAM)"
+                        type: integer
+                      targetCpuValue:
+                        description: "Target CPU average value for scaling (e.g.,\
+                          \ '1500m' or '1'). If omitted, CPU scaling is disabled."
+                        type: string
+                    type: object
                   configOverrides:
                     additionalProperties:
                       type: string

diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml
@@ -50,3 +50,15 @@ rules:
   - apiGroups: [""]
     resources: ["pods"]
     verbs: ["get", "list", "watch"]
+  # PodDisruptionBudgets for graceful autoscaling
+  - apiGroups: ["policy"]
+    resources: ["poddisruptionbudgets"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  # KEDA ScaledObjects for autoscaling
+  - apiGroups: ["keda.sh"]
+    resources: ["scaledobjects", "triggerauthentications"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  # KEDA HTTP Add-on for scale-to-zero (wake-from-zero on HTTP request)
+  - apiGroups: ["http.keda.sh"]
+    resources: ["httpscaledobjects", "interceptorroutes"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]