diff --git a/Makefile b/Makefile index de9d6ad..e5a4095 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,7 @@ generate-configuration: EXAMPLES := \ examples/gitopsstacks/minimal.yaml:: \ examples/gitopsstacks/standard.yaml:: \ + examples/gitopsstacks/nodepool.yaml:: \ examples/gitopsstacks/standard.yaml::examples/test/mocks/observed-resources/standard/steps/1/ \ examples/gitopsstacks/standard.yaml::examples/test/mocks/observed-resources/standard/steps/2/ \ examples/gitopsstacks/eso.yaml:: \ diff --git a/apis/gitopsstacks/definition.yaml b/apis/gitopsstacks/definition.yaml index 75796fb..132aff0 100644 --- a/apis/gitopsstacks/definition.yaml +++ b/apis/gitopsstacks/definition.yaml @@ -76,6 +76,34 @@ spec: enum: - ProviderConfig - ClusterProviderConfig + nodePool: + description: Optional dedicated Karpenter NodePool for GitOps workloads. When enabled, ArgoCD pods are scheduled with workload-type=gitops and tolerate gitops=true:NoSchedule. + type: object + properties: + enabled: + description: Whether to create a dedicated NodePool. Defaults to false. + type: boolean + default: false + name: + description: NodePool name on the target cluster. Defaults to "hops-gitops". + type: string + nodeClassName: + description: EKS Auto Mode NodeClass to reference. Defaults to "hops-default". + type: string + limits: + description: Karpenter NodePool resource limits. Defaults to nodes=10. + type: object + x-kubernetes-preserve-unknown-fields: true + requirements: + description: Karpenter scheduling requirements. Defaults to amd64/linux spot or on-demand nodes. + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true + disruption: + description: Karpenter disruption settings. Defaults to WhenEmptyOrUnderutilized after 60s. + type: object + x-kubernetes-preserve-unknown-fields: true namespace: description: Namespace for ArgoCD. Defaults to argocd. type: string diff --git a/examples/gitopsstacks/nodepool.yaml b/examples/gitopsstacks/nodepool.yaml new file mode 100644 index 0000000..7710947 --- /dev/null +++ b/examples/gitopsstacks/nodepool.yaml @@ -0,0 +1,14 @@ +apiVersion: hops.ops.com.ai/v1alpha1 +kind: GitopsStack +metadata: + name: gitops + namespace: example-env +spec: + clusterName: example-cluster + labels: + team: platform + nodePool: + enabled: true + repository: + org: hops-ops + name: example-cluster-gitops diff --git a/functions/render/000-state-init.yaml.gotmpl b/functions/render/000-state-init.yaml.gotmpl index 61e404a..65cec23 100644 --- a/functions/render/000-state-init.yaml.gotmpl +++ b/functions/render/000-state-init.yaml.gotmpl @@ -43,6 +43,36 @@ "kind" ($ghProviderConfigRef.kind | default "ProviderConfig") }} +# ============================================================================== +# NodePool configuration +# ============================================================================== +{{- $nodePoolSpec := $spec.nodePool | default dict }} +{{- $nodePoolEnabled := false }} +{{- if hasKey $nodePoolSpec "enabled" }} + {{- $nodePoolEnabled = $nodePoolSpec.enabled }} +{{- end }} +{{- $nodePoolName := $nodePoolSpec.name | default "hops-gitops" }} +{{- $nodePoolNodeClassName := $nodePoolSpec.nodeClassName | default "hops-default" }} +{{- $nodePoolLimits := $nodePoolSpec.limits | default (dict "nodes" 10) }} +{{- $nodePoolRequirements := $nodePoolSpec.requirements | default (list + (dict "key" "karpenter.sh/capacity-type" "operator" "In" "values" (list "spot" "on-demand")) + (dict "key" "eks.amazonaws.com/instance-category" "operator" "In" "values" (list "c" "m" "r")) + (dict "key" "eks.amazonaws.com/instance-generation" "operator" "Gt" "values" (list "4")) + (dict "key" "eks.amazonaws.com/instance-memory" "operator" "Gt" "values" (list "7999")) + (dict "key" "eks.amazonaws.com/instance-cpu" "operator" "Gt" "values" (list "1")) + (dict "key" "kubernetes.io/arch" "operator" "In" "values" (list "amd64")) + (dict "key" "kubernetes.io/os" "operator" "In" "values" (list "linux")) +) }} +{{- $nodePoolDisruption := $nodePoolSpec.disruption | default (dict "consolidationPolicy" "WhenEmptyOrUnderutilized" "consolidateAfter" "60s") }} +{{- $nodePoolTaintKey := "gitops" }} +{{- $nodePoolTaintValue := "true" }} +{{- $nodePoolNodeSelector := dict }} +{{- $nodePoolTolerations := list }} +{{- if $nodePoolEnabled }} + {{- $nodePoolNodeSelector = dict "workload-type" "gitops" }} + {{- $nodePoolTolerations = list (dict "key" $nodePoolTaintKey "operator" "Equal" "value" $nodePoolTaintValue "effect" "NoSchedule") }} +{{- end }} + # ============================================================================== # ArgoCD configuration # ============================================================================== @@ -84,6 +114,18 @@ "helmProviderConfigRef" $helmProviderConfigRef "kubernetesProviderConfigRef" $k8sProviderConfigRef "githubProviderConfigRef" $ghProviderConfigRef + "nodePool" (dict + "enabled" $nodePoolEnabled + "name" $nodePoolName + "nodeClassName" $nodePoolNodeClassName + "limits" $nodePoolLimits + "requirements" $nodePoolRequirements + "disruption" $nodePoolDisruption + "taintKey" $nodePoolTaintKey + "taintValue" $nodePoolTaintValue + "nodeSelector" $nodePoolNodeSelector + "tolerations" $nodePoolTolerations + ) "argocd" (dict "name" ($argocdSpec.name | default "argocd") "namespace" ($argocdSpec.namespace | default $namespace) diff --git a/functions/render/155-nodepool.yaml.gotmpl b/functions/render/155-nodepool.yaml.gotmpl new file mode 100644 index 0000000..cc6dfce --- /dev/null +++ b/functions/render/155-nodepool.yaml.gotmpl @@ -0,0 +1,64 @@ +# code: language=yaml +# +# Optional Karpenter NodePool for GitOps workloads. +# + +{{- if $state.nodePool.enabled }} +--- +apiVersion: kubernetes.m.crossplane.io/v1alpha1 +kind: Object +metadata: + name: {{ $state.name }}-nodepool-gitops + annotations: + {{ setResourceNameAnnotation "nodepool-gitops" }} + labels: {{ $state.labels | toJson }} +spec: + managementPolicies: {{ $state.managementPolicies | toJson }} + forProvider: + manifest: + apiVersion: karpenter.sh/v1 + kind: NodePool + metadata: + name: {{ $state.nodePool.name }} + spec: + template: + metadata: + labels: + workload-type: gitops + spec: + nodeClassRef: + group: eks.amazonaws.com + kind: NodeClass + name: {{ $state.nodePool.nodeClassName }} + taints: + - key: {{ $state.nodePool.taintKey }} + value: {{ $state.nodePool.taintValue | quote }} + effect: NoSchedule + requirements: {{ $state.nodePool.requirements | toJson }} + limits: {{ $state.nodePool.limits | toJson }} + disruption: {{ $state.nodePool.disruption | toJson }} + providerConfigRef: + name: {{ $state.kubernetesProviderConfigRef.name }} + kind: {{ $state.kubernetesProviderConfigRef.kind }} + +--- +apiVersion: protection.crossplane.io/v1beta1 +kind: Usage +metadata: + name: {{ $state.name }}-delete-argocd-before-nodepool + annotations: + {{ setResourceNameAnnotation "usage-argocd-before-nodepool" }} + labels: {{ $state.labels | toJson }} +spec: + replayDeletion: true + of: + apiVersion: kubernetes.m.crossplane.io/v1alpha1 + kind: Object + resourceRef: + name: {{ $state.name }}-nodepool-gitops + by: + apiVersion: helm.m.crossplane.io/v1beta1 + kind: Release + resourceRef: + name: {{ $state.argocd.name }} +{{- end }} diff --git a/functions/render/200-helm-release-argocd.yaml.gotmpl b/functions/render/200-helm-release-argocd.yaml.gotmpl index e929d75..88e955d 100644 --- a/functions/render/200-helm-release-argocd.yaml.gotmpl +++ b/functions/render/200-helm-release-argocd.yaml.gotmpl @@ -35,37 +35,42 @@ spec: CPU request only, no limit so it can burst. The rest are Burstable restart-cheap controllers/webhooks/proxies. See references/karpenter-resource-best-practices §7. */}} - {{- /* application-controller P95 observed at 1047Mi on pat-local — needs - real headroom beyond the chart's "small cluster" sizing. 1.5Gi covers - the cluster-wide state cache for our app count with margin. */}} + {{- /* application-controller is sized from current VPA/Goldilocks + recommendations on pat-local. */}} {{- $controllerResources := dict - "requests" (dict "cpu" "250m" "memory" "1536Mi") - "limits" (dict "memory" "1536Mi") + "requests" (dict "cpu" "548m" "memory" "2062Mi") + "limits" (dict "memory" "2062Mi") }} {{- $repoServerResources := dict - "requests" (dict "cpu" "100m" "memory" "256Mi") + "requests" (dict "cpu" "15m" "memory" "100Mi") "limits" (dict "cpu" "500m" "memory" "512Mi") }} {{- $serverResources := dict - "requests" (dict "cpu" "50m" "memory" "128Mi") - "limits" (dict "cpu" "200m" "memory" "256Mi") + "requests" (dict "cpu" "15m" "memory" "100Mi") + "limits" (dict "cpu" "250m" "memory" "256Mi") }} {{- $smallControllerResources := dict - "requests" (dict "cpu" "25m" "memory" "64Mi") - "limits" (dict "cpu" "100m" "memory" "128Mi") + "requests" (dict "cpu" "15m" "memory" "100Mi") + "limits" (dict "cpu" "100m" "memory" "256Mi") }} {{- $redisResources := dict - "requests" (dict "cpu" "50m" "memory" "64Mi") - "limits" (dict "cpu" "100m" "memory" "128Mi") + "requests" (dict "cpu" "15m" "memory" "100Mi") + "limits" (dict "cpu" "100m" "memory" "256Mi") }} + {{- $globalValues := dict + "monitoring" (dict + "enabled" true + "serviceMonitor" (dict "enabled" true) + ) + "nodeSelector" (dict "kubernetes.io/os" "linux") + }} + {{- if $state.nodePool.enabled }} + {{- $_ := set $globalValues "nodeSelector" (mergeOverwrite (dict "kubernetes.io/os" "linux") $state.nodePool.nodeSelector) }} + {{- $_ := set $globalValues "tolerations" $state.nodePool.tolerations }} + {{- end }} {{- $chartDefaults := dict "crds" (dict "install" true) - "global" (dict - "monitoring" (dict - "enabled" true - "serviceMonitor" (dict "enabled" true) - ) - ) + "global" $globalValues "controller" (dict "resources" $controllerResources) "repoServer" (dict "resources" $repoServerResources) "server" (dict "resources" $serverResources) diff --git a/tests/test-render/main.k b/tests/test-render/main.k index 81c01e3..52fbe51 100644 --- a/tests/test-render/main.k +++ b/tests/test-render/main.k @@ -755,6 +755,81 @@ _items = [ ] } } + + # ========================================================================== + # Test 18: nodePool enables Argo CD placement and renders a Karpenter NodePool + # ========================================================================== + metav1alpha1.CompositionTest { + metadata.name = "nodepool-adds-argocd-placement-and-resources" + spec = { + compositionPath = "apis/gitopsstacks/composition.yaml" + xrdPath = "apis/gitopsstacks/definition.yaml" + timeoutSeconds = 60 + validate = False + xr = stacksv1alpha1.GitopsStack { + metadata.name = "nodepool-test" + spec = { + clusterName = "test-cluster" + repository.org = "hops-ops" + nodePool.enabled = True + } + } + assertResources = [ + { + apiVersion = "helm.m.crossplane.io/v1beta1" + kind = "Release" + metadata.name = "argocd" + spec.forProvider.values = { + global = { + nodeSelector = { + "kubernetes.io/os" = "linux" + "workload-type" = "gitops" + } + tolerations = [ + { + key = "gitops" + operator = "Equal" + value = "true" + effect = "NoSchedule" + } + ] + } + controller.resources.requests = { + cpu = "548m" + memory = "2062Mi" + } + repoServer.resources.requests = { + cpu = "15m" + memory = "100Mi" + } + } + } + { + apiVersion = "kubernetes.m.crossplane.io/v1alpha1" + kind = "Object" + metadata.name = "nodepool-test-nodepool-gitops" + spec.forProvider.manifest = { + apiVersion = "karpenter.sh/v1" + kind = "NodePool" + metadata.name = "hops-gitops" + spec.template.metadata.labels = {"workload-type" = "gitops"} + spec.template.spec.taints = [ + { + key = "gitops" + value = "true" + effect = "NoSchedule" + } + ] + } + } + { + apiVersion = "protection.crossplane.io/v1beta1" + kind = "Usage" + metadata.name = "nodepool-test-delete-argocd-before-nodepool" + } + ] + } + } ] items = _items