From 227e9394157bd31d6f85da3324ca80af4e393f7d Mon Sep 17 00:00:00 2001 From: Lyon Date: Thu, 14 May 2026 10:04:27 +0900 Subject: [PATCH] feat: add webhook filtering by failure type Introduce WEBHOOK_EXCLUDE_TYPES config option to exclude specific failure types from webhook/S3/CloudWatch output, regardless of category or severity. - WEBHOOK_EXCLUDE_TYPES: comma-separated list of failure types to exclude (e.g. ErrImagePull, ContainerCreating) This filter applies as AND logic with existing WEBHOOK_SKIP_CATEGORIES and WEBHOOK_MIN_SEVERITY filters. Unset values preserve existing behavior. Useful for filtering transient states that kubelet retries automatically (e.g. ErrImagePull transitions to ImagePullBackOff). Affected files: config/config.go, controller/pod_controller.go, examples/04-configmap.yaml, README.md Signed-off-by: Lyon --- containers/devops-agent-operator/README.md | 18 +++++++++++++--- .../examples/04-configmap.yaml | 6 ++++++ .../internal/config/config.go | 21 +++++++++++++++++-- .../internal/controller/pod_controller.go | 2 +- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/containers/devops-agent-operator/README.md b/containers/devops-agent-operator/README.md index 9eee2e2..c6879c4 100644 --- a/containers/devops-agent-operator/README.md +++ b/containers/devops-agent-operator/README.md @@ -110,10 +110,11 @@ ContainerCreating, Unschedulable 같은 일시적 상태는 설정된 대기 시 | `FAILURE_RECHECK_INTERVAL` | 타임아웃 재확인 간격 | `1m` | | `WEBHOOK_SKIP_CATEGORIES` | 웹훅/S3/CloudWatch 출력을 건너뛸 감지 레이어 (쉼표 구분) | - | | `WEBHOOK_MIN_SEVERITY` | 출력을 트리거할 최소 심각도 | - | +| `WEBHOOK_EXCLUDE_TYPES` | 웹훅/S3/CloudWatch 출력을 건너뛸 장애 유형 (쉼표 구분) | - | #### 출력 필터링 -`WEBHOOK_SKIP_CATEGORIES`와 `WEBHOOK_MIN_SEVERITY`는 AND 조건으로 동작합니다. 두 조건을 모두 통과해야 CloudWatch Logs, S3, Webhook 출력이 실행됩니다. 미설정 시 모든 장애에 대해 출력이 실행됩니다. +`WEBHOOK_SKIP_CATEGORIES`, `WEBHOOK_MIN_SEVERITY`, `WEBHOOK_EXCLUDE_TYPES`는 AND 조건으로 동작합니다. 세 조건을 모두 통과해야 CloudWatch Logs, S3, Webhook 출력이 실행됩니다. 미설정 시 모든 장애에 대해 출력이 실행됩니다. **WEBHOOK_SKIP_CATEGORIES** — 특정 감지 레이어의 장애를 출력에서 제외합니다. @@ -133,12 +134,23 @@ WEBHOOK_SKIP_CATEGORIES=PodPhase,PodCondition WEBHOOK_MIN_SEVERITY=HIGH ``` -두 옵션을 조합하면 더 세밀하게 제어할 수 있습니다. +**WEBHOOK_EXCLUDE_TYPES** — 특정 장애 유형을 카테고리나 심각도와 무관하게 출력에서 제외합니다. + +유효한 값: 장애 유형 문자열 (예: `ErrImagePull`, `ContainerCreating`, `Unschedulable`) + +``` +# ErrImagePull은 일시적 상태로 kubelet이 자동 재시도하므로 제외 +# (지속되면 ImagePullBackOff로 전환되며 이는 제외되지 않음) +WEBHOOK_EXCLUDE_TYPES=ErrImagePull +``` + +세 옵션을 조합하면 더 세밀하게 제어할 수 있습니다. ``` -# PodPhase는 무조건 제외 + 나머지는 HIGH 이상만 출력 +# PodPhase는 무조건 제외 + 나머지는 HIGH 이상만 출력 + ErrImagePull 제외 WEBHOOK_SKIP_CATEGORIES=PodPhase,PodCondition WEBHOOK_MIN_SEVERITY=HIGH +WEBHOOK_EXCLUDE_TYPES=ErrImagePull ``` ## IAM 권한 diff --git a/containers/devops-agent-operator/examples/04-configmap.yaml b/containers/devops-agent-operator/examples/04-configmap.yaml index 6fe679d..93b9604 100644 --- a/containers/devops-agent-operator/examples/04-configmap.yaml +++ b/containers/devops-agent-operator/examples/04-configmap.yaml @@ -29,3 +29,9 @@ data: # Empty means all severities trigger webhooks (default behavior). # Example: "HIGH" → only HIGH and CRITICAL failures trigger webhooks WEBHOOK_MIN_SEVERITY: "" + # Comma-separated list of failure types to exclude from webhook calls, + # regardless of category or severity. + # e.g., "ErrImagePull" - transient state that kubelet retries automatically; + # persistent failures transition to ImagePullBackOff which is not excluded. + # Empty means no failure types are excluded (default behavior). + WEBHOOK_EXCLUDE_TYPES: "" \ No newline at end of file diff --git a/containers/devops-agent-operator/internal/config/config.go b/containers/devops-agent-operator/internal/config/config.go index 4b19692..4c36546 100644 --- a/containers/devops-agent-operator/internal/config/config.go +++ b/containers/devops-agent-operator/internal/config/config.go @@ -90,6 +90,12 @@ type Config struct { // Valid values: LOW, MEDIUM, HIGH, CRITICAL // Empty means all severities trigger webhooks. WebhookMinSeverity string + + // WebhookExcludeTypes is a list of failure types that should not trigger webhook calls, + // regardless of category or severity. + // e.g., "ErrImagePull" - transient state that kubelet retries automatically. + // Empty means no failure types are excluded. + WebhookExcludeTypes []string } // DefaultConfig returns a Config with default values @@ -201,6 +207,10 @@ func LoadFromEnv() *Config { cfg.WebhookMinSeverity = strings.ToUpper(strings.TrimSpace(v)) } + if v := os.Getenv("WEBHOOK_EXCLUDE_TYPES"); v != "" { + cfg.WebhookExcludeTypes = splitAndTrim(v, ",") + } + return cfg } @@ -249,11 +259,11 @@ var severityLevel = map[string]int{ } // ShouldSendWebhook returns true if the failure should trigger a webhook call. -// Both conditions must pass (AND logic): +// All conditions must pass (AND logic): // - The failure category must not be in WebhookSkipCategories // - The failure severity must meet or exceed WebhookMinSeverity // -// If neither filter is configured, always returns true (default behavior preserved). +// If no filters are configured, always returns true (default behavior preserved). func (c *Config) ShouldSendWebhook(category, severity string) bool { // Category filter: skip if category is in the skip list for _, cat := range c.WebhookSkipCategories { @@ -272,6 +282,13 @@ func (c *Config) ShouldSendWebhook(category, severity string) bool { } } + // Type filter: skip if failure type is in the exclude list + for _, t := range c.WebhookExcludeTypes { + if t == failureType { + return false + } + } + return true } diff --git a/containers/devops-agent-operator/internal/controller/pod_controller.go b/containers/devops-agent-operator/internal/controller/pod_controller.go index e096a86..02e0796 100644 --- a/containers/devops-agent-operator/internal/controller/pod_controller.go +++ b/containers/devops-agent-operator/internal/controller/pod_controller.go @@ -146,7 +146,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R // Evaluate filter once for all outputs severity := collector.DetermineSeverity(failure) - shouldSend := r.Config.ShouldSendWebhook(failure.Category, severity) + shouldSend := r.Config.ShouldSendWebhook(failure.Category, severity, failure.Type) if !shouldSend { logger.Info("Outputs skipped by filter",