Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions containers/devops-agent-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,11 @@ ContainerCreating, Unschedulable 같은 일시적 상태는 설정된 대기 시
| `FAILURE_RECHECK_INTERVAL` | 타임아웃 재확인 간격 | `1m` |
| `WEBHOOK_SKIP_CATEGORIES` | 웹훅/S3/CloudWatch 출력을 건너뛸 감지 레이어 (쉼표 구분) | - |
| `WEBHOOK_MIN_SEVERITY` | 출력을 트리거할 최소 심각도 | - |
| `WEBHOOK_EXCLUDE_TYPES` | 웹훅/S3/CloudWatch 출력을 건너뛸 장애 유형 (쉼표 구분) | - |

#### 출력 필터링

`WEBHOOK_SKIP_CATEGORIES` `WEBHOOK_MIN_SEVERITY`는 AND 조건으로 동작합니다. 조건을 모두 통과해야 CloudWatch Logs, S3, Webhook 출력이 실행됩니다. 미설정 시 모든 장애에 대해 출력이 실행됩니다.
`WEBHOOK_SKIP_CATEGORIES`, `WEBHOOK_MIN_SEVERITY`, `WEBHOOK_EXCLUDE_TYPES`는 AND 조건으로 동작합니다. 조건을 모두 통과해야 CloudWatch Logs, S3, Webhook 출력이 실행됩니다. 미설정 시 모든 장애에 대해 출력이 실행됩니다.

**WEBHOOK_SKIP_CATEGORIES** — 특정 감지 레이어의 장애를 출력에서 제외합니다.

Expand All @@ -133,12 +134,23 @@ WEBHOOK_SKIP_CATEGORIES=PodPhase,PodCondition
WEBHOOK_MIN_SEVERITY=HIGH
```

두 옵션을 조합하면 더 세밀하게 제어할 수 있습니다.
**WEBHOOK_EXCLUDE_TYPES** — 특정 장애 유형을 카테고리나 심각도와 무관하게 출력에서 제외합니다.

유효한 값: 장애 유형 문자열 (예: `ErrImagePull`, `ContainerCreating`, `Unschedulable`)

```
# ErrImagePull은 일시적 상태로 kubelet이 자동 재시도하므로 제외
# (지속되면 ImagePullBackOff로 전환되며 이는 제외되지 않음)
WEBHOOK_EXCLUDE_TYPES=ErrImagePull
```

세 옵션을 조합하면 더 세밀하게 제어할 수 있습니다.

```
# PodPhase는 무조건 제외 + 나머지는 HIGH 이상만 출력
# PodPhase는 무조건 제외 + 나머지는 HIGH 이상만 출력 + ErrImagePull 제외
WEBHOOK_SKIP_CATEGORIES=PodPhase,PodCondition
WEBHOOK_MIN_SEVERITY=HIGH
WEBHOOK_EXCLUDE_TYPES=ErrImagePull
```

## IAM 권한
Expand Down
6 changes: 6 additions & 0 deletions containers/devops-agent-operator/examples/04-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ data:
# Empty means all severities trigger webhooks (default behavior).
# Example: "HIGH" → only HIGH and CRITICAL failures trigger webhooks
WEBHOOK_MIN_SEVERITY: ""
# Comma-separated list of failure types to exclude from webhook calls,
# regardless of category or severity.
# e.g., "ErrImagePull" - transient state that kubelet retries automatically;
# persistent failures transition to ImagePullBackOff which is not excluded.
# Empty means no failure types are excluded (default behavior).
WEBHOOK_EXCLUDE_TYPES: ""
21 changes: 19 additions & 2 deletions containers/devops-agent-operator/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ type Config struct {
// Valid values: LOW, MEDIUM, HIGH, CRITICAL
// Empty means all severities trigger webhooks.
WebhookMinSeverity string

// WebhookExcludeTypes is a list of failure types that should not trigger webhook calls,
// regardless of category or severity.
// e.g., "ErrImagePull" - transient state that kubelet retries automatically.
// Empty means no failure types are excluded.
WebhookExcludeTypes []string
}

// DefaultConfig returns a Config with default values
Expand Down Expand Up @@ -201,6 +207,10 @@ func LoadFromEnv() *Config {
cfg.WebhookMinSeverity = strings.ToUpper(strings.TrimSpace(v))
}

if v := os.Getenv("WEBHOOK_EXCLUDE_TYPES"); v != "" {
cfg.WebhookExcludeTypes = splitAndTrim(v, ",")
}

return cfg
}

Expand Down Expand Up @@ -249,11 +259,11 @@ var severityLevel = map[string]int{
}

// ShouldSendWebhook returns true if the failure should trigger a webhook call.
// Both conditions must pass (AND logic):
// All conditions must pass (AND logic):
// - The failure category must not be in WebhookSkipCategories
// - The failure severity must meet or exceed WebhookMinSeverity
//
// If neither filter is configured, always returns true (default behavior preserved).
// If no filters are configured, always returns true (default behavior preserved).
func (c *Config) ShouldSendWebhook(category, severity string) bool {
// Category filter: skip if category is in the skip list
for _, cat := range c.WebhookSkipCategories {
Expand All @@ -272,6 +282,13 @@ func (c *Config) ShouldSendWebhook(category, severity string) bool {
}
}

// Type filter: skip if failure type is in the exclude list
for _, t := range c.WebhookExcludeTypes {
if t == failureType {
return false
}
}

return true
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R

// Evaluate filter once for all outputs
severity := collector.DetermineSeverity(failure)
shouldSend := r.Config.ShouldSendWebhook(failure.Category, severity)
shouldSend := r.Config.ShouldSendWebhook(failure.Category, severity, failure.Type)

if !shouldSend {
logger.Info("Outputs skipped by filter",
Expand Down