Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .cruft.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"name": "prometheus",
"slug": "prometheus",
"parameter_key": "prometheus",
"test_cases": "defaults multi rewrite-registries thanos cluster-monitoring additional_rules resourcequota grafana-ingress additional_nodeexporter_args additional_kubestatemetrics_args grafana-storage additional-netpols kubernetes_1.26 kubernetes_1.27 kubernetes_1.28 kubernetes_1.29 kubernetes_1.30 kubernetes_1.31 kubernetes_1.32 kubernetes_1.33 kubernetes_1.34 kubernetes_1.35",
"test_cases": "defaults multi rewrite-registries thanos cluster-monitoring additional_rules resourcequota grafana-ingress additional_nodeexporter_args additional_kubestatemetrics_args grafana-storage additional-netpols kubernetes_1.26 kubernetes_1.27 kubernetes_1.28 kubernetes_1.29 kubernetes_1.30 kubernetes_1.31 kubernetes_1.32 kubernetes_1.33 kubernetes_1.34 kubernetes_1.35 kubernetes_1.36",
"add_lib": "y",
"add_pp": "n",
"add_golden": "y",
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ jobs:
- kubernetes_1.33
- kubernetes_1.34
- kubernetes_1.35
- kubernetes_1.36
defaults:
run:
working-directory: ${{ env.COMPONENT_NAME }}
Expand Down Expand Up @@ -90,6 +91,7 @@ jobs:
- kubernetes_1.33
- kubernetes_1.34
- kubernetes_1.35
- kubernetes_1.36
defaults:
run:
working-directory: ${{ env.COMPONENT_NAME }}
Expand Down
2 changes: 1 addition & 1 deletion Makefile.vars.mk
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ KUBENT_IMAGE ?= ghcr.io/doitintl/kube-no-trouble:latest
KUBENT_DOCKER ?= $(DOCKER_CMD) $(DOCKER_ARGS) $(root_volume) --entrypoint=/app/kubent $(KUBENT_IMAGE)

instance ?= defaults
test_instances = tests/defaults.yml tests/multi.yml tests/rewrite-registries.yml tests/thanos.yml tests/cluster-monitoring.yml tests/additional_rules.yml tests/resourcequota.yml tests/grafana-ingress.yml tests/additional_nodeexporter_args.yml tests/additional_kubestatemetrics_args.yml tests/grafana-storage.yml tests/additional-netpols.yml tests/kubernetes_1.26.yml tests/kubernetes_1.27.yml tests/kubernetes_1.28.yml tests/kubernetes_1.29.yml tests/kubernetes_1.30.yml tests/kubernetes_1.31.yml tests/kubernetes_1.32.yml tests/kubernetes_1.33.yml tests/kubernetes_1.34.yml tests/kubernetes_1.35.yml
test_instances = tests/defaults.yml tests/multi.yml tests/rewrite-registries.yml tests/thanos.yml tests/cluster-monitoring.yml tests/additional_rules.yml tests/resourcequota.yml tests/grafana-ingress.yml tests/additional_nodeexporter_args.yml tests/additional_kubestatemetrics_args.yml tests/grafana-storage.yml tests/additional-netpols.yml tests/kubernetes_1.26.yml tests/kubernetes_1.27.yml tests/kubernetes_1.28.yml tests/kubernetes_1.29.yml tests/kubernetes_1.30.yml tests/kubernetes_1.31.yml tests/kubernetes_1.32.yml tests/kubernetes_1.33.yml tests/kubernetes_1.34.yml tests/kubernetes_1.35.yml tests/kubernetes_1.36.yml
2 changes: 2 additions & 0 deletions class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ parameters:
"1.34": 68444c6b63b032a8761da09182ef63d89cc65cd5
# renovate: branch=release-0.16
"1.35": a4bfb4d4867bef9db56a168c60b11cc63b52f2dd
# renovate: branch=release-0.17
"1.36": d6d094d115093d81d3355bc970a93e4357d6ef05

namespaces: {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26829,15 +26829,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down Expand Up @@ -26898,15 +26898,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,23 @@ spec:
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!="lo"}[5m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_drop_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_drop_physical:rate5m
Original file line number Diff line number Diff line change
Expand Up @@ -26829,15 +26829,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down Expand Up @@ -26898,15 +26898,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,23 @@ spec:
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!="lo"}[5m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_drop_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_drop_physical:rate5m
Original file line number Diff line number Diff line change
Expand Up @@ -26829,15 +26829,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_bytes_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down Expand Up @@ -26898,15 +26898,15 @@ items:
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_receive_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Receive"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"expr": "instance:node_network_transmit_drop_physical:rate5m{job=\"nodeexporter-default-instance\", instance=\"$instance\", cluster=~\"$cluster\"} != 0",
"legendFormat": "Transmit"
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,23 @@ spec:
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!="lo"}[5m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_bytes_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_bytes_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_receive_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_receive_drop_physical:rate5m
- expr: |
sum without (device) (
rate(node_network_transmit_drop_total{job="nodeexporter-default-instance", device!~"lo|veth.+"}[5m])
)
record: instance:node_network_transmit_drop_physical:rate5m
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
spec:
ignoreDifferences:
- group: ''
jsonPointers:
- /imagePullSecrets
kind: ServiceAccount
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Namespace
metadata:
annotations: {}
labels:
SYNMonitoring: main
name: syn-prometheus-operator
name: syn-prometheus-operator
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
annotations:
source: https://github.com/projectsyn/component-prometheus
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/managed-by: commodore
app.kubernetes.io/name: kube-prometheus
app.kubernetes.io/part-of: kube-prometheus
monitoring.syn.tools/enabled: 'true'
prometheus: default-instance
role: alert-rules
name: kubeprometheus-default-instance-rules
namespace: syn-prometheus
spec:
groups:
- name: general.rules
rules:
- alert: TargetDown
annotations:
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{
$labels.service }} targets in {{ $labels.namespace }} namespace are
down.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
BY (cluster, job, namespace, service)) > 10
for: 10m
labels:
severity: warning
- alert: Watchdog
annotations:
description: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary: An alert that should always be firing to certify that Alertmanager
is working properly.
expr: vector(1)
labels:
severity: none
- alert: InfoInhibitor
annotations:
description: |
This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
severity of 'warning' or 'critical' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr: group by (namespace) (ALERTS{severity = "info"} == 1) unless on (namespace)
group by (namespace) (ALERTS{alertname != "InfoInhibitor", alertstate
= "firing", severity =~ "warning|critical"} == 1)
labels:
severity: none
- name: node-network
rules:
- alert: NodeNetworkInterfaceFlapping
annotations:
description: Network interface "{{ $labels.device }}" changing its up
status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod
}}
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
summary: Network interface is often changing its status
expr: |
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
labels:
severity: warning
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY
(instance, cpu))
record: cluster:node_cpu:ratio
- name: kube-prometheus-general.rules
rules:
- expr: count without(instance, pod, node) (up == 1)
record: count:up1
- expr: count without(instance, pod, node) (up == 0)
record: count:up0
Loading
Loading