diff --git a/managed/data/alerting-templates/mongodb_down.yml b/managed/data/alerting-templates/mongodb_down.yml index 0e1c3402232..7a6ca7fc7fd 100644 --- a/managed/data/alerting-templates/mongodb_down.yml +++ b/managed/data/alerting-templates/mongodb_down.yml @@ -3,7 +3,20 @@ templates: - name: pmm_mongodb_down version: 1 summary: MongoDB down - expr: sum by (service_name, node_name) (mongodb_up) == bool 0 and on(node_name) pmm_managed_inventory_agents{agent_type="mongodb_exporter", disabled="0"} + expr: | + ( + # enabled exporters we should be watching (from inventory — always present) + max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="mongodb_exporter", disabled="0"} + ) + # drop the ones that are up → leaves down + missing (value 1) + unless on (service_id) (max by (service_id) (mongodb_up) == 1) + ) + or + # baseline 0 for every enabled service so healthy ones stay Normal (not MissingSeries) + 0 * max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="mongodb_exporter", disabled="0"} + ) for: 1m severity: critical annotations: diff --git a/managed/data/alerting-templates/postgresql_down.yml b/managed/data/alerting-templates/postgresql_down.yml index b5a4e96e2e8..8d93cb15fab 100644 --- a/managed/data/alerting-templates/postgresql_down.yml +++ b/managed/data/alerting-templates/postgresql_down.yml @@ -3,7 +3,20 @@ templates: - name: pmm_postgresql_down version: 1 summary: PostgreSQL down - expr: sum by (service_name, node_name) (pg_up) == bool 0 and on(node_name) pmm_managed_inventory_agents{agent_type="postgres_exporter", disabled="0"} + expr: | + ( + # enabled exporters we should be watching (from inventory — always present) + max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="postgres_exporter", disabled="0"} + ) + # drop the ones that are up → leaves down + missing (value 1) + unless on (service_id) (max by (service_id) (pg_up) == 1) + ) + or + # baseline 0 for every enabled service so healthy ones stay Normal (not MissingSeries) + 0 * max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="postgres_exporter", disabled="0"} + ) for: 1m severity: critical annotations: diff --git a/managed/data/alerting-templates/redis_down.yml b/managed/data/alerting-templates/redis_down.yml index 670e0758992..7c5b775847d 100644 --- a/managed/data/alerting-templates/redis_down.yml +++ b/managed/data/alerting-templates/redis_down.yml @@ -3,7 +3,20 @@ templates: - name: pmm_redis_down version: 1 summary: Redis down - expr: sum by (service_name, node_name) (redis_up) == bool 0 and on(node_name) pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + expr: | + ( + # enabled exporters we should be watching (from inventory — always present) + max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + ) + # drop the ones that are up → leaves down + missing (value 1) + unless on (service_id) (max by (service_id) (redis_up) == 1) + ) + or + # baseline 0 for every enabled service so healthy ones stay Normal (not MissingSeries) + 0 * max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + ) for: 1m severity: critical annotations: diff --git a/managed/data/alerting-templates/valkey_down.yml b/managed/data/alerting-templates/valkey_down.yml index 43caa90445f..9ee1502fed3 100644 --- a/managed/data/alerting-templates/valkey_down.yml +++ b/managed/data/alerting-templates/valkey_down.yml @@ -3,7 +3,20 @@ templates: - name: pmm_valkey_down version: 1 summary: Valkey down - expr: sum by (service_name, node_name) (redis_up) == bool 0 and on(node_name) pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + expr: | + ( + # enabled exporters we should be watching (from inventory — always present) + max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + ) + # drop the ones that are up → leaves down + missing (value 1) + unless on (service_id) (max by (service_id) (redis_up) == 1) + ) + or + # baseline 0 for every enabled service so healthy ones stay Normal (not MissingSeries) + 0 * max by (service_id, service_name, node_name) ( + pmm_managed_inventory_agents{agent_type="valkey_exporter", disabled="0"} + ) for: 1m severity: critical annotations: