From f7aeef4b0e0902dc0d4ccf27d768870f7ccf2b17 Mon Sep 17 00:00:00 2001 From: Mimi Tantono Date: Thu, 25 Dec 2025 14:48:24 +0100 Subject: [PATCH 1/2] Add a new contrib plugin that writes total transaction recording rules --- examples/contrib-sli-total-amount-rules.yaml | 34 +++++++ .../sli_total_amount_rules_v1/README.md | 23 +++++ .../sli_total_amount_rules_v1/plugin.go | 93 +++++++++++++++++++ .../sli_total_amount_rules_v1/plugin_test.go | 70 ++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 examples/contrib-sli-total-amount-rules.yaml create mode 100644 internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md create mode 100644 internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go create mode 100644 internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go diff --git a/examples/contrib-sli-total-amount-rules.yaml b/examples/contrib-sli-total-amount-rules.yaml new file mode 100644 index 00000000..dfe24dd2 --- /dev/null +++ b/examples/contrib-sli-total-amount-rules.yaml @@ -0,0 +1,34 @@ +apiVersion: sloth.slok.dev/v1 +kind: PrometheusServiceLevel +metadata: + name: svc + namespace: test-ns +spec: + service: "svc01" + labels: + global01k1: global01v1 + sloPlugins: + chain: + - id: "sloth.dev/contrib/sli_total_amount/v1" + slos: + - name: "slo1" + objective: 99.9 + description: "This is SLO 01." + labels: + global02k1: global02v1 + sli: + events: + errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}])) + totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}])) + alerting: + name: myServiceAlert + labels: + alert01k1: "alert01v1" + annotations: + alert02k1: "alert02k2" + pageAlert: + labels: + alert03k1: "alert03v1" + ticketAlert: + labels: + alert04k1: "alert04v1" diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md new file mode 100644 index 00000000..4da1e130 --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md @@ -0,0 +1,23 @@ +# SLI Total Amount Rules Plugin for Sloth + +This plugin additionally generates Prometheus recording rules for the total SLI amount, preserving the `TotalQuery` from the SLO spec. It is designed to be used as an SLO plugin in Sloth's plugin chain, and outputs rules to the metric `slo:sli_total:amount`. + +## Features +- Generates a Prometheus rule group for the SLI total amount per SLO. +- Ensures unique rule group names to avoid conflicts (e.g., `sloth-slo-sli-total-amount-`). +- Preserves the original `TotalQuery` from the SLO definition. + +## Usage example + +Add the plugin to the `sloPlugins.chain` section of your SLO YAML: + +```yaml + sloPlugins: + chain: + - id: "sloth.dev/contrib/sli_total_amount_rules/v1" +``` + +## License + +This plugin is licensed under the Apache 2.0 License. See [LICENSE](../../../../LICENSE) for details. + diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go new file mode 100644 index 00000000..8f458ee2 --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go @@ -0,0 +1,93 @@ +package plugin + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "text/template" + + "github.com/prometheus/prometheus/model/rulefmt" + "github.com/slok/sloth/pkg/common/conventions" + "github.com/slok/sloth/pkg/common/model" + utilsdata "github.com/slok/sloth/pkg/common/utils/data" + promutils "github.com/slok/sloth/pkg/common/utils/prometheus" + pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1" +) + +const ( + PluginVersion = "prometheus/slo/v1" + PluginID = "sloth.dev/contrib/sli_total_amount/v1" + sliTotalAmountMetric = "slo:sli_total:amount" + sliTotalAmountGroupNamePrefix = "sloth-slo-sli-total-amount-" +) + +type PluginConfig struct{} + +func NewPlugin(c json.RawMessage, _ pluginslov1.AppUtils) (pluginslov1.Plugin, error) { + cfg := &PluginConfig{} + err := json.Unmarshal(c, cfg) + if err != nil { + return nil, err + } + + return plugin{cfg: *cfg}, nil +} + +type plugin struct { + cfg PluginConfig +} + +func (p plugin) ProcessSLO(ctx context.Context, request *pluginslov1.Request, result *pluginslov1.Result) error { + if request.SLO.SLI.Events == nil || request.SLO.SLI.Events.TotalQuery == "" { + return fmt.Errorf("SLI event type with TotalQuery required") + } + + rules, err := p.generateSLITotalRecordingRules(ctx, request.SLO, request.MWMBAlertGroup) + if err != nil { + return err + } + + customGroup := model.PromRuleGroup{ + Name: sliTotalAmountGroupNamePrefix + request.SLO.ID, + Interval: 0, // or set as needed + Rules: rules, + } + + result.SLORules.ExtraRules = append(result.SLORules.ExtraRules, customGroup) + return nil +} + +func (p plugin) generateSLITotalRecordingRules(ctx context.Context, slo model.PromSLO, alerts model.MWMBAlertGroup) ([]rulefmt.Rule, error) { + windows := alerts.TimeDurationWindows() + windows = append(windows, slo.TimeWindow) + + labels := utilsdata.MergeLabels(conventions.GetSLOIDPromLabels(slo), slo.Labels) + rules := make([]rulefmt.Rule, 0, len(windows)) + + for _, window := range windows { + windowStr := promutils.TimeDurationToPromStr(window) + recordName := sliTotalAmountMetric + windowStr + + tpl, err := template.New("totalQuery").Option("missingkey=error").Parse(slo.SLI.Events.TotalQuery) + if err != nil { + return nil, fmt.Errorf("could not create template for %s: %w", recordName, err) + } + + var buf bytes.Buffer + err = tpl.Execute(&buf, map[string]string{ + conventions.TplSLIQueryWindowVarName: windowStr, + }) + if err != nil { + return nil, fmt.Errorf("could not render TotalQuery for %s: %w", recordName, err) + } + + rule := rulefmt.Rule{ + Record: recordName, + Expr: buf.String(), + Labels: labels, + } + rules = append(rules, rule) + } + return rules, nil +} diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go new file mode 100644 index 00000000..5557e789 --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go @@ -0,0 +1,70 @@ +package plugin_test + +import ( + "encoding/json" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + plugin "github.com/slok/sloth/internal/plugin/slo/contrib/sli_total_amount_rules_v1" + "github.com/slok/sloth/pkg/common/model" + pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1" +) + +func baseAlertGroup() model.MWMBAlertGroup { + return model.MWMBAlertGroup{ + PageQuick: model.MWMBAlert{ + ShortWindow: 5 * time.Minute, + LongWindow: 1 * time.Hour, + }, + } +} + +func baseSLO() model.PromSLO { + return model.PromSLO{ + ID: "svc01-slo1", + Name: "slo1", + Service: "svc01", + TimeWindow: 30 * 24 * time.Hour, + SLI: model.PromSLI{ + Events: &model.PromSLIEvents{ + ErrorQuery: `sum(rate(http_requests_total{job="api",status=~"5.."}[{{.window}}]))`, + TotalQuery: `sum(rate(http_requests_total{job="api"}[{{.window}}]))`, + }, + }, + Labels: map[string]string{ + "global01k1": "global01v1", + "global02k1": "global02v1", + }, + } +} + +func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) { + cfgBytes, err := json.Marshal(plugin.PluginConfig{}) + require.NoError(t, err) + + plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{}) + require.NoError(t, err) + + req := &pluginslov1.Request{ + SLO: baseSLO(), + MWMBAlertGroup: baseAlertGroup(), + } + result := &pluginslov1.Result{} + + err = plug.ProcessSLO(t.Context(), req, result) + require.NoError(t, err) + + myAssert := assert.New(t) + if myAssert.NotEmpty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules") { + group := result.SLORules.ExtraRules[0] + myAssert.Equal("sloth-slo-sli-total-amount-svc01-slo1", group.Name) + myAssert.NotEmpty(group.Rules, "expected at least one rule in the group") + // Optionally, check the first rule's Record and Expr. + rule := group.Rules[0] + myAssert.Contains(rule.Record, "slo:sli_total:amount") + myAssert.Contains(rule.Expr, "sum(rate(http_requests_total") + } +} From 14cfa28c992e6c23811b11ca4a76addea0f0802c Mon Sep 17 00:00:00 2001 From: Mimi Tantono Date: Thu, 25 Dec 2025 18:05:25 +0100 Subject: [PATCH 2/2] Add test case for when TotalQuery is missing --- .../sli_total_amount_rules_v1/plugin_test.go | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go index 5557e789..2052d8aa 100644 --- a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go @@ -22,8 +22,10 @@ func baseAlertGroup() model.MWMBAlertGroup { } } -func baseSLO() model.PromSLO { - return model.PromSLO{ +type SLOOption func(*model.PromSLO) + +func baseSLO(opts ...SLOOption) model.PromSLO { + slo := model.PromSLO{ ID: "svc01-slo1", Name: "slo1", Service: "svc01", @@ -31,7 +33,6 @@ func baseSLO() model.PromSLO { SLI: model.PromSLI{ Events: &model.PromSLIEvents{ ErrorQuery: `sum(rate(http_requests_total{job="api",status=~"5.."}[{{.window}}]))`, - TotalQuery: `sum(rate(http_requests_total{job="api"}[{{.window}}]))`, }, }, Labels: map[string]string{ @@ -39,9 +40,21 @@ func baseSLO() model.PromSLO { "global02k1": "global02v1", }, } + + for _, opt := range opts { + opt(&slo) + } + + return slo } -func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) { +func withTotalQuery() SLOOption { + return func(slo *model.PromSLO) { + slo.SLI.Events.TotalQuery = `sum(rate(http_requests_total{job="api"}[{{.window}}]))` + } +} + +func TestProcessSLO_NoRules(t *testing.T) { cfgBytes, err := json.Marshal(plugin.PluginConfig{}) require.NoError(t, err) @@ -54,6 +67,26 @@ func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) { } result := &pluginslov1.Result{} + err = plug.ProcessSLO(t.Context(), req, result) + require.Error(t, err) + + myAssert := assert.New(t) + myAssert.Empty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules") +} + +func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) { + cfgBytes, err := json.Marshal(plugin.PluginConfig{}) + require.NoError(t, err) + + plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{}) + require.NoError(t, err) + + req := &pluginslov1.Request{ + SLO: baseSLO(withTotalQuery()), + MWMBAlertGroup: baseAlertGroup(), + } + result := &pluginslov1.Result{} + err = plug.ProcessSLO(t.Context(), req, result) require.NoError(t, err)