diff --git a/examples/contrib-sli-total-amount-rules.yaml b/examples/contrib-sli-total-amount-rules.yaml new file mode 100644 index 00000000..dfe24dd2 --- /dev/null +++ b/examples/contrib-sli-total-amount-rules.yaml @@ -0,0 +1,34 @@ +apiVersion: sloth.slok.dev/v1 +kind: PrometheusServiceLevel +metadata: + name: svc + namespace: test-ns +spec: + service: "svc01" + labels: + global01k1: global01v1 + sloPlugins: + chain: + - id: "sloth.dev/contrib/sli_total_amount/v1" + slos: + - name: "slo1" + objective: 99.9 + description: "This is SLO 01." + labels: + global02k1: global02v1 + sli: + events: + errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}])) + totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}])) + alerting: + name: myServiceAlert + labels: + alert01k1: "alert01v1" + annotations: + alert02k1: "alert02k2" + pageAlert: + labels: + alert03k1: "alert03v1" + ticketAlert: + labels: + alert04k1: "alert04v1" diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md new file mode 100644 index 00000000..4da1e130 --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md @@ -0,0 +1,23 @@ +# SLI Total Amount Rules Plugin for Sloth + +This plugin additionally generates Prometheus recording rules for the total SLI amount, preserving the `TotalQuery` from the SLO spec. It is designed to be used as an SLO plugin in Sloth's plugin chain, and outputs rules to the metric `slo:sli_total:amount`. + +## Features +- Generates a Prometheus rule group for the SLI total amount per SLO. +- Ensures unique rule group names to avoid conflicts (e.g., `sloth-slo-sli-total-amount-`). +- Preserves the original `TotalQuery` from the SLO definition. + +## Usage example + +Add the plugin to the `sloPlugins.chain` section of your SLO YAML: + +```yaml + sloPlugins: + chain: + - id: "sloth.dev/contrib/sli_total_amount_rules/v1" +``` + +## License + +This plugin is licensed under the Apache 2.0 License. See [LICENSE](../../../../LICENSE) for details. + diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go new file mode 100644 index 00000000..8f458ee2 --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go @@ -0,0 +1,93 @@ +package plugin + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "text/template" + + "github.com/prometheus/prometheus/model/rulefmt" + "github.com/slok/sloth/pkg/common/conventions" + "github.com/slok/sloth/pkg/common/model" + utilsdata "github.com/slok/sloth/pkg/common/utils/data" + promutils "github.com/slok/sloth/pkg/common/utils/prometheus" + pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1" +) + +const ( + PluginVersion = "prometheus/slo/v1" + PluginID = "sloth.dev/contrib/sli_total_amount/v1" + sliTotalAmountMetric = "slo:sli_total:amount" + sliTotalAmountGroupNamePrefix = "sloth-slo-sli-total-amount-" +) + +type PluginConfig struct{} + +func NewPlugin(c json.RawMessage, _ pluginslov1.AppUtils) (pluginslov1.Plugin, error) { + cfg := &PluginConfig{} + err := json.Unmarshal(c, cfg) + if err != nil { + return nil, err + } + + return plugin{cfg: *cfg}, nil +} + +type plugin struct { + cfg PluginConfig +} + +func (p plugin) ProcessSLO(ctx context.Context, request *pluginslov1.Request, result *pluginslov1.Result) error { + if request.SLO.SLI.Events == nil || request.SLO.SLI.Events.TotalQuery == "" { + return fmt.Errorf("SLI event type with TotalQuery required") + } + + rules, err := p.generateSLITotalRecordingRules(ctx, request.SLO, request.MWMBAlertGroup) + if err != nil { + return err + } + + customGroup := model.PromRuleGroup{ + Name: sliTotalAmountGroupNamePrefix + request.SLO.ID, + Interval: 0, // or set as needed + Rules: rules, + } + + result.SLORules.ExtraRules = append(result.SLORules.ExtraRules, customGroup) + return nil +} + +func (p plugin) generateSLITotalRecordingRules(ctx context.Context, slo model.PromSLO, alerts model.MWMBAlertGroup) ([]rulefmt.Rule, error) { + windows := alerts.TimeDurationWindows() + windows = append(windows, slo.TimeWindow) + + labels := utilsdata.MergeLabels(conventions.GetSLOIDPromLabels(slo), slo.Labels) + rules := make([]rulefmt.Rule, 0, len(windows)) + + for _, window := range windows { + windowStr := promutils.TimeDurationToPromStr(window) + recordName := sliTotalAmountMetric + windowStr + + tpl, err := template.New("totalQuery").Option("missingkey=error").Parse(slo.SLI.Events.TotalQuery) + if err != nil { + return nil, fmt.Errorf("could not create template for %s: %w", recordName, err) + } + + var buf bytes.Buffer + err = tpl.Execute(&buf, map[string]string{ + conventions.TplSLIQueryWindowVarName: windowStr, + }) + if err != nil { + return nil, fmt.Errorf("could not render TotalQuery for %s: %w", recordName, err) + } + + rule := rulefmt.Rule{ + Record: recordName, + Expr: buf.String(), + Labels: labels, + } + rules = append(rules, rule) + } + return rules, nil +} diff --git a/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go new file mode 100644 index 00000000..2052d8aa --- /dev/null +++ b/internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go @@ -0,0 +1,103 @@ +package plugin_test + +import ( + "encoding/json" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + plugin "github.com/slok/sloth/internal/plugin/slo/contrib/sli_total_amount_rules_v1" + "github.com/slok/sloth/pkg/common/model" + pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1" +) + +func baseAlertGroup() model.MWMBAlertGroup { + return model.MWMBAlertGroup{ + PageQuick: model.MWMBAlert{ + ShortWindow: 5 * time.Minute, + LongWindow: 1 * time.Hour, + }, + } +} + +type SLOOption func(*model.PromSLO) + +func baseSLO(opts ...SLOOption) model.PromSLO { + slo := model.PromSLO{ + ID: "svc01-slo1", + Name: "slo1", + Service: "svc01", + TimeWindow: 30 * 24 * time.Hour, + SLI: model.PromSLI{ + Events: &model.PromSLIEvents{ + ErrorQuery: `sum(rate(http_requests_total{job="api",status=~"5.."}[{{.window}}]))`, + }, + }, + Labels: map[string]string{ + "global01k1": "global01v1", + "global02k1": "global02v1", + }, + } + + for _, opt := range opts { + opt(&slo) + } + + return slo +} + +func withTotalQuery() SLOOption { + return func(slo *model.PromSLO) { + slo.SLI.Events.TotalQuery = `sum(rate(http_requests_total{job="api"}[{{.window}}]))` + } +} + +func TestProcessSLO_NoRules(t *testing.T) { + cfgBytes, err := json.Marshal(plugin.PluginConfig{}) + require.NoError(t, err) + + plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{}) + require.NoError(t, err) + + req := &pluginslov1.Request{ + SLO: baseSLO(), + MWMBAlertGroup: baseAlertGroup(), + } + result := &pluginslov1.Result{} + + err = plug.ProcessSLO(t.Context(), req, result) + require.Error(t, err) + + myAssert := assert.New(t) + myAssert.Empty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules") +} + +func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) { + cfgBytes, err := json.Marshal(plugin.PluginConfig{}) + require.NoError(t, err) + + plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{}) + require.NoError(t, err) + + req := &pluginslov1.Request{ + SLO: baseSLO(withTotalQuery()), + MWMBAlertGroup: baseAlertGroup(), + } + result := &pluginslov1.Result{} + + err = plug.ProcessSLO(t.Context(), req, result) + require.NoError(t, err) + + myAssert := assert.New(t) + if myAssert.NotEmpty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules") { + group := result.SLORules.ExtraRules[0] + myAssert.Equal("sloth-slo-sli-total-amount-svc01-slo1", group.Name) + myAssert.NotEmpty(group.Rules, "expected at least one rule in the group") + // Optionally, check the first rule's Record and Expr. + rule := group.Rules[0] + myAssert.Contains(rule.Record, "slo:sli_total:amount") + myAssert.Contains(rule.Expr, "sum(rate(http_requests_total") + } +}