diff --git a/internal/lm/resource.go b/internal/lm/resource.go index f85f38f20..1fdd77c2c 100644 --- a/internal/lm/resource.go +++ b/internal/lm/resource.go @@ -21,6 +21,7 @@ import ( "regexp" "strings" + "k8s.io/apimachinery/pkg/api/validate/content" "k8s.io/klog/v2" spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1" @@ -202,7 +203,43 @@ func (rl resourceLabeler) getProductName(parts ...string) string { if rl.isShared() && !rl.isRenamed() { strippedParts = append(strippedParts, "SHARED") } - return strings.Join(strippedParts, "-") + return limitProductName(strippedParts) +} + +// limitProductName joins the product name parts and keeps the result within the +// Kubernetes label-value length limit. parts[0] is the GPU model; every part +// after it (e.g. "MIG", the profile, "SHARED") is discriminating and preserved, +// so only the model is truncated. The truncated value has any trailing +// separator removed so it remains a valid label value. +func limitProductName(parts []string) string { + full := strings.Join(parts, "-") + if len(full) <= content.LabelValueMaxLength { + return full + } + + suffix := strings.Join(parts[1:], "-") + if suffix == "" { + return strings.TrimRight(truncate(parts[0], content.LabelValueMaxLength), "-._") + } + + // The +1 accounts for the "-" that rejoins the model to the suffix. + maxModel := content.LabelValueMaxLength - (len(suffix) + 1) + if maxModel <= 0 { + // The non-model parts alone exceed the limit. This does not occur with + // real MIG profiles; truncate the whole value as a last resort. + return strings.TrimRight(truncate(full, content.LabelValueMaxLength), "-._") + } + + model := strings.TrimRight(truncate(parts[0], maxModel), "-._") + return model + "-" + suffix +} + +// truncate returns the first limit bytes of s. +func truncate(s string, limit int) string { + if len(s) <= limit { + return s + } + return s[:limit] } func (rl resourceLabeler) getReplicas() int { diff --git a/internal/lm/resource_test.go b/internal/lm/resource_test.go index c2e3b3e5c..06b7c4046 100644 --- a/internal/lm/resource_test.go +++ b/internal/lm/resource_test.go @@ -17,9 +17,11 @@ package lm import ( + "strings" "testing" "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/validate/content" spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1" rt "github.com/NVIDIA/k8s-device-plugin/internal/resource/testing" @@ -216,6 +218,78 @@ func TestGPUResourceLabeler(t *testing.T) { } +func TestGetProductName(t *testing.T) { + // A sanitised product name that is already close to the 63-character limit. + const longModel = "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-Edition" // 55 chars + + shared := &spec.Sharing{ + TimeSlicing: spec.ReplicatedResources{ + Resources: []spec.ReplicatedResource{ + {Name: "nvidia.com/gpu", Replicas: 2}, + }, + }, + } + + testCases := []struct { + description string + sharing *spec.Sharing + parts []string + expected string + }{ + { + description: "short value is returned unchanged", + parts: []string{"MOCKMODEL", "MIG", "1g.300gb"}, + expected: "MOCKMODEL-MIG-1g.300gb", + }, + { + description: "short bare model is returned unchanged", + parts: []string{"MOCKMODEL"}, + expected: "MOCKMODEL", + }, + { + description: "long model with mig profile truncates the model and preserves the profile", + parts: []string{longModel, "MIG", "1g.24gb"}, + expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-Edi-MIG-1g.24gb", + }, + { + description: "media-extension profile is preserved and trailing separator is trimmed", + parts: []string{longModel, "MIG", "1g.24gb.me"}, + expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-MIG-1g.24gb.me", + }, + { + description: "trailing dot from truncation is trimmed", + parts: []string{strings.Repeat("A", 51) + "." + strings.Repeat("B", 10), "MIG", "1g.5gb"}, + expected: strings.Repeat("A", 51) + "-MIG-1g.5gb", + }, + { + description: "long bare model is truncated to the limit", + parts: []string{strings.Repeat("A", 80)}, + expected: strings.Repeat("A", 63), + }, + { + description: "shared suffix is preserved when truncating", + sharing: shared, + parts: []string{longModel, "MIG", "1g.24gb"}, + expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstat-MIG-1g.24gb-SHARED", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + rl := resourceLabeler{ + resourceName: "nvidia.com/gpu", + sharing: tc.sharing, + } + + result := rl.getProductName(tc.parts...) + + require.Equal(t, tc.expected, result) + require.LessOrEqual(t, len(result), content.LabelValueMaxLength) + require.Empty(t, content.IsLabelValue(result)) + }) + } +} + func TestSanitise(t *testing.T) { testCases := []struct { input string