Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion internal/lm/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"regexp"
"strings"

"k8s.io/apimachinery/pkg/api/validate/content"
"k8s.io/klog/v2"

spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
Expand Down Expand Up @@ -202,7 +203,43 @@ func (rl resourceLabeler) getProductName(parts ...string) string {
if rl.isShared() && !rl.isRenamed() {
strippedParts = append(strippedParts, "SHARED")
}
return strings.Join(strippedParts, "-")
return limitProductName(strippedParts)
}

// limitProductName joins the product name parts and keeps the result within the
// Kubernetes label-value length limit. parts[0] is the GPU model; every part
// after it (e.g. "MIG", the profile, "SHARED") is discriminating and preserved,
// so only the model is truncated. The truncated value has any trailing
// separator removed so it remains a valid label value.
func limitProductName(parts []string) string {
full := strings.Join(parts, "-")
if len(full) <= content.LabelValueMaxLength {
return full
}

suffix := strings.Join(parts[1:], "-")
if suffix == "" {
return strings.TrimRight(truncate(parts[0], content.LabelValueMaxLength), "-._")
}

// The +1 accounts for the "-" that rejoins the model to the suffix.
maxModel := content.LabelValueMaxLength - (len(suffix) + 1)
if maxModel <= 0 {
// The non-model parts alone exceed the limit. This does not occur with
// real MIG profiles; truncate the whole value as a last resort.
return strings.TrimRight(truncate(full, content.LabelValueMaxLength), "-._")
}

model := strings.TrimRight(truncate(parts[0], maxModel), "-._")
return model + "-" + suffix
}

// truncate returns the first limit bytes of s.
func truncate(s string, limit int) string {
if len(s) <= limit {
return s
}
return s[:limit]
}

func (rl resourceLabeler) getReplicas() int {
Expand Down
74 changes: 74 additions & 0 deletions internal/lm/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
package lm

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/api/validate/content"

spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
rt "github.com/NVIDIA/k8s-device-plugin/internal/resource/testing"
Expand Down Expand Up @@ -216,6 +218,78 @@ func TestGPUResourceLabeler(t *testing.T) {

}

func TestGetProductName(t *testing.T) {
// A sanitised product name that is already close to the 63-character limit.
const longModel = "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-Edition" // 55 chars

shared := &spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{Name: "nvidia.com/gpu", Replicas: 2},
},
},
}

testCases := []struct {
description string
sharing *spec.Sharing
parts []string
expected string
}{
{
description: "short value is returned unchanged",
parts: []string{"MOCKMODEL", "MIG", "1g.300gb"},
expected: "MOCKMODEL-MIG-1g.300gb",
},
{
description: "short bare model is returned unchanged",
parts: []string{"MOCKMODEL"},
expected: "MOCKMODEL",
},
{
description: "long model with mig profile truncates the model and preserves the profile",
parts: []string{longModel, "MIG", "1g.24gb"},
expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-Edi-MIG-1g.24gb",
},
{
description: "media-extension profile is preserved and trailing separator is trimmed",
parts: []string{longModel, "MIG", "1g.24gb.me"},
expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstation-MIG-1g.24gb.me",
},
{
description: "trailing dot from truncation is trimmed",
parts: []string{strings.Repeat("A", 51) + "." + strings.Repeat("B", 10), "MIG", "1g.5gb"},
expected: strings.Repeat("A", 51) + "-MIG-1g.5gb",
},
{
description: "long bare model is truncated to the limit",
parts: []string{strings.Repeat("A", 80)},
expected: strings.Repeat("A", 63),
},
{
description: "shared suffix is preserved when truncating",
sharing: shared,
parts: []string{longModel, "MIG", "1g.24gb"},
expected: "NVIDIA-RTX-PRO-6000-Blackwell-Max-Q-Workstat-MIG-1g.24gb-SHARED",
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
rl := resourceLabeler{
resourceName: "nvidia.com/gpu",
sharing: tc.sharing,
}

result := rl.getProductName(tc.parts...)

require.Equal(t, tc.expected, result)
require.LessOrEqual(t, len(result), content.LabelValueMaxLength)
require.Empty(t, content.IsLabelValue(result))
})
}
}

func TestSanitise(t *testing.T) {
testCases := []struct {
input string
Expand Down
Loading