From 5b3283b1a054b3ce5ee3438462ea4e75ff29a3e6 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Wed, 1 Jul 2026 14:52:57 +0300 Subject: [PATCH 01/46] feat(vmpool): add VirtualMachinePool CRD and feature gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the VirtualMachinePool API type (namespaced, group virtualization.deckhouse.io/v1alpha2) with the scale and status subresources, generated deepcopy/client/lister/informer code and the CRD manifest. Gate the resource behind the VirtualMachinePool module feature gate (EE/SE+, default off; locked off in CE). No controller behaviour yet — the type and gate are the scaffold for the pool controller. Part of the VirtualMachinePool implementation (ADR: architecture-decision-records dvp/2026-06-29-vmpool.md). Signed-off-by: Pavel Tishkov --- .../typed/core/v1alpha2/core_client.go | 5 + .../core/v1alpha2/fake/fake_core_client.go | 4 + .../v1alpha2/fake/fake_virtualmachinepool.go | 52 + .../core/v1alpha2/generated_expansion.go | 2 + .../typed/core/v1alpha2/virtualmachinepool.go | 70 + .../core/v1alpha2/interface.go | 7 + .../core/v1alpha2/virtualmachinepool.go | 102 ++ .../informers/externalversions/generic.go | 2 + .../core/v1alpha2/expansion_generated.go | 8 + .../core/v1alpha2/virtualmachinepool.go | 70 + api/core/v1alpha2/register.go | 5 + api/core/v1alpha2/virtual_machine_pool.go | 129 ++ .../v1alpha2/vmpoolcondition/condition.go | 72 + api/core/v1alpha2/zz_generated.deepcopy.go | 124 ++ api/scripts/update-codegen.sh | 3 +- crds/virtualmachinepools.yaml | 1460 +++++++++++++++++ .../pkg/featuregates/featuregate.go | 6 + openapi/config-values.yaml | 2 + openapi/doc-ru-config-values.yaml | 1 + 19 files changed, 2123 insertions(+), 1 deletion(-) create mode 100644 api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_virtualmachinepool.go create mode 100644 api/client/generated/clientset/versioned/typed/core/v1alpha2/virtualmachinepool.go create mode 100644 api/client/generated/informers/externalversions/core/v1alpha2/virtualmachinepool.go create mode 100644 api/client/generated/listers/core/v1alpha2/virtualmachinepool.go create mode 100644 api/core/v1alpha2/virtual_machine_pool.go create mode 100644 api/core/v1alpha2/vmpoolcondition/condition.go create mode 100644 crds/virtualmachinepools.yaml diff --git a/api/client/generated/clientset/versioned/typed/core/v1alpha2/core_client.go b/api/client/generated/clientset/versioned/typed/core/v1alpha2/core_client.go index 95d9f09723..9bde1c921a 100644 --- a/api/client/generated/clientset/versioned/typed/core/v1alpha2/core_client.go +++ b/api/client/generated/clientset/versioned/typed/core/v1alpha2/core_client.go @@ -42,6 +42,7 @@ type VirtualizationV1alpha2Interface interface { VirtualMachineMACAddressesGetter VirtualMachineMACAddressLeasesGetter VirtualMachineOperationsGetter + VirtualMachinePoolsGetter VirtualMachineSnapshotsGetter VirtualMachineSnapshotOperationsGetter } @@ -107,6 +108,10 @@ func (c *VirtualizationV1alpha2Client) VirtualMachineOperations(namespace string return newVirtualMachineOperations(c, namespace) } +func (c *VirtualizationV1alpha2Client) VirtualMachinePools(namespace string) VirtualMachinePoolInterface { + return newVirtualMachinePools(c, namespace) +} + func (c *VirtualizationV1alpha2Client) VirtualMachineSnapshots(namespace string) VirtualMachineSnapshotInterface { return newVirtualMachineSnapshots(c, namespace) } diff --git a/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_core_client.go b/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_core_client.go index 3e4d10a08e..b4498e50fd 100644 --- a/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_core_client.go +++ b/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_core_client.go @@ -84,6 +84,10 @@ func (c *FakeVirtualizationV1alpha2) VirtualMachineOperations(namespace string) return newFakeVirtualMachineOperations(c, namespace) } +func (c *FakeVirtualizationV1alpha2) VirtualMachinePools(namespace string) v1alpha2.VirtualMachinePoolInterface { + return newFakeVirtualMachinePools(c, namespace) +} + func (c *FakeVirtualizationV1alpha2) VirtualMachineSnapshots(namespace string) v1alpha2.VirtualMachineSnapshotInterface { return newFakeVirtualMachineSnapshots(c, namespace) } diff --git a/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_virtualmachinepool.go b/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_virtualmachinepool.go new file mode 100644 index 0000000000..66f8670201 --- /dev/null +++ b/api/client/generated/clientset/versioned/typed/core/v1alpha2/fake/fake_virtualmachinepool.go @@ -0,0 +1,52 @@ +/* +Copyright Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + corev1alpha2 "github.com/deckhouse/virtualization/api/client/generated/clientset/versioned/typed/core/v1alpha2" + v1alpha2 "github.com/deckhouse/virtualization/api/core/v1alpha2" + gentype "k8s.io/client-go/gentype" +) + +// fakeVirtualMachinePools implements VirtualMachinePoolInterface +type fakeVirtualMachinePools struct { + *gentype.FakeClientWithList[*v1alpha2.VirtualMachinePool, *v1alpha2.VirtualMachinePoolList] + Fake *FakeVirtualizationV1alpha2 +} + +func newFakeVirtualMachinePools(fake *FakeVirtualizationV1alpha2, namespace string) corev1alpha2.VirtualMachinePoolInterface { + return &fakeVirtualMachinePools{ + gentype.NewFakeClientWithList[*v1alpha2.VirtualMachinePool, *v1alpha2.VirtualMachinePoolList]( + fake.Fake, + namespace, + v1alpha2.SchemeGroupVersion.WithResource("virtualmachinepools"), + v1alpha2.SchemeGroupVersion.WithKind("VirtualMachinePool"), + func() *v1alpha2.VirtualMachinePool { return &v1alpha2.VirtualMachinePool{} }, + func() *v1alpha2.VirtualMachinePoolList { return &v1alpha2.VirtualMachinePoolList{} }, + func(dst, src *v1alpha2.VirtualMachinePoolList) { dst.ListMeta = src.ListMeta }, + func(list *v1alpha2.VirtualMachinePoolList) []*v1alpha2.VirtualMachinePool { + return gentype.ToPointerSlice(list.Items) + }, + func(list *v1alpha2.VirtualMachinePoolList, items []*v1alpha2.VirtualMachinePool) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/api/client/generated/clientset/versioned/typed/core/v1alpha2/generated_expansion.go b/api/client/generated/clientset/versioned/typed/core/v1alpha2/generated_expansion.go index 3032ee8501..8a24c86510 100644 --- a/api/client/generated/clientset/versioned/typed/core/v1alpha2/generated_expansion.go +++ b/api/client/generated/clientset/versioned/typed/core/v1alpha2/generated_expansion.go @@ -44,6 +44,8 @@ type VirtualMachineMACAddressLeaseExpansion interface{} type VirtualMachineOperationExpansion interface{} +type VirtualMachinePoolExpansion interface{} + type VirtualMachineSnapshotExpansion interface{} type VirtualMachineSnapshotOperationExpansion interface{} diff --git a/api/client/generated/clientset/versioned/typed/core/v1alpha2/virtualmachinepool.go b/api/client/generated/clientset/versioned/typed/core/v1alpha2/virtualmachinepool.go new file mode 100644 index 0000000000..fb0fc5839b --- /dev/null +++ b/api/client/generated/clientset/versioned/typed/core/v1alpha2/virtualmachinepool.go @@ -0,0 +1,70 @@ +/* +Copyright Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + context "context" + + scheme "github.com/deckhouse/virtualization/api/client/generated/clientset/versioned/scheme" + corev1alpha2 "github.com/deckhouse/virtualization/api/core/v1alpha2" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// VirtualMachinePoolsGetter has a method to return a VirtualMachinePoolInterface. +// A group's client should implement this interface. +type VirtualMachinePoolsGetter interface { + VirtualMachinePools(namespace string) VirtualMachinePoolInterface +} + +// VirtualMachinePoolInterface has methods to work with VirtualMachinePool resources. +type VirtualMachinePoolInterface interface { + Create(ctx context.Context, virtualMachinePool *corev1alpha2.VirtualMachinePool, opts v1.CreateOptions) (*corev1alpha2.VirtualMachinePool, error) + Update(ctx context.Context, virtualMachinePool *corev1alpha2.VirtualMachinePool, opts v1.UpdateOptions) (*corev1alpha2.VirtualMachinePool, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, virtualMachinePool *corev1alpha2.VirtualMachinePool, opts v1.UpdateOptions) (*corev1alpha2.VirtualMachinePool, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*corev1alpha2.VirtualMachinePool, error) + List(ctx context.Context, opts v1.ListOptions) (*corev1alpha2.VirtualMachinePoolList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *corev1alpha2.VirtualMachinePool, err error) + VirtualMachinePoolExpansion +} + +// virtualMachinePools implements VirtualMachinePoolInterface +type virtualMachinePools struct { + *gentype.ClientWithList[*corev1alpha2.VirtualMachinePool, *corev1alpha2.VirtualMachinePoolList] +} + +// newVirtualMachinePools returns a VirtualMachinePools +func newVirtualMachinePools(c *VirtualizationV1alpha2Client, namespace string) *virtualMachinePools { + return &virtualMachinePools{ + gentype.NewClientWithList[*corev1alpha2.VirtualMachinePool, *corev1alpha2.VirtualMachinePoolList]( + "virtualmachinepools", + c.RESTClient(), + scheme.ParameterCodec, + namespace, + func() *corev1alpha2.VirtualMachinePool { return &corev1alpha2.VirtualMachinePool{} }, + func() *corev1alpha2.VirtualMachinePoolList { return &corev1alpha2.VirtualMachinePoolList{} }, + ), + } +} diff --git a/api/client/generated/informers/externalversions/core/v1alpha2/interface.go b/api/client/generated/informers/externalversions/core/v1alpha2/interface.go index ed97a49b94..c98d319363 100644 --- a/api/client/generated/informers/externalversions/core/v1alpha2/interface.go +++ b/api/client/generated/informers/externalversions/core/v1alpha2/interface.go @@ -52,6 +52,8 @@ type Interface interface { VirtualMachineMACAddressLeases() VirtualMachineMACAddressLeaseInformer // VirtualMachineOperations returns a VirtualMachineOperationInformer. VirtualMachineOperations() VirtualMachineOperationInformer + // VirtualMachinePools returns a VirtualMachinePoolInformer. + VirtualMachinePools() VirtualMachinePoolInformer // VirtualMachineSnapshots returns a VirtualMachineSnapshotInformer. VirtualMachineSnapshots() VirtualMachineSnapshotInformer // VirtualMachineSnapshotOperations returns a VirtualMachineSnapshotOperationInformer. @@ -139,6 +141,11 @@ func (v *version) VirtualMachineOperations() VirtualMachineOperationInformer { return &virtualMachineOperationInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} } +// VirtualMachinePools returns a VirtualMachinePoolInformer. +func (v *version) VirtualMachinePools() VirtualMachinePoolInformer { + return &virtualMachinePoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} + // VirtualMachineSnapshots returns a VirtualMachineSnapshotInformer. func (v *version) VirtualMachineSnapshots() VirtualMachineSnapshotInformer { return &virtualMachineSnapshotInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} diff --git a/api/client/generated/informers/externalversions/core/v1alpha2/virtualmachinepool.go b/api/client/generated/informers/externalversions/core/v1alpha2/virtualmachinepool.go new file mode 100644 index 0000000000..de1f406bac --- /dev/null +++ b/api/client/generated/informers/externalversions/core/v1alpha2/virtualmachinepool.go @@ -0,0 +1,102 @@ +/* +Copyright Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + context "context" + time "time" + + versioned "github.com/deckhouse/virtualization/api/client/generated/clientset/versioned" + internalinterfaces "github.com/deckhouse/virtualization/api/client/generated/informers/externalversions/internalinterfaces" + corev1alpha2 "github.com/deckhouse/virtualization/api/client/generated/listers/core/v1alpha2" + apicorev1alpha2 "github.com/deckhouse/virtualization/api/core/v1alpha2" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// VirtualMachinePoolInformer provides access to a shared informer and lister for +// VirtualMachinePools. +type VirtualMachinePoolInformer interface { + Informer() cache.SharedIndexInformer + Lister() corev1alpha2.VirtualMachinePoolLister +} + +type virtualMachinePoolInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewVirtualMachinePoolInformer constructs a new informer for VirtualMachinePool type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewVirtualMachinePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredVirtualMachinePoolInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredVirtualMachinePoolInformer constructs a new informer for VirtualMachinePool type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredVirtualMachinePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtualizationV1alpha2().VirtualMachinePools(namespace).List(context.Background(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtualizationV1alpha2().VirtualMachinePools(namespace).Watch(context.Background(), options) + }, + ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtualizationV1alpha2().VirtualMachinePools(namespace).List(ctx, options) + }, + WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtualizationV1alpha2().VirtualMachinePools(namespace).Watch(ctx, options) + }, + }, + &apicorev1alpha2.VirtualMachinePool{}, + resyncPeriod, + indexers, + ) +} + +func (f *virtualMachinePoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredVirtualMachinePoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *virtualMachinePoolInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&apicorev1alpha2.VirtualMachinePool{}, f.defaultInformer) +} + +func (f *virtualMachinePoolInformer) Lister() corev1alpha2.VirtualMachinePoolLister { + return corev1alpha2.NewVirtualMachinePoolLister(f.Informer().GetIndexer()) +} diff --git a/api/client/generated/informers/externalversions/generic.go b/api/client/generated/informers/externalversions/generic.go index 8b16f3d58a..093bdb6a29 100644 --- a/api/client/generated/informers/externalversions/generic.go +++ b/api/client/generated/informers/externalversions/generic.go @@ -82,6 +82,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource return &genericInformer{resource: resource.GroupResource(), informer: f.Virtualization().V1alpha2().VirtualMachineMACAddressLeases().Informer()}, nil case v1alpha2.SchemeGroupVersion.WithResource("virtualmachineoperations"): return &genericInformer{resource: resource.GroupResource(), informer: f.Virtualization().V1alpha2().VirtualMachineOperations().Informer()}, nil + case v1alpha2.SchemeGroupVersion.WithResource("virtualmachinepools"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Virtualization().V1alpha2().VirtualMachinePools().Informer()}, nil case v1alpha2.SchemeGroupVersion.WithResource("virtualmachinesnapshots"): return &genericInformer{resource: resource.GroupResource(), informer: f.Virtualization().V1alpha2().VirtualMachineSnapshots().Informer()}, nil case v1alpha2.SchemeGroupVersion.WithResource("virtualmachinesnapshotoperations"): diff --git a/api/client/generated/listers/core/v1alpha2/expansion_generated.go b/api/client/generated/listers/core/v1alpha2/expansion_generated.go index e47e2ae835..f7da265496 100644 --- a/api/client/generated/listers/core/v1alpha2/expansion_generated.go +++ b/api/client/generated/listers/core/v1alpha2/expansion_generated.go @@ -110,6 +110,14 @@ type VirtualMachineOperationListerExpansion interface{} // VirtualMachineOperationNamespaceLister. type VirtualMachineOperationNamespaceListerExpansion interface{} +// VirtualMachinePoolListerExpansion allows custom methods to be added to +// VirtualMachinePoolLister. +type VirtualMachinePoolListerExpansion interface{} + +// VirtualMachinePoolNamespaceListerExpansion allows custom methods to be added to +// VirtualMachinePoolNamespaceLister. +type VirtualMachinePoolNamespaceListerExpansion interface{} + // VirtualMachineSnapshotListerExpansion allows custom methods to be added to // VirtualMachineSnapshotLister. type VirtualMachineSnapshotListerExpansion interface{} diff --git a/api/client/generated/listers/core/v1alpha2/virtualmachinepool.go b/api/client/generated/listers/core/v1alpha2/virtualmachinepool.go new file mode 100644 index 0000000000..2bc93b5adb --- /dev/null +++ b/api/client/generated/listers/core/v1alpha2/virtualmachinepool.go @@ -0,0 +1,70 @@ +/* +Copyright Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + corev1alpha2 "github.com/deckhouse/virtualization/api/core/v1alpha2" + labels "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers" + cache "k8s.io/client-go/tools/cache" +) + +// VirtualMachinePoolLister helps list VirtualMachinePools. +// All objects returned here must be treated as read-only. +type VirtualMachinePoolLister interface { + // List lists all VirtualMachinePools in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*corev1alpha2.VirtualMachinePool, err error) + // VirtualMachinePools returns an object that can list and get VirtualMachinePools. + VirtualMachinePools(namespace string) VirtualMachinePoolNamespaceLister + VirtualMachinePoolListerExpansion +} + +// virtualMachinePoolLister implements the VirtualMachinePoolLister interface. +type virtualMachinePoolLister struct { + listers.ResourceIndexer[*corev1alpha2.VirtualMachinePool] +} + +// NewVirtualMachinePoolLister returns a new VirtualMachinePoolLister. +func NewVirtualMachinePoolLister(indexer cache.Indexer) VirtualMachinePoolLister { + return &virtualMachinePoolLister{listers.New[*corev1alpha2.VirtualMachinePool](indexer, corev1alpha2.Resource("virtualmachinepool"))} +} + +// VirtualMachinePools returns an object that can list and get VirtualMachinePools. +func (s *virtualMachinePoolLister) VirtualMachinePools(namespace string) VirtualMachinePoolNamespaceLister { + return virtualMachinePoolNamespaceLister{listers.NewNamespaced[*corev1alpha2.VirtualMachinePool](s.ResourceIndexer, namespace)} +} + +// VirtualMachinePoolNamespaceLister helps list and get VirtualMachinePools. +// All objects returned here must be treated as read-only. +type VirtualMachinePoolNamespaceLister interface { + // List lists all VirtualMachinePools in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*corev1alpha2.VirtualMachinePool, err error) + // Get retrieves the VirtualMachinePool from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*corev1alpha2.VirtualMachinePool, error) + VirtualMachinePoolNamespaceListerExpansion +} + +// virtualMachinePoolNamespaceLister implements the VirtualMachinePoolNamespaceLister +// interface. +type virtualMachinePoolNamespaceLister struct { + listers.ResourceIndexer[*corev1alpha2.VirtualMachinePool] +} diff --git a/api/core/v1alpha2/register.go b/api/core/v1alpha2/register.go index 821755d18f..ec9c569262 100644 --- a/api/core/v1alpha2/register.go +++ b/api/core/v1alpha2/register.go @@ -38,6 +38,9 @@ var VirtualImageGVK = schema.GroupVersionKind{Group: SchemeGroupVersion.Group, V // VirtualDiskGVK is group version kind for VirtualDisk var VirtualDiskGVK = schema.GroupVersionKind{Group: SchemeGroupVersion.Group, Version: SchemeGroupVersion.Version, Kind: VirtualDiskKind} +// VirtualMachinePoolGVK is group version kind for VirtualMachinePool +var VirtualMachinePoolGVK = schema.GroupVersionKind{Group: SchemeGroupVersion.Group, Version: SchemeGroupVersion.Version, Kind: VirtualMachinePoolKind} + // Kind takes an unqualified kind and returns back a Group qualified GroupKind func Kind(kind string) schema.GroupKind { return SchemeGroupVersion.WithKind(kind).GroupKind() @@ -70,6 +73,8 @@ func addKnownTypes(scheme *runtime.Scheme) error { &VirtualDiskList{}, &VirtualMachine{}, &VirtualMachineList{}, + &VirtualMachinePool{}, + &VirtualMachinePoolList{}, &VirtualMachineBlockDeviceAttachment{}, &VirtualMachineBlockDeviceAttachmentList{}, &VirtualMachineClass{}, diff --git a/api/core/v1alpha2/virtual_machine_pool.go b/api/core/v1alpha2/virtual_machine_pool.go new file mode 100644 index 0000000000..edee170f42 --- /dev/null +++ b/api/core/v1alpha2/virtual_machine_pool.go @@ -0,0 +1,129 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + VirtualMachinePoolKind = "VirtualMachinePool" + VirtualMachinePoolResource = "virtualmachinepools" +) + +// VirtualMachinePool declaratively manages a group of identical virtual machines: +// it keeps the requested number of replicas, scales via the standard `scale` +// subresource, and reuses "heavy" disks across replica generations. +// +// The resource is available only in paid editions (EE/SE+) and is gated behind +// the `VirtualMachinePool` module feature gate. +// +// +kubebuilder:object:root=true +// +kubebuilder:metadata:labels={heritage=deckhouse,module=virtualization} +// +kubebuilder:subresource:status +// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector +// +kubebuilder:resource:categories={virtualization},scope=Namespaced,shortName={vmpool,vmpools},singular=virtualmachinepool +// +kubebuilder:storageversion +// +kubebuilder:printcolumn:name="Replicas",type="integer",JSONPath=".status.replicas",description="Current number of pool members (including Terminating)." +// +kubebuilder:printcolumn:name="Ready",type="integer",JSONPath=".status.readyReplicas",description="Number of members ready to serve." +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time of resource creation." +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type VirtualMachinePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec VirtualMachinePoolSpec `json:"spec"` + Status VirtualMachinePoolStatus `json:"status,omitempty"` +} + +// VirtualMachinePoolList contains a list of VirtualMachinePool resources. +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type VirtualMachinePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata"` + + Items []VirtualMachinePool `json:"items"` +} + +// VirtualMachinePoolSpec is the desired state of a VirtualMachinePool. +type VirtualMachinePoolSpec struct { + // Replicas is the desired number of virtual machines in the pool. + // + // The field is written only by its owner — an autoscaler or a human via the + // `scale` subresource, or by the addressed scale-down handler. The controller + // never writes it. Bounds are held by the autoscaler; the hard ceiling is the + // namespace ResourceQuota. + // + // +kubebuilder:validation:Minimum=0 + // +optional + Replicas *int32 `json:"replicas,omitempty"` + + // VirtualMachineTemplate is the template every replica is stamped from. Its + // `spec` is an ordinary VirtualMachineSpec, so a replica is no different from a + // manually created virtual machine. + VirtualMachineTemplate VirtualMachineTemplateSpec `json:"virtualMachineTemplate"` +} + +// VirtualMachineTemplateSpec describes the metadata and spec a pool replica is +// created with. +type VirtualMachineTemplateSpec struct { + // Standard object metadata applied to every replica. Arbitrary user labels and + // annotations are allowed; the controller adds its managed pool labels on top. + // + // +optional + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Spec of the virtual machine that backs each replica. + // + // +optional + Spec VirtualMachineSpec `json:"spec,omitempty"` +} + +// VirtualMachinePoolStatus is the observed state of a VirtualMachinePool. +type VirtualMachinePoolStatus struct { + // ObservedGeneration is the generation of the spec the controller has processed. + // + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // Replicas is the number of existing members, including those in Terminating: + // such a machine still occupies resources, so it is real capacity, not a phantom. + // + // +optional + Replicas int32 `json:"replicas,omitempty"` + + // ReadyReplicas is the number of members ready to serve (Terminating excluded). + // + // +optional + ReadyReplicas int32 `json:"readyReplicas,omitempty"` + + // Selector is the label selector the controller publishes for the `scale` + // subresource; HPA/KEDA read it themselves. + // + // +optional + Selector string `json:"selector,omitempty"` + + // Conditions describe the current state of the pool. + // + // +optional + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` +} diff --git a/api/core/v1alpha2/vmpoolcondition/condition.go b/api/core/v1alpha2/vmpoolcondition/condition.go new file mode 100644 index 0000000000..5f6bb9f9b6 --- /dev/null +++ b/api/core/v1alpha2/vmpoolcondition/condition.go @@ -0,0 +1,72 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmpoolcondition + +// Type is a type of VirtualMachinePool condition. +type Type string + +func (t Type) String() string { + return string(t) +} + +const ( + // TypeAvailable indicates whether the pool has enough ready replicas. + TypeAvailable Type = "Available" + // TypeProgressing indicates that a self-converging rollout is in progress + // (scaling, creation, migration). + TypeProgressing Type = "Progressing" + // TypeSynced indicates whether every live replica is effectively on the + // current virtualMachineTemplate. + TypeSynced Type = "Synced" +) + +// AvailableReason is a reason for the Available condition. +type AvailableReason string + +func (r AvailableReason) String() string { + return string(r) +} + +const ( + ReasonMinimumReplicasAvailable AvailableReason = "MinimumReplicasAvailable" + ReasonMinimumReplicasUnavailable AvailableReason = "MinimumReplicasUnavailable" +) + +// ProgressingReason is a reason for the Progressing condition. +type ProgressingReason string + +func (r ProgressingReason) String() string { + return string(r) +} + +const ( + ReasonPoolStable ProgressingReason = "PoolStable" + ReasonScaling ProgressingReason = "Scaling" +) + +// SyncedReason is a reason for the Synced condition. +type SyncedReason string + +func (r SyncedReason) String() string { + return string(r) +} + +const ( + ReasonPoolSynced SyncedReason = "PoolSynced" + ReasonRolloutInProgress SyncedReason = "RolloutInProgress" + ReasonRestartPendingApproval SyncedReason = "RestartPendingApproval" +) diff --git a/api/core/v1alpha2/zz_generated.deepcopy.go b/api/core/v1alpha2/zz_generated.deepcopy.go index 58d00cb82b..7e7ba946c7 100644 --- a/api/core/v1alpha2/zz_generated.deepcopy.go +++ b/api/core/v1alpha2/zz_generated.deepcopy.go @@ -3069,6 +3069,112 @@ func (in *VirtualMachinePod) DeepCopy() *VirtualMachinePod { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePool) DeepCopyInto(out *VirtualMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePool. +func (in *VirtualMachinePool) DeepCopy() *VirtualMachinePool { + if in == nil { + return nil + } + out := new(VirtualMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePoolList) DeepCopyInto(out *VirtualMachinePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]VirtualMachinePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePoolList. +func (in *VirtualMachinePoolList) DeepCopy() *VirtualMachinePoolList { + if in == nil { + return nil + } + out := new(VirtualMachinePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePoolSpec) DeepCopyInto(out *VirtualMachinePoolSpec) { + *out = *in + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } + in.VirtualMachineTemplate.DeepCopyInto(&out.VirtualMachineTemplate) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePoolSpec. +func (in *VirtualMachinePoolSpec) DeepCopy() *VirtualMachinePoolSpec { + if in == nil { + return nil + } + out := new(VirtualMachinePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePoolStatus) DeepCopyInto(out *VirtualMachinePoolStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePoolStatus. +func (in *VirtualMachinePoolStatus) DeepCopy() *VirtualMachinePoolStatus { + if in == nil { + return nil + } + out := new(VirtualMachinePoolStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualMachineSnapshot) DeepCopyInto(out *VirtualMachineSnapshot) { *out = *in @@ -3472,6 +3578,24 @@ func (in *VirtualMachineStatus) DeepCopy() *VirtualMachineStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachineTemplateSpec) DeepCopyInto(out *VirtualMachineTemplateSpec) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachineTemplateSpec. +func (in *VirtualMachineTemplateSpec) DeepCopy() *VirtualMachineTemplateSpec { + if in == nil { + return nil + } + out := new(VirtualMachineTemplateSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WeightedVirtualMachineAndPodAffinityTerm) DeepCopyInto(out *WeightedVirtualMachineAndPodAffinityTerm) { *out = *in diff --git a/api/scripts/update-codegen.sh b/api/scripts/update-codegen.sh index b07d310fc5..7139c7d674 100755 --- a/api/scripts/update-codegen.sh +++ b/api/scripts/update-codegen.sh @@ -41,7 +41,8 @@ function source::settings { "VirtualImage" "ClusterVirtualImage" "NodeUSBDevice" - "USBDevice") + "USBDevice" + "VirtualMachinePool") # shellcheck source=/dev/null source "${CODEGEN_PKG}/kube_codegen.sh" diff --git a/crds/virtualmachinepools.yaml b/crds/virtualmachinepools.yaml new file mode 100644 index 0000000000..0f72d8c408 --- /dev/null +++ b/crds/virtualmachinepools.yaml @@ -0,0 +1,1460 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + labels: + heritage: deckhouse + module: virtualization + name: virtualmachinepools.virtualization.deckhouse.io +spec: + group: virtualization.deckhouse.io + names: + categories: + - virtualization + kind: VirtualMachinePool + listKind: VirtualMachinePoolList + plural: virtualmachinepools + shortNames: + - vmpool + - vmpools + singular: virtualmachinepool + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Current number of pool members (including Terminating). + jsonPath: .status.replicas + name: Replicas + type: integer + - description: Number of members ready to serve. + jsonPath: .status.readyReplicas + name: Ready + type: integer + - description: Time of resource creation. + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha2 + schema: + openAPIV3Schema: + description: |- + VirtualMachinePool declaratively manages a group of identical virtual machines: + it keeps the requested number of replicas, scales via the standard `scale` + subresource, and reuses "heavy" disks across replica generations. + + The resource is available only in paid editions (EE/SE+) and is gated behind + the `VirtualMachinePool` module feature gate. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VirtualMachinePoolSpec is the desired state of a VirtualMachinePool. + properties: + replicas: + description: |- + Replicas is the desired number of virtual machines in the pool. + + The field is written only by its owner — an autoscaler or a human via the + `scale` subresource, or by the addressed scale-down handler. The controller + never writes it. Bounds are held by the autoscaler; the hard ceiling is the + namespace ResourceQuota. + format: int32 + minimum: 0 + type: integer + virtualMachineTemplate: + description: |- + VirtualMachineTemplate is the template every replica is stamped from. Its + `spec` is an ordinary VirtualMachineSpec, so a replica is no different from a + manually created virtual machine. + properties: + metadata: + description: |- + Standard object metadata applied to every replica. Arbitrary user labels and + annotations are allowed; the controller adds its managed pool labels on top. + type: object + spec: + description: Spec of the virtual machine that backs each replica. + properties: + affinity: + description: |- + VMAffinity [The same](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) as in the pods `spec.affinity` parameter in Kubernetes; + + The affinity setting is completely similar to the above documentation, the only difference is in the names of some parameters. In fact, the following analogs are used: + * podAffinity -> virtualMachineAndPodAffinity + * podAffinityTerm -> virtualMachineAndPodAffinityTerm + properties: + nodeAffinity: + description: + Node affinity is a group of node affinity + scheduling rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: + A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: + A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: + The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: + A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: + The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: + Weight associated with matching + the corresponding nodeSelectorTerm, in the + range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: + Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: + A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: + The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: + A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: + The label key that the + selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + virtualMachineAndPodAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + virtualMachineAndPodAffinityTerm: + description: + Required. A vm affinity term, associated + with the corresponding weight. + properties: + labelSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + mismatchLabelKeys: + items: + type: string + type: array + namespaceSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding vmAndPodAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - virtualMachineAndPodAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + mismatchLabelKeys: + items: + type: string + type: array + namespaceSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + virtualMachineAndPodAntiAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + virtualMachineAndPodAffinityTerm: + description: + Required. A vm affinity term, associated + with the corresponding weight. + properties: + labelSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + mismatchLabelKeys: + items: + type: string + type: array + namespaceSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding vmAndPodAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - virtualMachineAndPodAffinityTerm + - weight + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + mismatchLabelKeys: + items: + type: string + type: array + namespaceSelector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: + matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + type: object + type: object + blockDeviceRefs: + description: |- + List of block devices that can be mounted by disks belonging to the virtual machine. + The order of booting is determined by the order in the list. + items: + properties: + bootOrder: + description: |- + Boot order of the block device. A smaller value means a higher priority. + If the parameter is not set for any device, the boot order follows the device position in the list (starting from 1). + If the parameter is set for at least one device, the boot order is determined by the specified values. + minimum: 1 + type: integer + kind: + description: |- + The BlockDeviceKind is a type of the block device. Options are: + + * `ClusterVirtualImage` — Use `ClusterVirtualImage` as the disk. This type is always mounted in RO mode. If the image is an iso-image, it will be mounted as a CDROM device. + * `VirtualImage` — Use `VirtualImage` as the disk. This type is always mounted in RO mode. If the image is an iso-image, it will be mounted as a CDROM device. + * `VirtualDisk` — Use `VirtualDisk` as the disk. This type is always mounted in RW mode. + enum: + - ClusterVirtualImage + - VirtualImage + - VirtualDisk + type: string + name: + description: The name of attached resource. + type: string + required: + - kind + - name + type: object + maxItems: 16 + minItems: 1 + type: array + bootloader: + default: BIOS + description: |- + The BootloaderType defines bootloader for VM. + * BIOS - use legacy BIOS. + * EFI - use Unified Extensible Firmware (EFI/UEFI). + * EFIWithSecureBoot - use UEFI/EFI with SecureBoot support. + enum: + - BIOS + - EFI + - EFIWithSecureBoot + type: string + cpu: + description: CPUSpec specifies the CPU settings for the VM. + properties: + coreFraction: + description: |- + Guaranteed share of CPU that will be allocated to the VM. Specified as a percentage. + The range of available values is defined in the VirtualMachineClass sizing policy. + If not specified, the default value from the VirtualMachineClass will be used. + pattern: ^(100|[1-9][0-9]?|[1-9])%$ + type: string + cores: + description: + Specifies the number of cores inside the + VM. The value must be greater or equal 1. + format: int32 + minimum: 1 + type: integer + required: + - cores + type: object + disruptions: + default: + restartApprovalMode: Manual + description: |- + Disruptions describes the policy for applying changes that require rebooting the VM + Changes to some VM configuration settings require a reboot of the VM to apply them. This policy allows you to specify the behavior of how the VM will respond to such changes. + properties: + restartApprovalMode: + description: + "RestartApprovalMode defines a restart approving + mode: Manual or Automatic." + enum: + - Manual + - Automatic + type: string + type: object + enableParavirtualization: + default: true + description: |- + Use the `virtio` bus to connect virtual devices of the VM. Set false to disable `virtio` for this VM. + Note: To use paravirtualization mode, some operating systems require the appropriate drivers to be installed. + type: boolean + liveMigrationPolicy: + description: Live migration policy type. + enum: + - Manual + - Never + - AlwaysSafe + - PreferSafe + - AlwaysForced + - PreferForced + type: string + memory: + description: + MemorySpec specifies the memory settings for + the VM. + properties: + size: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - size + type: object + networks: + items: + properties: + id: + type: integer + name: + type: string + type: + type: string + virtualMachineMACAddressName: + type: string + required: + - type + type: object + type: array + nodeSelector: + additionalProperties: + type: string + description: |- + NodeSelector must match a node's labels for the VM to be scheduled on that node. + [The same](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes//) as in the pods `spec.nodeSelector` parameter in Kubernetes. + type: object + osType: + default: Generic + description: |- + The OsType parameter allows you to select the type of used OS, for which a VM with an optimal set of required virtual devices and parameters will be created. + + * Windows - for Microsoft Windows family operating systems. + * Generic - for other types of OS. + enum: + - Windows + - Generic + type: string + priorityClassName: + description: + PriorityClassName [The same](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/) as + in the pods `spec.priorityClassName` parameter in Kubernetes. + type: string + provisioning: + description: + Provisioning is a block allows you to configure + the provisioning script for the VM. + properties: + sysprepRef: + description: |- + SysprepRef is reference to an existing Windows sysprep automation. + Resource structure for the SysprepRef type: + * `.data.autounattend.xml`. + * `.data.unattend.xml`. + properties: + kind: + default: Secret + description: |- + The kind of existing Windows sysprep automation resource. + The following options are supported: + - Secret + enum: + - Secret + type: string + name: + type: string + required: + - name + type: object + type: + description: |- + ProvisioningType parameter defines the type of provisioning script: + + Parameters supported for using the provisioning script: + * UserData - use the cloud-init in the .spec.provisioning.UserData section. + * UserDataRef - use a cloud-init script that resides in a different resource. + * SysprepRef - Use a Windows Automation script that resides in a different resource. + More information: https://cloudinit.readthedocs.io/en/latest/reference/examples.html + type: string + userData: + description: Inline cloud-init userdata script. + type: string + userDataRef: + description: |- + UserDataRef is reference to an existing resource with a cloud-init script. + Resource structure for userDataRef type: + * `.data.userData`. + properties: + kind: + default: Secret + description: |- + The kind of existing cloud-init automation resource. + The following options are supported: + - Secret + enum: + - Secret + type: string + name: + type: string + required: + - name + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: UserData cannot have userDataRef or sysprepRef. + rule: + "self.type == 'UserData' ? has(self.userData) && + !has(self.userDataRef) && !has(self.sysprepRef) : true" + - message: UserDataRef cannot have userData or sysprepRef. + rule: + "self.type == 'UserDataRef' ? has(self.userDataRef) + && !has(self.userData) && !has(self.sysprepRef) : true" + - message: SysprepRef cannot have userData or userDataRef. + rule: + "self.type == 'SysprepRef' ? has(self.sysprepRef) + && !has(self.userData) && !has(self.userDataRef) : true" + runPolicy: + default: AlwaysOnUnlessStoppedManually + description: |- + RunPolicy parameter defines the VM startup policy + * `AlwaysOn` - after creation the VM is always in a running state, even in case of its shutdown by OS means. + * `AlwaysOff` - after creation the VM is always in the off state. + * `Manual` - after creation the VM is switched off, the VM state (switching on/off) is controlled via sub-resources or OS means. + * `AlwaysOnUnlessStoppedManually` - after creation the VM is always in a running state. The VM can be shutdown by means of the OS or use the d8 utility: `d8 v stop `. + enum: + - AlwaysOn + - AlwaysOff + - Manual + - AlwaysOnUnlessStoppedManually + type: string + terminationGracePeriodSeconds: + default: 60 + description: + Grace period observed after signalling a VM to + stop after which the VM is force terminated. + format: int64 + type: integer + tolerations: + description: |- + Tolerations define rules to tolerate node taints. + The same](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) as in the pods `spec.tolerations` parameter in Kubernetes. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + topologySpreadConstraints: + items: + description: + TopologySpreadConstraint specifies how to spread + matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: + matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: + key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + If this value is nil, the behavior is equivalent to the Honor policy. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + If this value is nil, the behavior is equivalent to the Ignore policy. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + usbDevices: + description: |- + List of USB devices to attach to the virtual machine. + Devices are referenced by name of USBDevice resource in the same namespace. + items: + description: + USBDeviceSpecRef references a USB device by + name. + properties: + name: + description: + The name of USBDevice resource in the same + namespace. + type: string + required: + - name + type: object + maxItems: 8 + type: array + virtualMachineClassName: + description: + Name of the `VirtualMachineClass` resource describing + the requirements for a virtual CPU, memory and the resource + allocation policy and node placement policies for virtual + machines. + type: string + virtualMachineIPAddressName: + description: |- + Name for the associated `virtualMachineIPAddress` resource. + Specified when it is necessary to use a previously created IP address of the VM. + If not explicitly specified, by default a `virtualMachineIPAddress` resource is created for the VM with a name similar to the VM resource (`.metadata.name`). + type: string + required: + - blockDeviceRefs + - cpu + - liveMigrationPolicy + - memory + - virtualMachineClassName + type: object + type: object + required: + - virtualMachineTemplate + type: object + status: + description: VirtualMachinePoolStatus is the observed state of a VirtualMachinePool. + properties: + conditions: + description: Conditions describe the current state of the pool. + items: + description: + Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + observedGeneration: + description: + ObservedGeneration is the generation of the spec the + controller has processed. + format: int64 + type: integer + readyReplicas: + description: + ReadyReplicas is the number of members ready to serve + (Terminating excluded). + format: int32 + type: integer + replicas: + description: |- + Replicas is the number of existing members, including those in Terminating: + such a machine still occupies resources, so it is real capacity, not a phantom. + format: int32 + type: integer + selector: + description: |- + Selector is the label selector the controller publishes for the `scale` + subresource; HPA/KEDA read it themselves. + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + scale: + labelSelectorPath: .status.selector + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/images/virtualization-artifact/pkg/featuregates/featuregate.go b/images/virtualization-artifact/pkg/featuregates/featuregate.go index fc8e081bc7..5504b11958 100644 --- a/images/virtualization-artifact/pkg/featuregates/featuregate.go +++ b/images/virtualization-artifact/pkg/featuregates/featuregate.go @@ -33,6 +33,7 @@ const ( HotplugCPUWithLiveMigration featuregate.Feature = "HotplugCPUWithLiveMigration" HotplugMemoryWithLiveMigration featuregate.Feature = "HotplugMemoryWithLiveMigration" HotplugCPUAndMemoryWithInPlaceResize featuregate.Feature = "HotplugCPUAndMemoryWithInPlaceResize" + VirtualMachinePool featuregate.Feature = "VirtualMachinePool" ) var featureSpecs = map[featuregate.Feature]featuregate.FeatureSpec{ @@ -75,6 +76,11 @@ var featureSpecs = map[featuregate.Feature]featuregate.FeatureSpec{ LockToDefault: version.GetEdition() == version.EditionCE, PreRelease: featuregate.Alpha, }, + VirtualMachinePool: { + Default: false, + LockToDefault: version.GetEdition() == version.EditionCE, + PreRelease: featuregate.Alpha, + }, } var ( diff --git a/openapi/config-values.yaml b/openapi/config-values.yaml index 2b2aa2b814..1bad3bcf25 100644 --- a/openapi/config-values.yaml +++ b/openapi/config-values.yaml @@ -226,9 +226,11 @@ properties: - `HotplugCPUWithLiveMigration` — enable live changing of cpu cores number via LiveMigration. (Not available in CE); - `HotplugMemoryWithLiveMigration` — enable live changing of memory size via LiveMigration. (Not available in CE); - `HotplugCPUAndMemoryWithInPlaceResize` - enable live changing of cpu cores number or memory size via InPlaceResize. (Not available in CE); + - `VirtualMachinePool` — enable the VirtualMachinePool resource for group management of virtual machines. (Not available in CE); items: type: string enum: - "HotplugCPUWithLiveMigration" - "HotplugMemoryWithLiveMigration" - "HotplugCPUAndMemoryWithInPlaceResize" + - "VirtualMachinePool" diff --git a/openapi/doc-ru-config-values.yaml b/openapi/doc-ru-config-values.yaml index 2e29677fb7..846db8a4f3 100644 --- a/openapi/doc-ru-config-values.yaml +++ b/openapi/doc-ru-config-values.yaml @@ -156,5 +156,6 @@ properties: - `HotplugCPUWithLiveMigration` — включить изменение количества ядер процессора без перезагрузки через живую миграцию. (Не доступно в CE); - `HotplugMemoryWithLiveMigration` — включить изменение размера памяти без перезагрузки через живую миграцию. (Не доступно в CE); - `HotplugCPUAndMemoryWithInPlaceResize` - включить изменение количества ядер процессора или размера памяти без перезагрузки через InPlaceResize (Не доступно в CE) + - `VirtualMachinePool` — включить ресурс VirtualMachinePool для группового управления виртуальными машинами. (Не доступно в CE) items: type: string From 66c361b29f9bcbdcbe0e90fa2c0f5ecddf63e2a5 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 11:14:27 +0300 Subject: [PATCH 02/46] feat(vmpool): scaffold enterprise-only pool controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the VirtualMachinePool controller skeleton behind the EE build tag (//go:build EE) and the VirtualMachinePool feature gate: handler-chain reconciler with an empty chain and a primary watch on the resource. It is wired into the controller manager through build-tagged enterprise shims (setup_enterprise_{ee,ce}.go); the CE build compiles a no-op. No reconcile behaviour yet — replica maintenance, template propagation and reusable disks land in the follow-up slices. Signed-off-by: Pavel Tishkov --- .../cmd/virtualization-controller/main.go | 6 ++ .../setup_enterprise_ce.go | 38 ++++++++ .../setup_enterprise_ee.go | 32 +++++++ .../vmpool/internal/watcher/vmpool.go | 39 ++++++++ .../controller/vmpool/vmpool_controller.go | 67 ++++++++++++++ .../controller/vmpool/vmpool_reconciler.go | 89 +++++++++++++++++++ 6 files changed, 271 insertions(+) create mode 100644 images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ce.go create mode 100644 images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vmpool.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go diff --git a/images/virtualization-artifact/cmd/virtualization-controller/main.go b/images/virtualization-artifact/cmd/virtualization-controller/main.go index 1a6e0f1263..6c2f78a92d 100644 --- a/images/virtualization-artifact/cmd/virtualization-controller/main.go +++ b/images/virtualization-artifact/cmd/virtualization-controller/main.go @@ -487,6 +487,12 @@ func main() { os.Exit(1) } + // Enterprise-only controllers (compiled in EE builds only, see setup_enterprise_{ee,ce}.go). + if err = setupEnterpriseControllers(ctx, mgr, logLevel, logOutput, logDebugVerbosity, logDebugControllerList); err != nil { + log.Error(err.Error()) + os.Exit(1) + } + vmmacLogger := logger.NewControllerLogger(vmmac.ControllerName, logLevel, logOutput, logDebugVerbosity, logDebugControllerList) if _, err = vmmac.NewController(ctx, mgr, vmmacLogger, clusterUUID, virtClient); err != nil { log.Error(err.Error()) diff --git a/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ce.go b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ce.go new file mode 100644 index 0000000000..5e4391e617 --- /dev/null +++ b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ce.go @@ -0,0 +1,38 @@ +//go:build !EE +// +build !EE + +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// setupEnterpriseControllers is a no-op in CE builds: enterprise-only +// controllers (e.g. VirtualMachinePool) are not compiled here. +func setupEnterpriseControllers( + _ context.Context, + _ manager.Manager, + _, _ string, + _ int, + _ []string, +) error { + return nil +} diff --git a/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go new file mode 100644 index 0000000000..2e757a6add --- /dev/null +++ b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go @@ -0,0 +1,32 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package main + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool" + "github.com/deckhouse/virtualization-controller/pkg/logger" +) + +// setupEnterpriseControllers wires controllers that ship only in paid editions +// (EE/SE+). It is compiled into EE builds; the CE build uses the no-op stub in +// setup_enterprise_ce.go. +func setupEnterpriseControllers( + ctx context.Context, + mgr manager.Manager, + logLevel, logOutput string, + logDebugVerbosity int, + logDebugControllerList []string, +) error { + vmpoolLogger := logger.NewControllerLogger(vmpool.ControllerName, logLevel, logOutput, logDebugVerbosity, logDebugControllerList) + return vmpool.SetupController(ctx, mgr, vmpoolLogger) +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vmpool.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vmpool.go new file mode 100644 index 0000000000..a00754a487 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vmpool.go @@ -0,0 +1,39 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package watcher + +import ( + "fmt" + + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +type VirtualMachinePoolWatcher struct{} + +func NewVirtualMachinePoolWatcher() *VirtualMachinePoolWatcher { + return &VirtualMachinePoolWatcher{} +} + +func (w *VirtualMachinePoolWatcher) Watch(mgr manager.Manager, ctr controller.Controller) error { + if err := ctr.Watch( + source.Kind( + mgr.GetCache(), + &v1alpha2.VirtualMachinePool{}, + &handler.TypedEnqueueRequestForObject[*v1alpha2.VirtualMachinePool]{}, + ), + ); err != nil { + return fmt.Errorf("error setting watch on VirtualMachinePool: %w", err) + } + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go new file mode 100644 index 0000000000..ff8bbe642c --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go @@ -0,0 +1,67 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package vmpool + +import ( + "context" + "time" + + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/deckhouse/virtualization-controller/pkg/featuregates" + "github.com/deckhouse/virtualization-controller/pkg/logger" +) + +const ( + ControllerName = "vmpool-controller" +) + +// SetupController wires the VirtualMachinePool controller into the manager. +// +// The resource is gated behind the VirtualMachinePool feature gate: while the +// gate is off the controller is not set up at all (the CRD is still installed, +// so objects can be created — they simply are not reconciled). See ADR +// "VirtualMachinePool", section "Feature gate". +func SetupController( + ctx context.Context, + mgr manager.Manager, + log *log.Logger, +) error { + if !featuregates.Default().Enabled(featuregates.VirtualMachinePool) { + return nil + } + + client := mgr.GetClient() + + // Handlers are added by the follow-up slices (replica maintenance, template + // propagation, reuse disks). The scaffold wires an empty chain. + handlers := []Handler{} + r := NewReconciler(client, handlers) + + c, err := controller.New(ControllerName, mgr, controller.Options{ + Reconciler: r, + RecoverPanic: ptr.To(true), + LogConstructor: logger.NewConstructor(log), + CacheSyncTimeout: 10 * time.Minute, + UsePriorityQueue: ptr.To(true), + }) + if err != nil { + return err + } + + if err = r.SetupController(ctx, mgr, c); err != nil { + return err + } + + log.Info("Initialized VirtualMachinePool controller") + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go new file mode 100644 index 0000000000..b9929d5397 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go @@ -0,0 +1,89 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package vmpool + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/watcher" + "github.com/deckhouse/virtualization-controller/pkg/logger" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +type Handler interface { + Handle(ctx context.Context, pool *v1alpha2.VirtualMachinePool) (reconcile.Result, error) + Name() string +} + +type Watcher interface { + Watch(mgr manager.Manager, ctr controller.Controller) error +} + +func NewReconciler(client client.Client, handlers []Handler) *Reconciler { + return &Reconciler{ + client: client, + handlers: handlers, + } +} + +type Reconciler struct { + client client.Client + handlers []Handler +} + +func (r *Reconciler) SetupController(_ context.Context, mgr manager.Manager, ctr controller.Controller) error { + for _, w := range []Watcher{ + watcher.NewVirtualMachinePoolWatcher(), + } { + if err := w.Watch(mgr, ctr); err != nil { + return err + } + } + return nil +} + +func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + log := logger.FromContext(ctx) + + pool := reconciler.NewResource(req.NamespacedName, r.client, r.factory, r.statusGetter) + + err := pool.Fetch(ctx) + if err != nil { + return reconcile.Result{}, err + } + + if pool.IsEmpty() { + log.Info("Reconcile observe an absent VirtualMachinePool: it may be deleted") + return reconcile.Result{}, nil + } + + rec := reconciler.NewBaseReconciler[Handler](r.handlers) + rec.SetHandlerExecutor(func(ctx context.Context, h Handler) (reconcile.Result, error) { + return h.Handle(ctx, pool.Current()) + }) + rec.SetResourceUpdater(func(ctx context.Context) error { + return pool.Update(ctx) + }) + + return rec.Reconcile(ctx) +} + +func (r *Reconciler) factory() *v1alpha2.VirtualMachinePool { + return &v1alpha2.VirtualMachinePool{} +} + +func (r *Reconciler) statusGetter(obj *v1alpha2.VirtualMachinePool) v1alpha2.VirtualMachinePoolStatus { + return obj.Status +} From 8c71361a585b5be288657e8f05c9abaa366e0e5e Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 11:39:04 +0300 Subject: [PATCH 03/46] chore(ci): run virtualization-controller unit tests with the EE build tag EE is the default shipped edition (werf.inc.yaml builds with -tags $MODULE_EDITION, default EE), but the unit-test task ran ginkgo without a build tag, so //go:build EE code was never exercised by the unit suite. Run ginkgo with --tags EE so enterprise code and its tests are covered. Signed-off-by: Pavel Tishkov --- images/virtualization-artifact/Taskfile.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/images/virtualization-artifact/Taskfile.yaml b/images/virtualization-artifact/Taskfile.yaml index fb254e3ddd..3a8edfcaad 100644 --- a/images/virtualization-artifact/Taskfile.yaml +++ b/images/virtualization-artifact/Taskfile.yaml @@ -52,7 +52,9 @@ tasks: desc: "Run go unit tests" cmds: - | - go tool ginkgo -v -r pkg/ + # Build with the EE tag: EE is the default shipped edition (see werf.inc.yaml, + # MODULE_EDITION defaults to EE), so unit tests must cover //go:build EE code too. + go tool ginkgo -v --tags EE -r pkg/ lint: desc: "Run linters locally" From a23c08c920698b27852094562dd0ee2a80c582eb Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 11:39:04 +0300 Subject: [PATCH 04/46] feat(vmpool): add expectations tracker for cache-lag-safe scaling Add an in-memory, thread-safe expectations tracker (EE) modelled on the Kubernetes ReplicaSet UIDTrackingControllerExpectations: creations are counted, deletions tracked by UID, with a TTL safety valve. The pool reconciler will use it to avoid double-creating anonymous replicas while the informer cache lags behind a Create/Delete. Covered by unit tests (race-clean). Signed-off-by: Pavel Tishkov --- .../internal/expectations/expectations.go | 164 +++++++++++++++++ .../expectations/expectations_suite_test.go | 21 +++ .../expectations/expectations_test.go | 166 ++++++++++++++++++ 3 files changed, 351 insertions(+) create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_suite_test.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations.go new file mode 100644 index 0000000000..766e922130 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations.go @@ -0,0 +1,164 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +// Package expectations provides an in-memory, thread-safe tracker of pending +// child-object creations and deletions for the pool controller, modelled on the +// battle-tested Kubernetes ReplicaSet UIDTrackingControllerExpectations. +// +// A controller that creates anonymous children (via GenerateName) cannot rely +// on its informer cache being up to date within a single reconcile: right after +// Create/Delete the cache still shows the old set, so the next reconcile would +// recompute the same diff and act again, overshooting. Expectations close that +// gap: after acting, the controller records how many creations/deletions it +// expects to observe; it does not act again for the same key until those +// expectations are Satisfied — either observed through the informer, or expired +// by TTL as a safety valve against a lost watch event. +// +// Creations are tracked as a counter because the child UID is unknown until the +// API server assigns it. Deletions are tracked by UID so a duplicate delete +// event (or a delete of an object we did not expect) cannot wrongly satisfy an +// expectation. +package expectations + +import ( + "sync" + "time" + + "k8s.io/apimachinery/pkg/types" +) + +// DefaultTTL is how long an unmet expectation is honoured before it is treated +// as satisfied. It mirrors the Kubernetes ExpectationsTimeout: long enough to +// ride out normal informer lag, short enough that a lost watch event cannot +// wedge the controller forever. +const DefaultTTL = 5 * time.Minute + +// Expectations tracks, per controller key, the number of child creations and +// the set of child deletions the controller is still waiting to observe. +// +// All methods are safe for concurrent use. +type Expectations struct { + mu sync.Mutex + items map[string]*item + ttl time.Duration + // now is injectable so tests can control TTL expiry deterministically. + now func() time.Time +} + +type item struct { + creations int + deletions map[types.UID]struct{} + timestamp time.Time +} + +// New returns an Expectations tracker with the default TTL. +func New() *Expectations { + return NewWithTTL(DefaultTTL) +} + +// NewWithTTL returns an Expectations tracker with a custom TTL. +func NewWithTTL(ttl time.Duration) *Expectations { + return &Expectations{ + items: make(map[string]*item), + ttl: ttl, + now: time.Now, + } +} + +// getOrCreate must be called with the mutex held. +func (e *Expectations) getOrCreate(key string) *item { + it, ok := e.items[key] + if !ok { + it = &item{deletions: make(map[types.UID]struct{})} + e.items[key] = it + } + return it +} + +// ExpectCreations records that the controller has just created (or is about to +// create) n children for key and expects to observe n creation events. It +// resets the expectation's timestamp. +func (e *Expectations) ExpectCreations(key string, n int) { + if n <= 0 { + return + } + e.mu.Lock() + defer e.mu.Unlock() + it := e.getOrCreate(key) + it.creations += n + it.timestamp = e.now() +} + +// ExpectDeletions records that the controller has just deleted the children +// with the given UIDs for key and expects to observe their deletion events. It +// resets the expectation's timestamp. +func (e *Expectations) ExpectDeletions(key string, uids []types.UID) { + if len(uids) == 0 { + return + } + e.mu.Lock() + defer e.mu.Unlock() + it := e.getOrCreate(key) + for _, uid := range uids { + it.deletions[uid] = struct{}{} + } + it.timestamp = e.now() +} + +// CreationObserved records that one expected creation for key has been observed +// through the informer. Surplus observations (more than expected) are ignored, +// keeping the counter from going negative. +func (e *Expectations) CreationObserved(key string) { + e.mu.Lock() + defer e.mu.Unlock() + it, ok := e.items[key] + if !ok { + return + } + if it.creations > 0 { + it.creations-- + } +} + +// DeletionObserved records that the child with the given UID has been observed +// deleted through the informer. Only UIDs the controller expected are cleared, +// so duplicate or unrelated delete events do not satisfy an expectation. +func (e *Expectations) DeletionObserved(key string, uid types.UID) { + e.mu.Lock() + defer e.mu.Unlock() + it, ok := e.items[key] + if !ok { + return + } + delete(it.deletions, uid) +} + +// Satisfied reports whether the controller may act on key again. It is true +// when there is no tracked expectation, when all expected creations and +// deletions have been observed, or when the expectation has outlived the TTL +// (the safety valve against a lost watch event). +func (e *Expectations) Satisfied(key string) bool { + e.mu.Lock() + defer e.mu.Unlock() + it, ok := e.items[key] + if !ok { + return true + } + if it.creations <= 0 && len(it.deletions) == 0 { + return true + } + return e.now().Sub(it.timestamp) >= e.ttl +} + +// Forget drops all expectations for key. Call it when the controlled object is +// deleted so its entry does not leak. +func (e *Expectations) Forget(key string) { + e.mu.Lock() + defer e.mu.Unlock() + delete(e.items, key) +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_suite_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_suite_test.go new file mode 100644 index 0000000000..3294bbf43b --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_suite_test.go @@ -0,0 +1,21 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package expectations + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestExpectations(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "VirtualMachinePool Expectations Suite") +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go new file mode 100644 index 0000000000..9dc9660b65 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go @@ -0,0 +1,166 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package expectations + +import ( + "sync" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/types" +) + +const key = "ci/web" + +var _ = Describe("Expectations", func() { + Context("an unknown key", func() { + It("is satisfied (nothing expected yet)", func() { + e := New() + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("creations", func() { + It("is unsatisfied until every expected creation is observed", func() { + e := New() + e.ExpectCreations(key, 2) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.CreationObserved(key) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.CreationObserved(key) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + + It("does not bank surplus observations below zero", func() { + e := New() + e.ExpectCreations(key, 1) + // Observe more than expected — the extra observations must be ignored. + e.CreationObserved(key) + e.CreationObserved(key) + e.CreationObserved(key) + Expect(e.Satisfied(key)).To(BeTrue()) + + // A fresh expectation must not be pre-satisfied by earlier surplus. + e.ExpectCreations(key, 1) + Expect(e.Satisfied(key)).To(BeFalse()) + }) + + It("ignores non-positive counts", func() { + e := New() + e.ExpectCreations(key, 0) + e.ExpectCreations(key, -3) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("deletions", func() { + uidA := types.UID("a") + uidB := types.UID("b") + + It("is unsatisfied until every expected UID is observed deleted", func() { + e := New() + e.ExpectDeletions(key, []types.UID{uidA, uidB}) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.DeletionObserved(key, uidA) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.DeletionObserved(key, uidB) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + + It("is not fooled by duplicate or unrelated deletion events", func() { + e := New() + e.ExpectDeletions(key, []types.UID{uidA}) + + // An unrelated UID must not satisfy the expectation. + e.DeletionObserved(key, types.UID("unrelated")) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.DeletionObserved(key, uidA) + Expect(e.Satisfied(key)).To(BeTrue()) + + // A duplicate delete event must not underflow anything. + e.DeletionObserved(key, uidA) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("creations and deletions together", func() { + It("requires both to be cleared", func() { + e := New() + e.ExpectCreations(key, 1) + e.ExpectDeletions(key, []types.UID{"x"}) + + e.CreationObserved(key) + Expect(e.Satisfied(key)).To(BeFalse()) // deletion still pending + + e.DeletionObserved(key, "x") + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("TTL safety valve", func() { + It("becomes satisfied once the expectation outlives the TTL", func() { + e := NewWithTTL(time.Minute) + now := time.Unix(1_700_000_000, 0) + e.now = func() time.Time { return now } + + e.ExpectCreations(key, 1) + Expect(e.Satisfied(key)).To(BeFalse()) + + // Just under the TTL — still honoured. + now = now.Add(59 * time.Second) + Expect(e.Satisfied(key)).To(BeFalse()) + + // Past the TTL — treated as satisfied even without observation. + now = now.Add(2 * time.Second) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("Forget", func() { + It("drops the tracked expectation", func() { + e := New() + e.ExpectCreations(key, 3) + Expect(e.Satisfied(key)).To(BeFalse()) + + e.Forget(key) + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) + + Context("concurrent access", func() { + It("is race-free under parallel expect/observe", func() { + e := New() + const workers = 16 + const perWorker = 200 + + var wg sync.WaitGroup + for w := 0; w < workers; w++ { + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + for i := 0; i < perWorker; i++ { + e.ExpectCreations(key, 1) + e.CreationObserved(key) + } + }() + } + wg.Wait() + + // Every creation was observed, so the tracker must settle satisfied. + Expect(e.Satisfied(key)).To(BeTrue()) + }) + }) +}) From 51fcb3ba89921b6507cb045072f40d5f661ab7d8 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 11:55:36 +0300 Subject: [PATCH 05/46] feat(vmpool): reconcile replica count with cache-lag-safe create/delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the pool's core reconcile: list members by the managed pool-uid label + controllerRef, create missing replicas from the template (managed labels + controller ownerReference, GenerateName naming) and remove surplus ones, then publish status (replicas, readyReplicas, selector, Available/Progressing conditions). Every create/delete is guarded by the expectations tracker, and a member VirtualMachine watcher re-enqueues the owning pool and records observed creations/deletions — so a lagging informer cache cannot double-create anonymous replicas. Terminating members count toward a scale-down (invariant 2), so a replica already leaving is not over-replaced. Covered by unit tests (fake client, race-clean). The controller stays behind //go:build EE and the feature gate. Signed-off-by: Pavel Tishkov --- .../internal/handler/handler_suite_test.go | 21 ++ .../vmpool/internal/handler/sync.go | 249 ++++++++++++++++++ .../vmpool/internal/handler/sync_test.go | 206 +++++++++++++++ .../vmpool/internal/poollabels/poollabels.go | 55 ++++ .../controller/vmpool/internal/watcher/vm.go | 96 +++++++ .../controller/vmpool/vmpool_controller.go | 15 +- .../controller/vmpool/vmpool_reconciler.go | 8 +- 7 files changed, 644 insertions(+), 6 deletions(-) create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/handler/handler_suite_test.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vm.go diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/handler_suite_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/handler_suite_test.go new file mode 100644 index 0000000000..6cd3380145 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/handler_suite_test.go @@ -0,0 +1,21 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package handler + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestHandler(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "VirtualMachinePool Handlers Suite") +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go new file mode 100644 index 0000000000..f2c36d0f78 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go @@ -0,0 +1,249 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package handler + +import ( + "context" + "errors" + "fmt" + "sort" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/expectations" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/poollabels" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmpoolcondition" +) + +const syncHandlerName = "sync" + +// expectationsRecheck is how soon reconcile retries while it waits for pending +// creations/deletions to settle in the informer cache. It is a safety net: the +// member watcher normally re-enqueues the pool as soon as the events arrive. +const expectationsRecheck = 15 * time.Second + +// SyncHandler keeps the number of pool members equal to spec.replicas: it +// creates missing replicas from the template and removes surplus ones, guarding +// every action with expectations so a lagging cache cannot cause double-acting. +type SyncHandler struct { + client client.Client + exp *expectations.Expectations +} + +func NewSyncHandler(c client.Client, exp *expectations.Expectations) *SyncHandler { + return &SyncHandler{client: c, exp: exp} +} + +func (h *SyncHandler) Name() string { return syncHandlerName } + +func (h *SyncHandler) Handle(ctx context.Context, pool *v1alpha2.VirtualMachinePool) (reconcile.Result, error) { + key := types.NamespacedName{Namespace: pool.GetNamespace(), Name: pool.GetName()}.String() + + // The pool is going away — its members are garbage-collected via ownerRef. + // Drop the expectations entry so it does not leak. + if pool.GetDeletionTimestamp() != nil { + h.exp.Forget(key) + return reconcile.Result{}, nil + } + + members, err := h.listMembers(ctx, pool) + if err != nil { + return reconcile.Result{}, fmt.Errorf("list pool members: %w", err) + } + + // Status always reflects the observed set, whether or not we act this pass. + defer h.updateStatus(pool, members) + + // Do not create/delete again until previous actions are observed (or expire): + // this is what stops a lagging cache from over-creating anonymous replicas. + if !h.exp.Satisfied(key) { + return reconcile.Result{RequeueAfter: expectationsRecheck}, nil + } + + desired := int(ptr.Deref(pool.Spec.Replicas, 0)) + // live counts every member, including Terminating and Stopped: a Terminating + // replica still holds capacity, and counting it prevents a premature + // replacement (invariant 2). + live := len(members) + + switch { + case live < desired: + return reconcile.Result{}, h.scaleUp(ctx, pool, key, desired-live) + case live > desired: + return reconcile.Result{}, h.scaleDown(ctx, pool, key, members, live-desired) + default: + return reconcile.Result{}, nil + } +} + +func (h *SyncHandler) listMembers(ctx context.Context, pool *v1alpha2.VirtualMachinePool) ([]v1alpha2.VirtualMachine, error) { + var list v1alpha2.VirtualMachineList + if err := h.client.List(ctx, &list, client.InNamespace(pool.GetNamespace()), poollabels.MemberSelector(pool)); err != nil { + return nil, err + } + // Keep only VMs actually controlled by this pool. The pool-uid label already + // scopes the list, but the controllerRef check is the authoritative guard. + members := make([]v1alpha2.VirtualMachine, 0, len(list.Items)) + for i := range list.Items { + if ref := metav1.GetControllerOf(&list.Items[i]); ref != nil && ref.UID == pool.GetUID() { + members = append(members, list.Items[i]) + } + } + return members, nil +} + +func (h *SyncHandler) scaleUp(ctx context.Context, pool *v1alpha2.VirtualMachinePool, key string, n int) error { + // Record the expectation before creating so a create event cannot be observed + // before we start waiting for it. + h.exp.ExpectCreations(key, n) + var errs error + for i := 0; i < n; i++ { + if err := h.client.Create(ctx, h.newMember(pool)); err != nil { + // This creation will never be observed — stop waiting for it. + h.exp.CreationObserved(key) + errs = errors.Join(errs, fmt.Errorf("create replica: %w", err)) + } + } + return errs +} + +func (h *SyncHandler) scaleDown(ctx context.Context, pool *v1alpha2.VirtualMachinePool, key string, members []v1alpha2.VirtualMachine, surplus int) error { + // Terminating members already count toward the reduction (invariant 2), so + // subtract them and only remove additional healthy replicas for the remainder. + terminating := 0 + candidates := make([]v1alpha2.VirtualMachine, 0, len(members)) + for i := range members { + if members[i].GetDeletionTimestamp() != nil { + terminating++ + continue + } + candidates = append(candidates, members[i]) + } + + toDelete := surplus - terminating + if toDelete <= 0 { + return nil + } + + victims := pickVictims(candidates, toDelete) + uids := make([]types.UID, 0, len(victims)) + for i := range victims { + uids = append(uids, victims[i].GetUID()) + } + h.exp.ExpectDeletions(key, uids) + + var errs error + for i := range victims { + if err := h.client.Delete(ctx, &victims[i]); err != nil { + // Already gone or failed — stop waiting for that deletion event. + h.exp.DeletionObserved(key, victims[i].GetUID()) + if !apierrors.IsNotFound(err) { + errs = errors.Join(errs, fmt.Errorf("delete replica %s: %w", victims[i].GetName(), err)) + } + } + } + return errs +} + +// pickVictims chooses which replicas to remove during anonymous scale-down. The +// first version deletes the youngest first (least accumulated state); the +// configurable scaleDownPolicy is introduced in a later slice. +func pickVictims(candidates []v1alpha2.VirtualMachine, n int) []v1alpha2.VirtualMachine { + sort.SliceStable(candidates, func(i, j int) bool { + return candidates[i].GetCreationTimestamp().After(candidates[j].GetCreationTimestamp().Time) + }) + if n > len(candidates) { + n = len(candidates) + } + return candidates[:n] +} + +func (h *SyncHandler) newMember(pool *v1alpha2.VirtualMachinePool) *v1alpha2.VirtualMachine { + tmpl := pool.Spec.VirtualMachineTemplate + + labels := make(map[string]string, len(tmpl.Labels)+2) + for k, v := range tmpl.Labels { + labels[k] = v + } + for k, v := range poollabels.Member(pool) { + labels[k] = v + } + + var annotations map[string]string + if len(tmpl.Annotations) > 0 { + annotations = make(map[string]string, len(tmpl.Annotations)) + for k, v := range tmpl.Annotations { + annotations[k] = v + } + } + + return &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: pool.GetName() + "-", + Namespace: pool.GetNamespace(), + Labels: labels, + Annotations: annotations, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(pool, v1alpha2.VirtualMachinePoolGVK)}, + }, + Spec: *tmpl.Spec.DeepCopy(), + } +} + +func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members []v1alpha2.VirtualMachine) { + ready := 0 + for i := range members { + if members[i].GetDeletionTimestamp() == nil && members[i].Status.Phase == v1alpha2.MachineRunning { + ready++ + } + } + desired := int(ptr.Deref(pool.Spec.Replicas, 0)) + + pool.Status.ObservedGeneration = pool.GetGeneration() + pool.Status.Replicas = int32(len(members)) + pool.Status.ReadyReplicas = int32(ready) + pool.Status.Selector = poollabels.StatusSelector(pool) + + availableStatus := metav1.ConditionFalse + availableReason := vmpoolcondition.ReasonMinimumReplicasUnavailable + if ready >= desired { + availableStatus = metav1.ConditionTrue + availableReason = vmpoolcondition.ReasonMinimumReplicasAvailable + } + meta.SetStatusCondition(&pool.Status.Conditions, metav1.Condition{ + Type: vmpoolcondition.TypeAvailable.String(), + Status: availableStatus, + Reason: availableReason.String(), + ObservedGeneration: pool.GetGeneration(), + Message: fmt.Sprintf("VirtualMachinePool has %d of %d ready replicas.", ready, desired), + }) + + progressingStatus := metav1.ConditionFalse + progressingReason := vmpoolcondition.ReasonPoolStable + progressingMessage := "No scaling or creation in progress." + if len(members) != desired { + progressingStatus = metav1.ConditionTrue + progressingReason = vmpoolcondition.ReasonScaling + progressingMessage = fmt.Sprintf("Scaling VirtualMachinePool from %d to %d replicas.", len(members), desired) + } + meta.SetStatusCondition(&pool.Status.Conditions, metav1.Condition{ + Type: vmpoolcondition.TypeProgressing.String(), + Status: progressingStatus, + Reason: progressingReason.String(), + ObservedGeneration: pool.GetGeneration(), + Message: progressingMessage, + }) +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go new file mode 100644 index 0000000000..54c7b2c337 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go @@ -0,0 +1,206 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package handler + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/expectations" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/poollabels" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmpoolcondition" +) + +const ( + poolNamespace = "ci" + poolName = "web" + poolUID = types.UID("pool-uid-0001") +) + +func newPool(replicas int32) *v1alpha2.VirtualMachinePool { + return &v1alpha2.VirtualMachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: poolNamespace, + UID: poolUID, + Generation: 1, + }, + Spec: v1alpha2.VirtualMachinePoolSpec{ + Replicas: ptr.To(replicas), + }, + } +} + +// newMemberVM builds a VM that belongs to pool: the managed labels and the +// controller ownerReference are what listMembers keys on. +func newMemberVM(pool *v1alpha2.VirtualMachinePool, name string, phase v1alpha2.MachinePhase, createdAt time.Time, terminating bool) *v1alpha2.VirtualMachine { + vm := &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: pool.Namespace, + UID: types.UID(name + "-uid"), + Labels: poollabels.Member(pool), + CreationTimestamp: metav1.NewTime(createdAt), + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(pool, v1alpha2.VirtualMachinePoolGVK)}, + }, + Status: v1alpha2.VirtualMachineStatus{Phase: phase}, + } + if terminating { + ts := metav1.NewTime(createdAt.Add(time.Hour)) + vm.DeletionTimestamp = &ts + vm.Finalizers = []string{"test.local/keep"} + } + return vm +} + +func listMemberNames(ctx context.Context, c client.Client, pool *v1alpha2.VirtualMachinePool) []string { + var list v1alpha2.VirtualMachineList + Expect(c.List(ctx, &list, client.InNamespace(pool.Namespace), poollabels.MemberSelector(pool))).To(Succeed()) + names := make([]string, 0, len(list.Items)) + for i := range list.Items { + names = append(names, list.Items[i].Name) + } + return names +} + +var _ = Describe("SyncHandler", func() { + var ( + ctx context.Context + exp *expectations.Expectations + clock time.Time + ) + + BeforeEach(func() { + ctx = context.Background() + exp = expectations.New() + clock = time.Unix(1_700_000_000, 0) + }) + + Context("scale up", func() { + It("creates the missing replicas from the template", func() { + pool := newPool(3) + c, err := testutil.NewFakeClientWithObjects(pool) + Expect(err).NotTo(HaveOccurred()) + + h := NewSyncHandler(c, exp) + _, err = h.Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + members := listMemberNames(ctx, c, pool) + Expect(members).To(HaveLen(3)) + }) + + It("stamps managed labels and a controller ownerReference on each replica", func() { + pool := newPool(1) + c, err := testutil.NewFakeClientWithObjects(pool) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + var list v1alpha2.VirtualMachineList + Expect(c.List(ctx, &list, client.InNamespace(pool.Namespace))).To(Succeed()) + Expect(list.Items).To(HaveLen(1)) + vm := list.Items[0] + Expect(vm.Name).To(HavePrefix(poolName + "-")) + Expect(vm.Labels).To(HaveKeyWithValue(poollabels.PoolUID, string(poolUID))) + Expect(vm.Labels).To(HaveKeyWithValue(poollabels.Pool, poolName)) + ref := metav1.GetControllerOf(&vm) + Expect(ref).NotTo(BeNil()) + Expect(ref.UID).To(Equal(poolUID)) + Expect(ref.Kind).To(Equal(v1alpha2.VirtualMachinePoolKind)) + }) + + It("does not create again while creations are unobserved (cache-lag guard)", func() { + pool := newPool(3) + c, err := testutil.NewFakeClientWithObjects(pool) + Expect(err).NotTo(HaveOccurred()) + h := NewSyncHandler(c, exp) + + // First pass creates 3 and records expectations. + _, err = h.Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + Expect(listMemberNames(ctx, c, pool)).To(HaveLen(3)) + + // Second pass: cache now shows 3, but expectations are unmet — the + // handler must NOT create 3 more. It requeues instead. + res, err := h.Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RequeueAfter).To(BeNumerically(">", 0)) + Expect(listMemberNames(ctx, c, pool)).To(HaveLen(3)) + }) + }) + + Context("steady state", func() { + It("neither creates nor deletes when live == desired", func() { + pool := newPool(2) + m1 := newMemberVM(pool, "web-aaaaa", v1alpha2.MachineRunning, clock, false) + m2 := newMemberVM(pool, "web-bbbbb", v1alpha2.MachineRunning, clock, false) + c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(listMemberNames(ctx, c, pool)).To(HaveLen(2)) + Expect(pool.Status.Replicas).To(Equal(int32(2))) + Expect(pool.Status.ReadyReplicas).To(Equal(int32(2))) + Expect(pool.Status.Selector).To(ContainSubstring(string(poolUID))) + Expect(meta.IsStatusConditionTrue(pool.Status.Conditions, vmpoolcondition.TypeAvailable.String())).To(BeTrue()) + Expect(meta.IsStatusConditionFalse(pool.Status.Conditions, vmpoolcondition.TypeProgressing.String())).To(BeTrue()) + }) + }) + + Context("scale down", func() { + It("deletes the youngest surplus replicas", func() { + pool := newPool(1) + older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock, false) + newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + c, err := testutil.NewFakeClientWithObjects(pool, older, newer) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + remaining := listMemberNames(ctx, c, pool) + Expect(remaining).To(ConsistOf("web-old")) // newest removed first + }) + }) + + Context("Terminating accounting (invariant 2)", func() { + It("counts a Terminating member toward the reduction and deletes fewer healthy ones", func() { + pool := newPool(1) + // live=3, desired=1 => surplus 2; one member already Terminating counts + // as one of those two, so only ONE healthy replica should be deleted. + terminating := newMemberVM(pool, "web-term", v1alpha2.MachineRunning, clock, true) + healthyOld := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + healthyNew := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(2*time.Minute), false) + c, err := testutil.NewFakeClientWithObjects(pool, terminating, healthyOld, healthyNew) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + remaining := listMemberNames(ctx, c, pool) + // web-new (youngest healthy) deleted; web-term still present (Terminating, + // held by finalizer); web-old kept. + Expect(remaining).To(ConsistOf("web-term", "web-old")) + }) + }) +}) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go new file mode 100644 index 0000000000..0dd08c0615 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go @@ -0,0 +1,55 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +// Package poollabels defines the controller-managed labels that mark a +// VirtualMachine as a member of a VirtualMachinePool and the selectors used to +// list members. +package poollabels + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +const ( + // PoolUID marks a replica with the metadata.uid of its pool. It is unique + // per pool instance, so a manually created VirtualMachine can never match it + // — membership cannot be hijacked. The controller lists members by this label + // and publishes it in status.selector for the scale subresource. Analogous to + // batch.kubernetes.io/controller-uid on Job pods. + PoolUID = "vmpool.virtualization.deckhouse.io/pool-uid" + + // Pool is a human-readable label with the pool name, predictable from the + // pool and handy for kubectl/observability. Analogous to job-name on Job pods. + Pool = "vmpool.virtualization.deckhouse.io/pool" +) + +// Member returns the managed labels stamped on every replica of the pool. +func Member(pool *v1alpha2.VirtualMachinePool) map[string]string { + return map[string]string{ + PoolUID: string(pool.GetUID()), + Pool: pool.GetName(), + } +} + +// MemberSelector is the label selector the controller uses to list the members +// it owns. It contains only the hash-independent pool-uid, so it stays stable +// across template changes. +func MemberSelector(pool *v1alpha2.VirtualMachinePool) client.MatchingLabels { + return client.MatchingLabels{PoolUID: string(pool.GetUID())} +} + +// StatusSelector is the string form published in status.selector for the scale +// subresource (HPA/KEDA read it themselves). +func StatusSelector(pool *v1alpha2.VirtualMachinePool) string { + return metav1.FormatLabelSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{PoolUID: string(pool.GetUID())}, + }) +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vm.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vm.go new file mode 100644 index 0000000000..55381b4a6c --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/watcher/vm.go @@ -0,0 +1,96 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package watcher + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/expectations" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +// VirtualMachineWatcher watches pool members (VirtualMachines) and, for each +// event, re-enqueues the owning pool and updates its expectations so a lagging +// cache cannot make the pool over-create or over-delete replicas. +type VirtualMachineWatcher struct { + exp *expectations.Expectations +} + +func NewVirtualMachineWatcher(exp *expectations.Expectations) *VirtualMachineWatcher { + return &VirtualMachineWatcher{exp: exp} +} + +func (w *VirtualMachineWatcher) Watch(mgr manager.Manager, ctr controller.Controller) error { + if err := ctr.Watch( + source.Kind( + mgr.GetCache(), + &v1alpha2.VirtualMachine{}, + &memberEventHandler{exp: w.exp}, + ), + ); err != nil { + return fmt.Errorf("error setting watch on pool member VirtualMachines: %w", err) + } + return nil +} + +// memberEventHandler enqueues the pool that owns a member VM and records +// observed creations/deletions against its expectations. +type memberEventHandler struct { + exp *expectations.Expectations +} + +// ownerKey returns the NamespacedName of the pool that controls vm, or nil if +// the VM is not controlled by a VirtualMachinePool. +func ownerKey(vm *v1alpha2.VirtualMachine) *types.NamespacedName { + ref := metav1.GetControllerOf(vm) + if ref == nil || ref.Kind != v1alpha2.VirtualMachinePoolKind || ref.APIVersion != v1alpha2.SchemeGroupVersion.String() { + return nil + } + return &types.NamespacedName{Namespace: vm.GetNamespace(), Name: ref.Name} +} + +func (m *memberEventHandler) Create(_ context.Context, e event.TypedCreateEvent[*v1alpha2.VirtualMachine], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + key := ownerKey(e.Object) + if key == nil { + return + } + m.exp.CreationObserved(key.String()) + q.Add(reconcile.Request{NamespacedName: *key}) +} + +func (m *memberEventHandler) Delete(_ context.Context, e event.TypedDeleteEvent[*v1alpha2.VirtualMachine], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + key := ownerKey(e.Object) + if key == nil { + return + } + m.exp.DeletionObserved(key.String(), e.Object.GetUID()) + q.Add(reconcile.Request{NamespacedName: *key}) +} + +func (m *memberEventHandler) Update(_ context.Context, e event.TypedUpdateEvent[*v1alpha2.VirtualMachine], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + if key := ownerKey(e.ObjectNew); key != nil { + q.Add(reconcile.Request{NamespacedName: *key}) + } +} + +func (m *memberEventHandler) Generic(_ context.Context, e event.TypedGenericEvent[*v1alpha2.VirtualMachine], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + if key := ownerKey(e.Object); key != nil { + q.Add(reconcile.Request{NamespacedName: *key}) + } +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go index ff8bbe642c..24e320032d 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go @@ -17,6 +17,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/deckhouse/deckhouse/pkg/log" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/expectations" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/handler" "github.com/deckhouse/virtualization-controller/pkg/featuregates" "github.com/deckhouse/virtualization-controller/pkg/logger" ) @@ -42,10 +44,15 @@ func SetupController( client := mgr.GetClient() - // Handlers are added by the follow-up slices (replica maintenance, template - // propagation, reuse disks). The scaffold wires an empty chain. - handlers := []Handler{} - r := NewReconciler(client, handlers) + // exp guards against a lagging informer cache causing double create/delete of + // anonymous replicas. It is shared between the reconcile handlers and the + // member watcher that observes creations/deletions. + exp := expectations.New() + + handlers := []Handler{ + handler.NewSyncHandler(client, exp), + } + r := NewReconciler(client, exp, handlers) c, err := controller.New(ControllerName, mgr, controller.Options{ Reconciler: r, diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go index b9929d5397..e2654a0a5e 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_reconciler.go @@ -17,6 +17,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/expectations" "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/watcher" "github.com/deckhouse/virtualization-controller/pkg/logger" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -31,21 +32,24 @@ type Watcher interface { Watch(mgr manager.Manager, ctr controller.Controller) error } -func NewReconciler(client client.Client, handlers []Handler) *Reconciler { +func NewReconciler(client client.Client, exp *expectations.Expectations, handlers []Handler) *Reconciler { return &Reconciler{ client: client, + exp: exp, handlers: handlers, } } type Reconciler struct { client client.Client + exp *expectations.Expectations handlers []Handler } func (r *Reconciler) SetupController(_ context.Context, mgr manager.Manager, ctr controller.Controller) error { for _, w := range []Watcher{ watcher.NewVirtualMachinePoolWatcher(), + watcher.NewVirtualMachineWatcher(r.exp), } { if err := w.Watch(mgr, ctr); err != nil { return err @@ -71,7 +75,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco rec := reconciler.NewBaseReconciler[Handler](r.handlers) rec.SetHandlerExecutor(func(ctx context.Context, h Handler) (reconcile.Result, error) { - return h.Handle(ctx, pool.Current()) + return h.Handle(ctx, pool.Changed()) }) rec.SetResourceUpdater(func(ctx context.Context) error { return pool.Update(ctx) From 6756dc4ab7d3e305da3aac3b4b7a551853d4b908 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:07:05 +0300 Subject: [PATCH 06/46] feat(vmpool): add scaleDownPolicy for anonymous scale-down Add the required spec.scaleDownPolicy enum (NewestFirst / OldestFirst / Explicit) and honour it when the pool is scaled down anonymously via the scale subresource: NewestFirst removes the youngest replicas first, OldestFirst the oldest, and Explicit removes nothing anonymously (such pools shrink only by addressed removal). The scale-subresource guard that rejects anonymous shrink under Explicit is added next. Covered by unit tests. Signed-off-by: Pavel Tishkov --- api/core/v1alpha2/virtual_machine_pool.go | 25 +++++++++++++++ crds/virtualmachinepools.yaml | 20 ++++++++++++ .../vmpool/internal/handler/sync.go | 27 ++++++++++++---- .../vmpool/internal/handler/sync_test.go | 32 ++++++++++++++++++- 4 files changed, 97 insertions(+), 7 deletions(-) diff --git a/api/core/v1alpha2/virtual_machine_pool.go b/api/core/v1alpha2/virtual_machine_pool.go index edee170f42..66ebc813be 100644 --- a/api/core/v1alpha2/virtual_machine_pool.go +++ b/api/core/v1alpha2/virtual_machine_pool.go @@ -73,12 +73,37 @@ type VirtualMachinePoolSpec struct { // +optional Replicas *int32 `json:"replicas,omitempty"` + // ScaleDownPolicy chooses how a replica is picked when the pool is scaled down + // anonymously through the `scale` subresource. It is required and has no + // default, forcing a conscious choice between "any replica may be killed" and + // "only addressed removal is allowed". + // + // - `NewestFirst` — anonymous scale-down is allowed; the youngest replicas + // (least accumulated state) are removed first. + // - `OldestFirst` — anonymous scale-down is allowed; the oldest replicas are + // removed first (faster rotation). + // - `Explicit` — anonymous scale-down through `scale` is rejected by a + // webhook; replicas can be removed only by address. For "busy" workloads + // such as CI runners and VDI. + // + // +kubebuilder:validation:Enum=NewestFirst;OldestFirst;Explicit + ScaleDownPolicy ScaleDownPolicy `json:"scaleDownPolicy"` + // VirtualMachineTemplate is the template every replica is stamped from. Its // `spec` is an ordinary VirtualMachineSpec, so a replica is no different from a // manually created virtual machine. VirtualMachineTemplate VirtualMachineTemplateSpec `json:"virtualMachineTemplate"` } +// ScaleDownPolicy selects which replica is removed on anonymous scale-down. +type ScaleDownPolicy string + +const ( + ScaleDownPolicyNewestFirst ScaleDownPolicy = "NewestFirst" + ScaleDownPolicyOldestFirst ScaleDownPolicy = "OldestFirst" + ScaleDownPolicyExplicit ScaleDownPolicy = "Explicit" +) + // VirtualMachineTemplateSpec describes the metadata and spec a pool replica is // created with. type VirtualMachineTemplateSpec struct { diff --git a/crds/virtualmachinepools.yaml b/crds/virtualmachinepools.yaml index 0f72d8c408..deb1844980 100644 --- a/crds/virtualmachinepools.yaml +++ b/crds/virtualmachinepools.yaml @@ -77,6 +77,25 @@ spec: format: int32 minimum: 0 type: integer + scaleDownPolicy: + description: |- + ScaleDownPolicy chooses how a replica is picked when the pool is scaled down + anonymously through the `scale` subresource. It is required and has no + default, forcing a conscious choice between "any replica may be killed" and + "only addressed removal is allowed". + + - `NewestFirst` — anonymous scale-down is allowed; the youngest replicas + (least accumulated state) are removed first. + - `OldestFirst` — anonymous scale-down is allowed; the oldest replicas are + removed first (faster rotation). + - `Explicit` — anonymous scale-down through `scale` is rejected by a + webhook; replicas can be removed only by address. For "busy" workloads + such as CI runners and VDI. + enum: + - NewestFirst + - OldestFirst + - Explicit + type: string virtualMachineTemplate: description: |- VirtualMachineTemplate is the template every replica is stamped from. Its @@ -1357,6 +1376,7 @@ spec: type: object type: object required: + - scaleDownPolicy - virtualMachineTemplate type: object status: diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go index f2c36d0f78..52f0252014 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go @@ -139,7 +139,13 @@ func (h *SyncHandler) scaleDown(ctx context.Context, pool *v1alpha2.VirtualMachi return nil } - victims := pickVictims(candidates, toDelete) + victims := pickVictims(pool.Spec.ScaleDownPolicy, candidates, toDelete) + if len(victims) == 0 { + // Explicit policy: anonymous scale-down is not allowed here — replicas are + // removed only by address (scaleDownWith). The /scale path is additionally + // blocked by an admission webhook. + return nil + } uids := make([]types.UID, 0, len(victims)) for i := range victims { uids = append(uids, victims[i].GetUID()) @@ -159,12 +165,21 @@ func (h *SyncHandler) scaleDown(ctx context.Context, pool *v1alpha2.VirtualMachi return errs } -// pickVictims chooses which replicas to remove during anonymous scale-down. The -// first version deletes the youngest first (least accumulated state); the -// configurable scaleDownPolicy is introduced in a later slice. -func pickVictims(candidates []v1alpha2.VirtualMachine, n int) []v1alpha2.VirtualMachine { +// pickVictims chooses which replicas to remove during anonymous scale-down, +// honouring the pool's scaleDownPolicy. Explicit forbids anonymous removal, so +// it returns nothing — such pools shrink only through addressed removal. +func pickVictims(policy v1alpha2.ScaleDownPolicy, candidates []v1alpha2.VirtualMachine, n int) []v1alpha2.VirtualMachine { + if n <= 0 || policy == v1alpha2.ScaleDownPolicyExplicit { + return nil + } + oldestFirst := policy == v1alpha2.ScaleDownPolicyOldestFirst sort.SliceStable(candidates, func(i, j int) bool { - return candidates[i].GetCreationTimestamp().After(candidates[j].GetCreationTimestamp().Time) + ti := candidates[i].GetCreationTimestamp().Time + tj := candidates[j].GetCreationTimestamp().Time + if oldestFirst { + return ti.Before(tj) + } + return tj.Before(ti) // NewestFirst: youngest removed first }) if n > len(candidates) { n = len(candidates) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go index 54c7b2c337..608ab6bce3 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go @@ -42,7 +42,8 @@ func newPool(replicas int32) *v1alpha2.VirtualMachinePool { Generation: 1, }, Spec: v1alpha2.VirtualMachinePoolSpec{ - Replicas: ptr.To(replicas), + Replicas: ptr.To(replicas), + ScaleDownPolicy: v1alpha2.ScaleDownPolicyNewestFirst, }, } } @@ -181,6 +182,35 @@ var _ = Describe("SyncHandler", func() { remaining := listMemberNames(ctx, c, pool) Expect(remaining).To(ConsistOf("web-old")) // newest removed first }) + + It("deletes the oldest surplus replicas under OldestFirst", func() { + pool := newPool(1) + pool.Spec.ScaleDownPolicy = v1alpha2.ScaleDownPolicyOldestFirst + older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock, false) + newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + c, err := testutil.NewFakeClientWithObjects(pool, older, newer) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(listMemberNames(ctx, c, pool)).To(ConsistOf("web-new")) // oldest removed first + }) + + It("removes nothing anonymously under Explicit", func() { + pool := newPool(1) + pool.Spec.ScaleDownPolicy = v1alpha2.ScaleDownPolicyExplicit + m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) + m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + // Explicit forbids anonymous scale-down: both replicas stay. + Expect(listMemberNames(ctx, c, pool)).To(ConsistOf("web-a", "web-b")) + }) }) Context("Terminating accounting (invariant 2)", func() { From a8074e2d1e0a4af07f0f4aeffcbb951714a29832 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:11:03 +0300 Subject: [PATCH 07/46] feat(vmpool): guard anonymous scale-down for Explicit pools Add a validating webhook on the virtualmachinepools/scale subresource that rejects a replicas decrease when the pool's scaleDownPolicy is Explicit, pointing the user to scaleDownWith for addressed removal. Growth and no-op scale updates are always allowed. The webhook is registered only in EE builds and self-gates on the VirtualMachinePool feature gate; its ValidatingWebhookConfiguration entry is rendered only when the gate is enabled. Covered by unit tests. Signed-off-by: Pavel Tishkov --- .../setup_enterprise_ee.go | 8 +- .../controller/vmpool/vmpool_scale_webhook.go | 88 ++++++++++++++++++ .../vmpool/vmpool_scale_webhook_test.go | 89 +++++++++++++++++++ .../controller/vmpool/vmpool_suite_test.go | 21 +++++ .../validation-webhook.yaml | 20 +++++ 5 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook_test.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/vmpool_suite_test.go diff --git a/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go index 2e757a6add..d96e45fb4f 100644 --- a/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go +++ b/images/virtualization-artifact/cmd/virtualization-controller/setup_enterprise_ee.go @@ -28,5 +28,11 @@ func setupEnterpriseControllers( logDebugControllerList []string, ) error { vmpoolLogger := logger.NewControllerLogger(vmpool.ControllerName, logLevel, logOutput, logDebugVerbosity, logDebugControllerList) - return vmpool.SetupController(ctx, mgr, vmpoolLogger) + if err := vmpool.SetupController(ctx, mgr, vmpoolLogger); err != nil { + return err + } + // Guards anonymous scale-down for scaleDownPolicy: Explicit. Self-gated by + // the VirtualMachinePool feature gate. + vmpool.SetupScaleWebhook(mgr) + return nil } diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook.go new file mode 100644 index 0000000000..25a52387e1 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook.go @@ -0,0 +1,88 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package vmpool + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + admissionv1 "k8s.io/api/admission/v1" + autoscalingv1 "k8s.io/api/autoscaling/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/deckhouse/virtualization-controller/pkg/featuregates" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +// ScaleWebhookPath is where the scale-subresource guard is served. It must match +// the ValidatingWebhookConfiguration entry for virtualmachinepools/scale. +const ScaleWebhookPath = "/validate-virtualization-deckhouse-io-v1alpha2-virtualmachinepool-scale" + +// SetupScaleWebhook registers the guard that rejects anonymous scale-down via +// the scale subresource for pools with scaleDownPolicy: Explicit. +func SetupScaleWebhook(mgr manager.Manager) { + // Gated like the controller: while the feature gate is off the guard is not + // registered (the CRD's scale subresource is still served, just unguarded — + // there is no controller to act on it either). + if !featuregates.Default().Enabled(featuregates.VirtualMachinePool) { + return + } + mgr.GetWebhookServer().Register(ScaleWebhookPath, &webhook.Admission{ + Handler: &scaleValidator{client: mgr.GetClient()}, + }) +} + +type scaleValidator struct { + client client.Client +} + +func (v *scaleValidator) Handle(ctx context.Context, req admission.Request) admission.Response { + // Only UPDATE of the scale subresource carries a replicas change to guard. + if req.SubResource != "scale" || req.Operation != admissionv1.Update { + return admission.Allowed("") + } + + var newScale, oldScale autoscalingv1.Scale + if err := json.Unmarshal(req.Object.Raw, &newScale); err != nil { + return admission.Errored(http.StatusBadRequest, fmt.Errorf("decode new Scale: %w", err)) + } + if err := json.Unmarshal(req.OldObject.Raw, &oldScale); err != nil { + return admission.Errored(http.StatusBadRequest, fmt.Errorf("decode old Scale: %w", err)) + } + + // Only a decrease is anonymous scale-down; growth and no-ops are always fine. + if newScale.Spec.Replicas >= oldScale.Spec.Replicas { + return admission.Allowed("") + } + + pool := &v1alpha2.VirtualMachinePool{} + if err := v.client.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: req.Name}, pool); err != nil { + if apierrors.IsNotFound(err) { + return admission.Allowed("") + } + return admission.Errored(http.StatusInternalServerError, fmt.Errorf("get VirtualMachinePool %s/%s: %w", req.Namespace, req.Name, err)) + } + + if pool.Spec.ScaleDownPolicy == v1alpha2.ScaleDownPolicyExplicit { + return admission.Denied(fmt.Sprintf( + "VirtualMachinePool %q uses scaleDownPolicy Explicit: decreasing replicas through the scale subresource is not allowed. "+ + "Remove specific virtual machines with the scaleDownWith subresource instead.", + req.Name, + )) + } + + return admission.Allowed("") +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook_test.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook_test.go new file mode 100644 index 0000000000..329339ca22 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_scale_webhook_test.go @@ -0,0 +1,89 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package vmpool + +import ( + "context" + "encoding/json" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + admissionv1 "k8s.io/api/admission/v1" + autoscalingv1 "k8s.io/api/autoscaling/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +func scaleRaw(replicas int32) runtime.RawExtension { + raw, err := json.Marshal(&autoscalingv1.Scale{Spec: autoscalingv1.ScaleSpec{Replicas: replicas}}) + Expect(err).NotTo(HaveOccurred()) + return runtime.RawExtension{Raw: raw} +} + +func scaleUpdateRequest(oldReplicas, newReplicas int32) admission.Request { + return admission.Request{AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Update, + SubResource: "scale", + Namespace: "ci", + Name: "web", + Object: scaleRaw(newReplicas), + OldObject: scaleRaw(oldReplicas), + }} +} + +func poolWithPolicy(policy v1alpha2.ScaleDownPolicy) *v1alpha2.VirtualMachinePool { + return &v1alpha2.VirtualMachinePool{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ci"}, + Spec: v1alpha2.VirtualMachinePoolSpec{ScaleDownPolicy: policy}, + } +} + +var _ = Describe("scaleValidator", func() { + var ctx context.Context + + BeforeEach(func() { ctx = context.Background() }) + + validatorFor := func(pool *v1alpha2.VirtualMachinePool) *scaleValidator { + c, err := testutil.NewFakeClientWithObjects(pool) + Expect(err).NotTo(HaveOccurred()) + return &scaleValidator{client: c} + } + + It("denies a decrease for an Explicit pool", func() { + resp := validatorFor(poolWithPolicy(v1alpha2.ScaleDownPolicyExplicit)).Handle(ctx, scaleUpdateRequest(5, 3)) + Expect(resp.Allowed).To(BeFalse()) + Expect(string(resp.Result.Message)).To(ContainSubstring("scaleDownWith")) + }) + + It("allows a decrease for a NewestFirst pool", func() { + resp := validatorFor(poolWithPolicy(v1alpha2.ScaleDownPolicyNewestFirst)).Handle(ctx, scaleUpdateRequest(5, 3)) + Expect(resp.Allowed).To(BeTrue()) + }) + + It("allows an increase even for an Explicit pool", func() { + resp := validatorFor(poolWithPolicy(v1alpha2.ScaleDownPolicyExplicit)).Handle(ctx, scaleUpdateRequest(3, 5)) + Expect(resp.Allowed).To(BeTrue()) + }) + + It("allows a no-op (equal replicas)", func() { + resp := validatorFor(poolWithPolicy(v1alpha2.ScaleDownPolicyExplicit)).Handle(ctx, scaleUpdateRequest(3, 3)) + Expect(resp.Allowed).To(BeTrue()) + }) + + It("ignores non-scale subresource requests", func() { + req := scaleUpdateRequest(5, 3) + req.SubResource = "" + resp := validatorFor(poolWithPolicy(v1alpha2.ScaleDownPolicyExplicit)).Handle(ctx, req) + Expect(resp.Allowed).To(BeTrue()) + }) +}) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_suite_test.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_suite_test.go new file mode 100644 index 0000000000..27a3bb777a --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_suite_test.go @@ -0,0 +1,21 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package vmpool + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestVMPool(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "VirtualMachinePool Controller Suite") +} diff --git a/templates/virtualization-controller/validation-webhook.yaml b/templates/virtualization-controller/validation-webhook.yaml index a06879cfdb..5d188442d5 100644 --- a/templates/virtualization-controller/validation-webhook.yaml +++ b/templates/virtualization-controller/validation-webhook.yaml @@ -250,4 +250,24 @@ webhooks: - name: 'match-virtualization' expression: 'request.name == "virtualization"' {{- end }} + {{- if has "VirtualMachinePool" .Values.virtualization.internal.moduleConfig.featureGates }} + - name: "vmpool-scale.virtualization-controller.validate.d8-virtualization" + rules: + - apiGroups: ["virtualization.deckhouse.io"] + apiVersions: ["v1alpha2"] + operations: ["UPDATE"] + resources: ["virtualmachinepools/scale"] + scope: "Namespaced" + clientConfig: + service: + namespace: d8-{{ .Chart.Name }} + name: virtualization-controller + path: /validate-virtualization-deckhouse-io-v1alpha2-virtualmachinepool-scale + port: 443 + caBundle: | + {{ .Values.virtualization.internal.controller.cert.ca | b64enc }} + admissionReviewVersions: ["v1"] + sideEffects: None + failurePolicy: Fail + {{- end }} {{- end }} From 96011f17f3778ff8103ddfd3a144158cb2e042e5 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:21:44 +0300 Subject: [PATCH 08/46] feat(vmpool): add scaleDownWith subresource API types Add the VirtualMachinePool meta object and the VirtualMachinePoolScaleDownWith body type (targets to remove) to the subresources.virtualization.deckhouse.io API group, with generated deepcopy/conversion/openapi. This is the type surface for the addressed scale-down handle; the aggregated-apiserver REST storage and wiring follow. Signed-off-by: Pavel Tishkov --- api/subresources/register.go | 2 + api/subresources/types.go | 16 ++++ api/subresources/v1alpha2/register.go | 2 + api/subresources/v1alpha2/types.go | 19 ++++ .../v1alpha2/zz_generated.conversion.go | 88 ++++++++++++++++++ .../v1alpha2/zz_generated.deepcopy.go | 61 ++++++++++++ api/subresources/zz_generated.deepcopy.go | 61 ++++++++++++ .../generated/openapi/zz_generated.openapi.go | 92 +++++++++++++++++++ 8 files changed, 341 insertions(+) diff --git a/api/subresources/register.go b/api/subresources/register.go index 872ad19a8f..e839a66c17 100644 --- a/api/subresources/register.go +++ b/api/subresources/register.go @@ -59,6 +59,8 @@ func addKnownTypes(scheme *runtime.Scheme) error { &VirtualMachineCancelEvacuation{}, &VirtualMachineAddResourceClaim{}, &VirtualMachineRemoveResourceClaim{}, + &VirtualMachinePool{}, + &VirtualMachinePoolScaleDownWith{}, ) return nil } diff --git a/api/subresources/types.go b/api/subresources/types.go index 90ce98abaf..7b416efd3a 100644 --- a/api/subresources/types.go +++ b/api/subresources/types.go @@ -109,3 +109,19 @@ type VirtualMachineRemoveResourceClaim struct { Name string DryRun []string } + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type VirtualMachinePool struct { + metav1.TypeMeta + metav1.ObjectMeta +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type VirtualMachinePoolScaleDownWith struct { + metav1.TypeMeta + + Targets []string + DryRun []string +} diff --git a/api/subresources/v1alpha2/register.go b/api/subresources/v1alpha2/register.go index d978d3f4f3..39da230e19 100644 --- a/api/subresources/v1alpha2/register.go +++ b/api/subresources/v1alpha2/register.go @@ -61,6 +61,8 @@ func addKnownTypes(scheme *runtime.Scheme) error { &VirtualMachineCancelEvacuation{}, &VirtualMachineAddResourceClaim{}, &VirtualMachineRemoveResourceClaim{}, + &VirtualMachinePool{}, + &VirtualMachinePoolScaleDownWith{}, ) return nil } diff --git a/api/subresources/v1alpha2/types.go b/api/subresources/v1alpha2/types.go index 2bfcbcedbc..7339f7a5c3 100644 --- a/api/subresources/v1alpha2/types.go +++ b/api/subresources/v1alpha2/types.go @@ -117,3 +117,22 @@ type VirtualMachineRemoveResourceClaim struct { Name string `json:"name"` DryRun []string `json:"dryRun,omitempty"` } + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type VirtualMachinePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +k8s:conversion-gen:explicit-from=net/url.Values + +type VirtualMachinePoolScaleDownWith struct { + metav1.TypeMeta `json:",inline"` + + // Targets are the names of the pool member VirtualMachines to remove. + Targets []string `json:"targets"` + + DryRun []string `json:"dryRun,omitempty"` +} diff --git a/api/subresources/v1alpha2/zz_generated.conversion.go b/api/subresources/v1alpha2/zz_generated.conversion.go index 7955b3f836..7f21f8d085 100644 --- a/api/subresources/v1alpha2/zz_generated.conversion.go +++ b/api/subresources/v1alpha2/zz_generated.conversion.go @@ -98,6 +98,26 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*VirtualMachinePool)(nil), (*subresources.VirtualMachinePool)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha2_VirtualMachinePool_To_subresources_VirtualMachinePool(a.(*VirtualMachinePool), b.(*subresources.VirtualMachinePool), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*subresources.VirtualMachinePool)(nil), (*VirtualMachinePool)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_subresources_VirtualMachinePool_To_v1alpha2_VirtualMachinePool(a.(*subresources.VirtualMachinePool), b.(*VirtualMachinePool), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*VirtualMachinePoolScaleDownWith)(nil), (*subresources.VirtualMachinePoolScaleDownWith)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha2_VirtualMachinePoolScaleDownWith_To_subresources_VirtualMachinePoolScaleDownWith(a.(*VirtualMachinePoolScaleDownWith), b.(*subresources.VirtualMachinePoolScaleDownWith), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*subresources.VirtualMachinePoolScaleDownWith)(nil), (*VirtualMachinePoolScaleDownWith)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_subresources_VirtualMachinePoolScaleDownWith_To_v1alpha2_VirtualMachinePoolScaleDownWith(a.(*subresources.VirtualMachinePoolScaleDownWith), b.(*VirtualMachinePoolScaleDownWith), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*VirtualMachinePortForward)(nil), (*subresources.VirtualMachinePortForward)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha2_VirtualMachinePortForward_To_subresources_VirtualMachinePortForward(a.(*VirtualMachinePortForward), b.(*subresources.VirtualMachinePortForward), scope) }); err != nil { @@ -173,6 +193,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*url.Values)(nil), (*VirtualMachinePoolScaleDownWith)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_url_Values_To_v1alpha2_VirtualMachinePoolScaleDownWith(a.(*url.Values), b.(*VirtualMachinePoolScaleDownWith), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*url.Values)(nil), (*VirtualMachinePortForward)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_url_Values_To_v1alpha2_VirtualMachinePortForward(a.(*url.Values), b.(*VirtualMachinePortForward), scope) }); err != nil { @@ -468,6 +493,69 @@ func Convert_url_Values_To_v1alpha2_VirtualMachineFreeze(in *url.Values, out *Vi return autoConvert_url_Values_To_v1alpha2_VirtualMachineFreeze(in, out, s) } +func autoConvert_v1alpha2_VirtualMachinePool_To_subresources_VirtualMachinePool(in *VirtualMachinePool, out *subresources.VirtualMachinePool, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + return nil +} + +// Convert_v1alpha2_VirtualMachinePool_To_subresources_VirtualMachinePool is an autogenerated conversion function. +func Convert_v1alpha2_VirtualMachinePool_To_subresources_VirtualMachinePool(in *VirtualMachinePool, out *subresources.VirtualMachinePool, s conversion.Scope) error { + return autoConvert_v1alpha2_VirtualMachinePool_To_subresources_VirtualMachinePool(in, out, s) +} + +func autoConvert_subresources_VirtualMachinePool_To_v1alpha2_VirtualMachinePool(in *subresources.VirtualMachinePool, out *VirtualMachinePool, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + return nil +} + +// Convert_subresources_VirtualMachinePool_To_v1alpha2_VirtualMachinePool is an autogenerated conversion function. +func Convert_subresources_VirtualMachinePool_To_v1alpha2_VirtualMachinePool(in *subresources.VirtualMachinePool, out *VirtualMachinePool, s conversion.Scope) error { + return autoConvert_subresources_VirtualMachinePool_To_v1alpha2_VirtualMachinePool(in, out, s) +} + +func autoConvert_v1alpha2_VirtualMachinePoolScaleDownWith_To_subresources_VirtualMachinePoolScaleDownWith(in *VirtualMachinePoolScaleDownWith, out *subresources.VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + out.Targets = *(*[]string)(unsafe.Pointer(&in.Targets)) + out.DryRun = *(*[]string)(unsafe.Pointer(&in.DryRun)) + return nil +} + +// Convert_v1alpha2_VirtualMachinePoolScaleDownWith_To_subresources_VirtualMachinePoolScaleDownWith is an autogenerated conversion function. +func Convert_v1alpha2_VirtualMachinePoolScaleDownWith_To_subresources_VirtualMachinePoolScaleDownWith(in *VirtualMachinePoolScaleDownWith, out *subresources.VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + return autoConvert_v1alpha2_VirtualMachinePoolScaleDownWith_To_subresources_VirtualMachinePoolScaleDownWith(in, out, s) +} + +func autoConvert_subresources_VirtualMachinePoolScaleDownWith_To_v1alpha2_VirtualMachinePoolScaleDownWith(in *subresources.VirtualMachinePoolScaleDownWith, out *VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + out.Targets = *(*[]string)(unsafe.Pointer(&in.Targets)) + out.DryRun = *(*[]string)(unsafe.Pointer(&in.DryRun)) + return nil +} + +// Convert_subresources_VirtualMachinePoolScaleDownWith_To_v1alpha2_VirtualMachinePoolScaleDownWith is an autogenerated conversion function. +func Convert_subresources_VirtualMachinePoolScaleDownWith_To_v1alpha2_VirtualMachinePoolScaleDownWith(in *subresources.VirtualMachinePoolScaleDownWith, out *VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + return autoConvert_subresources_VirtualMachinePoolScaleDownWith_To_v1alpha2_VirtualMachinePoolScaleDownWith(in, out, s) +} + +func autoConvert_url_Values_To_v1alpha2_VirtualMachinePoolScaleDownWith(in *url.Values, out *VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + // WARNING: Field TypeMeta does not have json tag, skipping. + + if values, ok := map[string][]string(*in)["targets"]; ok && len(values) > 0 { + out.Targets = *(*[]string)(unsafe.Pointer(&values)) + } else { + out.Targets = nil + } + if values, ok := map[string][]string(*in)["dryRun"]; ok && len(values) > 0 { + out.DryRun = *(*[]string)(unsafe.Pointer(&values)) + } else { + out.DryRun = nil + } + return nil +} + +// Convert_url_Values_To_v1alpha2_VirtualMachinePoolScaleDownWith is an autogenerated conversion function. +func Convert_url_Values_To_v1alpha2_VirtualMachinePoolScaleDownWith(in *url.Values, out *VirtualMachinePoolScaleDownWith, s conversion.Scope) error { + return autoConvert_url_Values_To_v1alpha2_VirtualMachinePoolScaleDownWith(in, out, s) +} + func autoConvert_v1alpha2_VirtualMachinePortForward_To_subresources_VirtualMachinePortForward(in *VirtualMachinePortForward, out *subresources.VirtualMachinePortForward, s conversion.Scope) error { out.Protocol = in.Protocol out.Port = in.Port diff --git a/api/subresources/v1alpha2/zz_generated.deepcopy.go b/api/subresources/v1alpha2/zz_generated.deepcopy.go index 6554e97509..e83a39f0f3 100644 --- a/api/subresources/v1alpha2/zz_generated.deepcopy.go +++ b/api/subresources/v1alpha2/zz_generated.deepcopy.go @@ -192,6 +192,67 @@ func (in *VirtualMachineFreeze) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePool) DeepCopyInto(out *VirtualMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePool. +func (in *VirtualMachinePool) DeepCopy() *VirtualMachinePool { + if in == nil { + return nil + } + out := new(VirtualMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePoolScaleDownWith) DeepCopyInto(out *VirtualMachinePoolScaleDownWith) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Targets != nil { + in, out := &in.Targets, &out.Targets + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.DryRun != nil { + in, out := &in.DryRun, &out.DryRun + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePoolScaleDownWith. +func (in *VirtualMachinePoolScaleDownWith) DeepCopy() *VirtualMachinePoolScaleDownWith { + if in == nil { + return nil + } + out := new(VirtualMachinePoolScaleDownWith) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePoolScaleDownWith) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualMachinePortForward) DeepCopyInto(out *VirtualMachinePortForward) { *out = *in diff --git a/api/subresources/zz_generated.deepcopy.go b/api/subresources/zz_generated.deepcopy.go index 8268bde57f..dc9fa4f7a8 100644 --- a/api/subresources/zz_generated.deepcopy.go +++ b/api/subresources/zz_generated.deepcopy.go @@ -192,6 +192,67 @@ func (in *VirtualMachineFreeze) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePool) DeepCopyInto(out *VirtualMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePool. +func (in *VirtualMachinePool) DeepCopy() *VirtualMachinePool { + if in == nil { + return nil + } + out := new(VirtualMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualMachinePoolScaleDownWith) DeepCopyInto(out *VirtualMachinePoolScaleDownWith) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Targets != nil { + in, out := &in.Targets, &out.Targets + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.DryRun != nil { + in, out := &in.DryRun, &out.DryRun + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachinePoolScaleDownWith. +func (in *VirtualMachinePoolScaleDownWith) DeepCopy() *VirtualMachinePoolScaleDownWith { + if in == nil { + return nil + } + out := new(VirtualMachinePoolScaleDownWith) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *VirtualMachinePoolScaleDownWith) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualMachinePortForward) DeepCopyInto(out *VirtualMachinePortForward) { *out = *in diff --git a/images/virtualization-artifact/pkg/apiserver/api/generated/openapi/zz_generated.openapi.go b/images/virtualization-artifact/pkg/apiserver/api/generated/openapi/zz_generated.openapi.go index da9da79aef..9318cbfce9 100644 --- a/images/virtualization-artifact/pkg/apiserver/api/generated/openapi/zz_generated.openapi.go +++ b/images/virtualization-artifact/pkg/apiserver/api/generated/openapi/zz_generated.openapi.go @@ -48,6 +48,8 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachineCancelEvacuation": schema_virtualization_api_subresources_v1alpha2_VirtualMachineCancelEvacuation(ref), "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachineConsole": schema_virtualization_api_subresources_v1alpha2_VirtualMachineConsole(ref), "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachineFreeze": schema_virtualization_api_subresources_v1alpha2_VirtualMachineFreeze(ref), + "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachinePool": schema_virtualization_api_subresources_v1alpha2_VirtualMachinePool(ref), + "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachinePoolScaleDownWith": schema_virtualization_api_subresources_v1alpha2_VirtualMachinePoolScaleDownWith(ref), "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachinePortForward": schema_virtualization_api_subresources_v1alpha2_VirtualMachinePortForward(ref), "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachineRemoveResourceClaim": schema_virtualization_api_subresources_v1alpha2_VirtualMachineRemoveResourceClaim(ref), "github.com/deckhouse/virtualization/api/subresources/v1alpha2.VirtualMachineRemoveVolume": schema_virtualization_api_subresources_v1alpha2_VirtualMachineRemoveVolume(ref), @@ -951,6 +953,96 @@ func schema_virtualization_api_subresources_v1alpha2_VirtualMachineFreeze(ref co } } +func schema_virtualization_api_subresources_v1alpha2_VirtualMachinePool(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "kind": { + SchemaProps: spec.SchemaProps{ + Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", + Type: []string{"string"}, + Format: "", + }, + }, + "apiVersion": { + SchemaProps: spec.SchemaProps{ + Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", + Type: []string{"string"}, + Format: "", + }, + }, + "metadata": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), + }, + }, + }, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + } +} + +func schema_virtualization_api_subresources_v1alpha2_VirtualMachinePoolScaleDownWith(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "kind": { + SchemaProps: spec.SchemaProps{ + Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", + Type: []string{"string"}, + Format: "", + }, + }, + "apiVersion": { + SchemaProps: spec.SchemaProps{ + Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", + Type: []string{"string"}, + Format: "", + }, + }, + "targets": { + SchemaProps: spec.SchemaProps{ + Description: "Targets are the names of the pool member VirtualMachines to remove.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + "dryRun": { + SchemaProps: spec.SchemaProps{ + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + }, + Required: []string{"targets"}, + }, + }, + } +} + func schema_virtualization_api_subresources_v1alpha2_VirtualMachinePortForward(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ From f9fcfaaaa6e42aff221f8b62f088535461529bd7 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:28:02 +0300 Subject: [PATCH 09/46] feat(vmpool): serve scaleDownWith via the aggregated apiserver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register the virtualmachinepools resource and its scaleDownWith subresource in the existing aggregated apiserver (group subresources.virtualization.deckhouse.io). The handler validates that every target belongs to the pool, deletes them and atomically decrements spec.replicas on the main resource — bypassing the /scale guard, which is what lets Explicit pools shrink by address. The meta-object itself is not served (Get returns NotFound). Enterprise-only: the REST/storage live under //go:build EE and are wired into the apiserver group through a build-tagged hook; the CE build adds nothing. A write-capable client is threaded from the apiserver config. Covered by unit tests. Signed-off-by: Pavel Tishkov --- .../pkg/apiserver/api/install.go | 9 +- .../apiserver/api/install_enterprise_ce.go | 29 ++++ .../apiserver/api/install_enterprise_ee.go | 24 +++ .../registry/vmpool/rest/rest_suite_test.go | 21 +++ .../registry/vmpool/rest/scaledownwith.go | 142 ++++++++++++++++++ .../vmpool/rest/scaledownwith_test.go | 115 ++++++++++++++ .../registry/vmpool/storage/storage.go | 77 ++++++++++ .../pkg/apiserver/server/config.go | 16 ++ 8 files changed, 431 insertions(+), 2 deletions(-) create mode 100644 images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ce.go create mode 100644 images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ee.go create mode 100644 images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/rest_suite_test.go create mode 100644 images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith.go create mode 100644 images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith_test.go create mode 100644 images/virtualization-artifact/pkg/apiserver/registry/vmpool/storage/storage.go diff --git a/images/virtualization-artifact/pkg/apiserver/api/install.go b/images/virtualization-artifact/pkg/apiserver/api/install.go index 627194b783..e6f30bac1a 100644 --- a/images/virtualization-artifact/pkg/apiserver/api/install.go +++ b/images/virtualization-artifact/pkg/apiserver/api/install.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apiserver/pkg/registry/rest" genericapiserver "k8s.io/apiserver/pkg/server" + "sigs.k8s.io/controller-runtime/pkg/client" vmrest "github.com/deckhouse/virtualization-controller/pkg/apiserver/registry/vm/rest" "github.com/deckhouse/virtualization-controller/pkg/apiserver/registry/vm/storage" @@ -53,7 +54,7 @@ func init() { ) } -func Build(store *storage.VirtualMachineStorage) genericapiserver.APIGroupInfo { +func Build(store *storage.VirtualMachineStorage, client client.Client) genericapiserver.APIGroupInfo { apiGroupInfo := genericapiserver.NewDefaultAPIGroupInfo(subresources.GroupName, Scheme, ParameterCodec, Codecs) resourcesV1alpha2 := map[string]rest.Storage{ "virtualmachines": store, @@ -68,6 +69,9 @@ func Build(store *storage.VirtualMachineStorage) genericapiserver.APIGroupInfo { "virtualmachines/addresourceclaim": store.AddResourceClaimREST(), "virtualmachines/removeresourceclaim": store.RemoveResourceClaimREST(), } + // Enterprise-only resources (e.g. virtualmachinepools/scaledownwith) are added + // by the EE build; the CE build leaves the map untouched. + installEnterpriseResources(resourcesV1alpha2, client) apiGroupInfo.VersionedResourcesStorageMap[subv1alpha2.SchemeGroupVersion.Version] = resourcesV1alpha2 return apiGroupInfo } @@ -77,12 +81,13 @@ func Install( server *genericapiserver.GenericAPIServer, kubevirt vmrest.KubevirtAPIServerConfig, proxyCertManager certmanager.CertificateManager, + client client.Client, ) error { vmStorage := storage.NewStorage( vmLister, kubevirt, proxyCertManager, ) - info := Build(vmStorage) + info := Build(vmStorage, client) return server.InstallAPIGroup(&info) } diff --git a/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ce.go b/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ce.go new file mode 100644 index 0000000000..267678fa9d --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ce.go @@ -0,0 +1,29 @@ +//go:build !EE +// +build !EE + +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package api + +import ( + "k8s.io/apiserver/pkg/registry/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// installEnterpriseResources is a no-op in CE builds: paid-edition subresources +// (e.g. virtualmachinepools/scaledownwith) are not compiled here. +func installEnterpriseResources(_ map[string]rest.Storage, _ client.Client) {} diff --git a/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ee.go b/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ee.go new file mode 100644 index 0000000000..69c2ee18df --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/api/install_enterprise_ee.go @@ -0,0 +1,24 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package api + +import ( + "k8s.io/apiserver/pkg/registry/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + + vmpoolstorage "github.com/deckhouse/virtualization-controller/pkg/apiserver/registry/vmpool/storage" +) + +// installEnterpriseResources registers paid-edition subresources into the +// aggregated apiserver group. Compiled only in EE builds. +func installEnterpriseResources(resources map[string]rest.Storage, c client.Client) { + poolStorage := vmpoolstorage.NewStorage(c) + resources["virtualmachinepools"] = poolStorage + resources["virtualmachinepools/scaledownwith"] = poolStorage.ScaleDownWithREST() +} diff --git a/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/rest_suite_test.go b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/rest_suite_test.go new file mode 100644 index 0000000000..ff78f332b2 --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/rest_suite_test.go @@ -0,0 +1,21 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package rest + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestVMPoolREST(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "VirtualMachinePool subresources REST Suite") +} diff --git a/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith.go b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith.go new file mode 100644 index 0000000000..b53bce24c7 --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith.go @@ -0,0 +1,142 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package rest + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + genericapirequest "k8s.io/apiserver/pkg/endpoints/request" + "k8s.io/apiserver/pkg/registry/rest" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/subresources" +) + +// ScaleDownWithREST serves the addressed scale-down handle: +// +// POST .../virtualmachinepools//scaledownwith {"targets": [...]} +// +// It validates that every target belongs to the pool, deletes them and +// atomically decrements spec.replicas by the number removed. The decrement is +// done on the main resource (server-side, from the apiserver's own identity), +// so it bypasses the /scale guard — that is what makes addressed removal work +// for Explicit pools. +type ScaleDownWithREST struct { + client client.Client +} + +var ( + _ rest.Storage = &ScaleDownWithREST{} + _ rest.Connecter = &ScaleDownWithREST{} +) + +func NewScaleDownWithREST(c client.Client) *ScaleDownWithREST { + return &ScaleDownWithREST{client: c} +} + +func (r *ScaleDownWithREST) New() runtime.Object { + return &subresources.VirtualMachinePoolScaleDownWith{} +} + +func (r *ScaleDownWithREST) Destroy() {} + +// NewConnectOptions implements rest.Connecter. +func (r *ScaleDownWithREST) NewConnectOptions() (runtime.Object, bool, string) { + return &subresources.VirtualMachinePoolScaleDownWith{}, false, "" +} + +// ConnectMethods implements rest.Connecter. +func (r *ScaleDownWithREST) ConnectMethods() []string { + return []string{http.MethodPost} +} + +func (r *ScaleDownWithREST) Connect(ctx context.Context, name string, _ runtime.Object, responder rest.Responder) (http.Handler, error) { + namespace := genericapirequest.NamespaceValue(ctx) + + return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + var body subresources.VirtualMachinePoolScaleDownWith + if req.Body != nil { + if err := json.NewDecoder(req.Body).Decode(&body); err != nil { + responder.Error(apierrors.NewBadRequest(fmt.Sprintf("decode scaleDownWith body: %v", err))) + return + } + } + if len(body.Targets) == 0 { + responder.Error(apierrors.NewBadRequest("scaleDownWith requires a non-empty targets list")) + return + } + + if err := r.scaleDown(req.Context(), namespace, name, body.Targets); err != nil { + responder.Error(err) + return + } + responder.Object(http.StatusOK, &metav1.Status{Status: metav1.StatusSuccess}) + }), nil +} + +func (r *ScaleDownWithREST) scaleDown(ctx context.Context, namespace, poolName string, targets []string) error { + pool := &v1alpha2.VirtualMachinePool{} + if err := r.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: poolName}, pool); err != nil { + if apierrors.IsNotFound(err) { + return apierrors.NewNotFound(v1alpha2.Resource(v1alpha2.VirtualMachinePoolResource), poolName) + } + return apierrors.NewInternalError(err) + } + + // Validate all targets up front: every one must be a member of this pool. + // Fail the whole request if any is not, so we never partially delete. + for _, target := range targets { + vm := &v1alpha2.VirtualMachine{} + if err := r.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: target}, vm); err != nil { + if apierrors.IsNotFound(err) { + return apierrors.NewBadRequest(fmt.Sprintf("target VirtualMachine %q not found in namespace %q", target, namespace)) + } + return apierrors.NewInternalError(err) + } + if ref := metav1.GetControllerOf(vm); ref == nil || ref.UID != pool.GetUID() { + return apierrors.NewBadRequest(fmt.Sprintf("target VirtualMachine %q does not belong to VirtualMachinePool %q", target, poolName)) + } + } + + // Delete the targets. A target already gone still counts toward the decrement. + for _, target := range targets { + vm := &v1alpha2.VirtualMachine{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: target}} + if err := r.client.Delete(ctx, vm); err != nil && !apierrors.IsNotFound(err) { + return apierrors.NewInternalError(fmt.Errorf("delete target %q: %w", target, err)) + } + } + + // Atomically shrink the pool by the number of removed replicas. + removed := int32(len(targets)) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + current := &v1alpha2.VirtualMachinePool{} + if err := r.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: poolName}, current); err != nil { + return err + } + desired := int32(0) + if current.Spec.Replicas != nil { + desired = *current.Spec.Replicas + } + desired -= removed + if desired < 0 { + desired = 0 + } + current.Spec.Replicas = &desired + return r.client.Update(ctx, current) + }) +} diff --git a/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith_test.go b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith_test.go new file mode 100644 index 0000000000..4657d1f294 --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/rest/scaledownwith_test.go @@ -0,0 +1,115 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package rest + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +const ( + ns = "ci" + poolName = "web" + poolUID = types.UID("pool-uid-1") +) + +func pool(replicas int32) *v1alpha2.VirtualMachinePool { + return &v1alpha2.VirtualMachinePool{ + ObjectMeta: metav1.ObjectMeta{Name: poolName, Namespace: ns, UID: poolUID}, + Spec: v1alpha2.VirtualMachinePoolSpec{Replicas: ptr.To(replicas)}, + } +} + +func memberOf(p *v1alpha2.VirtualMachinePool, name string) *v1alpha2.VirtualMachine { + return &v1alpha2.VirtualMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + UID: types.UID(name + "-uid"), + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(p, v1alpha2.VirtualMachinePoolGVK)}, + }, + } +} + +// foreignVM belongs to no pool. +func foreignVM(name string) *v1alpha2.VirtualMachine { + return &v1alpha2.VirtualMachine{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns, UID: types.UID(name + "-uid")}} +} + +func getReplicas(ctx context.Context, c client.Client) int32 { + p := &v1alpha2.VirtualMachinePool{} + Expect(c.Get(ctx, types.NamespacedName{Namespace: ns, Name: poolName}, p)).To(Succeed()) + return ptr.Deref(p.Spec.Replicas, -1) +} + +func vmExists(ctx context.Context, c client.Client, name string) bool { + err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, &v1alpha2.VirtualMachine{}) + return err == nil +} + +var _ = Describe("ScaleDownWith", func() { + var ctx context.Context + BeforeEach(func() { ctx = context.Background() }) + + It("deletes the targets and decrements replicas", func() { + p := pool(3) + c, err := testutil.NewFakeClientWithObjects(p, memberOf(p, "web-a"), memberOf(p, "web-b"), memberOf(p, "web-c")) + Expect(err).NotTo(HaveOccurred()) + + r := NewScaleDownWithREST(c) + Expect(r.scaleDown(ctx, ns, poolName, []string{"web-a", "web-b"})).To(Succeed()) + + Expect(vmExists(ctx, c, "web-a")).To(BeFalse()) + Expect(vmExists(ctx, c, "web-b")).To(BeFalse()) + Expect(vmExists(ctx, c, "web-c")).To(BeTrue()) + Expect(getReplicas(ctx, c)).To(Equal(int32(1))) + }) + + It("rejects a target that does not belong to the pool and deletes nothing", func() { + p := pool(2) + c, err := testutil.NewFakeClientWithObjects(p, memberOf(p, "web-a"), foreignVM("intruder")) + Expect(err).NotTo(HaveOccurred()) + + err = NewScaleDownWithREST(c).scaleDown(ctx, ns, poolName, []string{"web-a", "intruder"}) + Expect(apierrors.IsBadRequest(err)).To(BeTrue()) + + // Validation happens up front, so no target is deleted and replicas stay. + Expect(vmExists(ctx, c, "web-a")).To(BeTrue()) + Expect(vmExists(ctx, c, "intruder")).To(BeTrue()) + Expect(getReplicas(ctx, c)).To(Equal(int32(2))) + }) + + It("rejects a missing target", func() { + p := pool(1) + c, err := testutil.NewFakeClientWithObjects(p, memberOf(p, "web-a")) + Expect(err).NotTo(HaveOccurred()) + + err = NewScaleDownWithREST(c).scaleDown(ctx, ns, poolName, []string{"ghost"}) + Expect(apierrors.IsBadRequest(err)).To(BeTrue()) + }) + + It("floors replicas at zero", func() { + p := pool(1) + c, err := testutil.NewFakeClientWithObjects(p, memberOf(p, "web-a"), memberOf(p, "web-b")) + Expect(err).NotTo(HaveOccurred()) + + Expect(NewScaleDownWithREST(c).scaleDown(ctx, ns, poolName, []string{"web-a", "web-b"})).To(Succeed()) + Expect(getReplicas(ctx, c)).To(Equal(int32(0))) + }) +}) diff --git a/images/virtualization-artifact/pkg/apiserver/registry/vmpool/storage/storage.go b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/storage/storage.go new file mode 100644 index 0000000000..9274d7a47d --- /dev/null +++ b/images/virtualization-artifact/pkg/apiserver/registry/vmpool/storage/storage.go @@ -0,0 +1,77 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package storage + +import ( + "context" + + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apiserver/pkg/registry/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + + vmpoolrest "github.com/deckhouse/virtualization-controller/pkg/apiserver/registry/vmpool/rest" + "github.com/deckhouse/virtualization/api/subresources" + subv1alpha2 "github.com/deckhouse/virtualization/api/subresources/v1alpha2" +) + +// VirtualMachinePoolStorage is the meta-object storage for VirtualMachinePool in +// the subresources API group. The meta-object itself is not served (Get returns +// NotFound); clients always address a subresource such as scaleDownWith. +type VirtualMachinePoolStorage struct { + scaleDownWith *vmpoolrest.ScaleDownWithREST +} + +var ( + _ rest.Storage = &VirtualMachinePoolStorage{} + _ rest.Scoper = &VirtualMachinePoolStorage{} + _ rest.KindProvider = &VirtualMachinePoolStorage{} + _ rest.Getter = &VirtualMachinePoolStorage{} + _ rest.SingularNameProvider = &VirtualMachinePoolStorage{} +) + +func NewStorage(c client.Client) *VirtualMachinePoolStorage { + return &VirtualMachinePoolStorage{ + scaleDownWith: vmpoolrest.NewScaleDownWithREST(c), + } +} + +func (store VirtualMachinePoolStorage) ScaleDownWithREST() *vmpoolrest.ScaleDownWithREST { + return store.scaleDownWith +} + +// New implements rest.Storage. +func (store VirtualMachinePoolStorage) New() runtime.Object { + return &subv1alpha2.VirtualMachinePool{} +} + +// Destroy implements rest.Storage. +func (store VirtualMachinePoolStorage) Destroy() {} + +// Kind implements rest.KindProvider. +func (store VirtualMachinePoolStorage) Kind() string { + return "VirtualMachinePool" +} + +// NamespaceScoped implements rest.Scoper. +func (store VirtualMachinePoolStorage) NamespaceScoped() bool { + return true +} + +// GetSingularName implements rest.SingularNameProvider. +func (store VirtualMachinePoolStorage) GetSingularName() string { + return "virtualmachinepool" +} + +// Get implements rest.Getter. The meta-object is intentionally not served — the +// client must address a subresource (see package doc / ADR). +func (store VirtualMachinePoolStorage) Get(_ context.Context, name string, _ *metav1.GetOptions) (runtime.Object, error) { + return nil, k8serrors.NewNotFound(subresources.Resource("virtualmachinepools"), name) +} diff --git a/images/virtualization-artifact/pkg/apiserver/server/config.go b/images/virtualization-artifact/pkg/apiserver/server/config.go index 291dc99643..b3d9230d41 100644 --- a/images/virtualization-artifact/pkg/apiserver/server/config.go +++ b/images/virtualization-artifact/pkg/apiserver/server/config.go @@ -20,12 +20,15 @@ import ( "errors" "fmt" + "k8s.io/apimachinery/pkg/runtime" genericapiserver "k8s.io/apiserver/pkg/server" "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/deckhouse/virtualization-controller/pkg/apiserver/api" vmrest "github.com/deckhouse/virtualization-controller/pkg/apiserver/registry/vm/rest" "github.com/deckhouse/virtualization-controller/pkg/tls/certmanager/filesystem" + "github.com/deckhouse/virtualization/api/core/v1alpha2" ) var ErrConfigInvalid = errors.New("configuration is invalid") @@ -80,10 +83,23 @@ func (c Config) Complete() (*Server, error) { return nil, err } + // Write-capable client used by enterprise subresources (e.g. scaleDownWith) + // to delete pool members and adjust spec.replicas from the apiserver's own + // identity. + crScheme := runtime.NewScheme() + if err = v1alpha2.AddToScheme(crScheme); err != nil { + return nil, err + } + crClient, err := client.New(c.Rest, client.Options{Scheme: crScheme}) + if err != nil { + return nil, err + } + err = api.Install(vmInformer.Lister(), genericServer, c.Kubevirt, proxyCertManager, + crClient, ) if err != nil { return nil, err From e4148f34f24e6613ff9e95351b046fe6c3e1b57d Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:31:04 +0300 Subject: [PATCH 10/46] feat(vmpool): grant RBAC for VirtualMachinePool and scaleDownWith Let the aggregated apiserver's service account get/update VirtualMachinePool (the scaleDownWith handler decrements spec.replicas) and reach the pool subresources. Grant the Editor cluster role management of VirtualMachinePool, its scale subresource (kubectl scale / HPA) and the scaleDownWith handle for addressed removal. Signed-off-by: Pavel Tishkov --- templates/user-authz-cluster-roles.yaml | 15 +++++++++++++++ templates/virtualization-api/rbac-for-us.yaml | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/templates/user-authz-cluster-roles.yaml b/templates/user-authz-cluster-roles.yaml index 6460800b4a..1d2f851140 100644 --- a/templates/user-authz-cluster-roles.yaml +++ b/templates/user-authz-cluster-roles.yaml @@ -81,12 +81,27 @@ rules: - virtualmachinemacaddresses - virtualmachineoperations - virtualmachinesnapshotoperations + - virtualmachinepools verbs: - create - delete - deletecollection - patch - update +- apiGroups: + - virtualization.deckhouse.io + resources: + - virtualmachinepools/scale + verbs: + - get + - patch + - update +- apiGroups: + - subresources.virtualization.deckhouse.io + resources: + - virtualmachinepools/scaledownwith + verbs: + - create --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/templates/virtualization-api/rbac-for-us.yaml b/templates/virtualization-api/rbac-for-us.yaml index de7e903c93..9034e4c400 100644 --- a/templates/virtualization-api/rbac-for-us.yaml +++ b/templates/virtualization-api/rbac-for-us.yaml @@ -41,6 +41,7 @@ rules: - virtualization.deckhouse.io resources: - virtualmachines + - virtualmachinepools verbs: - get - list @@ -108,11 +109,14 @@ rules: - virtualmachines/removeresourceclaim - virtualmachines/unfreeze - virtualmachines/vnc + - virtualmachinepools + - virtualmachinepools/scaledownwith verbs: - get - patch - update - create + - create --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding From af4ebfcf359c0f9d02d729040048b21a29ec6fd0 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:37:32 +0300 Subject: [PATCH 11/46] feat(vmpool): track template revision in status Add the template-hash label (revision marker, not part of the member selector) stamped on every created replica, and report the rollout in status: desiredTemplateHash, updatedReplicas and the Synced condition (True once all live replicas are on the current virtualMachineTemplate). This makes the rollout observable at pool level. In-place patching of existing replicas on a template change follows. Covered by unit tests. Signed-off-by: Pavel Tishkov --- api/core/v1alpha2/virtual_machine_pool.go | 19 ++++++++++ crds/virtualmachinepools.yaml | 18 +++++++++ .../vmpool/internal/handler/sync.go | 33 +++++++++++++++- .../vmpool/internal/handler/sync_test.go | 38 +++++++++++++++++++ .../vmpool/internal/poollabels/poollabels.go | 19 ++++++++++ 5 files changed, 126 insertions(+), 1 deletion(-) diff --git a/api/core/v1alpha2/virtual_machine_pool.go b/api/core/v1alpha2/virtual_machine_pool.go index 66ebc813be..f95b4388b8 100644 --- a/api/core/v1alpha2/virtual_machine_pool.go +++ b/api/core/v1alpha2/virtual_machine_pool.go @@ -137,6 +137,25 @@ type VirtualMachinePoolStatus struct { // +optional ReadyReplicas int32 `json:"readyReplicas,omitempty"` + // DesiredTemplateHash is the hash of the current virtualMachineTemplate — the + // revision the controller is converging replicas to (cf. updateRevision on a + // StatefulSet). + // + // +optional + DesiredTemplateHash string `json:"desiredTemplateHash,omitempty"` + + // UpdatedReplicas is the number of replicas effectively on DesiredTemplateHash + // (fully synced). + // + // +optional + UpdatedReplicas int32 `json:"updatedReplicas,omitempty"` + + // RestartPendingReplicas is the number of replicas patched to the new template + // whose disruptive part still awaits a restart. + // + // +optional + RestartPendingReplicas int32 `json:"restartPendingReplicas,omitempty"` + // Selector is the label selector the controller publishes for the `scale` // subresource; HPA/KEDA read it themselves. // diff --git a/crds/virtualmachinepools.yaml b/crds/virtualmachinepools.yaml index deb1844980..dc0435ca39 100644 --- a/crds/virtualmachinepools.yaml +++ b/crds/virtualmachinepools.yaml @@ -1443,6 +1443,12 @@ spec: x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map + desiredTemplateHash: + description: |- + DesiredTemplateHash is the hash of the current virtualMachineTemplate — the + revision the controller is converging replicas to (cf. updateRevision on a + StatefulSet). + type: string observedGeneration: description: ObservedGeneration is the generation of the spec the @@ -1461,11 +1467,23 @@ spec: such a machine still occupies resources, so it is real capacity, not a phantom. format: int32 type: integer + restartPendingReplicas: + description: |- + RestartPendingReplicas is the number of replicas patched to the new template + whose disruptive part still awaits a restart. + format: int32 + type: integer selector: description: |- Selector is the label selector the controller publishes for the `scale` subresource; HPA/KEDA read it themselves. type: string + updatedReplicas: + description: |- + UpdatedReplicas is the number of replicas effectively on DesiredTemplateHash + (fully synced). + format: int32 + type: integer type: object required: - spec diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go index 52f0252014..6034bda4bc 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go @@ -197,6 +197,8 @@ func (h *SyncHandler) newMember(pool *v1alpha2.VirtualMachinePool) *v1alpha2.Vir for k, v := range poollabels.Member(pool) { labels[k] = v } + // Stamp the revision the replica is created on. + labels[poollabels.TemplateHash] = poollabels.ComputeTemplateHash(pool) var annotations map[string]string if len(tmpl.Annotations) > 0 { @@ -219,17 +221,30 @@ func (h *SyncHandler) newMember(pool *v1alpha2.VirtualMachinePool) *v1alpha2.Vir } func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members []v1alpha2.VirtualMachine) { + desiredHash := poollabels.ComputeTemplateHash(pool) + ready := 0 + liveNonTerminating := 0 + updated := 0 for i := range members { - if members[i].GetDeletionTimestamp() == nil && members[i].Status.Phase == v1alpha2.MachineRunning { + if members[i].GetDeletionTimestamp() != nil { + continue + } + liveNonTerminating++ + if members[i].Status.Phase == v1alpha2.MachineRunning { ready++ } + if members[i].GetLabels()[poollabels.TemplateHash] == desiredHash { + updated++ + } } desired := int(ptr.Deref(pool.Spec.Replicas, 0)) pool.Status.ObservedGeneration = pool.GetGeneration() pool.Status.Replicas = int32(len(members)) pool.Status.ReadyReplicas = int32(ready) + pool.Status.UpdatedReplicas = int32(updated) + pool.Status.DesiredTemplateHash = desiredHash pool.Status.Selector = poollabels.StatusSelector(pool) availableStatus := metav1.ConditionFalse @@ -261,4 +276,20 @@ func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members [] ObservedGeneration: pool.GetGeneration(), Message: progressingMessage, }) + + syncedStatus := metav1.ConditionTrue + syncedReason := vmpoolcondition.ReasonPoolSynced + syncedMessage := "All replicas are on the current virtualMachineTemplate." + if updated < liveNonTerminating { + syncedStatus = metav1.ConditionFalse + syncedReason = vmpoolcondition.ReasonRolloutInProgress + syncedMessage = fmt.Sprintf("%d of %d replicas are on the current virtualMachineTemplate.", updated, liveNonTerminating) + } + meta.SetStatusCondition(&pool.Status.Conditions, metav1.Condition{ + Type: vmpoolcondition.TypeSynced.String(), + Status: syncedStatus, + Reason: syncedReason.String(), + ObservedGeneration: pool.GetGeneration(), + Message: syncedMessage, + }) } diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go index 608ab6bce3..e3498b9668 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go @@ -122,6 +122,7 @@ var _ = Describe("SyncHandler", func() { Expect(vm.Name).To(HavePrefix(poolName + "-")) Expect(vm.Labels).To(HaveKeyWithValue(poollabels.PoolUID, string(poolUID))) Expect(vm.Labels).To(HaveKeyWithValue(poollabels.Pool, poolName)) + Expect(vm.Labels).To(HaveKeyWithValue(poollabels.TemplateHash, poollabels.ComputeTemplateHash(pool))) ref := metav1.GetControllerOf(&vm) Expect(ref).NotTo(BeNil()) Expect(ref.UID).To(Equal(poolUID)) @@ -168,6 +169,43 @@ var _ = Describe("SyncHandler", func() { }) }) + Context("template revision", func() { + It("reports Synced when every replica is on the current template hash", func() { + pool := newPool(2) + hash := poollabels.ComputeTemplateHash(pool) + m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) + m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock, false) + m1.Labels[poollabels.TemplateHash] = hash + m2.Labels[poollabels.TemplateHash] = hash + c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(pool.Status.DesiredTemplateHash).To(Equal(hash)) + Expect(pool.Status.UpdatedReplicas).To(Equal(int32(2))) + Expect(meta.IsStatusConditionTrue(pool.Status.Conditions, vmpoolcondition.TypeSynced.String())).To(BeTrue()) + }) + + It("reports Synced=False when a replica lags on an old hash", func() { + pool := newPool(2) + hash := poollabels.ComputeTemplateHash(pool) + current := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) + lagging := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock, false) + current.Labels[poollabels.TemplateHash] = hash + lagging.Labels[poollabels.TemplateHash] = "stale" + c, err := testutil.NewFakeClientWithObjects(pool, current, lagging) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewSyncHandler(c, exp).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(pool.Status.UpdatedReplicas).To(Equal(int32(1))) + Expect(meta.IsStatusConditionFalse(pool.Status.Conditions, vmpoolcondition.TypeSynced.String())).To(BeTrue()) + }) + }) + Context("scale down", func() { It("deletes the youngest surplus replicas", func() { pool := newPool(1) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go index 0dd08c0615..283dfec8af 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go @@ -12,6 +12,10 @@ Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https package poollabels import ( + "encoding/json" + "fmt" + "hash/fnv" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -29,8 +33,23 @@ const ( // Pool is a human-readable label with the pool name, predictable from the // pool and handy for kubectl/observability. Analogous to job-name on Job pods. Pool = "vmpool.virtualization.deckhouse.io/pool" + + // TemplateHash marks the template revision a replica is effectively on (cf. + // pod-template-hash / currentRevision). It is NOT part of the member selector, + // so changing the template does not orphan existing replicas. + TemplateHash = "vmpool.virtualization.deckhouse.io/template-hash" ) +// ComputeTemplateHash returns a stable short hash of the pool's +// virtualMachineTemplate — the desired revision replicas converge to. +func ComputeTemplateHash(pool *v1alpha2.VirtualMachinePool) string { + // encoding/json sorts map keys, so the marshalling is deterministic. + data, _ := json.Marshal(pool.Spec.VirtualMachineTemplate) + h := fnv.New32a() + _, _ = h.Write(data) + return fmt.Sprintf("%x", h.Sum32()) +} + // Member returns the managed labels stamped on every replica of the pool. func Member(pool *v1alpha2.VirtualMachinePool) map[string]string { return map[string]string{ From acedc2f07e098b9f3b4ef94dcb604b90724e5ee4 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:43:37 +0300 Subject: [PATCH 12/46] feat(vmpool): propagate template changes to live replicas in place Add a template handler that patches each live replica's spec to the current virtualMachineTemplate and marks it on the new revision once applied. Re-patching is avoided with a patched-template-hash annotation (not a spec diff, which the apiserver mutates by defaulting), and the template-hash label is advanced only when the replica is not awaiting a restart, so status.updatedReplicas / restartPendingReplicas and the Synced condition (RolloutInProgress vs RestartPendingApproval) reflect what has effectively landed. Hot/cold is decided by the VM layer. Covered by unit tests. Signed-off-by: Pavel Tishkov --- .../vmpool/internal/handler/sync.go | 26 ++-- .../vmpool/internal/handler/template.go | 107 ++++++++++++++++ .../vmpool/internal/handler/template_test.go | 116 ++++++++++++++++++ .../vmpool/internal/poollabels/poollabels.go | 24 ++++ .../controller/vmpool/vmpool_controller.go | 1 + 5 files changed, 261 insertions(+), 13 deletions(-) create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template.go create mode 100644 images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go index 6034bda4bc..2701e12528 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync.go @@ -91,19 +91,7 @@ func (h *SyncHandler) Handle(ctx context.Context, pool *v1alpha2.VirtualMachineP } func (h *SyncHandler) listMembers(ctx context.Context, pool *v1alpha2.VirtualMachinePool) ([]v1alpha2.VirtualMachine, error) { - var list v1alpha2.VirtualMachineList - if err := h.client.List(ctx, &list, client.InNamespace(pool.GetNamespace()), poollabels.MemberSelector(pool)); err != nil { - return nil, err - } - // Keep only VMs actually controlled by this pool. The pool-uid label already - // scopes the list, but the controllerRef check is the authoritative guard. - members := make([]v1alpha2.VirtualMachine, 0, len(list.Items)) - for i := range list.Items { - if ref := metav1.GetControllerOf(&list.Items[i]); ref != nil && ref.UID == pool.GetUID() { - members = append(members, list.Items[i]) - } - } - return members, nil + return poollabels.ListMembers(ctx, h.client, pool) } func (h *SyncHandler) scaleUp(ctx context.Context, pool *v1alpha2.VirtualMachinePool, key string, n int) error { @@ -226,6 +214,7 @@ func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members [] ready := 0 liveNonTerminating := 0 updated := 0 + restartPending := 0 for i := range members { if members[i].GetDeletionTimestamp() != nil { continue @@ -237,6 +226,10 @@ func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members [] if members[i].GetLabels()[poollabels.TemplateHash] == desiredHash { updated++ } + // Patched to the desired revision but the disruptive part awaits a restart. + if members[i].GetAnnotations()[poollabels.PatchedTemplateHash] == desiredHash && awaitingRestart(&members[i]) { + restartPending++ + } } desired := int(ptr.Deref(pool.Spec.Replicas, 0)) @@ -244,6 +237,7 @@ func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members [] pool.Status.Replicas = int32(len(members)) pool.Status.ReadyReplicas = int32(ready) pool.Status.UpdatedReplicas = int32(updated) + pool.Status.RestartPendingReplicas = int32(restartPending) pool.Status.DesiredTemplateHash = desiredHash pool.Status.Selector = poollabels.StatusSelector(pool) @@ -284,6 +278,12 @@ func (h *SyncHandler) updateStatus(pool *v1alpha2.VirtualMachinePool, members [] syncedStatus = metav1.ConditionFalse syncedReason = vmpoolcondition.ReasonRolloutInProgress syncedMessage = fmt.Sprintf("%d of %d replicas are on the current virtualMachineTemplate.", updated, liveNonTerminating) + if restartPending > 0 { + // Some replicas are patched but wait for a restart that will not happen + // on its own under restartApprovalMode: Manual. + syncedReason = vmpoolcondition.ReasonRestartPendingApproval + syncedMessage = fmt.Sprintf("%d of %d replicas await a restart to apply configuration.", restartPending, liveNonTerminating) + } } meta.SetStatusCondition(&pool.Status.Conditions, metav1.Condition{ Type: vmpoolcondition.TypeSynced.String(), diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template.go new file mode 100644 index 0000000000..e8806e23da --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template.go @@ -0,0 +1,107 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package handler + +import ( + "context" + "errors" + "fmt" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/poollabels" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmcondition" +) + +const templateHandlerName = "template" + +// TemplateHandler propagates virtualMachineTemplate changes to live replicas +// in place (the pool owns the member spec): it patches each replica's spec to +// the desired revision, then marks the replica as effectively on that revision +// once the change has taken effect. Whether a change is applied hot or needs a +// restart is decided by the VM layer — the pool does not duplicate that. +type TemplateHandler struct { + client client.Client +} + +func NewTemplateHandler(c client.Client) *TemplateHandler { + return &TemplateHandler{client: c} +} + +func (h *TemplateHandler) Name() string { return templateHandlerName } + +func (h *TemplateHandler) Handle(ctx context.Context, pool *v1alpha2.VirtualMachinePool) (reconcile.Result, error) { + if pool.GetDeletionTimestamp() != nil { + return reconcile.Result{}, nil + } + + members, err := poollabels.ListMembers(ctx, h.client, pool) + if err != nil { + return reconcile.Result{}, fmt.Errorf("list pool members: %w", err) + } + + desiredHash := poollabels.ComputeTemplateHash(pool) + tmplSpec := pool.Spec.VirtualMachineTemplate.Spec + + var errs error + for i := range members { + m := &members[i] + if m.GetDeletionTimestamp() != nil { + continue + } + + // Step 1: bring the spec to the desired revision. Keyed on an annotation, + // not a spec diff, because the apiserver mutates the spec (defaulting, + // id allocation) and a diff would re-patch forever. In this slice a + // replica has no per-replica spec, so the whole spec follows the template + // (per-replica disk refs are merged in a later slice). + if m.GetAnnotations()[poollabels.PatchedTemplateHash] != desiredHash { + patched := m.DeepCopy() + patched.Spec = *tmplSpec.DeepCopy() + if patched.Annotations == nil { + patched.Annotations = map[string]string{} + } + patched.Annotations[poollabels.PatchedTemplateHash] = desiredHash + if err := h.client.Update(ctx, patched); err != nil { + errs = errors.Join(errs, fmt.Errorf("patch replica %s to template: %w", m.GetName(), err)) + } + continue + } + + // Step 2: the spec is on the desired revision. Mark the revision label as + // effectively applied only once the disruptive part is no longer pending; + // while the VM awaits a restart the label stays on the old revision. + if awaitingRestart(m) { + continue + } + if m.GetLabels()[poollabels.TemplateHash] != desiredHash { + updated := m.DeepCopy() + if updated.Labels == nil { + updated.Labels = map[string]string{} + } + updated.Labels[poollabels.TemplateHash] = desiredHash + if err := h.client.Update(ctx, updated); err != nil { + errs = errors.Join(errs, fmt.Errorf("mark replica %s on current template: %w", m.GetName(), err)) + } + } + } + + return reconcile.Result{}, errs +} + +// awaitingRestart reports whether the VM has pending disruptive changes waiting +// for a restart to apply. +func awaitingRestart(vm *v1alpha2.VirtualMachine) bool { + c := meta.FindStatusCondition(vm.Status.Conditions, vmcondition.TypeAwaitingRestartToApplyConfiguration.String()) + return c != nil && c.Status == metav1.ConditionTrue +} diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go new file mode 100644 index 0000000000..f720e6946b --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go @@ -0,0 +1,116 @@ +//go:build EE +// +build EE + +/* +Copyright 2026 Flant JSC +Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https://github.com/deckhouse/deckhouse/blob/main/ee/LICENSE +*/ + +package handler + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmpool/internal/poollabels" + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmcondition" +) + +func getVM(ctx context.Context, c client.Client, name string) *v1alpha2.VirtualMachine { + vm := &v1alpha2.VirtualMachine{} + Expect(c.Get(ctx, types.NamespacedName{Namespace: poolNamespace, Name: name}, vm)).To(Succeed()) + return vm +} + +var _ = Describe("TemplateHandler", func() { + var ctx context.Context + var when time.Time + BeforeEach(func() { + ctx = context.Background() + when = time.Unix(1_700_000_000, 0) + }) + + poolWithRunPolicy := func(p v1alpha2.RunPolicy) *v1alpha2.VirtualMachinePool { + pool := newPool(1) + pool.Spec.VirtualMachineTemplate.Spec.RunPolicy = p + return pool + } + + It("patches a lagging replica's spec and records the patched revision", func() { + pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m.Spec.RunPolicy = v1alpha2.AlwaysOnUnlessStoppedManually // differs from template + c, err := testutil.NewFakeClientWithObjects(pool, m) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewTemplateHandler(c).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + got := getVM(ctx, c, "web-a") + Expect(got.Spec.RunPolicy).To(Equal(v1alpha2.AlwaysOnPolicy)) + Expect(got.Annotations).To(HaveKeyWithValue(poollabels.PatchedTemplateHash, poollabels.ComputeTemplateHash(pool))) + // The effectively-applied label is only set on a subsequent pass. + Expect(got.Labels).NotTo(HaveKeyWithValue(poollabels.TemplateHash, poollabels.ComputeTemplateHash(pool))) + }) + + It("marks the replica on the current template once patched and not awaiting restart", func() { + pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) + hash := poollabels.ComputeTemplateHash(pool) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} + m.Labels[poollabels.TemplateHash] = "old" + m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy + c, err := testutil.NewFakeClientWithObjects(pool, m) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewTemplateHandler(c).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(getVM(ctx, c, "web-a").Labels).To(HaveKeyWithValue(poollabels.TemplateHash, hash)) + }) + + It("keeps the old revision label while the replica awaits a restart", func() { + pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) + hash := poollabels.ComputeTemplateHash(pool) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} + m.Labels[poollabels.TemplateHash] = "old" + m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy + m.Status.Conditions = []metav1.Condition{{ + Type: vmcondition.TypeAwaitingRestartToApplyConfiguration.String(), + Status: metav1.ConditionTrue, + Reason: "PendingRestart", + }} + c, err := testutil.NewFakeClientWithObjects(pool, m) + Expect(err).NotTo(HaveOccurred()) + + _, err = NewTemplateHandler(c).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + + Expect(getVM(ctx, c, "web-a").Labels[poollabels.TemplateHash]).To(Equal("old")) + }) + + It("does not re-patch or relabel a stable replica", func() { + pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) + hash := poollabels.ComputeTemplateHash(pool) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} + m.Labels[poollabels.TemplateHash] = hash + m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy + c, err := testutil.NewFakeClientWithObjects(pool, m) + Expect(err).NotTo(HaveOccurred()) + + before := getVM(ctx, c, "web-a").ResourceVersion + _, err = NewTemplateHandler(c).Handle(ctx, pool) + Expect(err).NotTo(HaveOccurred()) + Expect(getVM(ctx, c, "web-a").ResourceVersion).To(Equal(before)) // no write happened + }) +}) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go index 283dfec8af..6786633704 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/poollabels/poollabels.go @@ -12,6 +12,7 @@ Licensed under the Deckhouse Platform Enterprise Edition (EE) license. See https package poollabels import ( + "context" "encoding/json" "fmt" "hash/fnv" @@ -38,8 +39,31 @@ const ( // pod-template-hash / currentRevision). It is NOT part of the member selector, // so changing the template does not orphan existing replicas. TemplateHash = "vmpool.virtualization.deckhouse.io/template-hash" + + // PatchedTemplateHash (annotation) records the revision a replica's spec was + // last patched to. It is distinct from TemplateHash so a re-patch is avoided + // even while the disruptive part of the change waits for a restart, and it + // does not depend on comparing specs (which the apiserver mutates by + // defaulting/allocation). + PatchedTemplateHash = "vmpool.virtualization.deckhouse.io/patched-template-hash" ) +// ListMembers returns the VirtualMachines controlled by the pool. The pool-uid +// label scopes the list; the controllerRef check is the authoritative guard. +func ListMembers(ctx context.Context, c client.Client, pool *v1alpha2.VirtualMachinePool) ([]v1alpha2.VirtualMachine, error) { + var list v1alpha2.VirtualMachineList + if err := c.List(ctx, &list, client.InNamespace(pool.GetNamespace()), MemberSelector(pool)); err != nil { + return nil, err + } + members := make([]v1alpha2.VirtualMachine, 0, len(list.Items)) + for i := range list.Items { + if ref := metav1.GetControllerOf(&list.Items[i]); ref != nil && ref.UID == pool.GetUID() { + members = append(members, list.Items[i]) + } + } + return members, nil +} + // ComputeTemplateHash returns a stable short hash of the pool's // virtualMachineTemplate — the desired revision replicas converge to. func ComputeTemplateHash(pool *v1alpha2.VirtualMachinePool) string { diff --git a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go index 24e320032d..b1ffd5a90a 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/vmpool_controller.go @@ -50,6 +50,7 @@ func SetupController( exp := expectations.New() handlers := []Handler{ + handler.NewTemplateHandler(client), handler.NewSyncHandler(client, exp), } r := NewReconciler(client, exp, handlers) From 6c9513fdda1c433002f5141dce32a5625f0c6da7 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:44:49 +0300 Subject: [PATCH 13/46] test(vmpool): use a readable fixed date instead of a raw unix timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace time.Unix(1_700_000_000, 0) with time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) in the pool tests — same deterministic clock, but self-explanatory. Signed-off-by: Pavel Tishkov --- .../vmpool/internal/expectations/expectations_test.go | 2 +- .../pkg/controller/vmpool/internal/handler/sync_test.go | 2 +- .../pkg/controller/vmpool/internal/handler/template_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go index 9dc9660b65..421ae77bef 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go @@ -112,7 +112,7 @@ var _ = Describe("Expectations", func() { Context("TTL safety valve", func() { It("becomes satisfied once the expectation outlives the TTL", func() { e := NewWithTTL(time.Minute) - now := time.Unix(1_700_000_000, 0) + now := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) e.now = func() time.Time { return now } e.ExpectCreations(key, 1) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go index e3498b9668..1dd73fdb24 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go @@ -90,7 +90,7 @@ var _ = Describe("SyncHandler", func() { BeforeEach(func() { ctx = context.Background() exp = expectations.New() - clock = time.Unix(1_700_000_000, 0) + clock = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) }) Context("scale up", func() { diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go index f720e6946b..5e9d55e434 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go @@ -35,7 +35,7 @@ var _ = Describe("TemplateHandler", func() { var when time.Time BeforeEach(func() { ctx = context.Background() - when = time.Unix(1_700_000_000, 0) + when = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) }) poolWithRunPolicy := func(p v1alpha2.RunPolicy) *v1alpha2.VirtualMachinePool { From 367ac5ecaaafcfb0cb4636ed0c8ef2d19bbd4c13 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:48:50 +0300 Subject: [PATCH 14/46] test(vmpool): name the fixed test clock referenceTime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the inline dates with a single documented package-level referenceTime var per test package, and drop the clock/when aliases. A comment states the value is arbitrary — tests use only relative offsets and never read the wall clock — so the real-world date is irrelevant. Signed-off-by: Pavel Tishkov --- .../expectations/expectations_test.go | 6 ++- .../vmpool/internal/handler/sync_test.go | 41 ++++++++++--------- .../vmpool/internal/handler/template_test.go | 11 ++--- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go index 421ae77bef..9aecd141f1 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/expectations/expectations_test.go @@ -19,6 +19,10 @@ import ( const key = "ci/web" +// referenceTime is an arbitrary fixed clock; the TTL test advances it by hand +// via the injected now func, so the real-world date is irrelevant. +var referenceTime = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + var _ = Describe("Expectations", func() { Context("an unknown key", func() { It("is satisfied (nothing expected yet)", func() { @@ -112,7 +116,7 @@ var _ = Describe("Expectations", func() { Context("TTL safety valve", func() { It("becomes satisfied once the expectation outlives the TTL", func() { e := NewWithTTL(time.Minute) - now := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + now := referenceTime e.now = func() time.Time { return now } e.ExpectCreations(key, 1) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go index 1dd73fdb24..b40ad711d4 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/sync_test.go @@ -33,6 +33,11 @@ const ( poolUID = types.UID("pool-uid-0001") ) +// referenceTime is an arbitrary fixed clock for the tests. Only relative offsets +// from it matter (e.g. which replica is older); the wall clock is never read, so +// the value — and the real-world date — is irrelevant. +var referenceTime = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + func newPool(replicas int32) *v1alpha2.VirtualMachinePool { return &v1alpha2.VirtualMachinePool{ ObjectMeta: metav1.ObjectMeta{ @@ -82,15 +87,13 @@ func listMemberNames(ctx context.Context, c client.Client, pool *v1alpha2.Virtua var _ = Describe("SyncHandler", func() { var ( - ctx context.Context - exp *expectations.Expectations - clock time.Time + ctx context.Context + exp *expectations.Expectations ) BeforeEach(func() { ctx = context.Background() exp = expectations.New() - clock = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) }) Context("scale up", func() { @@ -152,8 +155,8 @@ var _ = Describe("SyncHandler", func() { Context("steady state", func() { It("neither creates nor deletes when live == desired", func() { pool := newPool(2) - m1 := newMemberVM(pool, "web-aaaaa", v1alpha2.MachineRunning, clock, false) - m2 := newMemberVM(pool, "web-bbbbb", v1alpha2.MachineRunning, clock, false) + m1 := newMemberVM(pool, "web-aaaaa", v1alpha2.MachineRunning, referenceTime, false) + m2 := newMemberVM(pool, "web-bbbbb", v1alpha2.MachineRunning, referenceTime, false) c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) Expect(err).NotTo(HaveOccurred()) @@ -173,8 +176,8 @@ var _ = Describe("SyncHandler", func() { It("reports Synced when every replica is on the current template hash", func() { pool := newPool(2) hash := poollabels.ComputeTemplateHash(pool) - m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) - m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock, false) + m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) + m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, referenceTime, false) m1.Labels[poollabels.TemplateHash] = hash m2.Labels[poollabels.TemplateHash] = hash c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) @@ -191,8 +194,8 @@ var _ = Describe("SyncHandler", func() { It("reports Synced=False when a replica lags on an old hash", func() { pool := newPool(2) hash := poollabels.ComputeTemplateHash(pool) - current := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) - lagging := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock, false) + current := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) + lagging := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, referenceTime, false) current.Labels[poollabels.TemplateHash] = hash lagging.Labels[poollabels.TemplateHash] = "stale" c, err := testutil.NewFakeClientWithObjects(pool, current, lagging) @@ -209,8 +212,8 @@ var _ = Describe("SyncHandler", func() { Context("scale down", func() { It("deletes the youngest surplus replicas", func() { pool := newPool(1) - older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock, false) - newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, referenceTime, false) + newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, referenceTime.Add(time.Minute), false) c, err := testutil.NewFakeClientWithObjects(pool, older, newer) Expect(err).NotTo(HaveOccurred()) @@ -224,8 +227,8 @@ var _ = Describe("SyncHandler", func() { It("deletes the oldest surplus replicas under OldestFirst", func() { pool := newPool(1) pool.Spec.ScaleDownPolicy = v1alpha2.ScaleDownPolicyOldestFirst - older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock, false) - newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + older := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, referenceTime, false) + newer := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, referenceTime.Add(time.Minute), false) c, err := testutil.NewFakeClientWithObjects(pool, older, newer) Expect(err).NotTo(HaveOccurred()) @@ -238,8 +241,8 @@ var _ = Describe("SyncHandler", func() { It("removes nothing anonymously under Explicit", func() { pool := newPool(1) pool.Spec.ScaleDownPolicy = v1alpha2.ScaleDownPolicyExplicit - m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, clock, false) - m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, clock.Add(time.Minute), false) + m1 := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) + m2 := newMemberVM(pool, "web-b", v1alpha2.MachineRunning, referenceTime.Add(time.Minute), false) c, err := testutil.NewFakeClientWithObjects(pool, m1, m2) Expect(err).NotTo(HaveOccurred()) @@ -256,9 +259,9 @@ var _ = Describe("SyncHandler", func() { pool := newPool(1) // live=3, desired=1 => surplus 2; one member already Terminating counts // as one of those two, so only ONE healthy replica should be deleted. - terminating := newMemberVM(pool, "web-term", v1alpha2.MachineRunning, clock, true) - healthyOld := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, clock.Add(time.Minute), false) - healthyNew := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, clock.Add(2*time.Minute), false) + terminating := newMemberVM(pool, "web-term", v1alpha2.MachineRunning, referenceTime, true) + healthyOld := newMemberVM(pool, "web-old", v1alpha2.MachineRunning, referenceTime.Add(time.Minute), false) + healthyNew := newMemberVM(pool, "web-new", v1alpha2.MachineRunning, referenceTime.Add(2*time.Minute), false) c, err := testutil.NewFakeClientWithObjects(pool, terminating, healthyOld, healthyNew) Expect(err).NotTo(HaveOccurred()) diff --git a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go index 5e9d55e434..5f1fc79977 100644 --- a/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go +++ b/images/virtualization-artifact/pkg/controller/vmpool/internal/handler/template_test.go @@ -10,7 +10,6 @@ package handler import ( "context" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -32,10 +31,8 @@ func getVM(ctx context.Context, c client.Client, name string) *v1alpha2.VirtualM var _ = Describe("TemplateHandler", func() { var ctx context.Context - var when time.Time BeforeEach(func() { ctx = context.Background() - when = time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) }) poolWithRunPolicy := func(p v1alpha2.RunPolicy) *v1alpha2.VirtualMachinePool { @@ -46,7 +43,7 @@ var _ = Describe("TemplateHandler", func() { It("patches a lagging replica's spec and records the patched revision", func() { pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) - m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) m.Spec.RunPolicy = v1alpha2.AlwaysOnUnlessStoppedManually // differs from template c, err := testutil.NewFakeClientWithObjects(pool, m) Expect(err).NotTo(HaveOccurred()) @@ -64,7 +61,7 @@ var _ = Describe("TemplateHandler", func() { It("marks the replica on the current template once patched and not awaiting restart", func() { pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) hash := poollabels.ComputeTemplateHash(pool) - m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} m.Labels[poollabels.TemplateHash] = "old" m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy @@ -80,7 +77,7 @@ var _ = Describe("TemplateHandler", func() { It("keeps the old revision label while the replica awaits a restart", func() { pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) hash := poollabels.ComputeTemplateHash(pool) - m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} m.Labels[poollabels.TemplateHash] = "old" m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy @@ -101,7 +98,7 @@ var _ = Describe("TemplateHandler", func() { It("does not re-patch or relabel a stable replica", func() { pool := poolWithRunPolicy(v1alpha2.AlwaysOnPolicy) hash := poollabels.ComputeTemplateHash(pool) - m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, when, false) + m := newMemberVM(pool, "web-a", v1alpha2.MachineRunning, referenceTime, false) m.Annotations = map[string]string{poollabels.PatchedTemplateHash: hash} m.Labels[poollabels.TemplateHash] = hash m.Spec.RunPolicy = v1alpha2.AlwaysOnPolicy From c53fdbe0da2f8f1d5e994c57aa3dbb2d3069f7db Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 12:53:26 +0300 Subject: [PATCH 15/46] feat(vmpool): add virtualDiskTemplates API for per-replica disks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add spec.virtualDiskTemplates: each entry describes a per-replica disk with a reclaim policy — Delete (default; the disk belongs to its VirtualMachine and is removed with it) or Retain (the disk belongs to the pool, outlives the replica and is reused on scale-up), plus keep (warm buffer) and ttl for Retain disks. This is the schema for reusable disks; the reconcile behaviour (creation, reuse selection, GC) follows. Signed-off-by: Pavel Tishkov --- api/core/v1alpha2/virtual_machine_pool.go | 64 ++++++ api/core/v1alpha2/zz_generated.deepcopy.go | 46 ++++ crds/virtualmachinepools.yaml | 236 +++++++++++++++++++++ 3 files changed, 346 insertions(+) diff --git a/api/core/v1alpha2/virtual_machine_pool.go b/api/core/v1alpha2/virtual_machine_pool.go index f95b4388b8..0b9b781816 100644 --- a/api/core/v1alpha2/virtual_machine_pool.go +++ b/api/core/v1alpha2/virtual_machine_pool.go @@ -93,6 +93,70 @@ type VirtualMachinePoolSpec struct { // `spec` is an ordinary VirtualMachineSpec, so a replica is no different from a // manually created virtual machine. VirtualMachineTemplate VirtualMachineTemplateSpec `json:"virtualMachineTemplate"` + + // VirtualDiskTemplates describes the per-replica disks. A disk with reclaim + // Delete belongs to its VirtualMachine and is removed with it; a disk with + // reclaim Retain belongs to the pool, outlives the replica and is reused on a + // later scale-up. + // + // +optional + // +listType=map + // +listMapKey=name + VirtualDiskTemplates []VirtualDiskTemplateSpec `json:"virtualDiskTemplates,omitempty"` +} + +// VirtualDiskTemplateSpec describes a per-replica disk. +type VirtualDiskTemplateSpec struct { + // Name identifies the disk template within the pool. It is a DNS-1123 label + // (no dots), because it is embedded into VirtualDisk names. + // + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` + // +kubebuilder:validation:MaxLength=63 + Name string `json:"name"` + + // Reclaim controls what happens to the disk when its replica is removed. + // + // +optional + Reclaim VirtualDiskReclaim `json:"reclaim,omitempty"` + + // Spec is the desired state of the disk (an ordinary VirtualDiskSpec). + Spec VirtualDiskSpec `json:"spec"` +} + +// VirtualDiskReclaimPolicy selects the fate of a per-replica disk on scale-down. +type VirtualDiskReclaimPolicy string + +const ( + // VirtualDiskReclaimDelete removes the disk together with its replica (owner + // is the VirtualMachine). This is the default. + VirtualDiskReclaimDelete VirtualDiskReclaimPolicy = "Delete" + // VirtualDiskReclaimRetain keeps the disk (owner is the pool); it is reused on + // the next scale-up. + VirtualDiskReclaimRetain VirtualDiskReclaimPolicy = "Retain" +) + +// VirtualDiskReclaim is the reclaim policy and warm-buffer settings of a disk +// template. +type VirtualDiskReclaim struct { + // OnScaleDown is Delete (default) or Retain. + // + // +kubebuilder:validation:Enum=Delete;Retain + // +kubebuilder:default=Delete + // +optional + OnScaleDown VirtualDiskReclaimPolicy `json:"onScaleDown,omitempty"` + + // Keep is the number of free (Retain) disks always kept warm for fast + // scale-up; these are immune to the ttl. Only meaningful with Retain. + // + // +kubebuilder:validation:Minimum=0 + // +optional + Keep int32 `json:"keep,omitempty"` + + // TTL is how long a free disk lives beyond the warm buffer before it is + // garbage-collected. Only meaningful with Retain. + // + // +optional + TTL *metav1.Duration `json:"ttl,omitempty"` } // ScaleDownPolicy selects which replica is removed on anonymous scale-down. diff --git a/api/core/v1alpha2/zz_generated.deepcopy.go b/api/core/v1alpha2/zz_generated.deepcopy.go index 7e7ba946c7..a9114737e6 100644 --- a/api/core/v1alpha2/zz_generated.deepcopy.go +++ b/api/core/v1alpha2/zz_generated.deepcopy.go @@ -1472,6 +1472,27 @@ func (in *VirtualDiskPersistentVolumeClaim) DeepCopy() *VirtualDiskPersistentVol return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualDiskReclaim) DeepCopyInto(out *VirtualDiskReclaim) { + *out = *in + if in.TTL != nil { + in, out := &in.TTL, &out.TTL + *out = new(v1.Duration) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualDiskReclaim. +func (in *VirtualDiskReclaim) DeepCopy() *VirtualDiskReclaim { + if in == nil { + return nil + } + out := new(VirtualDiskReclaim) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualDiskSnapshot) DeepCopyInto(out *VirtualDiskSnapshot) { *out = *in @@ -1698,6 +1719,24 @@ func (in *VirtualDiskStatus) DeepCopy() *VirtualDiskStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VirtualDiskTemplateSpec) DeepCopyInto(out *VirtualDiskTemplateSpec) { + *out = *in + in.Reclaim.DeepCopyInto(&out.Reclaim) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualDiskTemplateSpec. +func (in *VirtualDiskTemplateSpec) DeepCopy() *VirtualDiskTemplateSpec { + if in == nil { + return nil + } + out := new(VirtualDiskTemplateSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualImage) DeepCopyInto(out *VirtualImage) { *out = *in @@ -3139,6 +3178,13 @@ func (in *VirtualMachinePoolSpec) DeepCopyInto(out *VirtualMachinePoolSpec) { **out = **in } in.VirtualMachineTemplate.DeepCopyInto(&out.VirtualMachineTemplate) + if in.VirtualDiskTemplates != nil { + in, out := &in.VirtualDiskTemplates, &out.VirtualDiskTemplates + *out = make([]VirtualDiskTemplateSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } return } diff --git a/crds/virtualmachinepools.yaml b/crds/virtualmachinepools.yaml index dc0435ca39..fbbff40568 100644 --- a/crds/virtualmachinepools.yaml +++ b/crds/virtualmachinepools.yaml @@ -96,6 +96,242 @@ spec: - OldestFirst - Explicit type: string + virtualDiskTemplates: + description: |- + VirtualDiskTemplates describes the per-replica disks. A disk with reclaim + Delete belongs to its VirtualMachine and is removed with it; a disk with + reclaim Retain belongs to the pool, outlives the replica and is reused on a + later scale-up. + items: + description: VirtualDiskTemplateSpec describes a per-replica disk. + properties: + name: + description: |- + Name identifies the disk template within the pool. It is a DNS-1123 label + (no dots), because it is embedded into VirtualDisk names. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + reclaim: + description: + Reclaim controls what happens to the disk when + its replica is removed. + properties: + keep: + description: |- + Keep is the number of free (Retain) disks always kept warm for fast + scale-up; these are immune to the ttl. Only meaningful with Retain. + format: int32 + minimum: 0 + type: integer + onScaleDown: + default: Delete + description: OnScaleDown is Delete (default) or Retain. + enum: + - Delete + - Retain + type: string + ttl: + description: |- + TTL is how long a free disk lives beyond the warm buffer before it is + garbage-collected. Only meaningful with Retain. + type: string + type: object + spec: + description: + Spec is the desired state of the disk (an ordinary + VirtualDiskSpec). + properties: + dataSource: + properties: + containerImage: + description: + Use an image stored in an external container + registry. Only registries with enabled TLS are supported. + To provide a custom Certificate Authority (CA) chain, + use the `caBundle` field. + properties: + caBundle: + description: + CA chain in Base64 format to verify + the container registry. + example: YWFhCg== + format: byte + type: string + image: + description: + Path to the image in the container + registry. + example: registry.example.com/images/slackware:15 + pattern: ^(?P(?:(?P(?:(?:localhost|[\w-]+(?:\.[\w-]+)+)(?::\d+)?)|[\w]+:\d+)/)?(?P[a-z0-9_.-]+(?:/[a-z0-9_.-]+)*))(?::(?P[\w][\w.-]{0,127}))?(?:@(?P[A-Za-z][A-Za-z0-9]*(?:[+.-_][A-Za-z][A-Za-z0-9]*)*:[0-9a-fA-F]{32,}))?$ + type: string + imagePullSecret: + properties: + name: + description: + Name of the secret keeping container + registry credentials, which must be located + in the same namespace. + type: string + type: object + required: + - image + type: object + http: + description: |- + Fill the image with data from an external URL. The following schemas are supported: + + * HTTP + * HTTPS + + For HTTPS schema, there is an option to skip the TLS verification. + properties: + caBundle: + description: + CA chain in Base64 format to verify + the URL. + example: YWFhCg== + format: byte + type: string + checksum: + description: + Checksum to verify integrity and consistency + of the downloaded file. The file must match all + specified checksums. + properties: + md5: + example: f3b59bed9f91e32fac1210184fcff6f5 + maxLength: 32 + minLength: 32 + pattern: ^[0-9a-fA-F]{32}$ + type: string + sha256: + example: 78be890d71dde316c412da2ce8332ba47b9ce7a29d573801d2777e01aa20b9b5 + maxLength: 64 + minLength: 64 + pattern: ^[0-9a-fA-F]{64}$ + type: string + type: object + url: + description: |- + URL of the file for creating an image. The following file formats are supported: + * qcow2 + * vmdk + * vdi + * iso + * raw + The file can be compressed into an archive in one of the following formats: + * gz + * xz + example: https://mirror.example.com/images/slackware-15.qcow.gz + pattern: ^http[s]?:\/\/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$ + type: string + required: + - url + type: object + objectRef: + description: + Use an existing VirtualImage, ClusterVirtualImage, + or VirtualDiskSnapshot resource to create a disk. + properties: + kind: + description: + Kind of the existing VirtualImage, + ClusterVirtualImage, or VirtualDiskSnapshot resource. + enum: + - ClusterVirtualImage + - VirtualImage + - VirtualDiskSnapshot + type: string + name: + description: + Name of the existing VirtualImage, + ClusterVirtualImage, or VirtualDiskSnapshot resource. + minLength: 1 + type: string + required: + - kind + - name + type: object + type: + description: |- + The following image sources are available for creating an image: + + * `HTTP`: From a file published on an HTTP/HTTPS service at a given URL. + * `ContainerImage`: From another image stored in a container registry. + * `ObjectRef`: From an existing resource. + * `Upload`: From data uploaded by the user via a special interface. + enum: + - HTTP + - ContainerImage + - ObjectRef + - Upload + type: string + type: object + x-kubernetes-validations: + - message: + HTTP requires http and cannot have ContainerImage + or ObjectRef. + rule: + "self.type == 'HTTP' ? has(self.http) && !has(self.containerImage) + && !has(self.objectRef) : true" + - message: + ContainerImage requires containerImage and cannot + have HTTP or ObjectRef. + rule: + "self.type == 'ContainerImage' ? has(self.containerImage) + && !has(self.http) && !has(self.objectRef) : true" + - message: + ObjectRef requires objectRef and cannot have + HTTP or ContainerImage. + rule: + "self.type == 'ObjectRef' ? has(self.objectRef) + && !has(self.http) && !has(self.containerImage) : true" + persistentVolumeClaim: + description: Settings for creating PVCs to store the disk. + properties: + size: + anyOf: + - type: integer + - type: string + description: |- + Desired size for PVC to store the disk. If the disk is created from an image, the size must be at least as large as the original unpacked image. + + This parameter can be omitted if the `.spec.dataSource` section is filled out. In this case, the controller will determine the disk size automatically, based on the size of the extracted image from the source specified in `.spec.dataSource`. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + storageClassName: + description: |- + StorageClass name required by the claim. For details on using StorageClass for PVC, refer to https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1. + + When creating disks, the user can specify the required StorageClass. If not specified, the default StorageClass will be used. + + The disk features and virtual machine behavior depend on the selected StorageClass. + + The `VolumeBindingMode` parameter in the StorageClass affects the disk creation process. The following values are allowed: + - `Immediate`: The disk will be created and becomes available for use immediately after creation. + - `WaitForFirstConsumer`: The disk will be created when first used on the node where the virtual machine will be started. + + StorageClass supports multiple storage settings: + - Creating a block device (`Block`) or file system (`FileSystem`). + - Multiple access (`ReadWriteMany`) or single access (`ReadWriteOnce`). The `ReadWriteMany` disks support multiple access, which enables a "live" migration of virtual machines. In contrast, the `ReadWriteOnce` disks, which can be accessed from only one node, don't have this feature. + + For known storage types, Deckhouse automatically determines the most efficient settings when creating disks (by priority, in descending order): + 1. `Block` + `ReadWriteMany` + 2. `FileSystem` + `ReadWriteMany` + 3. `Block` + `ReadWriteOnce` + 4. `FileSystem` + `ReadWriteOnce` + type: string + type: object + type: object + required: + - name + - spec + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map virtualMachineTemplate: description: |- VirtualMachineTemplate is the template every replica is stamped from. Its From 18e40be8a16574d580949623129e609c18ec726e Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Thu, 2 Jul 2026 13:02:17 +0300 Subject: [PATCH 16/46] feat(vmpool): create and attach Delete-policy per-replica disks Add an idempotent, self-healing disks handler: for every live member it ensures each Delete-policy virtualDiskTemplate disk exists (owned by the VirtualMachine, named -