Skip to content

Commit

Permalink
🌱 Improve KCP scale up when using failure domains (#11598)
Browse files Browse the repository at this point in the history
* Improve KCP scale up when using failure domains

* Address comments

* Address feedback
  • Loading branch information
fabriziopandini authored Dec 20, 2024
1 parent 1fc0d67 commit 963fbff
Show file tree
Hide file tree
Showing 4 changed files with 822 additions and 74 deletions.
27 changes: 17 additions & 10 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains {
}

// MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it.
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) (*clusterv1.Machine, error) {
fd := c.FailureDomainWithMostMachines(ctx, machines)
machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd))
// Note: if there are eligibleMachines machines in failure domain that do not exists anymore, getting rid of those machines take precedence.
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) (*clusterv1.Machine, error) {
fd := c.FailureDomainWithMostMachines(ctx, eligibleMachines)
machinesInFailureDomain := eligibleMachines.Filter(collections.InFailureDomains(fd))
machineToMark := machinesInFailureDomain.Oldest()
if machineToMark == nil {
return nil, errors.New("failed to pick control plane Machine to mark for deletion")
Expand All @@ -171,11 +172,11 @@ func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines
return annotatedMachines
}

// FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most
// control-plane machines on it.
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) *string {
// FailureDomainWithMostMachines returns the fd with most machines in it and at least one eligible machine in it.
// Note: if there are eligibleMachines machines in failure domain that do not exist anymore, cleaning up those failure domains takes precedence.
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) *string {
// See if there are any Machines that are not in currently defined failure domains first.
notInFailureDomains := machines.Filter(
notInFailureDomains := eligibleMachines.Filter(
collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)),
)
if len(notInFailureDomains) > 0 {
Expand All @@ -184,15 +185,21 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
// in the cluster status.
return notInFailureDomains.Oldest().Spec.FailureDomain
}
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)

// Pick the failure domain with most machines in it and at least one eligible machine in it.
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, eligibleMachines)
}

// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines.
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines
// (the ultimate goal is to achieve ideal spreading of machines at stable state/when only up-to-date machines will exist).
//
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
return nil, nil
}
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.Machines, c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
}

// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
Expand Down
28 changes: 21 additions & 7 deletions controlplane/kubeadm/internal/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,17 +248,31 @@ func preflightCheckCondition(kind string, obj conditions.Getter, condition clust
return nil
}

// selectMachineForScaleDown select a machine candidate for scaling down. The selection is a two phase process:
//
// In the first phase it selects a subset of machines eligible for deletion:
// - if there are outdated machines with the delete machine annotation, use them as eligible subset (priority to user requests, part 1)
// - if there are machines (also not outdated) with the delete machine annotation, use them (priority to user requests, part 2)
// - if there are outdated machines with unhealthy control plane components, use them (priority to restore control plane health)
// - if there are outdated machines consider all the outdated machines as eligible subset (rollout)
// - otherwise consider all the machines
//
// Once the subset of machines eligible for deletion is identified, one machine is picked out of this subset by
// selecting the machine in the failure domain with most machines (including both eligible and not eligible machines).
func selectMachineForScaleDown(ctx context.Context, controlPlane *internal.ControlPlane, outdatedMachines collections.Machines) (*clusterv1.Machine, error) {
machines := controlPlane.Machines
// Select the subset of machines eligible for scale down.
eligibleMachines := controlPlane.Machines
switch {
case controlPlane.MachineWithDeleteAnnotation(outdatedMachines).Len() > 0:
machines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
case controlPlane.MachineWithDeleteAnnotation(machines).Len() > 0:
machines = controlPlane.MachineWithDeleteAnnotation(machines)
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
case controlPlane.MachineWithDeleteAnnotation(eligibleMachines).Len() > 0:
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(eligibleMachines)
case controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines).Len() > 0:
machines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
eligibleMachines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
case outdatedMachines.Len() > 0:
machines = outdatedMachines
eligibleMachines = outdatedMachines
}
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, machines)

// Pick an eligible machine from the failure domain with most machines in (including both eligible and not eligible machines)
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, eligibleMachines)
}
112 changes: 74 additions & 38 deletions util/failuredomains/failure_domains.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ import (
)

type failureDomainAggregation struct {
id string
count int
id string
countPriority int
countAll int
}
type failureDomainAggregations []failureDomainAggregation

Expand All @@ -43,67 +44,87 @@ func (f failureDomainAggregations) Len() int {
// Less reports whether the element with
// index i should sort before the element with index j.
func (f failureDomainAggregations) Less(i, j int) bool {
return f[i].count < f[j].count
// If a failure domain has less priority machines then the other, it goes first
if f[i].countPriority < f[j].countPriority {
return true
}
if f[i].countPriority > f[j].countPriority {
return false
}

// If a failure domain has the same number of priority machines then the other,
// use the number of overall machines to pick which one goes first.
if f[i].countAll < f[j].countAll {
return true
}
if f[i].countAll > f[j].countAll {
return false
}

// If both failure domain have the same number of priority machines and overall machines, we keep the order
// in the list which ensure a certain degree of randomness because the list originates from a map.
// This helps to spread machines e.g. when concurrently working on many clusters.
return i < j
}

// Swap swaps the elements with indexes i and j.
func (f failureDomainAggregations) Swap(i, j int) {
f[i], f[j] = f[j], f[i]
}

// PickMost returns a failure domain that is in machines and has most of the group of machines on.
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, groupMachines, machines collections.Machines) *string {
// orderDescending sorts failure domains according to all machines belonging to the group.
fds := orderDescending(ctx, failureDomains, groupMachines)
for _, fd := range fds {
for _, m := range machines {
if m.Spec.FailureDomain == nil {
continue
}
if *m.Spec.FailureDomain == fd.id {
return &fd.id
}
}
}
return nil
}

// orderDescending returns the sorted failure domains in decreasing order.
func orderDescending(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
aggregations := pick(ctx, failureDomains, machines)
// PickMost returns the failure domain from which we have to delete a control plane machine, which is the failure domain with most machines and at least one eligible machine in it.
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, eligibleMachines collections.Machines) *string {
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, eligibleMachines)
if len(aggregations) == 0 {
return nil
}
sort.Sort(sort.Reverse(aggregations))
return aggregations
if len(aggregations) > 0 && aggregations[0].countPriority > 0 {
return ptr.To(aggregations[0].id)
}
return nil
}

// PickFewest returns the failure domain with the fewest number of machines.
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) *string {
aggregations := pick(ctx, failureDomains, machines)
// PickFewest returns the failure domain that will be used for placement of a new control plane machine, which is the failure domain with the fewest
// number of up-to-date, not deleted machines.
//
// Ensuring proper spreading of up-to-date, not deleted machines, is the highest priority to achieve ideal spreading of machines
// at stable state/when only up-to-date machines will exist.
//
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, upToDateMachines collections.Machines) *string {
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, upToDateMachines)
if len(aggregations) == 0 {
return nil
}
sort.Sort(aggregations)
return ptr.To(aggregations[0].id)
}

func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
// countByFailureDomain returns failure domains with the number of machines in it.
// Note: countByFailureDomain computes both the number of machines as well as the number of a subset of machines with higher priority.
// E.g. for deletion out of date machines have higher priority vs other machines.
func countByFailureDomain(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, priorityMachines collections.Machines) failureDomainAggregations {
log := ctrl.LoggerFrom(ctx)

if len(failureDomains) == 0 {
return failureDomainAggregations{}
}

counters := map[string]int{}
counters := map[string]failureDomainAggregation{}

// Initialize the known failure domain keys to find out if an existing machine is in an unsupported failure domain.
for fd := range failureDomains {
counters[fd] = 0
for id := range failureDomains {
counters[id] = failureDomainAggregation{
id: id,
countPriority: 0,
countAll: 0,
}
}

// Count how many machines are in each failure domain.
for _, m := range machines {
for _, m := range allMachines {
if m.Spec.FailureDomain == nil {
continue
}
Expand All @@ -116,15 +137,30 @@ func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines
log.Info(fmt.Sprintf("Unknown failure domain %q for Machine %s (known failure domains: %v)", id, m.GetName(), knownFailureDomains))
continue
}
counters[id]++
a := counters[id]
a.countAll++
counters[id] = a
}

aggregations := make(failureDomainAggregations, 0)

// Gather up tuples of failure domains ids and counts
for fd, count := range counters {
aggregations = append(aggregations, failureDomainAggregation{id: fd, count: count})
for _, m := range priorityMachines {
if m.Spec.FailureDomain == nil {
continue
}
id := *m.Spec.FailureDomain
if _, ok := failureDomains[id]; !ok {
continue
}
a := counters[id]
a.countPriority++
counters[id] = a
}

// Collect failure domain aggregations.
// Note: by creating the list from a map, we get a certain degree of randomness that helps to spread machines
// e.g. when concurrently working on many clusters.
aggregations := make(failureDomainAggregations, 0)
for _, count := range counters {
aggregations = append(aggregations, count)
}
return aggregations
}
Loading

0 comments on commit 963fbff

Please sign in to comment.