diff --git a/pkg/test/pods.go b/pkg/test/pods.go index 1c41de3c53..de58be3d29 100644 --- a/pkg/test/pods.go +++ b/pkg/test/pods.go @@ -18,6 +18,7 @@ package test import ( "fmt" + "math/rand" "github.com/imdario/mergo" "github.com/samber/lo" @@ -321,3 +322,114 @@ func buildNodeAffinity(nodeRequirements []v1.NodeSelectorRequirement, nodePrefer } return nodeAffinity } + +func MakePodAntiAffinityPodOptions(key string) PodOptions { + // all of these pods have anti-affinity to each other + labels := map[string]string{ + "app": "nginx", + } + return PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: lo.Assign(labels, map[string]string{DiscoveryLabel: "owned"})}, + PodAntiRequirements: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + TopologyKey: key, + }, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: RandomCPU(), + v1.ResourceMemory: RandomMemory(), + }, + }} +} +func MakePodAffinityPodOptions(key string) PodOptions { + affinityLabels := RandomAffinityLabels() + return PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: lo.Assign(affinityLabels, map[string]string{DiscoveryLabel: "owned"})}, + PodRequirements: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{MatchLabels: affinityLabels}, + TopologyKey: key, + }, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: RandomCPU(), + v1.ResourceMemory: RandomMemory(), + }, + }} +} + +func MakeTopologySpreadPodOptions(key string) PodOptions { + return PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: lo.Assign(RandomLabels(), map[string]string{DiscoveryLabel: "owned"})}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: key, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: RandomLabels(), + }, + }, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: RandomCPU(), + v1.ResourceMemory: RandomMemory(), + }, + }} +} + +func MakeGenericPodOptions() PodOptions { + return PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: lo.Assign(RandomLabels(), map[string]string{DiscoveryLabel: "owned"})}, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: RandomCPU(), + v1.ResourceMemory: RandomMemory(), + }, + }} +} + +func MakeDiversePodOptions() []PodOptions { + var pods []PodOptions + pods = append(pods, MakeGenericPodOptions()) + pods = append(pods, MakeTopologySpreadPodOptions(v1.LabelTopologyZone)) + pods = append(pods, MakeTopologySpreadPodOptions(v1.LabelHostname)) + pods = append(pods, MakePodAffinityPodOptions(v1.LabelHostname)) + pods = append(pods, MakePodAffinityPodOptions(v1.LabelTopologyZone)) + pods = append(pods, MakePodAntiAffinityPodOptions(v1.LabelHostname)) + return pods +} + +func RandomAffinityLabels() map[string]string { + return map[string]string{ + "my-affinity": RandomLabelValue(), + } +} + +func RandomLabels() map[string]string { + return map[string]string{ + "my-label": RandomLabelValue(), + } +} + +//nolint:gosec +var r = rand.New(rand.NewSource(42)) + +func RandomLabelValue() string { + labelValues := []string{"a", "b", "c", "d", "e", "f", "g"} + return labelValues[r.Intn(len(labelValues))] +} + +func RandomMemory() resource.Quantity { + mem := []int{100, 256, 512, 1024, 2048, 4096} + return resource.MustParse(fmt.Sprintf("%dMi", mem[r.Intn(len(mem))])) +} + +func RandomCPU() resource.Quantity { + cpu := []int{100, 250, 500, 1000, 1500} + return resource.MustParse(fmt.Sprintf("%dm", cpu[r.Intn(len(cpu))])) +} diff --git a/test/pkg/debug/collector.go b/test/pkg/debug/collector.go new file mode 100644 index 0000000000..cb9cc487c1 --- /dev/null +++ b/test/pkg/debug/collector.go @@ -0,0 +1,172 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package debug + +import ( + "encoding/csv" + "fmt" + "os" + "strings" + "time" + + "github.com/samber/lo" + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +const ( + StageE2E = "E2E" + StageBeforeEach = "BeforeEach" + StageAfterEach = "AfterEach" +) + +type TimeIntervalCollector struct { + starts map[string]time.Time + ends map[string]time.Time + // used for ordering on Collect + Stages []string + suiteTimeIntervals map[string][]TimeInterval +} + +func NewTimestampCollector() *TimeIntervalCollector { + return &TimeIntervalCollector{ + starts: map[string]time.Time{}, + ends: map[string]time.Time{}, + Stages: []string{}, + suiteTimeIntervals: map[string][]TimeInterval{}, + } +} + +func (t *TimeIntervalCollector) Reset() { + t.starts = map[string]time.Time{} + t.ends = map[string]time.Time{} + t.Stages = []string{} +} + +// Record adds the current starts/ends/stages as a list of time intervals, +// and adds it to the existingTimestamps, then resets the starts/ends/stages. +func (t *TimeIntervalCollector) Record(name string) { + intervals := t.translate() + caser := cases.Title(language.AmericanEnglish) + sanitized := strings.ReplaceAll(caser.String(name), " ", "") + t.suiteTimeIntervals[sanitized] = intervals + t.Reset() +} + +// Start will add a timestamp with the given stage and add it to the list +// If there is no End associated with a Start, the interval's inferred End +// is at the start of the AfterEach. +func (t *TimeIntervalCollector) Start(stage string) time.Time { + t.starts[stage] = time.Now() + t.Stages = append(t.Stages, stage) + return time.Now() +} + +// Finalize will automatically add End time entries for Start entries +// without a corresponding set End. This is useful for when the test +// fails, since deferring time recording is tough to do. +func (t *TimeIntervalCollector) Finalize() { + for stage := range t.starts { + // If it's one of the enum stages, don't add, as these are added automatically. + if stage == StageE2E || stage == StageBeforeEach || stage == StageAfterEach { + continue + } + _, ok := t.ends[stage] + if ok { + continue + } + t.ends[stage] = time.Now() + } +} + +// End will mark the interval's end time. +// If there is no End associated with a Start, the interval's inferred End +// is at the start of the AfterEach. +func (t *TimeIntervalCollector) End(stage string) { + t.ends[stage] = time.Now() +} + +// translate takes the starts and ends in the existing TimeIntervalCollector +// and adds the lists of intervals into the suiteTimeIntervals to be used +// later for csv printing. +func (t *TimeIntervalCollector) translate() []TimeInterval { + intervals := []TimeInterval{} + for _, stage := range t.Stages { + end, ok := t.ends[stage] + if !ok { + end = time.Now() + } + intervals = append(intervals, TimeInterval{ + Start: t.starts[stage], + End: end, + Stage: stage, + }) + } + return intervals +} + +type TimeInterval struct { + Start time.Time + End time.Time + Stage string +} + +func (t TimeInterval) String() []string { + return []string{t.Stage, t.Start.UTC().Format(time.RFC3339), t.End.UTC().Format(time.RFC3339)} +} + +// PrintTestTimes returns a list of tables. +// Each table has a list of Timestamps, where each timestamp is a list of strings. +func PrintTestTimes(times map[string][]TimeInterval) map[string][][]string { + ret := map[string][][]string{} + for name, interval := range times { + ret[name] = lo.Map(interval, func(t TimeInterval, _ int) []string { + return t.String() + }) + } + return ret +} + +// WriteTimestamps will create a temp directory and a .csv file for each suite test +func WriteTimestamps(path string, timestamps *TimeIntervalCollector) error { + directory, err := os.MkdirTemp("/tmp", "") + if err != nil { + return err + } + for name, table := range PrintTestTimes(timestamps.suiteTimeIntervals) { + file, err := os.CreateTemp(directory, fmt.Sprintf("*-%s.csv", name)) + if err != nil { + return err + } + defer file.Close() + + w := csv.NewWriter(file) + + // Write the header + header := []string{"Stage", "Start", "End"} + if err := w.Write(header); err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + if err := w.WriteAll(table); err != nil { // calls Flush internally + return fmt.Errorf("failed to flush writer: %w", err) + } + + fmt.Println("-------- SUCCESS ---------") + fmt.Printf("Printed CSV TO %s\n", file.Name()) + } + return nil +} diff --git a/test/pkg/environment/common/environment.go b/test/pkg/environment/common/environment.go index 51559d977c..71ba7392b9 100644 --- a/test/pkg/environment/common/environment.go +++ b/test/pkg/environment/common/environment.go @@ -25,6 +25,7 @@ import ( "testing" "time" + "github.com/awslabs/operatorpkg/status" "github.com/onsi/gomega" "github.com/samber/lo" v1 "k8s.io/api/core/v1" @@ -39,6 +40,7 @@ import ( "sigs.k8s.io/karpenter/kwok/apis/v1alpha1" "sigs.k8s.io/karpenter/pkg/test" . "sigs.k8s.io/karpenter/pkg/utils/testing" //nolint:stylecheck + "sigs.k8s.io/karpenter/test/pkg/debug" "knative.dev/pkg/system" controllerruntime "sigs.k8s.io/controller-runtime" @@ -60,10 +62,11 @@ type Environment struct { context.Context cancel context.CancelFunc - Client client.Client - Config *rest.Config - KubeClient kubernetes.Interface - Monitor *Monitor + TimeIntervalCollector *debug.TimeIntervalCollector + Client client.Client + Config *rest.Config + KubeClient kubernetes.Interface + Monitor *Monitor StartingNodeCount int } @@ -82,12 +85,13 @@ func NewEnvironment(t *testing.T) *Environment { gomega.SetDefaultEventuallyTimeout(5 * time.Minute) gomega.SetDefaultEventuallyPollingInterval(1 * time.Second) return &Environment{ - Context: ctx, - cancel: cancel, - Config: config, - Client: client, - KubeClient: kubernetes.NewForConfigOrDie(config), - Monitor: NewMonitor(ctx, client), + Context: ctx, + cancel: cancel, + Config: config, + Client: client, + KubeClient: kubernetes.NewForConfigOrDie(config), + Monitor: NewMonitor(ctx, client), + TimeIntervalCollector: debug.NewTimestampCollector(), } } @@ -129,6 +133,12 @@ func NewClient(ctx context.Context, config *rest.Config) client.Client { }) return []string{t.Value} })) + lo.Must0(cache.IndexField(ctx, &v1beta1.NodeClaim{}, "status.conditions[*].type", func(o client.Object) []string { + nodeClaim := o.(*v1beta1.NodeClaim) + return lo.Map(nodeClaim.Status.Conditions, func(c status.Condition, _ int) string { + return c.Type + }) + })) c := lo.Must(client.New(config, client.Options{Scheme: scheme, Cache: &client.CacheOptions{Reader: cache}})) diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index f403285db9..09ed4acb12 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -42,11 +42,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/controller-runtime/pkg/log" - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + v1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling" "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/test" - coreresources "sigs.k8s.io/karpenter/pkg/utils/resources" + "sigs.k8s.io/karpenter/pkg/utils/resources" ) func (env *Environment) ExpectCreated(objects ...client.Object) { @@ -459,13 +459,13 @@ func (env *Environment) ExpectNodeCount(comparator string, count int) { func (env *Environment) ExpectNodeClaimCount(comparator string, count int) { GinkgoHelper() - nodeClaimList := &corev1beta1.NodeClaimList{} + nodeClaimList := &v1beta1.NodeClaimList{} Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count)) } -func NodeClaimNames(nodeClaims []*corev1beta1.NodeClaim) []string { - return lo.Map(nodeClaims, func(n *corev1beta1.NodeClaim, index int) string { +func NodeClaimNames(nodeClaims []*v1beta1.NodeClaim) []string { + return lo.Map(nodeClaims, func(n *v1beta1.NodeClaim, index int) string { return n.Name }) } @@ -499,7 +499,7 @@ func (env *Environment) ConsistentlyExpectDisruptionsWithNodeCount(disruptingNod nodes := []v1.Node{} Consistently(func(g Gomega) { // Ensure we don't change our NodeClaims - nodeClaimList := &corev1beta1.NodeClaimList{} + nodeClaimList := &v1beta1.NodeClaimList{} g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) g.Expect(nodeClaimList.Items).To(HaveLen(totalNodes)) @@ -509,7 +509,7 @@ func (env *Environment) ConsistentlyExpectDisruptionsWithNodeCount(disruptingNod nodes = lo.Filter(nodeList.Items, func(n v1.Node, _ int) bool { _, ok := lo.Find(n.Spec.Taints, func(t v1.Taint) bool { - return corev1beta1.IsDisruptingTaint(t) + return v1beta1.IsDisruptingTaint(t) }) return ok }) @@ -541,10 +541,10 @@ func (env *Environment) EventuallyExpectNodesUntaintedWithTimeout(timeout time.D }).WithTimeout(timeout).Should(Succeed()) } -func (env *Environment) EventuallyExpectNodeClaimCount(comparator string, count int) []*corev1beta1.NodeClaim { +func (env *Environment) EventuallyExpectNodeClaimCount(comparator string, count int) []*v1beta1.NodeClaim { GinkgoHelper() By(fmt.Sprintf("waiting for nodes to be %s to %d", comparator, count)) - nodeClaimList := &corev1beta1.NodeClaimList{} + nodeClaimList := &v1beta1.NodeClaimList{} Eventually(func(g Gomega) { g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed()) g.Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count), @@ -623,75 +623,75 @@ func (env *Environment) EventuallyExpectInitializedNodeCount(comparator string, Eventually(func(g Gomega) { nodes = env.Monitor.CreatedNodes() nodes = lo.Filter(nodes, func(n *v1.Node, _ int) bool { - return n.Labels[corev1beta1.NodeInitializedLabelKey] == "true" + return n.Labels[v1beta1.NodeInitializedLabelKey] == "true" }) g.Expect(len(nodes)).To(BeNumerically(comparator, count)) }).Should(Succeed()) return nodes } -func (env *Environment) EventuallyExpectCreatedNodeClaimCount(comparator string, count int) []*corev1beta1.NodeClaim { +func (env *Environment) EventuallyExpectCreatedNodeClaimCount(comparator string, count int) []*v1beta1.NodeClaim { GinkgoHelper() By(fmt.Sprintf("waiting for created nodeclaims to be %s to %d", comparator, count)) - nodeClaimList := &corev1beta1.NodeClaimList{} + nodeClaimList := &v1beta1.NodeClaimList{} Eventually(func(g Gomega) { g.Expect(env.Client.List(env.Context, nodeClaimList)).To(Succeed()) g.Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count)) }).Should(Succeed()) - return lo.Map(nodeClaimList.Items, func(nc corev1beta1.NodeClaim, _ int) *corev1beta1.NodeClaim { + return lo.Map(nodeClaimList.Items, func(nc v1beta1.NodeClaim, _ int) *v1beta1.NodeClaim { return &nc }) } -func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*corev1beta1.NodeClaim) { +func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*v1beta1.NodeClaim) { GinkgoHelper() Eventually(func(g Gomega) { for _, nc := range nodeClaims { - temp := &corev1beta1.NodeClaim{} + temp := &v1beta1.NodeClaim{} g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), temp)).Should(Succeed()) g.Expect(temp.StatusConditions().Root().IsTrue()).To(BeTrue()) } }).Should(Succeed()) } -func (env *Environment) EventuallyExpectExpired(nodeClaims ...*corev1beta1.NodeClaim) { +func (env *Environment) EventuallyExpectExpired(nodeClaims ...*v1beta1.NodeClaim) { GinkgoHelper() Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeExpired).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(v1beta1.ConditionTypeExpired).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } -func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*corev1beta1.NodeClaim) { +func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*v1beta1.NodeClaim) { GinkgoHelper() Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(v1beta1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } -func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Duration, nodeClaims ...*corev1beta1.NodeClaim) { +func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Duration, nodeClaims ...*v1beta1.NodeClaim) { GinkgoHelper() - nodeClaimNames := lo.Map(nodeClaims, func(nc *corev1beta1.NodeClaim, _ int) string { return nc.Name }) + nodeClaimNames := lo.Map(nodeClaims, func(nc *v1beta1.NodeClaim, _ int) string { return nc.Name }) By(fmt.Sprintf("consistently expect nodeclaims %s not to be drifted for %s", nodeClaimNames, duration)) Consistently(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeDrifted)).To(BeNil()) + g.Expect(nc.StatusConditions().Get(v1beta1.ConditionTypeDrifted)).To(BeNil()) } }, duration).Should(Succeed()) } -func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*corev1beta1.NodeClaim) { +func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*v1beta1.NodeClaim) { GinkgoHelper() Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeEmpty).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(v1beta1.ConditionTypeEmpty).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } @@ -860,7 +860,7 @@ func (env *Environment) ExpectCABundle() string { return base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData) } -func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int { +func (env *Environment) GetDaemonSetCount(np *v1beta1.NodePool) int { GinkgoHelper() // Performs the same logic as the scheduler to get the number of daemonset @@ -881,7 +881,7 @@ func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int { }) } -func (env *Environment) GetDaemonSetOverhead(np *corev1beta1.NodePool) v1.ResourceList { +func (env *Environment) GetDaemonSetOverhead(np *v1beta1.NodePool) v1.ResourceList { GinkgoHelper() // Performs the same logic as the scheduler to get the number of daemonset @@ -889,7 +889,7 @@ func (env *Environment) GetDaemonSetOverhead(np *corev1beta1.NodePool) v1.Resour daemonSetList := &appsv1.DaemonSetList{} Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed()) - return coreresources.RequestsForPods(lo.FilterMap(daemonSetList.Items, func(ds appsv1.DaemonSet, _ int) (*v1.Pod, bool) { + return resources.RequestsForPods(lo.FilterMap(daemonSetList.Items, func(ds appsv1.DaemonSet, _ int) (*v1.Pod, bool) { p := &v1.Pod{Spec: ds.Spec.Template.Spec} nodeClaimTemplate := pscheduling.NewNodeClaimTemplate(np) if err := scheduling.Taints(nodeClaimTemplate.Spec.Taints).Tolerates(p); err != nil { diff --git a/test/pkg/environment/common/setup.go b/test/pkg/environment/common/setup.go index 471c3c34bc..aeaa5106b0 100644 --- a/test/pkg/environment/common/setup.go +++ b/test/pkg/environment/common/setup.go @@ -33,6 +33,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/karpenter/kwok/apis/v1alpha1" + "sigs.k8s.io/karpenter/test/pkg/debug" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" @@ -59,16 +61,16 @@ var ( &schedulingv1.PriorityClass{}, &v1.Node{}, &v1beta1.NodeClaim{}, - // TODO @njtran add KWOKNodeClass + &v1alpha1.KWOKNodeClass{}, } ) // nolint:gocyclo func (env *Environment) BeforeEach() { debug.BeforeEach(env.Context, env.Config, env.Client) - // Expect this cluster to be clean for test runs to execute successfully env.ExpectCleanCluster() + env.TimeIntervalCollector.Start(debug.StageE2E) env.Monitor.Reset() env.StartingNodeCount = env.Monitor.NodeCountAtReset() @@ -91,7 +93,7 @@ func (env *Environment) ExpectCleanCluster() { fmt.Sprintf("expected no pods in the `default` namespace, found %s/%s", pods.Items[i].Namespace, pods.Items[i].Name)) } // TODO @njtran add KWOKNodeClass - for _, obj := range []client.Object{&v1beta1.NodePool{}} { + for _, obj := range []client.Object{&v1beta1.NodePool{}, &v1alpha1.KWOKNodeClass{}} { metaList := &metav1.PartialObjectMetadataList{} gvk := lo.Must(apiutil.GVKForObject(obj, env.Client.Scheme())) metaList.SetGroupVersionKind(gvk) @@ -101,13 +103,16 @@ func (env *Environment) ExpectCleanCluster() { } func (env *Environment) Cleanup() { + env.TimeIntervalCollector.Start(debug.StageAfterEach) env.CleanupObjects(CleanableObjects...) env.eventuallyExpectScaleDown() env.ExpectNoCrashes() + env.TimeIntervalCollector.End(debug.StageAfterEach) } func (env *Environment) AfterEach() { debug.AfterEach(env.Context) + env.TimeIntervalCollector.Record(CurrentSpecReport().LeafNodeText) env.printControllerLogs(&v1.PodLogOptions{Container: "controller"}) } diff --git a/test/suites/perf/scheduling_test.go b/test/suites/perf/scheduling_test.go index c5ded68789..d6bedb0ebe 100644 --- a/test/suites/perf/scheduling_test.go +++ b/test/suites/perf/scheduling_test.go @@ -17,22 +17,46 @@ limitations under the License. package perf_test import ( - "fmt" "time" . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/test" ) -var _ = Describe("Performance Benchmark", func() { +var replicas int = 100 + +var _ = Describe("Performance", func() { Context("Provisioning", func() { It("should do simple provisioning", func() { deployment := test.Deployment(test.DeploymentOptions{ - Replicas: 100, + Replicas: int32(replicas), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: testLabels, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + }, + }, + }}) + env.ExpectCreated(deployment) + env.ExpectCreated(nodePool, nodeClass) + env.EventuallyExpectHealthyPodCount(labelSelector, replicas) + }) + It("should do simple provisioning and simple drift", func() { + deployment := test.Deployment(test.DeploymentOptions{ + Replicas: int32(replicas), PodOptions: test.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: testLabels, @@ -44,18 +68,83 @@ var _ = Describe("Performance Benchmark", func() { }, }}) env.ExpectCreated(deployment) - start := time.Now() env.ExpectCreated(nodePool, nodeClass) - env.EventuallyExpectHealthyPodCount(labelSelector, 100) - // Need a way to respond to the last pod healthy event or look at the pod status conditions after the fact to get the exact measurements here. - // would also be good to just pull the metrics directly from the Karpenter pod to get the scheduling simulation metrics. + env.EventuallyExpectHealthyPodCount(labelSelector, replicas) + + env.TimeIntervalCollector.Start("Drift") + nodePool.Spec.Template.ObjectMeta.Labels = lo.Assign(nodePool.Spec.Template.ObjectMeta.Labels, map[string]string{ + "test-drift": "true", + }) + env.ExpectUpdated(nodePool) + // Eventually expect one node to be drifted + Eventually(func(g Gomega) { + nodeClaims := &v1beta1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.MatchingFields{"status.conditions[*].type": v1beta1.ConditionTypeDrifted})).To(Succeed()) + g.Expect(len(nodeClaims.Items)).ToNot(Equal(0)) + }).WithTimeout(5 * time.Second).Should(Succeed()) + // Then eventually expect no nodes to be drifted + Eventually(func(g Gomega) { + nodeClaims := &v1beta1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.MatchingFields{"status.conditions[*].type": v1beta1.ConditionTypeDrifted})).To(Succeed()) + g.Expect(len(nodeClaims.Items)).To(Equal(0)) + }).WithTimeout(300 * time.Second).Should(Succeed()) + env.TimeIntervalCollector.End("Drift") + }) + It("should do complex provisioning", func() { + deployments := []*appsv1.Deployment{} + podOptions := test.MakeDiversePodOptions() + for _, option := range podOptions { + deployments = append(deployments, test.Deployment( + test.DeploymentOptions{ + PodOptions: option, + Replicas: int32(replicas / len(podOptions)), + }, + )) + } + for _, dep := range deployments { + env.ExpectCreated(dep) + } + env.TimeIntervalCollector.Start("PostDeployment") + env.ExpectCreated(nodePool, nodeClass) + env.EventuallyExpectHealthyPodCountWithTimeout(10*time.Minute, labelSelector, len(deployments)*replicas) + env.TimeIntervalCollector.End("PostDeployment") + }) + It("should do complex provisioning and complex drift", func() { + deployments := []*appsv1.Deployment{} + podOptions := test.MakeDiversePodOptions() + for _, option := range podOptions { + deployments = append(deployments, test.Deployment( + test.DeploymentOptions{ + PodOptions: option, + Replicas: int32(replicas / len(podOptions)), + }, + )) + } + for _, dep := range deployments { + env.ExpectCreated(dep) + } - // env.Monitor.GetLastPodSchedulingEvent() - duration := time.Since(start) + env.ExpectCreated(nodePool, nodeClass) + env.EventuallyExpectHealthyPodCountWithTimeout(10*time.Minute, labelSelector, len(deployments)*replicas) - fmt.Println("--------- RESULTS ---------") - fmt.Printf("This is the duration: %s\n", duration) + env.TimeIntervalCollector.Start("Drift") + nodePool.Spec.Template.ObjectMeta.Labels = lo.Assign(nodePool.Spec.Template.ObjectMeta.Labels, map[string]string{ + "test-drift": "true", + }) + env.ExpectUpdated(nodePool) + // Eventually expect one node to be drifted + Eventually(func(g Gomega) { + nodeClaims := &v1beta1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.MatchingFields{"status.conditions[*].type": v1beta1.ConditionTypeDrifted})).To(Succeed()) + g.Expect(len(nodeClaims.Items)).ToNot(Equal(0)) + }).WithTimeout(5 * time.Second).Should(Succeed()) + // Then eventually expect no nodes to be drifted + Eventually(func(g Gomega) { + nodeClaims := &v1beta1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.MatchingFields{"status.conditions[*].type": v1beta1.ConditionTypeDrifted})).To(Succeed()) + g.Expect(len(nodeClaims.Items)).To(Equal(0)) + }).WithTimeout(10 * time.Minute).Should(Succeed()) + env.TimeIntervalCollector.End("Drift") }) }) - }) diff --git a/test/suites/perf/suite_test.go b/test/suites/perf/suite_test.go index eeb3bda969..c6fa41228f 100644 --- a/test/suites/perf/suite_test.go +++ b/test/suites/perf/suite_test.go @@ -17,14 +17,19 @@ limitations under the License. package perf_test import ( + "fmt" "testing" "time" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/karpenter/kwok/apis/v1alpha1" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/test" + "sigs.k8s.io/karpenter/test/pkg/debug" "sigs.k8s.io/karpenter/test/pkg/environment/common" . "github.com/onsi/ginkgo/v2" @@ -35,6 +40,7 @@ import ( var nodePool *v1beta1.NodePool var nodeClass *v1alpha1.KWOKNodeClass var env *common.Environment + var testLabels = map[string]string{ test.DiscoveryLabel: "owned", } @@ -46,6 +52,10 @@ func TestPerf(t *testing.T) { env = common.NewEnvironment(t) }) AfterSuite(func() { + // Write out the timestamps from our tests + if err := debug.WriteTimestamps("path", env.TimeIntervalCollector); err != nil { + log.FromContext(env).Info(fmt.Sprintf("Failed to write timestamps to files, %s", err)) + } env.Stop() }) RunSpecs(t, "Perf") @@ -55,7 +65,18 @@ var _ = BeforeEach(func() { env.BeforeEach() nodeClass = env.DefaultNodeClass() nodePool = env.DefaultNodePool(nodeClass) - nodePool.Spec.Disruption.ExpireAfter = v1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 30)} + test.ReplaceRequirements(nodePool, v1beta1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: v1.NodeSelectorRequirement{ + Key: v1alpha1.InstanceSizeLabelKey, + Operator: v1.NodeSelectorOpLt, + Values: []string{"32"}, + }, + }) + nodePool.Spec.Disruption.ExpireAfter = v1beta1.NillableDuration{Duration: lo.ToPtr(30 * time.Minute)} +}) + +var _ = AfterEach(func() { + env.TimeIntervalCollector.Finalize() + env.Cleanup() + env.AfterEach() }) -var _ = AfterEach(func() { env.Cleanup() }) -var _ = AfterEach(func() { env.AfterEach() })