Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 42 additions & 13 deletions test/extended/imagepolicy/imagepolicy.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,7 @@ func updateImageConfig(oc *exutil.CLI, allowedRegistries []string) {
return err
})
o.Expect(err).NotTo(o.HaveOccurred(), "error updating image config")
WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func cleanupImageConfig(oc *exutil.CLI) error {
Expand All @@ -238,8 +237,7 @@ func cleanupImageConfig(oc *exutil.CLI) error {
return err
})
o.Expect(err).NotTo(o.HaveOccurred(), "error cleaning up image config")
WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
return nil
}

Expand Down Expand Up @@ -285,8 +283,7 @@ func createClusterImagePolicy(oc *exutil.CLI, policy configv1.ClusterImagePolicy
_, err := oc.AdminConfigClient().ConfigV1().ClusterImagePolicies().Create(context.TODO(), &policy, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())

WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func deleteClusterImagePolicy(oc *exutil.CLI, policyName string) error {
Expand All @@ -296,8 +293,7 @@ func deleteClusterImagePolicy(oc *exutil.CLI, policyName string) error {
if err := oc.AdminConfigClient().ConfigV1().ClusterImagePolicies().Delete(context.TODO(), policyName, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("failed to delete cluster image policy %s: %v", policyName, err)
}
WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
return nil
}

Expand All @@ -312,8 +308,7 @@ func createImagePolicy(oc *exutil.CLI, policy configv1.ImagePolicy, namespace st

// Wait until each pool's Spec.Configuration.Name changes from the initial value
// and the pool reports Updated=true
WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func deleteImagePolicy(oc *exutil.CLI, policyName string, namespace string) error {
Expand All @@ -323,8 +318,7 @@ func deleteImagePolicy(oc *exutil.CLI, policyName string, namespace string) erro
if err := oc.AdminConfigClient().ConfigV1().ImagePolicies(namespace).Delete(context.TODO(), policyName, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("failed to delete image policy %s in namespace %s: %v", policyName, namespace, err)
}
WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
return nil
}

Expand Down Expand Up @@ -707,7 +701,42 @@ func WaitForMCPConfigSpecChangeAndUpdated(oc *exutil.CLI, pool string, initialSp
return false
}
return machineconfighelper.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated)
}, 20*time.Minute, 10*time.Second).Should(o.BeTrue())
}, 15*time.Minute, 10*time.Second).Should(o.BeTrue())
}

func WaitForMCPsConfigSpecChangeAndUpdated(oc *exutil.CLI, workerInitialSpec, masterInitialSpec string) {
e2e.Logf("Waiting for worker and master pools to complete")
clientSet, err := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig())
o.Expect(err).NotTo(o.HaveOccurred())

o.Eventually(func() bool {
workerMCP, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), "worker", metav1.GetOptions{})
if err != nil {
return false
}
masterMCP, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), "master", metav1.GetOptions{})
if err != nil {
return false
}

workerReady := workerMCP.Status.Configuration.Name != workerInitialSpec &&
workerMCP.Spec.Configuration.Name == workerMCP.Status.Configuration.Name &&
machineconfighelper.IsMachineConfigPoolConditionTrue(workerMCP.Status.Conditions, mcfgv1.MachineConfigPoolUpdated)

masterReady := masterMCP.Status.Configuration.Name != masterInitialSpec &&
masterMCP.Spec.Configuration.Name == masterMCP.Status.Configuration.Name &&
machineconfighelper.IsMachineConfigPoolConditionTrue(masterMCP.Status.Conditions, mcfgv1.MachineConfigPoolUpdated)

if !workerReady {
e2e.Logf("Worker MCP not ready yet")
}
if !masterReady {
e2e.Logf("Master MCP not ready yet")
}

return workerReady && masterReady
}, 15*time.Minute, 10*time.Second).Should(o.BeTrue())
e2e.Logf("Both worker and master pools completed successfully")
}

func isDisconnectedCluster(oc *exutil.CLI) bool {
Expand Down
9 changes: 3 additions & 6 deletions test/extended/node/criocredentialprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ func updateCRIOCredentialProviderConfig(oc *exutil.CLI, matchImages []string, ex
return
}

imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
imagepolicy.WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func getWorkerNodes(oc *exutil.CLI) ([]corev1.Node, error) {
Expand Down Expand Up @@ -289,8 +288,7 @@ func createIDMSResources(oc *exutil.CLI) {

e2e.Logf("Created ImageDigestMirrorSet %q", idms.Name)

imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
imagepolicy.WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func cleanupIDMSResources(oc *exutil.CLI) {
Expand All @@ -302,8 +300,7 @@ func cleanupIDMSResources(oc *exutil.CLI) {

e2e.Logf("Deleted ImageDigestMirrorSet %q", "digest-mirror")

imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, workerPool, initialWorkerSpec)
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, masterPool, initialMasterSpec)
imagepolicy.WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)
}

func createNamespaceRBAC(f *e2e.Framework, namespace string) {
Expand Down
4 changes: 3 additions & 1 deletion test/extended/node/image_volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ func describeImageVolumeTests(config imageVolumeTestConfig) bool {
podName = config.frameworkName + "-test"
)

g.BeforeEach(func() {
g.BeforeEach(func(ctx context.Context) {
// Microshift doesn't inherit OCP feature gates, and ImageVolume won't work either
isMicroshift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
if isMicroshift {
g.Skip("Not supported on Microshift")
}

EnsureNodesReady(ctx, oc)
})

g.It("should succeed with pod and pull policy of Always", func(ctx context.Context) {
Expand Down
6 changes: 3 additions & 3 deletions test/extended/node/kubelet_secret_pulled_images.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ var _ = g.Describe("[sig-node][Suite:openshift/disruptive-longrunning][Disruptiv

g.DeferCleanup(func() {
_ = deleteKC(oc, kcName)
_ = waitForMCP(ctx, mcClient, "worker", 30*time.Minute)
_ = waitForMCP(ctx, mcClient, "worker", 15*time.Minute)
})
Comment on lines 207 to 210

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major | ⚡ Quick win

Don't swallow rollback failures in this ordered cleanup.

This DeferCleanup drops both the deleteKC and waitForMCP errors, so Case 4 can leave the worker pool mid-rollout and still pass cleanup. In an g.Ordered suite, that leaks mutated cluster state into later cases.

Suggested fix
 		g.DeferCleanup(func() {
-			_ = deleteKC(oc, kcName)
-			_ = waitForMCP(ctx, mcClient, "worker", 15*time.Minute)
+			if err := deleteKC(oc, kcName); err != nil {
+				framework.Failf("cleanup: failed to delete KubeletConfig %s: %v", kcName, err)
+			}
+			if err := waitForMCP(ctx, mcClient, "worker", 15*time.Minute); err != nil {
+				framework.Failf("cleanup: worker MCP did not recover after deleting %s: %v", kcName, err)
+			}
 		})

As per path instructions, **/*.go: Never ignore error returns.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
g.DeferCleanup(func() {
_ = deleteKC(oc, kcName)
_ = waitForMCP(ctx, mcClient, "worker", 30*time.Minute)
_ = waitForMCP(ctx, mcClient, "worker", 15*time.Minute)
})
g.DeferCleanup(func() {
if err := deleteKC(oc, kcName); err != nil {
framework.Failf("cleanup: failed to delete KubeletConfig %s: %v", kcName, err)
}
if err := waitForMCP(ctx, mcClient, "worker", 15*time.Minute); err != nil {
framework.Failf("cleanup: worker MCP did not recover after deleting %s: %v", kcName, err)
}
})
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended/node/kubelet_secret_pulled_images.go` around lines 207 - 210,
The ordered cleanup in the kubelet secret pulled images test is swallowing
rollback failures because both deleteKC and waitForMCP errors are ignored.
Update the DeferCleanup registered in the Case 4 setup to handle and surface
errors from deleteKC and waitForMCP instead of discarding them, so the g.Ordered
suite does not continue with a partially rolled back worker pool. Use the
existing cleanup block near the Case 4 setup to keep the cluster state
consistent for later cases.

Source: Path instructions


g.By("Pre-caching private image on the node with a valid secret")
Expand All @@ -215,7 +215,7 @@ var _ = g.Describe("[sig-node][Suite:openshift/disruptive-longrunning][Disruptiv
g.By("Applying NeverVerify policy and waiting for MCO rollout")
credVerifyApplyPolicy(ctx, mcClient, kcName, `{"imagePullCredentialsVerificationPolicy":"NeverVerify"}`)
credVerifyWaitForMCPUpdating(ctx, mcClient, "worker")
err = waitForMCP(ctx, mcClient, "worker", 30*time.Minute)
err = waitForMCP(ctx, mcClient, "worker", 15*time.Minute)
o.Expect(err).NotTo(o.HaveOccurred())

g.By("Verifying NeverVerify policy allows pod without secret to use cached image")
Expand All @@ -224,7 +224,7 @@ var _ = g.Describe("[sig-node][Suite:openshift/disruptive-longrunning][Disruptiv
g.By("Switching to AlwaysVerify policy and waiting for MCO rollout")
credVerifyApplyPolicy(ctx, mcClient, kcName, `{"imagePullCredentialsVerificationPolicy":"AlwaysVerify"}`)
credVerifyWaitForMCPUpdating(ctx, mcClient, "worker")
err = waitForMCP(ctx, mcClient, "worker", 30*time.Minute)
err = waitForMCP(ctx, mcClient, "worker", 15*time.Minute)
o.Expect(err).NotTo(o.HaveOccurred())

// This pod also re-caches the image after MCO rollout since pull records are cleared
Expand Down
2 changes: 1 addition & 1 deletion test/extended/node/kubeletconfig_tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
"Timed out waiting for MachineConfigPool %q to start updating", testMCPName)

g.By(fmt.Sprintf("Waiting for MachineConfigPool %s to complete rollout", testMCPName))
err = waitForMCP(ctx, mcClient, testMCPName, 30*time.Minute)
err = waitForMCP(ctx, mcClient, testMCPName, 15*time.Minute)
o.Expect(err).NotTo(o.HaveOccurred(), "Error waiting for MachineConfigPool %q to become ready", testMCPName)
framework.Logf("MachineConfigPool %s has completed rollout", testMCPName)

Expand Down
12 changes: 7 additions & 5 deletions test/extended/node/node_e2e/container_runtime_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
oc = exutil.NewCLIWithoutNamespace("ctrcfg")
)

g.BeforeEach(func() {
g.BeforeEach(func(ctx context.Context) {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred(), "failed to detect MicroShift cluster")
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}

nodeutils.EnsureNodesReady(ctx, oc)
})

// Validates that ContainerRuntimeConfig pidsLimit setting is correctly applied
Expand All @@ -51,12 +53,12 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
workerNode := workers[0].Name

g.By("Make a manual change to crio.conf on worker node")
_, err = nodeutils.ExecOnNodeWithChroot(oc, workerNode,
_, err = nodeutils.ExecOnNodeWithChroot(ctx, oc, workerNode,
"/bin/bash", "-c", `sed -i '/^\[crio\.runtime\]/a log_level = "debug"' /etc/crio/crio.conf`)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to edit crio.conf on node %s", workerNode)

g.By("Verify the manual crio.conf edit took effect")
editedConf, err := nodeutils.ExecOnNodeWithChroot(oc, workerNode, "cat", "/etc/crio/crio.conf")
editedConf, err := nodeutils.ExecOnNodeWithChroot(ctx, oc, workerNode, "cat", "/etc/crio/crio.conf")
o.Expect(err).NotTo(o.HaveOccurred(), "failed to read crio.conf on node %s", workerNode)
o.Expect(editedConf).To(o.ContainSubstring(`log_level = "debug"`),
"sed edit did not apply: expected log_level = debug in crio.conf")
Expand Down Expand Up @@ -100,7 +102,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
var crioConfig string
o.Eventually(func() error {
var execErr error
crioConfig, execErr = nodeutils.ExecOnNodeWithChroot(oc, workerNode,
crioConfig, execErr = nodeutils.ExecOnNodeWithChroot(ctx, oc, workerNode,
"/bin/bash", "-c", "crio config 2>/dev/null")
return execErr
}, 30*time.Second, 5*time.Second).Should(o.Succeed(), "failed to get crio config on node %s", workerNode)
Expand Down Expand Up @@ -163,7 +165,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
e2e.Logf("Worker node rolled out successfully")

g.By("Check overlaySize takes effect in storage.conf on worker node")
storageConf, err := nodeutils.ExecOnNodeWithChroot(oc, workerNode,
storageConf, err := nodeutils.ExecOnNodeWithChroot(ctx, oc, workerNode,
"/bin/bash", "-c", "head -n 7 /etc/containers/storage.conf | grep size")
o.Expect(err).NotTo(o.HaveOccurred(), "failed to read storage.conf on node %s", workerNode)
e2e.Logf("storage.conf size line: %s", storageConf)
Expand Down
14 changes: 7 additions & 7 deletions test/extended/node/node_e2e/image_registry_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
oc = exutil.NewCLIWithoutNamespace("imgcfg")
)

g.BeforeEach(func() {
g.BeforeEach(func(ctx context.Context) {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred(), "failed to detect cluster type")
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}

nodeutils.EnsureNodesReady(ctx, oc)
})

// Verifies that updating image.config.openshift.io/cluster with a new search
Expand Down Expand Up @@ -62,8 +64,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv

cleanupWorkerSpec := imagepolicy.GetMCPCurrentSpecConfigName(oc, "worker")
cleanupMasterSpec := imagepolicy.GetMCPCurrentSpecConfigName(oc, "master")
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "worker", cleanupWorkerSpec)
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "master", cleanupMasterSpec)
imagepolicy.WaitForMCPsConfigSpecChangeAndUpdated(oc, cleanupWorkerSpec, cleanupMasterSpec)

e2e.Logf("Cleanup: waiting for all cluster operators to settle")
waitErr := operator.WaitForOperatorsToSettle(ctx, oc.AdminConfigClient(), 10)
Expand All @@ -90,8 +91,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
o.Expect(err).NotTo(o.HaveOccurred(), "failed to update image.config.openshift.io/cluster")

g.By("Wait for worker and master MCP rollout to complete")
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "worker", initialWorkerSpec)
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "master", initialMasterSpec)
imagepolicy.WaitForMCPsConfigSpecChangeAndUpdated(oc, initialWorkerSpec, initialMasterSpec)

g.By("Verify search registries config on a worker node")
workers, err := exutil.GetReadySchedulableWorkerNodes(ctx, oc.AdminKubeClient())
Expand All @@ -101,7 +101,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
var registriesConf string
o.Eventually(func() error {
var execErr error
registriesConf, execErr = nodeutils.ExecOnNodeWithChroot(oc, workers[0].Name,
registriesConf, execErr = nodeutils.ExecOnNodeWithChroot(ctx, oc, workers[0].Name,
"cat", "/etc/containers/registries.conf.d/01-image-searchRegistries.conf")
if execErr != nil {
return execErr
Expand All @@ -115,7 +115,7 @@ var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptiv
e2e.Logf("Registries config on %s:\n%s", workers[0].Name, registriesConf)

g.By("Verify policy.json is updated with allowed registries")
policyJSON, err := nodeutils.ExecOnNodeWithChroot(oc, workers[0].Name,
policyJSON, err := nodeutils.ExecOnNodeWithChroot(ctx, oc, workers[0].Name,
"cat", "/etc/containers/policy.json")
o.Expect(err).NotTo(o.HaveOccurred(), "failed to read policy.json on node %s", workers[0].Name)
e2e.Logf("policy.json on %s:\n%s", workers[0].Name, policyJSON)
Expand Down
6 changes: 4 additions & 2 deletions test/extended/node/node_e2e/initcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] NODE initContainer policy,vol
)

// Skip all tests on MicroShift clusters as MachineConfig resources are not available
g.BeforeEach(func() {
g.BeforeEach(func(ctx context.Context) {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}

nodeutils.EnsureNodesReady(ctx, oc)
})

//author: bgudi@redhat.com
Expand Down Expand Up @@ -127,7 +129,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] NODE initContainer policy,vol
actualContainerID := matches[1]

g.By("Delete init container from node")
output, err := nodeutils.ExecOnNodeWithChroot(oc, nodeName, "crictl", "rm", actualContainerID)
output, err := nodeutils.ExecOnNodeWithChroot(ctx, oc, nodeName, "crictl", "rm", actualContainerID)
o.Expect(err).NotTo(o.HaveOccurred(), "fail to delete container")
e2e.Logf("Container deletion output: %s", output)

Expand Down
Loading