From 4f49139057c0ee02f1000d0bbe4c963fe12af29e Mon Sep 17 00:00:00 2001 From: Jason Cho Date: Wed, 24 Apr 2024 11:14:30 -0700 Subject: [PATCH 1/2] OCPBUGS-18534: monitor test fix to wait before connecting to a non-existent dns Signed-off-by: Jason Cho go fmt fix Signed-off-by: Jason Cho add timeout to checkHostnameReady function Signed-off-by: Jason Cho switch to use oc Signed-off-by: Jason Cho --- .../monitortest.go | 41 ++++++++++++++++++ test/extended/util/client.go | 42 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go b/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go index 41d0de3d4584..2c3f91742811 100644 --- a/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go +++ b/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go @@ -9,6 +9,7 @@ import ( "net/http" "os" "strconv" + "strings" "time" "github.com/openshift/origin/pkg/monitortestframework" @@ -203,6 +204,15 @@ func (w *availability) StartCollection(ctx context.Context, adminRESTConfig *res return fmt.Errorf("error creating PDB: %w", err) } + // On certain platforms hitting the hostname before it is ready leads to a blackhole, this code checks + // the host from the cluster's context + if infra.Spec.PlatformSpec.Type == configv1.PowerVSPlatformType || infra.Spec.PlatformSpec.Type == configv1.IBMCloudPlatformType { + nodeTgt := "node/" + nodeList.Items[0].ObjectMeta.Name + if err := checkHostnameReady(tcpService, nodeTgt); err != nil { + return err + } + } + // Hit it once before considering ourselves ready fmt.Fprintf(os.Stderr, "hitting pods through the service's LoadBalancer\n") timeout := 10 * time.Minute @@ -344,3 +354,34 @@ func httpGetNoConnectionPoolTimeout(url string, timeout time.Duration) (*http.Re return client.Get(url) } + +// Uses the first node in the cluster to verify the LoadBalancer host is active before returning +func checkHostnameReady(tcpService *corev1.Service, nodeTgt string) error { + oc := exutil.NewCLIForMonitorTest(tcpService.GetObjectMeta().GetNamespace()) + + var ( + stdOut string + err error + ) + + for i := 0; i < 60; i++ { + fmt.Printf("Checking load balancer host is active \n") + wait.Poll(3*time.Second, 60*time.Second, func() (bool, error) { + stdOut, _, err = oc.AsAdmin().WithoutNamespace().RunInMonitorTest("debug").Args(nodeTgt, "--", "dig", "+short", tcpService.Status.LoadBalancer.Ingress[0].Hostname).Outputs() + if err != nil { + return false, nil + } + return true, nil + }) + + output := strings.TrimSpace(stdOut) + if output == "" { + fmt.Println("waiting for the LB to come active") + time.Sleep(1 * time.Minute) + continue + } else { + break + } + } + return nil +} diff --git a/test/extended/util/client.go b/test/extended/util/client.go index 053f2b86f014..2a9d573563a3 100644 --- a/test/extended/util/client.go +++ b/test/extended/util/client.go @@ -179,6 +179,32 @@ func NewCLIWithoutNamespace(project string) *CLI { return cli } +// NewCLIForMonitorTest initializes the CLI and Kube framework helpers +// without a namespace. Should be called outside of a Ginkgo .It() +// function. +func NewCLIForMonitorTest(project string) *CLI { + cli := &CLI{ + kubeFramework: &framework.Framework{ + SkipNamespaceCreation: true, + BaseName: project, + Options: framework.Options{ + ClientQPS: 20, + ClientBurst: 50, + }, + Timeouts: framework.NewTimeoutContext(), + }, + username: "admin", + execPath: "oc", + adminConfigPath: KubeConfigPath(), + staticConfigManifestDir: StaticConfigManifestDir(), + withoutNamespace: true, + } + + // Called only once (assumed the objects will never get modified) + cli.setupStaticConfigsFromManifests() + return cli +} + // NewHypershiftManagementCLI returns a CLI that interacts with the Hypershift management cluster. // Contrary to a normal CLI it does not perform any cleanup, and it must not be used for any mutating // operations. Also, contrary to a normal CLI it must be constructed inside an `It` block. This is @@ -813,6 +839,22 @@ func (c *CLI) Run(commands ...string) *CLI { return nc.setOutput(c.stdout) } +// Executes with the kubeconfig specified from the environment +func (c *CLI) RunInMonitorTest(commands ...string) *CLI { + in, out, errout := &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{} + nc := &CLI{ + execPath: c.execPath, + verb: commands[0], + kubeFramework: c.KubeFramework(), + adminConfigPath: c.adminConfigPath, + configPath: c.configPath, + username: c.username, + globalArgs: commands, + } + nc.stdin, nc.stdout, nc.stderr = in, out, errout + return nc.setOutput(c.stdout) +} + // InputString adds expected input to the command func (c *CLI) InputString(input string) *CLI { c.stdin.WriteString(input) From 60e8717b47ff2cb5bf6c7d62842fa44602fd019c Mon Sep 17 00:00:00 2001 From: Jason Cho Date: Mon, 6 May 2024 11:11:39 -0700 Subject: [PATCH 2/2] Use PollUntilContextTimeout instead of deprecated Poll Signed-off-by: Jason Cho --- .../monitortest.go | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go b/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go index 2c3f91742811..a1bdf6c11d9b 100644 --- a/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go +++ b/pkg/monitortests/network/disruptionserviceloadbalancer/monitortest.go @@ -208,7 +208,7 @@ func (w *availability) StartCollection(ctx context.Context, adminRESTConfig *res // the host from the cluster's context if infra.Spec.PlatformSpec.Type == configv1.PowerVSPlatformType || infra.Spec.PlatformSpec.Type == configv1.IBMCloudPlatformType { nodeTgt := "node/" + nodeList.Items[0].ObjectMeta.Name - if err := checkHostnameReady(tcpService, nodeTgt); err != nil { + if err := checkHostnameReady(ctx, tcpService, nodeTgt); err != nil { return err } } @@ -356,7 +356,7 @@ func httpGetNoConnectionPoolTimeout(url string, timeout time.Duration) (*http.Re } // Uses the first node in the cluster to verify the LoadBalancer host is active before returning -func checkHostnameReady(tcpService *corev1.Service, nodeTgt string) error { +func checkHostnameReady(ctx context.Context, tcpService *corev1.Service, nodeTgt string) error { oc := exutil.NewCLIForMonitorTest(tcpService.GetObjectMeta().GetNamespace()) var ( @@ -364,24 +364,19 @@ func checkHostnameReady(tcpService *corev1.Service, nodeTgt string) error { err error ) - for i := 0; i < 60; i++ { - fmt.Printf("Checking load balancer host is active \n") - wait.Poll(3*time.Second, 60*time.Second, func() (bool, error) { - stdOut, _, err = oc.AsAdmin().WithoutNamespace().RunInMonitorTest("debug").Args(nodeTgt, "--", "dig", "+short", tcpService.Status.LoadBalancer.Ingress[0].Hostname).Outputs() - if err != nil { - return false, nil - } - return true, nil - }) - + wait.PollUntilContextTimeout(ctx, 15*time.Second, 60*time.Minute, true, func(ctx context.Context) (bool, error) { + logrus.Debug("Checking load balancer host is active \n") + stdOut, _, err = oc.AsAdmin().WithoutNamespace().RunInMonitorTest("debug").Args(nodeTgt, "--", "dig", "+short", "+notcp", tcpService.Status.LoadBalancer.Ingress[0].Hostname).Outputs() + if err != nil { + return false, nil + } output := strings.TrimSpace(stdOut) if output == "" { - fmt.Println("waiting for the LB to come active") - time.Sleep(1 * time.Minute) - continue - } else { - break + logrus.Debug("Waiting for the load balancer to become active") + return false, nil } - } - return nil + logrus.Debug("Load balancer active") + return true, nil + }) + return err }