From cf332b798d6e0b3faa08289a3bdca4611672d357 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 08:08:23 -0600 Subject: [PATCH 01/38] dev img and lifespan in tmplt --- chart/infra-server/static/workflow-gke-default.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index e656408c2..e19a0ef7a 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -60,7 +60,7 @@ spec: valueFrom: path: /outputs/cluster_name container: - image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.5.3 + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-1-g1d384d4d42-snapshot imagePullPolicy: Always command: - /usr/bin/entrypoint @@ -71,6 +71,7 @@ spec: - "--machine-type={{workflow.parameters.machine-type}}" - --gcp-project=srox-temp-dev-test - --creation-source=infra + - --labels="lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}," - --k8s-version={{workflow.parameters.k8s-version}} - --pod-security-policy={{workflow.parameters.pod-security-policy}} - --gcp-image-type={{workflow.parameters.gcp-image-type}} From 946058f4661f5724c6874ec125077b4463da8ceb Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 08:14:58 -0600 Subject: [PATCH 02/38] generated srcs --- generated/api/v1/service.pb.go | 1 + 1 file changed, 1 insertion(+) diff --git a/generated/api/v1/service.pb.go b/generated/api/v1/service.pb.go index 19f0a919e..8286c25f0 100644 --- a/generated/api/v1/service.pb.go +++ b/generated/api/v1/service.pb.go @@ -264,6 +264,7 @@ type WhoamiResponse struct { // Principal represents a possible type of authenticated principal. // // Types that are valid to be assigned to Principal: + // // *WhoamiResponse_User // *WhoamiResponse_ServiceAccount Principal isWhoamiResponse_Principal `protobuf_oneof:"principal"` From 2db4b5b55897213c58f6ea38ff2a6c6bc0d27266 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 08:15:55 -0600 Subject: [PATCH 03/38] error name conflict existing cluster --- service/cluster/cluster.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index b565960dd..f672b43fc 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -333,6 +333,16 @@ func (s *clusterImpl) create(req *v1.CreateClusterRequest, owner, eventID string return nil, fmt.Errorf("parameter 'name' was not provided") } + existingWorkflow, _ := s.getMostRecentArgoWorkflowFromClusterID(req.ID) + if existingWorkflow != nil { + return nil, status.Errorf( + codes.AlreadyExists, + "An infra cluster ID %q already exists in state %s.", + req.ID, workflowStatus(existingWorkflow.Status).String(), + ) + } + + // Make sure there is no running argo workflow for infra cluster with the same ID existingWorkflow, _ := s.getMostRecentArgoWorkflowFromClusterID(clusterID) if existingWorkflow != nil { From b0af7821d7659f7cac4c50448abf99c5affa78b4 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 08:42:17 -0600 Subject: [PATCH 04/38] lint --- service/cluster/cluster.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index f672b43fc..36033bff4 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -342,9 +342,8 @@ func (s *clusterImpl) create(req *v1.CreateClusterRequest, owner, eventID string ) } - // Make sure there is no running argo workflow for infra cluster with the same ID - existingWorkflow, _ := s.getMostRecentArgoWorkflowFromClusterID(clusterID) + existingWorkflow, _ = s.getMostRecentArgoWorkflowFromClusterID(clusterID) if existingWorkflow != nil { switch workflowStatus(existingWorkflow.Status) { case v1.Status_FAILED, v1.Status_FINISHED: From f67f340ee191738755b33397fc57e99c547997b0 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 09:44:17 -0600 Subject: [PATCH 05/38] remove quotes --- chart/infra-server/static/workflow-gke-default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index e19a0ef7a..11708fc12 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -71,7 +71,7 @@ spec: - "--machine-type={{workflow.parameters.machine-type}}" - --gcp-project=srox-temp-dev-test - --creation-source=infra - - --labels="lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}," + - --labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} - --k8s-version={{workflow.parameters.k8s-version}} - --pod-security-policy={{workflow.parameters.pod-security-policy}} - --gcp-image-type={{workflow.parameters.gcp-image-type}} From e47ce0c64920f6c7585038a928d43e0117985f68 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 09:51:44 -0600 Subject: [PATCH 06/38] update image --- chart/infra-server/static/workflow-gke-default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 11708fc12..359ce8b7e 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -60,7 +60,7 @@ spec: valueFrom: path: /outputs/cluster_name container: - image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-1-g1d384d4d42-snapshot + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot imagePullPolicy: Always command: - /usr/bin/entrypoint From ead21677a0e480685c3ae375c36af8da8129cd57 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:24:06 -0600 Subject: [PATCH 07/38] test sidecar during suspend --- .../static/workflow-gke-default.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 359ce8b7e..4f8b7ebbd 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -82,6 +82,25 @@ spec: - name: wait suspend: {} + sidecars: + - name: timekeeper + container: + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot + imagePullPolicy: Always + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /tmp/google-credentials.json + command: + - /usr/bin/gcloud + args: + - container clusters update + - {{workflow.parameters.name}} + - --project=srox-temp-dev-test + - --zone={{workflow.parameters.gcp-zone}} + - --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} + volumeMounts: + - name: credentials + mountPath: /tmp - name: destroy activeDeadlineSeconds: 3600 From 770c84be3594d0a6429c2870fb05a56d0b00fcfc Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:41:27 -0600 Subject: [PATCH 08/38] default annot lifespan zero --- .../static/workflow-gke-default.yaml | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 4f8b7ebbd..e2d19f992 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -2,6 +2,8 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: gke-default- + annotations: + infra.stackrox.com/lifespan: 0s spec: entrypoint: start arguments: @@ -84,23 +86,22 @@ spec: suspend: {} sidecars: - name: timekeeper - container: - image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot - imagePullPolicy: Always - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /tmp/google-credentials.json - command: - - /usr/bin/gcloud - args: - - container clusters update - - {{workflow.parameters.name}} - - --project=srox-temp-dev-test - - --zone={{workflow.parameters.gcp-zone}} - - --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} - volumeMounts: - - name: credentials - mountPath: /tmp + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot + imagePullPolicy: Always + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /tmp/google-credentials.json + command: + - /usr/bin/gcloud + args: + - container clusters update + - "{{workflow.parameters.name}}" + - --project=srox-temp-dev-test + - --zone={{workflow.parameters.gcp-zone}} + - --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} + volumeMounts: + - name: credentials + mountPath: /tmp - name: destroy activeDeadlineSeconds: 3600 From cfede6be29966a6727ebb6b2bf6115c80ea14124 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 12 Jul 2023 15:42:42 -0600 Subject: [PATCH 09/38] try stop with onExit --- service/cluster/cluster.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 84a227f3e..0b8852e11 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -536,6 +536,19 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. "workflow-name", req.GetId(), "error", err, ) + log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + NodeFieldSelector: "", + Message: "Seeking end", + }) + if err != nil { + log.Warnw("failed to stop workflow, this is OK if the workflow is not running", + "workflow-name", req.GetId(), + "error", err, + ) + } } return &empty.Empty{}, nil From 816301110786b7b208a23599395a2601e001b3fe Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 13 Jul 2023 08:21:33 -0600 Subject: [PATCH 10/38] gke destroy as onExit --- .../static/workflow-gke-default.yaml | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index e2d19f992..12ddc9817 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -32,12 +32,13 @@ spec: template: create - - name: wait template: wait - - - name: destroy - template: destroy - arguments: - parameters: - - name: name - value: "{{steps.create.outputs.parameters.cluster_name}}" + hooks: + exit: + template: destroy + arguments: + parameters: + - name: name + value: "{{steps.create.outputs.parameters.cluster_name}}" - name: create activeDeadlineSeconds: 3600 @@ -83,26 +84,39 @@ spec: mountPath: /tmp - name: wait - suspend: {} - sidecars: - - name: timekeeper + steps: + - - name: timeupdate + template: timeupdate + - - name: delay + template: delay + - - name: loop + template: wait + + - name: delay + suspend: + duration: "120" + + - name: timeupdate + script: image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot imagePullPolicy: Always env: - name: GOOGLE_APPLICATION_CREDENTIALS value: /tmp/google-credentials.json - command: - - /usr/bin/gcloud - args: - - container clusters update - - "{{workflow.parameters.name}}" - - --project=srox-temp-dev-test - - --zone={{workflow.parameters.gcp-zone}} - - --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} + command: [bash, -x] + source: | + gcloud auth activate-service-account --key-file /tmp/google-credentials.json; + gcloud auth list; + gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}"; + gcloud config set core/disable_prompts True; + gcloud container clusters update \ + "{{workflow.parameters.name}}"\ + --project=srox-temp-dev-test\ + --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}; volumeMounts: - name: credentials mountPath: /tmp - + - name: destroy activeDeadlineSeconds: 3600 container: From 17c572a167abd52ec5de3272051383f9475c3b54 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 13 Jul 2023 09:06:57 -0600 Subject: [PATCH 11/38] needsExit flag --- chart/infra-server/static/workflow-gke-default.yaml | 2 ++ service/cluster/cluster.go | 3 +++ 2 files changed, 5 insertions(+) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 12ddc9817..089e9c37d 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -2,6 +2,8 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: gke-default- + labels: + needsExit: true annotations: infra.stackrox.com/lifespan: 0s spec: diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 0b8852e11..ea1fcb176 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -536,6 +536,9 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. "workflow-name", req.GetId(), "error", err, ) + } + if value, exists := v.GetLabels()["needsExit"]; exists { + log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), From 76502977b2e3924a59ce8e092a5601a5d375eabc Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 13 Jul 2023 09:09:02 -0600 Subject: [PATCH 12/38] fix name --- service/cluster/cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index ea1fcb176..01f009f17 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -537,7 +537,7 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. "error", err, ) } - if value, exists := v.GetLabels()["needsExit"]; exists { + if value, exists := workflow.GetLabels()["needsExit"]; exists { log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ From c5b056b8e8f2bc2e56092af0c3016f268007789a Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 14 Jul 2023 13:24:49 -0600 Subject: [PATCH 13/38] recreate dev server --- .../static/workflow-gke-default.yaml | 200 ++++++++++++++---- 1 file changed, 157 insertions(+), 43 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 089e9c37d..3ef75fef2 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -6,8 +6,10 @@ metadata: needsExit: true annotations: infra.stackrox.com/lifespan: 0s + infra.stackrox.com/lifespanlast: 1s spec: entrypoint: start + onExit: stop arguments: parameters: - name: name @@ -31,17 +33,163 @@ spec: - name: start steps: - - name: create - template: create - - - name: wait + template: echo + arguments: + parameters: + - name: message + value: "create" + - name: wait template: wait - hooks: - exit: - template: destroy - arguments: - parameters: - - name: name - value: "{{steps.create.outputs.parameters.cluster_name}}" + arguments: + parameters: + - name: oldlifespan + value: "3h0m0s" + - name: stop + steps: + - - name: destroy + template: echo + arguments: + parameters: + - name: message + value: "{{workflow.parameters.name}}" + + - name: wait + inputs: + parameters: + - name: oldlifespan + steps: + - - name: echo1 + template: echo + arguments: + parameters: + - name: message + value: "{{inputs.parameters.oldlifespan}} =? {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: echoD + template: echo + arguments: + parameters: + - name: message + value: "{{=sprig.dateModify(workflow.annotations.infra.stackrox.com/lifespan, workflow.creationTimestamp)}}" + - - name: echoD2 + template: echo + arguments: + parameters: + - name: message + value: "{{=sprig.dateModify('30h',workflow.creationTimestamp)}}" + - - name: echoD3 + template: echo + arguments: + parameters: + - name: message + value: "{{sprig.dateModify(workflow.annotations.infra.stackrox.com/lifespan, workflow.creationTimestamp)}}" + - - name: delay + template: delay + - - name: echo2 + template: echo + when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + arguments: + parameters: + - name: message + value: "{{inputs.parameters.oldlifespan}} =~ {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: echo3 + template: echo + when: "'{{inputs.parameters.oldlifespan}}' !~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + arguments: + parameters: + - name: message + value: "{{inputs.parameters.oldlifespan}} !~ {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: echo4 + template: echo + arguments: + parameters: + - name: message + value: "{{inputs.parameters.oldlifespan}}, {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: shortloop + template: wait + when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + arguments: + parameters: + - name: oldlifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + - - name: timeupdate + template: timeupdate + - - name: echoE + template: echo + arguments: + parameters: + - name: message + value: "{{inputs.parameters.oldlifespan}} =? {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: echoE2 + template: argosay + arguments: + parameters: + - name: foo + value: 5 + - name: message + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + - - name: loop + template: wait + arguments: + parameters: + - name: oldlifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + + - name: argosay + inputs: + parameters: + - name: foo + - name: message + container: + image: argoproj/argosay:v2 + # in this example, we use `asInt` to cast a parameter (which are ALWAYS strings) to an int so we can + # multiply by 10 + args: + - echo + - | + hello {{=asInt(inputs.parameters.foo) * 10}} @ {{=sprig.date('2006', workflow.creationTimestamp)}} + {{inputs.parameters.message}} + + - name: echo + inputs: + parameters: + - name: message + container: + image: alpine:3.7 + command: [echo, "{{inputs.parameters.message}}"] + + - name: delay + suspend: + duration: "4" + + - name: timeupdate + script: + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot + imagePullPolicy: Always + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /tmp/google-credentials.json + command: [bash, -x] + source: | + lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}; + gcloud auth activate-service-account --key-file /tmp/google-credentials.json; + gcloud auth list; + gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}"; + gcloud config set core/disable_prompts True; + gcloud container clusters update \ + "{{workflow.parameters.name}}"\ + --project=srox-temp-dev-test\ + "--update-labels=lifespan=${lifespan}"; + echo "$lifespan" | tee /outputs/value.txt + volumeMounts: + - name: credentials + mountPath: /tmp + outputs: + parameters: + - name: value + valueFrom: + path: /outputs/value.txt + - name: create activeDeadlineSeconds: 3600 outputs: @@ -85,40 +233,6 @@ spec: - name: credentials mountPath: /tmp - - name: wait - steps: - - - name: timeupdate - template: timeupdate - - - name: delay - template: delay - - - name: loop - template: wait - - - name: delay - suspend: - duration: "120" - - - name: timeupdate - script: - image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot - imagePullPolicy: Always - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /tmp/google-credentials.json - command: [bash, -x] - source: | - gcloud auth activate-service-account --key-file /tmp/google-credentials.json; - gcloud auth list; - gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}"; - gcloud config set core/disable_prompts True; - gcloud container clusters update \ - "{{workflow.parameters.name}}"\ - --project=srox-temp-dev-test\ - --update-labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}; - volumeMounts: - - name: credentials - mountPath: /tmp - - name: destroy activeDeadlineSeconds: 3600 container: From ae9bd9f022e3533e8422be4484a8da136a8fb609 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 08:30:04 -0600 Subject: [PATCH 14/38] stop, or resume if suspended --- service/cluster/cluster.go | 63 +++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 01f009f17..eeb1b2ee5 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -523,28 +523,27 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. return nil, err } - log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) - - // Resume the workflow so that it may move to the destroy phase without - // waiting for cleanupExpiredClusters() to kick in. - _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ - Name: workflow.GetName(), - Namespace: s.workflowNamespace, - }) - if err != nil { - log.Warnw("failed to resume workflow, this is OK if the workflow is not waiting", - "workflow-name", req.GetId(), - "error", err, - ) - } - if value, exists := workflow.GetLabels()["needsExit"]; exists { - log.Infow("argo workflow requires exit to stop looping", "needsExit", value) + if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { + // Resume the workflow so that it may move to the destroy phase without + // waiting for cleanupExpiredClusters() to kick in. + log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + }) + if err != nil { + log.Warnw("failed to resume workflow, this is OK if the workflow is not waiting", + "workflow-name", req.GetId(), + "error", err, + ) + } + } else { log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), Namespace: s.workflowNamespace, NodeFieldSelector: "", - Message: "Seeking end", + Message: "Destroying cluster. End workflow loop.", }) if err != nil { log.Warnw("failed to stop workflow, this is OK if the workflow is not running", @@ -663,13 +662,29 @@ func (s *clusterImpl) cleanupExpiredClusters() { continue } - log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) - _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ - Name: workflow.GetName(), - Namespace: s.workflowNamespace, - }) - if err != nil { - log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) + if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { + log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + }) + if err != nil { + log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) + } + } else { + log.Infow("stopping argo workflow that expired", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + NodeFieldSelector: "", + Message: "Destroying cluster. End workflow loop.", + }) + if err != nil { + log.Warnw("failed to stop workflow, this is OK if the workflow is not running", + "workflow-name", req.GetId(), + "error", err, + ) + } } } From f1debcdbfbf0fb31fcdd84c6ae4240d74d6f7966 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:16:20 -0600 Subject: [PATCH 15/38] use argo util to check suspend --- service/cluster/cluster.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index eeb1b2ee5..db49cbe0e 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -17,6 +17,7 @@ import ( workflowpkg "github.com/argoproj/argo-workflows/v3/pkg/apiclient/workflow" "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" workflowv1 "github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned/typed/workflow/v1alpha1" + workflowutil "github.com/argoproj/argo-workflows/workflow/util" "github.com/golang/protobuf/ptypes" "github.com/golang/protobuf/ptypes/duration" "github.com/golang/protobuf/ptypes/empty" @@ -523,7 +524,7 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. return nil, err } - if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { + if workflowutil.IsWorkflowSuspended(workflow) { // Resume the workflow so that it may move to the destroy phase without // waiting for cleanupExpiredClusters() to kick in. log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) @@ -662,7 +663,7 @@ func (s *clusterImpl) cleanupExpiredClusters() { continue } - if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { + if workflowutil.IsWorkflowSuspended(workflow) { log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ Name: workflow.GetName(), From e63a36c4d0c747b1db5a9bac1a6511c99cc6ec2f Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:23:45 -0600 Subject: [PATCH 16/38] fix wf lookup --- service/cluster/cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index db49cbe0e..6decda5ab 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -682,7 +682,7 @@ func (s *clusterImpl) cleanupExpiredClusters() { }) if err != nil { log.Warnw("failed to stop workflow, this is OK if the workflow is not running", - "workflow-name", req.GetId(), + "workflow-name", workflow.GetName(), "error", err, ) } From d9cc1688da00a14b0b8e41395f8f59df425c56dd Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 16:46:51 -0600 Subject: [PATCH 17/38] correct import --- service/cluster/cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 6decda5ab..0ff06b9f0 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -17,7 +17,7 @@ import ( workflowpkg "github.com/argoproj/argo-workflows/v3/pkg/apiclient/workflow" "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" workflowv1 "github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned/typed/workflow/v1alpha1" - workflowutil "github.com/argoproj/argo-workflows/workflow/util" + workflowutil "github.com/argoproj/argo-workflows/v3/workflow/util" "github.com/golang/protobuf/ptypes" "github.com/golang/protobuf/ptypes/duration" "github.com/golang/protobuf/ptypes/empty" From ee1ecc05f9acaf6f079d17976e938c4aa38e57de Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 16:59:35 -0600 Subject: [PATCH 18/38] latest test --- .../static/workflow-gke-default.yaml | 83 ++++++++----------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 3ef75fef2..ea689c678 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -43,7 +43,7 @@ spec: arguments: parameters: - name: oldlifespan - value: "3h0m0s" + value: "0h0m0s" - name: stop steps: @@ -65,26 +65,18 @@ spec: parameters: - name: message value: "{{inputs.parameters.oldlifespan}} =? {{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: echoD + - - name: echoDS template: echo arguments: parameters: - name: message - value: "{{=sprig.dateModify(workflow.annotations.infra.stackrox.com/lifespan, workflow.creationTimestamp)}}" - - - name: echoD2 + value: "{{= sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339) }}" + - - name: echoDlife template: echo arguments: parameters: - name: message - value: "{{=sprig.dateModify('30h',workflow.creationTimestamp)}}" - - - name: echoD3 - template: echo - arguments: - parameters: - - name: message - value: "{{sprig.dateModify(workflow.annotations.infra.stackrox.com/lifespan, workflow.creationTimestamp)}}" - - - name: delay - template: delay + value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" - - name: echo2 template: echo when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" @@ -92,19 +84,14 @@ spec: parameters: - name: message value: "{{inputs.parameters.oldlifespan}} =~ {{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: echo3 - template: echo - when: "'{{inputs.parameters.oldlifespan}}' !~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" - arguments: - parameters: - - name: message - value: "{{inputs.parameters.oldlifespan}} !~ {{workflow.annotations.infra.stackrox.com/lifespan}}" - - name: echo4 template: echo arguments: parameters: - name: message value: "{{inputs.parameters.oldlifespan}}, {{workflow.annotations.infra.stackrox.com/lifespan}}" + - - name: delay + template: delay - - name: shortloop template: wait when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" @@ -114,20 +101,10 @@ spec: value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference - - name: timeupdate template: timeupdate - - - name: echoE - template: echo arguments: parameters: - - name: message - value: "{{inputs.parameters.oldlifespan}} =? {{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: echoE2 - template: argosay - arguments: - parameters: - - name: foo - value: 5 - - name: message - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + - name: lifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference - - name: loop template: wait arguments: @@ -135,21 +112,6 @@ spec: - name: oldlifespan value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference - - name: argosay - inputs: - parameters: - - name: foo - - name: message - container: - image: argoproj/argosay:v2 - # in this example, we use `asInt` to cast a parameter (which are ALWAYS strings) to an int so we can - # multiply by 10 - args: - - echo - - | - hello {{=asInt(inputs.parameters.foo) * 10}} @ {{=sprig.date('2006', workflow.creationTimestamp)}} - {{inputs.parameters.message}} - - name: echo inputs: parameters: @@ -163,6 +125,27 @@ spec: duration: "4" - name: timeupdate + inputs: + parameters: + - name: lifespan + steps: + - - name: echo1 + template: echo + arguments: + parameters: + - name: message + value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" + - - name: setlabels + template: label + arguments: + parameters: + - name: labels + value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" + + - name: label + inputs: + parameters: + - name: labels script: image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot imagePullPolicy: Always @@ -171,7 +154,7 @@ spec: value: /tmp/google-credentials.json command: [bash, -x] source: | - lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}; + labels="{{inputs.parameters.labels}}" gcloud auth activate-service-account --key-file /tmp/google-credentials.json; gcloud auth list; gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}"; @@ -179,8 +162,8 @@ spec: gcloud container clusters update \ "{{workflow.parameters.name}}"\ --project=srox-temp-dev-test\ - "--update-labels=lifespan=${lifespan}"; - echo "$lifespan" | tee /outputs/value.txt + "--update-labels=${labels}"; + echo "$labels" | tee /outputs/value.txt volumeMounts: - name: credentials mountPath: /tmp From 4d5ca929255466bb6f921aa6ecfbd015ca1e66b8 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 17 Jul 2023 17:05:32 -0600 Subject: [PATCH 19/38] Revert "use argo util to check suspend" This reverts commit f1debcdbfbf0fb31fcdd84c6ae4240d74d6f7966. --- service/cluster/cluster.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 0ff06b9f0..6b3c63804 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -17,7 +17,6 @@ import ( workflowpkg "github.com/argoproj/argo-workflows/v3/pkg/apiclient/workflow" "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" workflowv1 "github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned/typed/workflow/v1alpha1" - workflowutil "github.com/argoproj/argo-workflows/v3/workflow/util" "github.com/golang/protobuf/ptypes" "github.com/golang/protobuf/ptypes/duration" "github.com/golang/protobuf/ptypes/empty" @@ -524,7 +523,7 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. return nil, err } - if workflowutil.IsWorkflowSuspended(workflow) { + if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { // Resume the workflow so that it may move to the destroy phase without // waiting for cleanupExpiredClusters() to kick in. log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) @@ -663,7 +662,7 @@ func (s *clusterImpl) cleanupExpiredClusters() { continue } - if workflowutil.IsWorkflowSuspended(workflow) { + if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ Name: workflow.GetName(), From f37f7e2d7cd0a1cd84feecd0af90c3747736ca8e Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 09:08:01 -0600 Subject: [PATCH 20/38] latest test --- .../static/workflow-gke-default.yaml | 76 +++++-------------- 1 file changed, 19 insertions(+), 57 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index ea689c678..89537e75f 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -3,10 +3,9 @@ kind: Workflow metadata: generateName: gke-default- labels: - needsExit: true + needsExit: "true" annotations: infra.stackrox.com/lifespan: 0s - infra.stackrox.com/lifespanlast: 1s spec: entrypoint: start onExit: stop @@ -38,12 +37,12 @@ spec: parameters: - name: message value: "create" - - name: wait + - - name: wait template: wait arguments: parameters: - name: oldlifespan - value: "0h0m0s" + value: "" # Force lookup and copy of lifespan - name: stop steps: @@ -59,58 +58,29 @@ spec: parameters: - name: oldlifespan steps: - - - name: echo1 - template: echo - arguments: - parameters: - - name: message - value: "{{inputs.parameters.oldlifespan}} =? {{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: echoDS - template: echo - arguments: - parameters: - - name: message - value: "{{= sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339) }}" - - - name: echoDlife - template: echo - arguments: - parameters: - - name: message - value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" - - - name: echo2 - template: echo - when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" - arguments: - parameters: - - name: message - value: "{{inputs.parameters.oldlifespan}} =~ {{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: echo4 - template: echo - arguments: - parameters: - - name: message - value: "{{inputs.parameters.oldlifespan}}, {{workflow.annotations.infra.stackrox.com/lifespan}}" - - name: delay - template: delay + inline: + suspend: + duration: "30" - - name: shortloop template: wait when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" arguments: parameters: - name: oldlifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - - name: timeupdate template: timeupdate arguments: parameters: - name: lifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - - name: loop template: wait arguments: parameters: - name: oldlifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" # preserve value; prevent pass by reference + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - name: echo inputs: @@ -120,27 +90,25 @@ spec: image: alpine:3.7 command: [echo, "{{inputs.parameters.message}}"] - - name: delay - suspend: - duration: "4" - - name: timeupdate inputs: parameters: - name: lifespan steps: - - - name: echo1 - template: echo - arguments: - parameters: - - name: message - value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" - - name: setlabels template: label arguments: parameters: - name: labels - value: "expiration={{= sprig.dateModify(inputs.parameters.lifespan, sprig.toDate('2006-01-02T15:04:05Z07:00', workflow.creationTimestamp.RFC3339)) }}" + value: >- + flavor={{workflow.annotations.infra.stackrox.com/flavor}}, + owner={{workflow.annotations.infra.stackrox.com/owner}}, + lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}, + expiration={{= sprig.date('2006-01-02T15:04:05Z07:00', + sprig.dateModify(inputs.parameters.lifespan, + sprig.toDate('2006-01-02T15:04:05Z07:00', + workflow.creationTimestamp.RFC3339))) + }} - name: label inputs: @@ -162,16 +130,10 @@ spec: gcloud container clusters update \ "{{workflow.parameters.name}}"\ --project=srox-temp-dev-test\ - "--update-labels=${labels}"; - echo "$labels" | tee /outputs/value.txt + "--update-labels=${labels}" || true volumeMounts: - name: credentials mountPath: /tmp - outputs: - parameters: - - name: value - valueFrom: - path: /outputs/value.txt - name: create activeDeadlineSeconds: 3600 From 48f1e6cc73daee370ea42a6b918adcb708832f18 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 09:17:30 -0600 Subject: [PATCH 21/38] no fail fast --- chart/infra-server/static/workflow-gke-default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 89537e75f..d2ada8e8b 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -57,6 +57,7 @@ spec: inputs: parameters: - name: oldlifespan + failFast: false steps: - - name: delay inline: From d1d7a1566dd763554904fa6fcf7d3d2b784fb2d0 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 09:23:09 -0600 Subject: [PATCH 22/38] default annotations --- chart/infra-server/static/workflow-gke-default.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index d2ada8e8b..9e906a2f0 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -6,6 +6,8 @@ metadata: needsExit: "true" annotations: infra.stackrox.com/lifespan: 0s + infra.stackrox.com/flavor: unknown + infra.stackrox.com/owner: unknown spec: entrypoint: start onExit: stop @@ -170,7 +172,15 @@ spec: - "--machine-type={{workflow.parameters.machine-type}}" - --gcp-project=srox-temp-dev-test - --creation-source=infra - - --labels=lifespan={{workflow.annotations.infra.stackrox.com/lifespan}} + - >- + --labels=flavor={{workflow.annotations.infra.stackrox.com/flavor}}, + owner={{workflow.annotations.infra.stackrox.com/owner}}, + lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}, + expiration={{= sprig.date('2006-01-02T15:04:05Z07:00', + sprig.dateModify(inputs.parameters.lifespan, + sprig.toDate('2006-01-02T15:04:05Z07:00', + workflow.creationTimestamp.RFC3339))) + }} - --k8s-version={{workflow.parameters.k8s-version}} - --pod-security-policy={{workflow.parameters.pod-security-policy}} - --gcp-image-type={{workflow.parameters.gcp-image-type}} From a51577e05ed6f41c7e828958be058ac7350374b8 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:10:49 -0600 Subject: [PATCH 23/38] not suspended != destroying --- service/cluster/helpers.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index 24f98ce81..63d2b2388 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -204,14 +204,17 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { } } else if node.Type == v1alpha1.NodeTypeSuspend { switch node.Phase { - case v1alpha1.NodeSucceeded: - return v1.Status_DESTROYING case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped: panic("a suspend should not be able to fail?") case v1alpha1.NodeRunning, v1alpha1.NodePending: return v1.Status_READY } } + if node.GetName() == "destroy" || node.IsExitNode() { + return v1.Status_DESTROYING + } else { + return v1.Status_READY + } } // No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating. From b42e4a78a93e7fe9b183b137bb8675f32829b949 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:20:54 -0600 Subject: [PATCH 24/38] remove wrong dupe check --- service/cluster/cluster.go | 41 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 6b3c63804..c5622821a 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -335,17 +335,8 @@ func (s *clusterImpl) create(req *v1.CreateClusterRequest, owner, eventID string return nil, fmt.Errorf("parameter 'name' was not provided") } - existingWorkflow, _ := s.getMostRecentArgoWorkflowFromClusterID(req.ID) - if existingWorkflow != nil { - return nil, status.Errorf( - codes.AlreadyExists, - "An infra cluster ID %q already exists in state %s.", - req.ID, workflowStatus(existingWorkflow.Status).String(), - ) - } - // Make sure there is no running argo workflow for infra cluster with the same ID - existingWorkflow, _ = s.getMostRecentArgoWorkflowFromClusterID(clusterID) + existingWorkflow, _ := s.getMostRecentArgoWorkflowFromClusterID(clusterID) if existingWorkflow != nil { switch workflowStatus(existingWorkflow.Status) { case v1.Status_FAILED, v1.Status_FINISHED: @@ -523,21 +514,21 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. return nil, err } - if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { - // Resume the workflow so that it may move to the destroy phase without - // waiting for cleanupExpiredClusters() to kick in. - log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) - _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ - Name: workflow.GetName(), - Namespace: s.workflowNamespace, - }) - if err != nil { - log.Warnw("failed to resume workflow, this is OK if the workflow is not waiting", - "workflow-name", req.GetId(), - "error", err, - ) - } - } else { + // Resume the workflow so that it may move to the destroy phase without + // waiting for cleanupExpiredClusters() to kick in. + log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + }) + if err != nil { + log.Warnw("failed to resume workflow, this is OK if the workflow is not waiting", + "workflow-name", req.GetId(), + "error", err, + ) + } + if value, exists := workflow.GetLabels()["needsExit"]; exists { + log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), From a17e4f1314b542f7c369b9ef2f7abfbd7f6b24ef Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:24:07 -0600 Subject: [PATCH 25/38] ?needsExit in expire check also --- service/cluster/cluster.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index c5622821a..cca99c7c0 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -514,9 +514,10 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. return nil, err } + log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) + // Resume the workflow so that it may move to the destroy phase without // waiting for cleanupExpiredClusters() to kick in. - log.Infow("resuming argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ Name: workflow.GetName(), Namespace: s.workflowNamespace, @@ -662,7 +663,9 @@ func (s *clusterImpl) cleanupExpiredClusters() { if err != nil { log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) } - } else { + } + if value, exists := workflow.GetLabels()["needsExit"]; exists { + log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow that expired", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), From aae3d10bb41739ebdcb2e0b1674ca1534c3509ca Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:51:24 -0600 Subject: [PATCH 26/38] lint --- service/cluster/helpers.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index 63d2b2388..84eee6b30 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -212,9 +212,8 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { } if node.GetName() == "destroy" || node.IsExitNode() { return v1.Status_DESTROYING - } else { - return v1.Status_READY } + return v1.Status_READY } // No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating. From 5ec5d9f523e5710c9a489dae75cbf90aae879406 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:52:58 -0600 Subject: [PATCH 27/38] latest workflow --- .../static/workflow-gke-default.yaml | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 9e906a2f0..00d31105e 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -103,15 +103,15 @@ spec: arguments: parameters: - name: labels - value: >- - flavor={{workflow.annotations.infra.stackrox.com/flavor}}, - owner={{workflow.annotations.infra.stackrox.com/owner}}, - lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}, - expiration={{= sprig.date('2006-01-02T15:04:05Z07:00', - sprig.dateModify(inputs.parameters.lifespan, - sprig.toDate('2006-01-02T15:04:05Z07:00', - workflow.creationTimestamp.RFC3339))) - }} + value: "\ + flavor={{workflow.annotations.infra.stackrox.com/flavor}},\ + owner={{workflow.annotations.infra.stackrox.com/owner}},\ + lifespan={{workflow.annotations.infra.stackrox.com/lifespan}},\ + expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\ + sprig.dateModify(inputs.parameters.lifespan,\ + sprig.toDate('2006-01-02T15:04:05Z07:00',\ + workflow.creationTimestamp.RFC3339)))\ + }}" - name: label inputs: @@ -172,15 +172,15 @@ spec: - "--machine-type={{workflow.parameters.machine-type}}" - --gcp-project=srox-temp-dev-test - --creation-source=infra - - >- - --labels=flavor={{workflow.annotations.infra.stackrox.com/flavor}}, - owner={{workflow.annotations.infra.stackrox.com/owner}}, - lifespan={{workflow.annotations.infra.stackrox.com/lifespan}}, - expiration={{= sprig.date('2006-01-02T15:04:05Z07:00', - sprig.dateModify(inputs.parameters.lifespan, - sprig.toDate('2006-01-02T15:04:05Z07:00', - workflow.creationTimestamp.RFC3339))) - }} + - "--labels=\ + flavor={{workflow.annotations.infra.stackrox.com/flavor}},\ + owner={{workflow.annotations.infra.stackrox.com/owner}},\ + lifespan={{workflow.annotations.infra.stackrox.com/lifespan}},\ + expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\ + sprig.dateModify(inputs.parameters.lifespan,\ + sprig.toDate('2006-01-02T15:04:05Z07:00',\ + workflow.creationTimestamp.RFC3339)))\ + }}" - --k8s-version={{workflow.parameters.k8s-version}} - --pod-security-policy={{workflow.parameters.pod-security-policy}} - --gcp-image-type={{workflow.parameters.gcp-image-type}} From bafebbc2f1bcd0ba75f1d3e4c86744c2b380b2a3 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 18:32:03 -0600 Subject: [PATCH 28/38] check for create start --- service/cluster/helpers.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index 84eee6b30..d6e42b1ca 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -213,7 +213,9 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { if node.GetName() == "destroy" || node.IsExitNode() { return v1.Status_DESTROYING } - return v1.Status_READY + if node.GetName() != "create" { + return v1.Status_READY + } } // No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating. From 66ba509e247eabf5b51b416ee80b1ae88dfcdb29 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 19:05:51 -0600 Subject: [PATCH 29/38] only create CREATING --- service/cluster/helpers.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index d6e42b1ca..a0efd0c65 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -214,12 +214,17 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { return v1.Status_DESTROYING } if node.GetName() != "create" { - return v1.Status_READY + switch node.Phase { + case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped: + return v1.Status_FAILED + case v1alpha1.NodeRunning, v1alpha1.NodePending: + return v1.Status_CREATING + } } } - // No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating. - return v1.Status_CREATING + // If no "create" or "destroy"/onExit node active, then we're ready. + return v1.Status_READY case "": return v1.Status_CREATING From 742a8df580a89b25c9f4479e2b21778961945fd1 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 18 Jul 2023 22:28:54 -0600 Subject: [PATCH 30/38] invert loop to prevent missed updates --- .../static/workflow-gke-default.yaml | 87 ++++++++++--------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 00d31105e..19ba5ba20 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -40,11 +40,11 @@ spec: - name: message value: "create" - - name: wait - template: wait + template: running arguments: parameters: - - name: oldlifespan - value: "" # Force lookup and copy of lifespan + - name: lifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - name: stop steps: @@ -55,65 +55,75 @@ spec: - name: message value: "{{workflow.parameters.name}}" - - name: wait + - name: running inputs: parameters: - - name: oldlifespan + - name: lifespan failFast: false + steps: + - - name: label + template: label + arguments: + parameters: + - name: lifespan + value: "{{inputs.parameters.lifespan}}" + - - name: wait + template: wait + arguments: + parameters: + - name: lifespan + value: "{{inputs.parameters.lifespan}}" + + - name: wait + inputs: + parameters: + - name: lifespan steps: - - name: delay inline: suspend: duration: "30" - - - name: shortloop - template: wait - when: "'{{inputs.parameters.oldlifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + - - name: echo + template: echo arguments: parameters: - - name: oldlifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: timeupdate - template: timeupdate + - name: message + value: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + - - name: loop + template: wait + when: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" arguments: parameters: - name: lifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - - - name: loop - template: wait + value: "{{inputs.parameters.lifespan}}" # If we send the annotation, it could change between the 'when' check and sending. + - - name: break + template: running arguments: parameters: - - name: oldlifespan + - name: lifespan value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - - name: echo - inputs: - parameters: - - name: message - container: - image: alpine:3.7 - command: [echo, "{{inputs.parameters.message}}"] - - - name: timeupdate + - name: label inputs: parameters: - name: lifespan steps: - - - name: setlabels - template: label + - - name: label + template: label-gke arguments: parameters: - name: labels value: "\ flavor={{workflow.annotations.infra.stackrox.com/flavor}},\ owner={{workflow.annotations.infra.stackrox.com/owner}},\ - lifespan={{workflow.annotations.infra.stackrox.com/lifespan}},\ + lifespan={{inputs.parameters.lifespan}},\ expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\ sprig.dateModify(inputs.parameters.lifespan,\ sprig.toDate('2006-01-02T15:04:05Z07:00',\ workflow.creationTimestamp.RFC3339)))\ }}" - - name: label + - name: label-gke inputs: parameters: - name: labels @@ -138,6 +148,14 @@ spec: - name: credentials mountPath: /tmp + - name: echo + inputs: + parameters: + - name: message + container: + image: alpine:3.7 + command: [echo, "{{inputs.parameters.message}}"] + - name: create activeDeadlineSeconds: 3600 outputs: @@ -161,7 +179,7 @@ spec: valueFrom: path: /outputs/cluster_name container: - image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.5.3 imagePullPolicy: Always command: - /usr/bin/entrypoint @@ -172,15 +190,6 @@ spec: - "--machine-type={{workflow.parameters.machine-type}}" - --gcp-project=srox-temp-dev-test - --creation-source=infra - - "--labels=\ - flavor={{workflow.annotations.infra.stackrox.com/flavor}},\ - owner={{workflow.annotations.infra.stackrox.com/owner}},\ - lifespan={{workflow.annotations.infra.stackrox.com/lifespan}},\ - expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\ - sprig.dateModify(inputs.parameters.lifespan,\ - sprig.toDate('2006-01-02T15:04:05Z07:00',\ - workflow.creationTimestamp.RFC3339)))\ - }}" - --k8s-version={{workflow.parameters.k8s-version}} - --pod-security-policy={{workflow.parameters.pod-security-policy}} - --gcp-image-type={{workflow.parameters.gcp-image-type}} From 95248f7ef39e3e6fefb745859fe5b4818a80ae09 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 19 Jul 2023 10:57:29 -0600 Subject: [PATCH 31/38] log hasExitHook --- service/cluster/cluster.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index cca99c7c0..5cc8e7c86 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -664,6 +664,9 @@ func (s *clusterImpl) cleanupExpiredClusters() { log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) } } + if workflow.Spec.HasExitHook() { + log.Infow("argo workflow has exit hook") + } if value, exists := workflow.GetLabels()["needsExit"]; exists { log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow that expired", "workflow-name", workflow.GetName()) From ffb59abac2d0902623af4b5beefcba684526c734 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 20 Jul 2023 12:25:16 -0600 Subject: [PATCH 32/38] log hasexithook in delete --- service/cluster/cluster.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 5cc8e7c86..f98c775df 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -528,6 +528,9 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. "error", err, ) } + if workflow.Spec.HasExitHook() { + log.Infow("argo workflow has exit hook") + } if value, exists := workflow.GetLabels()["needsExit"]; exists { log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) From 64cc1c41c20f50bf1a976e5e48958dd183dfefe0 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 21 Jul 2023 09:47:55 -0600 Subject: [PATCH 33/38] fast loop - stack limit? --- chart/infra-server/static/workflow-gke-default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 19ba5ba20..723fd8a97 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -82,7 +82,7 @@ spec: - - name: delay inline: suspend: - duration: "30" + duration: "1" - - name: echo template: echo arguments: From bb104a8cef752582016a54467d7f62dc1fa705b8 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:52:33 -0600 Subject: [PATCH 34/38] if create, creating --- service/cluster/helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index a0efd0c65..0d1ae3836 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -213,7 +213,7 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { if node.GetName() == "destroy" || node.IsExitNode() { return v1.Status_DESTROYING } - if node.GetName() != "create" { + if node.GetName() == "create" { switch node.Phase { case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped: return v1.Status_FAILED From e7f3431917c7bcc506509fc0972119aaf443cade Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 21 Jul 2023 16:36:57 -0600 Subject: [PATCH 35/38] log labels --- service/cluster/cluster.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index f98c775df..06a609121 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -531,8 +531,9 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. if workflow.Spec.HasExitHook() { log.Infow("argo workflow has exit hook") } + log.Infow("labels:", "labels", workflow.GetLabels(), "workflow-name", workflow.GetName()) if value, exists := workflow.GetLabels()["needsExit"]; exists { - log.Infow("argo workflow requires exit to stop looping", "needsExit", value) + log.Infow("argo workflow requires exit to stop looping", "needsExit", value, "workflow-name", workflow.GetName()) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), From 610d5d21994e29a6ecd8ef6c3689d8cdd78aad1c Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 21 Jul 2023 17:06:47 -0600 Subject: [PATCH 36/38] fix label lookup --- service/cluster/cluster.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 06a609121..584891bed 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -529,10 +529,10 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. ) } if workflow.Spec.HasExitHook() { - log.Infow("argo workflow has exit hook") + log.Infow("argo workflow has exit hook", "workflow-name", workflow.GetName()) } - log.Infow("labels:", "labels", workflow.GetLabels(), "workflow-name", workflow.GetName()) - if value, exists := workflow.GetLabels()["needsExit"]; exists { + log.Infow("labels:", "labels", workflow.Spec.WorkflowMetadata.Labels, "workflow-name", workflow.GetName()) + if value, exists := workflow.Spec.WorkflowMetadata.Labels["needsExit"]; exists { log.Infow("argo workflow requires exit to stop looping", "needsExit", value, "workflow-name", workflow.GetName()) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ @@ -669,9 +669,10 @@ func (s *clusterImpl) cleanupExpiredClusters() { } } if workflow.Spec.HasExitHook() { - log.Infow("argo workflow has exit hook") + log.Infow("argo workflow has exit hook", "workflow-name", workflow.GetName()) } - if value, exists := workflow.GetLabels()["needsExit"]; exists { + log.Infow("labels:", "labels", workflow.Spec.WorkflowMetadata.Labels, "workflow-name", workflow.GetName()) + if value, exists := workflow.Spec.WorkflowMetadata.Labels["needsExit"]; exists { log.Infow("argo workflow requires exit to stop looping", "needsExit", value) log.Infow("stopping argo workflow that expired", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ From 32873275e1694f1b4db5b2aeb06592d99dcb4fe7 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Fri, 21 Jul 2023 17:27:36 -0600 Subject: [PATCH 37/38] exit if exit hook --- service/cluster/cluster.go | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index 584891bed..653149c99 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -529,11 +529,6 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. ) } if workflow.Spec.HasExitHook() { - log.Infow("argo workflow has exit hook", "workflow-name", workflow.GetName()) - } - log.Infow("labels:", "labels", workflow.Spec.WorkflowMetadata.Labels, "workflow-name", workflow.GetName()) - if value, exists := workflow.Spec.WorkflowMetadata.Labels["needsExit"]; exists { - log.Infow("argo workflow requires exit to stop looping", "needsExit", value, "workflow-name", workflow.GetName()) log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), @@ -669,12 +664,7 @@ func (s *clusterImpl) cleanupExpiredClusters() { } } if workflow.Spec.HasExitHook() { - log.Infow("argo workflow has exit hook", "workflow-name", workflow.GetName()) - } - log.Infow("labels:", "labels", workflow.Spec.WorkflowMetadata.Labels, "workflow-name", workflow.GetName()) - if value, exists := workflow.Spec.WorkflowMetadata.Labels["needsExit"]; exists { - log.Infow("argo workflow requires exit to stop looping", "needsExit", value) - log.Infow("stopping argo workflow that expired", "workflow-name", workflow.GetName()) + log.Infow("stopping argo workflow with exit hook", "workflow-name", workflow.GetName()) _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ Name: workflow.GetName(), Namespace: s.workflowNamespace, From f4fdcf80c2fd76c82f664f9ae4a17380d80fa7f2 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 2 Aug 2023 20:59:53 -0600 Subject: [PATCH 38/38] test indent for yaml lint --- chart/infra-server/static/workflow-gke-default.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index 723fd8a97..c9636b359 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -37,14 +37,14 @@ spec: template: echo arguments: parameters: - - name: message - value: "create" + - name: message + value: "create" - - name: wait template: running arguments: parameters: - - name: lifespan - value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" + - name: lifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" - name: stop steps: