diff --git a/chart/infra-server/static/workflow-gke-default.yaml b/chart/infra-server/static/workflow-gke-default.yaml index e656408c2..c9636b359 100644 --- a/chart/infra-server/static/workflow-gke-default.yaml +++ b/chart/infra-server/static/workflow-gke-default.yaml @@ -2,8 +2,15 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: gke-default- + labels: + needsExit: "true" + annotations: + infra.stackrox.com/lifespan: 0s + infra.stackrox.com/flavor: unknown + infra.stackrox.com/owner: unknown spec: entrypoint: start + onExit: stop arguments: parameters: - name: name @@ -27,15 +34,127 @@ spec: - name: start steps: - - name: create - template: create + template: echo + arguments: + parameters: + - name: message + value: "create" - - name: wait - template: wait + template: running + arguments: + parameters: + - name: lifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" + + - name: stop + steps: - - name: destroy - template: destroy + template: echo + arguments: + parameters: + - name: message + value: "{{workflow.parameters.name}}" + + - name: running + inputs: + parameters: + - name: lifespan + failFast: false + steps: + - - name: label + template: label + arguments: + parameters: + - name: lifespan + value: "{{inputs.parameters.lifespan}}" + - - name: wait + template: wait + arguments: + parameters: + - name: lifespan + value: "{{inputs.parameters.lifespan}}" + + - name: wait + inputs: + parameters: + - name: lifespan + steps: + - - name: delay + inline: + suspend: + duration: "1" + - - name: echo + template: echo + arguments: + parameters: + - name: message + value: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + - - name: loop + template: wait + when: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'" + arguments: + parameters: + - name: lifespan + value: "{{inputs.parameters.lifespan}}" # If we send the annotation, it could change between the 'when' check and sending. + - - name: break + template: running + arguments: + parameters: + - name: lifespan + value: "{{workflow.annotations.infra.stackrox.com/lifespan}}" + + - name: label + inputs: + parameters: + - name: lifespan + steps: + - - name: label + template: label-gke arguments: parameters: - - name: name - value: "{{steps.create.outputs.parameters.cluster_name}}" + - name: labels + value: "\ + flavor={{workflow.annotations.infra.stackrox.com/flavor}},\ + owner={{workflow.annotations.infra.stackrox.com/owner}},\ + lifespan={{inputs.parameters.lifespan}},\ + expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\ + sprig.dateModify(inputs.parameters.lifespan,\ + sprig.toDate('2006-01-02T15:04:05Z07:00',\ + workflow.creationTimestamp.RFC3339)))\ + }}" + + - name: label-gke + inputs: + parameters: + - name: labels + script: + image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot + imagePullPolicy: Always + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /tmp/google-credentials.json + command: [bash, -x] + source: | + labels="{{inputs.parameters.labels}}" + gcloud auth activate-service-account --key-file /tmp/google-credentials.json; + gcloud auth list; + gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}"; + gcloud config set core/disable_prompts True; + gcloud container clusters update \ + "{{workflow.parameters.name}}"\ + --project=srox-temp-dev-test\ + "--update-labels=${labels}" || true + volumeMounts: + - name: credentials + mountPath: /tmp + + - name: echo + inputs: + parameters: + - name: message + container: + image: alpine:3.7 + command: [echo, "{{inputs.parameters.message}}"] - name: create activeDeadlineSeconds: 3600 @@ -79,9 +198,6 @@ spec: - name: credentials mountPath: /tmp - - name: wait - suspend: {} - - name: destroy activeDeadlineSeconds: 3600 container: diff --git a/service/cluster/cluster.go b/service/cluster/cluster.go index cb84cab41..653149c99 100644 --- a/service/cluster/cluster.go +++ b/service/cluster/cluster.go @@ -528,6 +528,21 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty. "error", err, ) } + if workflow.Spec.HasExitHook() { + log.Infow("stopping argo workflow", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + NodeFieldSelector: "", + Message: "Destroying cluster. End workflow loop.", + }) + if err != nil { + log.Warnw("failed to stop workflow, this is OK if the workflow is not running", + "workflow-name", req.GetId(), + "error", err, + ) + } + } return &empty.Empty{}, nil } @@ -638,13 +653,30 @@ func (s *clusterImpl) cleanupExpiredClusters() { continue } - log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) - _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ - Name: workflow.GetName(), - Namespace: s.workflowNamespace, - }) - if err != nil { - log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) + if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend { + log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + }) + if err != nil { + log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err) + } + } + if workflow.Spec.HasExitHook() { + log.Infow("stopping argo workflow with exit hook", "workflow-name", workflow.GetName()) + _, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{ + Name: workflow.GetName(), + Namespace: s.workflowNamespace, + NodeFieldSelector: "", + Message: "Destroying cluster. End workflow loop.", + }) + if err != nil { + log.Warnw("failed to stop workflow, this is OK if the workflow is not running", + "workflow-name", workflow.GetName(), + "error", err, + ) + } } } diff --git a/service/cluster/helpers.go b/service/cluster/helpers.go index 24f98ce81..0d1ae3836 100644 --- a/service/cluster/helpers.go +++ b/service/cluster/helpers.go @@ -204,18 +204,27 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status { } } else if node.Type == v1alpha1.NodeTypeSuspend { switch node.Phase { - case v1alpha1.NodeSucceeded: - return v1.Status_DESTROYING case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped: panic("a suspend should not be able to fail?") case v1alpha1.NodeRunning, v1alpha1.NodePending: return v1.Status_READY } } + if node.GetName() == "destroy" || node.IsExitNode() { + return v1.Status_DESTROYING + } + if node.GetName() == "create" { + switch node.Phase { + case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped: + return v1.Status_FAILED + case v1alpha1.NodeRunning, v1alpha1.NodePending: + return v1.Status_CREATING + } + } } - // No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating. - return v1.Status_CREATING + // If no "create" or "destroy"/onExit node active, then we're ready. + return v1.Status_READY case "": return v1.Status_CREATING