Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/PR.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
with:
flavor: gke-default
name: infra-pr-${{ github.event.pull_request.number }}
args: machine-type=e2-standard-4,nodes=3,gcp-image-type=ubuntu_containerd
args: machine-type=e2-standard-4,nodes=3,gcp-image-type=ubuntu_containerd,has-infra-server=true
lifespan: ${{ github.actor == 'dependabot[bot]' && '1h' || '24h' }}
wait: true
token: ${{ secrets.INFRA_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ push:

.PHONY: argo-workflow-lint
argo-workflow-lint:
@argo lint ./chart/infra-server/static/workflow*.yaml
@argo lint --offline ./chart/infra-server/static/workflow*.yaml ./chart/infra-server/templates/workflowtemplates/*.yaml

.PHONY: shellcheck
shellcheck:
Expand Down
5 changes: 5 additions & 0 deletions chart/infra-server/static/flavors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@
value: false
kind: optional

- name: has-infra-server
description: Whether this cluster runs an infra-server instance. Enables pre-destroy cleanup of managed clusters.
value: false
kind: optional

artifacts:
- name: kubeconfig
description: Kube config for connecting to this cluster
Expand Down
22 changes: 22 additions & 0 deletions chart/infra-server/static/workflow-gke-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: gke-default-
namespace: default
spec:
entrypoint: start
onExit: stop
Expand All @@ -20,6 +21,8 @@ spec:
value: ""
- name: set-ssd-storage-default
value: ""
- name: has-infra-server
value: "false"

volumes:
- name: credentials
Expand All @@ -36,6 +39,23 @@ spec:

- name: stop
steps:
# Stops all Argo workflows on a target cluster before it is destroyed,
# giving each workflow's onExit handler time to clean up cloud resources.
# Only relevant for GKE clusters that run the infra server.
- - name: cleanup-infra-clusters
templateRef:
name: cleanup-infra-clusters
template: cleanup
Comment thread
coderabbitai[bot] marked this conversation as resolved.
arguments:
artifacts:
- name: kubeconfig
from: '{{ "{{" }}workflow.outputs.artifacts.global-kubeconfig{{ "}}" }}'
optional: true
when: '{{ "{{" }}workflow.parameters.has-infra-server{{ "}}" }} == true'
continueOn:
failed: true
error: true

- - name: destroy
template: destroy
arguments:
Expand All @@ -48,8 +68,10 @@ spec:
outputs:
artifacts:
- name: kubeconfig
globalName: global-kubeconfig
path: /outputs/kubeconfig
mode: 0644
optional: true
archive:
none: {}
- name: connect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
apiVersion: argoproj.io/v1alpha1
kind: WorkflowTemplate
metadata:
name: cleanup-infra-clusters
namespace: default
spec:
templates:
# Stops all infra-managed Argo workflows on a target cluster before it
# is destroyed, giving each workflow's onExit handler time to clean up
# cloud resources.
- name: cleanup
inputs:
artifacts:
- name: kubeconfig
path: /tmp/kubeconfig
optional: true
activeDeadlineSeconds: 3600
script:
image: quay.io/argoproj/argocli:latest
command: [bash]
source: |
set -uo pipefail

if [ ! -f /tmp/kubeconfig ]; then
echo "No kubeconfig artifact available. Skipping cleanup."
exit 0
fi

export KUBECONFIG=/tmp/kubeconfig
export ARGO_NAMESPACE=default

get_active_workflows() {
argo list --status Running,Pending -l infra.stackrox.com/cluster-id -o name
}

ACTIVE=$(get_active_workflows)
if [ -z "$ACTIVE" ]; then
echo "No active workflows found. Skipping cleanup."
exit 0
fi

echo "Stopping all active workflows to trigger their destroy phases."
for wf in $ACTIVE; do
echo "Stopping workflow: ${wf}"
argo stop "$wf" || true
done

DEADLINE=$((SECONDS + 3000))
while [ $SECONDS -lt $DEADLINE ]; do
REMAINING=$(get_active_workflows)
if [ -z "$REMAINING" ]; then
echo "All managed workflows completed."
exit 0
fi
COUNT=$(echo "$REMAINING" | wc -l | tr -d ' ')
echo "Waiting for ${COUNT} workflow(s) to complete..."
sleep 30
done

echo "Timed out waiting for managed workflows. Proceeding with cluster destroy."
Comment thread
stehessel marked this conversation as resolved.
exit 1
Loading