Skip to content

Commit 0d72d56

Browse files
authored
Merge pull request #3890 from justinsb/clusterloader_pod_startup_latency
clusterloader2: create pod-startup-latency test
2 parents 86342ef + a73919f commit 0d72d56

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# ASSUMPTIONS:
2+
# - This test is designed for 1-node cluster.
3+
# - Pods take less than 1 second to start, so we can launch them at a rate of 1 pod per second without creating a backlog.
4+
5+
#Constants
6+
{{$POD_COUNT := DefaultParam .POD_COUNT 30}}
7+
{{$POD_THROUGHPUT := DefaultParam .POD_THROUGHPUT 1}} # We expect the pod to launch in less than one second, so launching more than 1 pod per second would create a backlog and test a different behaviour.
8+
{{$CONTAINER_IMAGE := DefaultParam .CONTAINER_IMAGE "registry.k8s.io/pause:3.9"}}
9+
{{$POD_STARTUP_LATENCY_THRESHOLD := DefaultParam .POD_STARTUP_LATENCY_THRESHOLD "5s"}}
10+
{{$OPERATION_TIMEOUT := DefaultParam .OPERATION_TIMEOUT "15m"}}
11+
12+
name: pod-startup-latency
13+
namespace:
14+
number: {{$POD_COUNT}}
15+
tuningSets:
16+
- name: UniformQPS
17+
qpsLoad:
18+
qps: {{$POD_THROUGHPUT}}
19+
steps:
20+
- name: Starting measurements
21+
measurements:
22+
- Identifier: APIResponsivenessPrometheusSimple
23+
Method: APIResponsivenessPrometheus
24+
Params:
25+
action: start
26+
- Identifier: PodStartupLatency
27+
Method: PodStartupLatency
28+
Params:
29+
action: start
30+
labelSelector: group = latency
31+
threshold: {{$POD_STARTUP_LATENCY_THRESHOLD}}
32+
- name: Starting pods measurements
33+
measurements:
34+
- Identifier: WaitForRunningLatencyRCs
35+
Method: WaitForControlledPodsRunning
36+
Params:
37+
action: start
38+
apiVersion: v1
39+
kind: ReplicationController
40+
labelSelector: group = latency
41+
operationTimeout: {{$OPERATION_TIMEOUT}}
42+
- name: Creating pods
43+
phases:
44+
- namespaceRange:
45+
min: 1
46+
max: {{$POD_COUNT}}
47+
replicasPerNamespace: 1
48+
tuningSet: UniformQPS
49+
objectBundle:
50+
- basename: latency-pod-rc
51+
objectTemplatePath: rc.yaml
52+
templateFillMap:
53+
Replicas: 1
54+
Group: latency
55+
Image: {{$CONTAINER_IMAGE}}
56+
- name: Waiting for pods to be running
57+
measurements:
58+
- Identifier: WaitForRunningLatencyRCs
59+
Method: WaitForControlledPodsRunning
60+
Params:
61+
action: gather
62+
- name: Deleting pods
63+
phases:
64+
- namespaceRange:
65+
min: 1
66+
max: {{$POD_COUNT}}
67+
replicasPerNamespace: 0
68+
tuningSet: UniformQPS
69+
objectBundle:
70+
- basename: latency-pod-rc
71+
objectTemplatePath: rc.yaml
72+
- name: Waiting for pods to be deleted
73+
measurements:
74+
- Identifier: WaitForRunningLatencyRCs
75+
Method: WaitForControlledPodsRunning
76+
Params:
77+
action: gather
78+
# Collect measurements
79+
- name: Collecting pods measurements
80+
measurements:
81+
- Identifier: PodStartupLatency
82+
Method: PodStartupLatency
83+
Params:
84+
action: gather
85+
- name: Collecting measurements
86+
measurements:
87+
- Identifier: APIResponsivenessPrometheusSimple
88+
Method: APIResponsivenessPrometheus
89+
Params:
90+
action: gather
91+
enableViolations: true
92+
useSimpleLatencyQuery: true
93+
summaryName: APIResponsivenessPrometheus_simple
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: v1
2+
kind: ReplicationController
3+
metadata:
4+
name: {{.Name}}
5+
labels:
6+
group: {{.Group}}
7+
spec:
8+
replicas: {{.Replicas}}
9+
selector:
10+
name: {{.Name}}
11+
template:
12+
metadata:
13+
labels:
14+
name: {{.Name}}
15+
group: {{.Group}}
16+
spec:
17+
# Do automount default service account, to be more representative of real workloads.
18+
automountServiceAccountToken: true
19+
containers:
20+
- image: {{.Image}}
21+
imagePullPolicy: Never # Image is expected to be already present on the node, so we can measure pod startup latency without image pull time.
22+
name: {{.Name}}
23+
ports:
24+
# Add not-ready/unreachable tolerations for 15 minutes so that node
25+
# failure doesn't trigger pod deletion.
26+
tolerations:
27+
- key: "node.kubernetes.io/not-ready"
28+
operator: "Exists"
29+
effect: "NoExecute"
30+
tolerationSeconds: 900
31+
- key: "node.kubernetes.io/unreachable"
32+
operator: "Exists"
33+
effect: "NoExecute"
34+
tolerationSeconds: 900

0 commit comments

Comments
 (0)