You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// The platform must prove that at least one complex AI operator with a CRD (e.g., Ray, Kubeflow) can be installed and functions reliably. This includes verifying that the operator's pods run correctly, its webhooks are operational, and its custom resources can be reconciled.
32
+
33
+
h:=validators.NewValidatorHarness(t)
34
+
35
+
if!h.HasCRD("rayjobs.ray.io") {
36
+
h.Skip("Ray CRDs not found, skipping test")
37
+
}
38
+
39
+
h.Logf("# Robust Controller (with KubeRay)")
40
+
41
+
h.Logf("## Verify KubeRay with a sample RayJob")
42
+
{
43
+
// This is based on https://docs.ray.io/en/latest/cluster/kubernetes/getting-started/rayjob-quick-start.html#kuberay-rayjob-quickstart
# shutdownAfterJobFinishes specifies whether the RayCluster should be deleted after the RayJob finishes. Default is false.
14
+
# shutdownAfterJobFinishes: false
15
+
16
+
# ttlSecondsAfterFinished specifies the number of seconds after which the RayCluster will be deleted after the RayJob finishes.
17
+
# ttlSecondsAfterFinished: 10
18
+
19
+
# activeDeadlineSeconds is the duration in seconds that the RayJob may be active before
20
+
# KubeRay actively tries to terminate the RayJob; value must be positive integer.
21
+
# activeDeadlineSeconds: 120
22
+
23
+
# RuntimeEnvYAML represents the runtime environment configuration provided as a multi-line YAML string.
24
+
# See https://docs.ray.io/en/latest/ray-core/handling-dependencies.html for details.
25
+
# (New in KubeRay version 1.0.)
26
+
runtimeEnvYAML: |
27
+
pip:
28
+
- requests==2.26.0
29
+
- pendulum==2.1.2
30
+
env_vars:
31
+
counter_name: "test_counter"
32
+
33
+
# Suspend specifies whether the RayJob controller should create a RayCluster instance.
34
+
# If a job is applied with the suspend field set to true, the RayCluster will not be created and we will wait for the transition to false.
35
+
# If the RayCluster is already created, it will be deleted. In the case of transition to false, a new RayCluster will be created.
36
+
# suspend: false
37
+
38
+
# rayClusterSpec specifies the RayCluster instance to be created by the RayJob controller.
39
+
rayClusterSpec:
40
+
rayVersion: '2.46.0'# should match the Ray version in the image of the containers
41
+
# Ray head pod template
42
+
headGroupSpec:
43
+
# The `rayStartParams` are used to configure the `ray start` command.
44
+
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
45
+
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
46
+
rayStartParams: {}
47
+
#pod template
48
+
template:
49
+
spec:
50
+
containers:
51
+
- name: ray-head
52
+
image: rayproject/ray:2.46.0
53
+
ports:
54
+
- containerPort: 6379
55
+
name: gcs-server
56
+
- containerPort: 8265# Ray dashboard
57
+
name: dashboard
58
+
- containerPort: 10001
59
+
name: client
60
+
resources:
61
+
limits:
62
+
cpu: "1"
63
+
requests:
64
+
cpu: "200m"
65
+
volumeMounts:
66
+
- mountPath: /home/ray/samples
67
+
name: code-sample
68
+
volumes:
69
+
# You set volumes at the Pod level, then mount them into containers inside that Pod
70
+
- name: code-sample
71
+
configMap:
72
+
# Provide the name of the ConfigMap you want to mount.
73
+
name: ray-job-code-sample
74
+
# An array of keys from the ConfigMap to create as files
75
+
items:
76
+
- key: sample_code.py
77
+
path: sample_code.py
78
+
workerGroupSpecs:
79
+
# the pod replicas in this group typed worker
80
+
- replicas: 1
81
+
minReplicas: 1
82
+
maxReplicas: 5
83
+
# logical group name, for this called small-group, also can be functional
84
+
groupName: small-group
85
+
# The `rayStartParams` are used to configure the `ray start` command.
86
+
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
87
+
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
88
+
rayStartParams: {}
89
+
#pod template
90
+
template:
91
+
spec:
92
+
containers:
93
+
- name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
94
+
image: rayproject/ray:2.46.0
95
+
resources:
96
+
limits:
97
+
cpu: "1"
98
+
requests:
99
+
cpu: "200m"
100
+
101
+
# SubmitterPodTemplate is the template for the pod that will run the `ray job submit` command against the RayCluster.
102
+
# If SubmitterPodTemplate is specified, the first container is assumed to be the submitter container.
103
+
# submitterPodTemplate:
104
+
# spec:
105
+
# restartPolicy: Never
106
+
# containers:
107
+
# - name: my-custom-rayjob-submitter-pod
108
+
# image: rayproject/ray:2.46.0
109
+
# # If Command is not specified, the correct command will be supplied at runtime using the RayJob spec `entrypoint` field.
0 commit comments