File tree Expand file tree Collapse file tree 1 file changed +4
-12
lines changed
tests/e2e/scenarios/ai-conformance Expand file tree Collapse file tree 1 file changed +4
-12
lines changed Original file line number Diff line number Diff line change 9898EOF
9999
100100${KOPS} update cluster --name " ${CLUSTER_NAME} " --yes --admin
101+
102+ # TODO: Can we delay this until later?
101103${KOPS} validate cluster --wait=10m
102104
103105echo " ----------------------------------------------------------------"
@@ -129,15 +131,6 @@ helm upgrade -i nvidia-gpu-operator --wait \
129131# Uses the driver installed by GPU Operator at /run/nvidia/driver (the default).
130132echo " Installing NVIDIA DRA Driver..."
131133
132- cat > values.yaml << EOF
133- # The driver daemonset needs a toleration for the nvidia.com/gpu taint
134- kubeletPlugin:
135- tolerations:
136- - key: nvidia.com/gpu
137- operator: Exists
138- effect: NoSchedule
139- EOF
140-
141134helm upgrade -i nvidia-dra-driver-gpu nvidia/nvidia-dra-driver-gpu \
142135 --version=" 25.12.0" \
143136 --create-namespace \
@@ -157,10 +150,9 @@ kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases
157150# Wait for webhooks to be ready (so we can apply other objects)
158151kubectl rollout status deployment -n kueue-system kueue-controller-manager --timeout=5m
159152
160- # 3. Robust Controller ( KubeRay)
153+ # KubeRay
161154echo " Installing KubeRay Operator..."
162- # KubeRay 1.3.0
163- kubectl apply -k " github.com/ray-project/kuberay/ray-operator/config/default?ref=v1.5.0"
155+ kubectl apply --server-side -k " github.com/ray-project/kuberay/ray-operator/config/default-with-webhooks?ref=v1.5.0"
164156
165157echo " ----------------------------------------------------------------"
166158echo " Verifying Cluster and Components"
You can’t perform that action at this time.
0 commit comments