File tree Expand file tree Collapse file tree
tests/e2e/scenarios/ai-conformance Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -182,31 +182,43 @@ kind: ResourceClaim
182182metadata:
183183 name: test-gpu-claim
184184spec:
185- resourceClassName: nvidia-gpu
185+ devices:
186+ requests:
187+ - name: single-gpu
188+ exactly:
189+ deviceClassName: gpu.nvidia.com
190+ allocationMode: ExactCount
191+ count: 1
186192---
187- apiVersion: v1
188- kind: Pod
193+ apiVersion: batch/ v1
194+ kind: Job
189195metadata:
190196 name: test-gpu-pod
191197spec:
192- restartPolicy: Never
193- containers:
194- - name: test
195- image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04
196- command: ["/bin/sh", "-c"]
197- args: ["/cuda-samples/vectorAdd"]
198- resources:
199- claims:
198+ template:
199+ spec:
200+ restartPolicy: Never
201+ tolerations:
202+ - key: "nvidia.com/gpu"
203+ operator: "Exists"
204+ effect: "NoSchedule"
205+ containers:
206+ - name: test
207+ image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04
208+ command: ["/bin/sh", "-c"]
209+ args: ["/cuda-samples/vectorAdd"]
210+ resources:
211+ claims:
212+ - name: gpu
213+ resourceClaims:
200214 - name: gpu
201- resourceClaims:
202- - name: gpu
203- resourceClaimName: test-gpu-claim
215+ resourceClaimName: test-gpu-claim
204216EOF
205217
206218echo " Waiting for Sample Workload to Complete..."
207219# Wait for the pod to succeed
208- kubectl wait --for=condition=Ready pod /test-gpu-pod --timeout=5m || true
209- kubectl logs test-gpu-pod || echo " Failed to get logs"
220+ kubectl wait --for=condition=complete job /test-gpu-pod --timeout=5m || true
221+ kubectl logs job/ test-gpu-pod || echo " Failed to get logs"
210222
211223# Note: The actual AI conformance test suite (e.g., k8s-ai-conformance binary)
212224# would be executed here. For this scenario, we establish the compliant environment.
You can’t perform that action at this time.
0 commit comments