Skip to content

Commit 614c2c3

Browse files
authored
Merge pull request #18009 from justinsb/aiconform_custom_tests
tests/ai-conformance: create first tests for AI Conformance
2 parents 775adb2 + e29eb9a commit 614c2c3

File tree

9 files changed

+580
-3
lines changed

9 files changed

+580
-3
lines changed

tests/e2e/scenarios/ai-conformance/run-test.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ echo "Waiting for Sample Workload to Complete..."
216216
kubectl wait --for=condition=complete job/test-gpu-pod --timeout=5m || true
217217
kubectl logs job/test-gpu-pod || echo "Failed to get logs"
218218

219-
# Note: The actual AI conformance test suite (e.g., k8s-ai-conformance binary)
220-
# would be executed here. For this scenario, we establish the compliant environment.
221-
222219
echo "AI Conformance Environment Setup Complete."
220+
221+
# Now run the actual AI conformance tests
222+
cd "${REPO_ROOT}/tests/e2e/scenarios/ai-conformance/validators"
223+
go test -v ./... -timeout=60m
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
Copyright The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package dra_support
18+
19+
import (
20+
"fmt"
21+
"strings"
22+
"testing"
23+
24+
"k8s.io/apimachinery/pkg/runtime/schema"
25+
"k8s.io/kops/tests/e2e/scenarios/ai-conformance/validators"
26+
)
27+
28+
// TestAcceleratorsDRASupport corresponds to the accelerators/dra_support scenario
29+
func TestAcceleratorsDRASupport(t *testing.T) {
30+
// Description:
31+
// Support Dynamic Resource Allocation (DRA) APIs to enable more flexible and fine-grained resource requests beyond simple counts.
32+
h := validators.NewValidatorHarness(t)
33+
34+
h.Logf("# DRA API Availability")
35+
36+
gv := schema.GroupVersion{Group: "resource.k8s.io", Version: "v1"}
37+
38+
// Check resource.k8s.io/v1 is registered
39+
h.Logf("## Checking for DRA API version v1")
40+
{
41+
result := h.ShellExec(fmt.Sprintf("kubectl api-versions | grep %s", gv.Group+"/"+gv.Version))
42+
if !strings.Contains(result.Stdout(), "resource.k8s.io/v1\n") {
43+
h.Fatalf("Expected DRA API version group %s version %s, but it was not found", gv.Group, gv.Version)
44+
}
45+
h.Success("DRA API version %s is available.", gv.String())
46+
}
47+
48+
// Check all expected DRA API resources are registered
49+
for _, resource := range []string{"deviceclasses", "resourceclaims", "resourceclaimtemplates", "resourceslices"} {
50+
h.Logf("## Checking for %s", resource)
51+
result := h.ShellExec(fmt.Sprintf("kubectl api-resources --api-group=%s | grep %s", gv.Group, resource))
52+
if !strings.Contains(result.Stdout(), "resource.k8s.io/v1") {
53+
h.Fatalf("Expected DRA API resource %s to be available in group %s version %s, but it was not found", resource, gv.Group, gv.Version)
54+
}
55+
h.Success("DRA API resource %s is available.", resource)
56+
}
57+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
Copyright The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package dra_support
18+
19+
import (
20+
"fmt"
21+
"testing"
22+
23+
"k8s.io/kops/tests/e2e/scenarios/ai-conformance/validators"
24+
)
25+
26+
// TestDRAWorks is an additional test to verify that DRA is not only available, but also functional.
27+
// This is not currently required by AI conformance.
28+
func TestDRAWorks(t *testing.T) {
29+
h := validators.NewValidatorHarness(t)
30+
31+
h.Logf("# DRA functional tests")
32+
33+
h.Logf("## Listing device classes")
34+
deviceClasses := h.ListDeviceClasses()
35+
for _, deviceClass := range deviceClasses {
36+
h.Logf("* %s", deviceClass.Name())
37+
}
38+
39+
h.Logf("## Listing resource slices")
40+
resourceSlices := h.ListResourceSlices()
41+
for _, resourceSlice := range resourceSlices {
42+
h.Logf("* %s", resourceSlice.Name())
43+
}
44+
45+
if !h.HasDeviceClass("gpu.nvidia.com") {
46+
t.Skipf("gpu.nvidia.com device class not found; skipping")
47+
}
48+
49+
h.Logf("## Run cuda-smoketest")
50+
ns := "default"
51+
h.ShellExec(fmt.Sprintf("kubectl apply --namespace %s -f testdata/cuda-smoketest.yaml", ns))
52+
h.ShellExec(fmt.Sprintf("kubectl wait --for=condition=complete --namespace %s job/cuda-smoketest --timeout=5m", ns))
53+
h.ShellExec(fmt.Sprintf("kubectl logs --namespace %s job/cuda-smoketest", ns))
54+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Create a ResourceClaim and Job to test DRA
2+
apiVersion: resource.k8s.io/v1
3+
kind: ResourceClaim
4+
metadata:
5+
name: cuda-smoketest
6+
spec:
7+
devices:
8+
requests:
9+
- name: single-gpu
10+
exactly:
11+
deviceClassName: gpu.nvidia.com
12+
allocationMode: ExactCount
13+
count: 1
14+
15+
---
16+
# TODO: Use Deployment+HPA
17+
apiVersion: batch/v1
18+
kind: Job
19+
metadata:
20+
name: cuda-smoketest
21+
spec:
22+
template:
23+
spec:
24+
restartPolicy: Never
25+
tolerations:
26+
- key: "nvidia.com/gpu"
27+
operator: "Exists"
28+
effect: "NoSchedule"
29+
containers:
30+
- name: test
31+
image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04
32+
command: ["/bin/sh", "-c"]
33+
args: ["/cuda-samples/vectorAdd"]
34+
resources:
35+
claims:
36+
- name: gpu
37+
resourceClaims:
38+
- name: gpu
39+
resourceClaimName: cuda-smoketest
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
Copyright The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package validators
18+
19+
import (
20+
"bytes"
21+
"os/exec"
22+
)
23+
24+
// ShellExec executes the given shell command and returns the result. If the command fails, it fails the test.
25+
func (h *ValidatorHarness) ShellExec(shellCommand string) *CommandResult {
26+
ctx := h.Context()
27+
cmd := exec.CommandContext(ctx, "sh", "-c", shellCommand)
28+
var stdout bytes.Buffer
29+
cmd.Stdout = &stdout
30+
var stderr bytes.Buffer
31+
cmd.Stderr = &stderr
32+
33+
h.Logf("ShellExec(%q)", shellCommand)
34+
err := cmd.Run()
35+
36+
result := &CommandResult{
37+
stdout: stdout.String(),
38+
stderr: stderr.String(),
39+
err: err,
40+
}
41+
42+
h.output.OnShellExec(shellCommand, result)
43+
44+
if err != nil {
45+
h.Logf("Command failed: %q", shellCommand)
46+
h.Logf("Stdout: %s", result.Stdout())
47+
h.Logf("Stderr: %s", result.Stderr())
48+
h.Fatalf("Command execution %q failed with error: %v", shellCommand, err)
49+
}
50+
51+
return result
52+
}
53+
54+
// CommandResult encapsulates the result of executing a shell command, including stdout, stderr, and any error that occurred.
55+
type CommandResult struct {
56+
stdout string
57+
stderr string
58+
err error
59+
}
60+
61+
// Stdout returns the standard output of the command execution.
62+
func (r *CommandResult) Stdout() string {
63+
return r.stdout
64+
}
65+
66+
// Stderr returns the standard error output of the command execution.
67+
func (r *CommandResult) Stderr() string {
68+
return r.stderr
69+
}
70+
71+
// Err returns the error that occurred during command execution, if any.
72+
func (r *CommandResult) Err() error {
73+
return r.err
74+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
Copyright The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package validators
18+
19+
import (
20+
"context"
21+
"os"
22+
"path/filepath"
23+
"testing"
24+
25+
"k8s.io/client-go/dynamic"
26+
"k8s.io/client-go/rest"
27+
"k8s.io/client-go/tools/clientcmd"
28+
)
29+
30+
// ValidatorHarness provides a common context and utilities for AI conformance validation.
31+
type ValidatorHarness struct {
32+
t *testing.T
33+
34+
output OutputSink
35+
36+
dynamicClient dynamic.Interface
37+
restConfig *rest.Config
38+
}
39+
40+
// NewValidatorHarness creates a new ValidatorHarness.
41+
func NewValidatorHarness(t *testing.T) *ValidatorHarness {
42+
h := &ValidatorHarness{t: t}
43+
44+
h.output = createMarkdownOutput(t)
45+
h.t.Cleanup(func() {
46+
if err := h.output.Close(); err != nil {
47+
h.t.Errorf("failed to close output: %v", err)
48+
}
49+
})
50+
51+
// use the current context in kubeconfig
52+
kubeconfig := os.Getenv("KUBECONFIG")
53+
if kubeconfig == "" {
54+
home, err := os.UserHomeDir()
55+
if err != nil {
56+
h.Fatalf("failed to get user home directory: %v", err)
57+
}
58+
kubeconfig = filepath.Join(home, ".kube", "config")
59+
}
60+
restConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
61+
if err != nil {
62+
h.Fatalf("failed to build get kubeconfig: %v", err)
63+
}
64+
h.restConfig = restConfig
65+
66+
return h
67+
}
68+
69+
// Context returns the context associated with the test, which can be used for command execution and API calls.
70+
func (h *ValidatorHarness) Context() context.Context {
71+
return h.t.Context()
72+
}

0 commit comments

Comments
 (0)