|
| 1 | +/* |
| 2 | +Copyright The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package secure_accelerator_access |
| 18 | + |
| 19 | +import ( |
| 20 | + "fmt" |
| 21 | + "strings" |
| 22 | + "testing" |
| 23 | + |
| 24 | + "k8s.io/kops/tests/e2e/scenarios/ai-conformance/validators" |
| 25 | +) |
| 26 | + |
| 27 | +// TestSecurity_SecureAcceleratorAccess corresponds to the security/secure_accelerator_access conformance requirement |
| 28 | +func TestSecurity_SecureAcceleratorAccess(t *testing.T) { |
| 29 | + // Description: |
| 30 | + // Ensure that access to accelerators from within containers is properly isolated and mediated by the Kubernetes resource management framework (device plugin or DRA) and container runtime, preventing unauthorized access or interference between workloads. |
| 31 | + h := validators.NewValidatorHarness(t) |
| 32 | + |
| 33 | + h.Logf("# Secure Accelerator Access") |
| 34 | + |
| 35 | + score := 0 |
| 36 | + |
| 37 | + h.Logf("## Checking that GPUs are available if requested") |
| 38 | + |
| 39 | + h.Run("accelerator-requested", func(h *validators.ValidatorHarness) { |
| 40 | + ns := h.TestNamespace() |
| 41 | + |
| 42 | + h.ApplyManifest(ns, "testdata/accelerator-requested.yaml") |
| 43 | + h.ShellExec(fmt.Sprintf("kubectl wait -n %s --for=condition=complete job/accelerator-requested --timeout=60s", ns)) |
| 44 | + |
| 45 | + logs := h.ShellExec(fmt.Sprintf("kubectl logs -n %s job/accelerator-requested", ns)) |
| 46 | + if !strings.Contains(logs.Stdout(), "<product_brand>NVIDIA</product_brand>") { |
| 47 | + h.Errorf("Expected to find nvidia GPUs available when requested, but did not find them in the logs: %s", logs.Stdout()) |
| 48 | + } else { |
| 49 | + h.Success("GPUs were requested, and nvidia-smi reported available GPUs.") |
| 50 | + score++ |
| 51 | + } |
| 52 | + }) |
| 53 | + |
| 54 | + h.Logf("## Checking that GPUs are not available if not requested") |
| 55 | + h.Run("accelerator-not-requested", func(h *validators.ValidatorHarness) { |
| 56 | + ns := h.TestNamespace() |
| 57 | + |
| 58 | + h.ApplyManifest(ns, "testdata/accelerator-not-requested.yaml") |
| 59 | + h.ShellExec(fmt.Sprintf("kubectl wait -n %s --for=condition=complete job/accelerator-not-requested --timeout=60s", ns)) |
| 60 | + |
| 61 | + logs := h.ShellExec(fmt.Sprintf("kubectl logs -n %s job/accelerator-not-requested", ns)) |
| 62 | + if !strings.Contains(logs.Stdout(), "nvidia-smi failed (as expected)") { |
| 63 | + h.Errorf("Expected nvidia-smi to fail when GPUs are not requested, but found them in the logs: %s", logs.Stdout()) |
| 64 | + } else { |
| 65 | + h.Success("No GPUs were requested, and nvidia-smi did not report any GPUs.") |
| 66 | + } |
| 67 | + }) |
| 68 | + |
| 69 | + h.Logf("## Pods with GPU requests should be isolated from each other") |
| 70 | + h.Run("accelerator-isolation", func(h *validators.ValidatorHarness) { |
| 71 | + ns := h.TestNamespace() |
| 72 | + |
| 73 | + h.ApplyManifest(ns, "testdata/accelerator-isolation.yaml") |
| 74 | + h.ShellExec(fmt.Sprintf("kubectl wait -n %s --for=condition=available deployment/accelerator-isolation-1 --timeout=60s", ns)) |
| 75 | + h.ShellExec(fmt.Sprintf("kubectl wait -n %s --for=condition=available deployment/accelerator-isolation-2 --timeout=60s", ns)) |
| 76 | + |
| 77 | + logs1 := h.ShellExec(fmt.Sprintf("kubectl logs -n %s deployment/accelerator-isolation-1", ns)) |
| 78 | + logs2 := h.ShellExec(fmt.Sprintf("kubectl logs -n %s deployment/accelerator-isolation-2", ns)) |
| 79 | + |
| 80 | + uuid1 := extractGPUUUID(logs1.Stdout()) |
| 81 | + uuid2 := extractGPUUUID(logs2.Stdout()) |
| 82 | + |
| 83 | + if uuid1 == "" { |
| 84 | + h.Errorf("Failed to extract GPU UUID from logs of accelerator-isolation-1:\n%s", logs1.Stdout()) |
| 85 | + } else if uuid2 == "" { |
| 86 | + h.Errorf("Failed to extract GPU UUID from logs of accelerator-isolation-2:\n%s", logs2.Stdout()) |
| 87 | + } else if uuid1 == uuid2 { |
| 88 | + h.Errorf("Expected that pods with GPU requests would be isolated from each other, but both pods saw the same GPU UUID: %s", uuid1) |
| 89 | + } else { |
| 90 | + h.Success("Pods with GPU requests were isolated from each other as expected.") |
| 91 | + score++ |
| 92 | + } |
| 93 | + }) |
| 94 | + |
| 95 | + if score == 3 { |
| 96 | + h.RecordConformance("security/secure_accelerator_access") |
| 97 | + } |
| 98 | +} |
| 99 | + |
| 100 | +// extractGPUUUID is a helper function to extract the GPU UUID from nvidia-smi XML output in the logs. |
| 101 | +func extractGPUUUID(logs string) string { |
| 102 | + lines := strings.Split(logs, "\n") |
| 103 | + for _, line := range lines { |
| 104 | + line = strings.TrimSpace(line) |
| 105 | + if strings.HasPrefix(line, "<uuid>") && strings.HasSuffix(line, "</uuid>") { |
| 106 | + value := strings.TrimPrefix(line, "<uuid>") |
| 107 | + value = strings.TrimSuffix(value, "</uuid>") |
| 108 | + return value |
| 109 | + } |
| 110 | + } |
| 111 | + return "" |
| 112 | +} |
0 commit comments