Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions assets/state-driver/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ spec:
- name: run-mellanox-drivers
mountPath: /run/mellanox/drivers
mountPropagation: HostToContainer
- name: sysfs-memory-online
mountPath: /sys/devices/system/memory/auto_online_blocks
- name: firmware-search-path
mountPath: /sys/module/firmware_class/parameters/path
- name: nv-firmware
Expand Down Expand Up @@ -320,9 +318,6 @@ spec:
- name: firmware-search-path
hostPath:
path: /sys/module/firmware_class/parameters/path
- name: sysfs-memory-online
hostPath:
path: /sys/devices/system/memory/auto_online_blocks
- name: nv-firmware
hostPath:
path: /run/nvidia/driver/lib/firmware
Expand Down
66 changes: 66 additions & 0 deletions controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,8 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C
}
}

applyMemoryHotplugAutoOnlineMount(&obj.Spec.Template.Spec, n.memoryHotplugAutoOnline)

// Compute driver configuration digest after all transformations are complete.
// This digest enables fast-path driver installation by detecting when configuration
// hasn't changed, avoiding unnecessary driver reinstalls and pod evictions.
Expand Down Expand Up @@ -3569,6 +3571,70 @@ func applyLicensingConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec
podSpec.Volumes = append(podSpec.Volumes, licensingConfigVol)
}

func applyMemoryHotplugAutoOnlineMount(podSpec *corev1.PodSpec, enabled bool) {
const (
volumeName = "sysfs-memory-online"
mountPath = "/sys/devices/system/memory/auto_online_blocks"
)

if enabled {
driverContainer := findContainerByName(podSpec.Containers, "nvidia-driver-ctr")
if driverContainer != nil && !hasVolumeMount(driverContainer.VolumeMounts, volumeName) {
driverContainer.VolumeMounts = append(driverContainer.VolumeMounts, corev1.VolumeMount{
Name: volumeName,
MountPath: mountPath,
})
}
if !hasVolume(podSpec.Volumes, volumeName) {
podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{
Name: volumeName,
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{Path: mountPath},
},
})
}
return
}

for i := range podSpec.Containers {
volumeMounts := podSpec.Containers[i].VolumeMounts[:0]
for _, volumeMount := range podSpec.Containers[i].VolumeMounts {
if volumeMount.Name == volumeName {
continue
}
volumeMounts = append(volumeMounts, volumeMount)
}
podSpec.Containers[i].VolumeMounts = volumeMounts
}

volumes := podSpec.Volumes[:0]
for _, volume := range podSpec.Volumes {
if volume.Name == volumeName {
continue
}
volumes = append(volumes, volume)
}
podSpec.Volumes = volumes
}

func hasVolumeMount(volumeMounts []corev1.VolumeMount, name string) bool {
for _, volumeMount := range volumeMounts {
if volumeMount.Name == name {
return true
}
}
return false
}

func hasVolume(volumes []corev1.Volume, name string) bool {
for _, volume := range volumes {
if volume.Name == name {
return true
}
}
return false
}

func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
podSpec := &obj.Spec.Template.Spec
driverContainer := findContainerByName(podSpec.Containers, "nvidia-driver-ctr")
Expand Down
70 changes: 39 additions & 31 deletions controllers/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,32 @@ import (
)

const (
commonGPULabelKey = "nvidia.com/gpu.present"
commonGPULabelValue = "true"
commonOperandsLabelKey = "nvidia.com/gpu.deploy.operands"
commonOperandsLabelValue = "true"
migManagerLabelKey = "nvidia.com/gpu.deploy.mig-manager"
migManagerLabelValue = "true"
migCapableLabelKey = "nvidia.com/mig.capable"
migCapableLabelValue = "true"
migConfigLabelKey = "nvidia.com/mig.config"
migConfigDisabledValue = "all-disabled"
vgpuHostDriverLabelKey = "nvidia.com/vgpu.host-driver-version"
gpuProductLabelKey = "nvidia.com/gpu.product"
nfdLabelPrefix = "feature.node.kubernetes.io/"
nfdKernelLabelKey = "feature.node.kubernetes.io/kernel-version.full"
nfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
nfdOSReleaseIDLabelKey = "feature.node.kubernetes.io/system-os_release.ID"
nfdOSVersionIDLabelKey = "feature.node.kubernetes.io/system-os_release.VERSION_ID"
ocpDriverToolkitVersionLabel = "openshift.driver-toolkit.rhcos"
ocpDriverToolkitIdentificationLabel = "openshift.driver-toolkit"
appLabelKey = "app"
ocpDriverToolkitIdentificationValue = "true"
ocpNamespaceMonitoringLabelKey = "openshift.io/cluster-monitoring"
ocpNamespaceMonitoringLabelValue = "true"
precompiledIdentificationLabelKey = "nvidia.com/precompiled"
precompiledIdentificationLabelValue = "true"
commonGPULabelKey = "nvidia.com/gpu.present"
commonGPULabelValue = "true"
commonOperandsLabelKey = "nvidia.com/gpu.deploy.operands"
commonOperandsLabelValue = "true"
migManagerLabelKey = "nvidia.com/gpu.deploy.mig-manager"
migManagerLabelValue = "true"
migCapableLabelKey = "nvidia.com/mig.capable"
migCapableLabelValue = "true"
migConfigLabelKey = "nvidia.com/mig.config"
migConfigDisabledValue = "all-disabled"
vgpuHostDriverLabelKey = "nvidia.com/vgpu.host-driver-version"
gpuProductLabelKey = "nvidia.com/gpu.product"
nfdLabelPrefix = "feature.node.kubernetes.io/"
nfdKernelLabelKey = "feature.node.kubernetes.io/kernel-version.full"
nfdKernelConfigMemoryHotplugLabelKey = "feature.node.kubernetes.io/kernel-config.MEMORY_HOTPLUG"
nfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
nfdOSReleaseIDLabelKey = "feature.node.kubernetes.io/system-os_release.ID"
nfdOSVersionIDLabelKey = "feature.node.kubernetes.io/system-os_release.VERSION_ID"
ocpDriverToolkitVersionLabel = "openshift.driver-toolkit.rhcos"
ocpDriverToolkitIdentificationLabel = "openshift.driver-toolkit"
appLabelKey = "app"
ocpDriverToolkitIdentificationValue = "true"
ocpNamespaceMonitoringLabelKey = "openshift.io/cluster-monitoring"
ocpNamespaceMonitoringLabelValue = "true"
precompiledIdentificationLabelKey = "nvidia.com/precompiled"
precompiledIdentificationLabelValue = "true"
// see bundle/manifests/gpu-operator.clusterserviceversion.yaml
// --> ClusterServiceVersion.metadata.annotations.operatorframework.io/suggested-namespace
ocpSuggestedNamespace = "nvidia-gpu-operator"
Expand Down Expand Up @@ -164,12 +165,13 @@ type ClusterPolicyController struct {
openshift string
ocpDriverToolkit OpenShiftDriverToolkit

runtime gpuv1.Runtime
gpuNodeOSTag string
gpuNodeOSRelease string
hasGPUNodes bool
hasNFDLabels bool
sandboxEnabled bool
runtime gpuv1.Runtime
gpuNodeOSTag string
gpuNodeOSRelease string
hasGPUNodes bool
hasNFDLabels bool
memoryHotplugAutoOnline bool
sandboxEnabled bool
}

func addState(n *ClusterPolicyController, path string) {
Expand Down Expand Up @@ -529,6 +531,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
}
clusterHasNFDLabels := false
gpuNodesTotal := 0
memoryHotplugAutoOnline := true
for _, node := range list.Items {
node := node
updateLabels := false
Expand Down Expand Up @@ -569,6 +572,10 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
}

if hasCommonGPULabel(labels) {
if labels[nfdKernelConfigMemoryHotplugLabelKey] != "true" {
memoryHotplugAutoOnline = false
}

// If node has GPU, then add state labels as per the workload type
n.logger.Info("Checking GPU state labels on the node", "NodeName", node.Name)
if gpuWorkloadConfig.updateGPUStateLabels(labels) {
Expand Down Expand Up @@ -616,6 +623,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
}
} // end node loop

n.memoryHotplugAutoOnline = gpuNodesTotal > 0 && memoryHotplugAutoOnline
n.logger.Info("Number of nodes with GPU label", "NodeCount", gpuNodesTotal)
n.operatorMetrics.gpuNodesTotal.Set(float64(gpuNodesTotal))
return clusterHasNFDLabels, gpuNodesTotal, nil
Expand Down
3 changes: 3 additions & 0 deletions deployments/gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,9 @@ node-feature-discovery:
effect: NoSchedule
config:
sources:
kernel:
configOpts:
- "MEMORY_HOTPLUG"
pci:
deviceClassWhitelist:
- "02"
Expand Down
20 changes: 11 additions & 9 deletions internal/state/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,16 @@ type additionalConfigs struct {
}

type driverRenderData struct {
Driver *driverSpec
GDS *gdsDriverSpec
GPUDirectRDMA *nvidiav1alpha1.GPUDirectRDMASpec
GDRCopy *gdrcopyDriverSpec
Runtime *driverRuntimeSpec
Openshift *openshiftSpec
Precompiled *precompiledSpec
AdditionalConfigs *additionalConfigs
HostRoot string
Driver *driverSpec
GDS *gdsDriverSpec
GPUDirectRDMA *nvidiav1alpha1.GPUDirectRDMASpec
GDRCopy *gdrcopyDriverSpec
Runtime *driverRuntimeSpec
Openshift *openshiftSpec
Precompiled *precompiledSpec
AdditionalConfigs *additionalConfigs
HostRoot string
MemoryHotplugAutoOnline bool
}

// ConfigDigest computes a hash of all driver-install-relevant fields.
Expand Down Expand Up @@ -301,6 +302,7 @@ func (s *stateDriver) getManifestObjects(ctx context.Context, cr *nvidiav1alpha1
return nil, fmt.Errorf("failed to construct driver spec: %w", err)
}
renderData.Driver = driverSpec
renderData.MemoryHotplugAutoOnline = nodePool.memoryHotplugAutoOnline

if cr.Spec.UsePrecompiledDrivers() {
renderData.Precompiled = &precompiledSpec{
Expand Down
25 changes: 24 additions & 1 deletion internal/state/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ func TestDriverRenderMinimal(t *testing.T) {
require.Equal(t, string(o), actual)
}

func TestDriverRenderSkipsMemoryHotplugMountWhenUnsupported(t *testing.T) {
state, err := NewStateDriver(nil, "", nil, manifestDir)
require.Nil(t, err)
stateDriver, ok := state.(*stateDriver)
require.True(t, ok)

renderData := getMinimalDriverRenderData()
renderData.MemoryHotplugAutoOnline = false

objs, err := stateDriver.renderer.RenderObjects(
&render.TemplatingData{
Data: renderData,
})
require.Nil(t, err)
require.NotEmpty(t, objs)

actual, err := getYAMLString(objs)
require.Nil(t, err)
require.NotContains(t, actual, "sysfs-memory-online")
require.NotContains(t, actual, "/sys/devices/system/memory/auto_online_blocks")
}

func TestDriverHostNetwork(t *testing.T) {
const (
testName = "driver-hostnetwork"
Expand Down Expand Up @@ -816,7 +838,8 @@ func getMinimalDriverRenderData() *driverRenderData {
Runtime: &driverRuntimeSpec{
Namespace: "test-operator",
},
HostRoot: "",
HostRoot: "",
MemoryHotplugAutoOnline: true,
}
}

Expand Down
31 changes: 19 additions & 12 deletions internal/state/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,22 @@ import (
)

const (
nfdKernelLabelKey = "feature.node.kubernetes.io/kernel-version.full"
nfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
nfdKernelLabelKey = "feature.node.kubernetes.io/kernel-version.full"
nfdKernelConfigMemoryHotplugLabelKey = "feature.node.kubernetes.io/kernel-config.MEMORY_HOTPLUG"
nfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
)

// TODO: move this code to it's own module?
// TODO: add unit tests
type nodePool struct {
name string
osRelease string
osVersion string
osTag string
rhcosVersion string
kernel string
nodeSelector map[string]string
name string
osRelease string
osVersion string
osTag string
rhcosVersion string
kernel string
nodeSelector map[string]string
memoryHotplugAutoOnline bool
}

// getNodePools partitions nodes into one or more node pools. The list of nodes to partition
Expand Down Expand Up @@ -80,6 +82,7 @@ func getNodePools(ctx context.Context, k8sClient client.Client, selector map[str
nodePool := nodePool{}
nodePool.nodeSelector = make(map[string]string)
maps.Copy(nodePool.nodeSelector, nodeSelector)
nodePool.memoryHotplugAutoOnline = nodeLabels[nfdKernelConfigMemoryHotplugLabelKey] == "true"

osID, ok := nodeLabels[nfdOSReleaseIDLabelKey]
if !ok {
Expand Down Expand Up @@ -126,10 +129,14 @@ func getNodePools(ctx context.Context, k8sClient client.Client, selector map[str
nodePool.name = rhcosVersion
}

if _, exists := nodePoolMap[nodePool.name]; !exists {
logger.Info("Detected new node pool", "NodePool", nodePool)
nodePoolMap[nodePool.name] = nodePool
if existing, exists := nodePoolMap[nodePool.name]; exists {
existing.memoryHotplugAutoOnline = existing.memoryHotplugAutoOnline && nodePool.memoryHotplugAutoOnline
nodePoolMap[nodePool.name] = existing
continue
}

logger.Info("Detected new node pool", "NodePool", nodePool)
nodePoolMap[nodePool.name] = nodePool
}

var nodePools []nodePool
Expand Down
Loading
Loading