Skip to content

Commit fa9f875

Browse files
authored
NodeTopologySyncer: populate zone information for NodeTopology CR (#1004)
* initial commit * simplified error scenario * combined zone and subnet to one update * removed commented code * fix update condition * changed subnet update condition * fix unit test and format
1 parent cb7e572 commit fa9f875

File tree

2 files changed

+438
-15
lines changed

2 files changed

+438
-15
lines changed

pkg/controller/nodeipam/ipam/node_topology_syncer.go

Lines changed: 97 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
v1 "k8s.io/api/core/v1"
1111
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1212
"k8s.io/apimachinery/pkg/labels"
13+
"k8s.io/apimachinery/pkg/util/sets"
1314
corelisters "k8s.io/client-go/listers/core/v1"
1415
"k8s.io/client-go/tools/cache"
1516
utilnode "k8s.io/cloud-provider-gcp/pkg/util/node"
@@ -109,15 +110,23 @@ func (syncer *NodeTopologySyncer) reconcile() error {
109110
}
110111
updatedNodeTopologyCR.Status.Subnets = updatedSubnets
111112

112-
if updatedNodeTopologyCR.Status.Zones == nil {
113-
updatedNodeTopologyCR.Status.Zones = []string{}
113+
zoneSet := sets.NewString()
114+
for _, node := range allNodes {
115+
zone, err := getZoneFromNode(context.TODO(), syncer, node)
116+
if err != nil {
117+
return err
118+
}
119+
zoneSet.Insert(zone)
114120
}
121+
updatedNodeTopologyCR.Status.Zones = zoneSet.List()
122+
115123
_, updateErr := syncer.nodeTopologyClient.NetworkingV1().NodeTopologies().UpdateStatus(context.TODO(), updatedNodeTopologyCR, metav1.UpdateOptions{})
116124
if updateErr != nil {
117125
klog.ErrorS(updateErr, "Error updating nodeTopology CR", "nodetopologyCR", nodeTopologyCRName)
118126
return updateErr
119127
}
120128
klog.InfoS("Successfully reconciled nodeTopolody CR")
129+
121130
return nil
122131
}
123132

@@ -136,6 +145,13 @@ func (syncer *NodeTopologySyncer) updateNodeTopology(node *v1.Node) error {
136145
return err
137146
}
138147

148+
// Check for zone update
149+
zone, zoneErr := getZoneFromNode(context.TODO(), syncer, node)
150+
shouldUpdateZone := false
151+
if zoneErr == nil {
152+
shouldUpdateZone = !isZoneInCR(zone, nodeTopologyCR)
153+
}
154+
139155
crSubnets := nodeTopologyCR.Status.Subnets
140156
// We will always add the default subnet to the CR.
141157
// We do not let additional subnetworks to be added during cluster creation.
@@ -167,6 +183,10 @@ func (syncer *NodeTopologySyncer) updateNodeTopology(node *v1.Node) error {
167183
if updatedCR.Status.Zones == nil {
168184
updatedCR.Status.Zones = []string{}
169185
}
186+
if shouldUpdateZone {
187+
updatedCR.Status.Zones = append(updatedCR.Status.Zones, zone)
188+
}
189+
170190
_, updateErr := syncer.nodeTopologyClient.NetworkingV1().NodeTopologies().UpdateStatus(context.TODO(), updatedCR, metav1.UpdateOptions{})
171191
if updateErr != nil {
172192
klog.Errorf("Error updating the CR: %v, err: %v", nodeTopologyCRName, updateErr)
@@ -175,42 +195,69 @@ func (syncer *NodeTopologySyncer) updateNodeTopology(node *v1.Node) error {
175195

176196
klog.Infof("Successfully added the default subnet %v to nodetopology CR", defaultSubnet)
177197

198+
if zoneErr != nil {
199+
klog.ErrorS(zoneErr, "Error updating zone for nodeTopology CR", "nodetopologyCR", nodeTopologyCRName, "node", node.Name)
200+
return zoneErr
201+
}
178202
return nil
179203
}
180204

181-
if !hasSubnetLabel {
205+
// Check if we need to update subnet in CR
206+
shouldUpdateSubnet := false
207+
if hasSubnetLabel {
208+
// Check if subnet already exists in the CR
209+
if !isSubnetInCR(nodeSubnet, nodeTopologyCR) {
210+
shouldUpdateSubnet = true
211+
} else {
212+
klog.V(2).InfoS("Subnet already exists in the node topology CR", "subnet", nodeSubnet)
213+
}
214+
} else {
182215
klog.V(2).Infof("No additional subnet detected. Default subnetwork is added to the CR.")
183-
return nil
184216
}
185217

186-
// Check if subnet already exists in the CR
187-
for _, subnet := range crSubnets {
188-
if subnet.Name == nodeSubnet {
189-
klog.V(2).Infof("The subnet %s already exists in the node topology CR", nodeSubnet)
190-
return nil
218+
// Nothing to update, skip
219+
if !shouldUpdateSubnet && !shouldUpdateZone {
220+
if zoneErr != nil {
221+
klog.V(4).InfoS("Waiting for zone info to be populated, forcing retry", "node", node.Name)
222+
return zoneErr
191223
}
224+
klog.V(2).InfoS("Both subnet and zone are already up to date, skipping", "node", node.Name)
225+
return nil
192226
}
193227

194228
// We have a new subnet that should be added to the CR
195229
// We assume all the subnets are in the same project and region
196230
updatedCR := nodeTopologyCR.DeepCopy()
197-
updatedCR.Status.Subnets = append(updatedCR.Status.Subnets, nodetopologyv1.SubnetConfig{
198-
Name: nodeSubnet,
199-
SubnetPath: subnetPrefix + nodeSubnet,
200-
})
231+
if shouldUpdateSubnet {
232+
updatedCR.Status.Subnets = append(updatedCR.Status.Subnets, nodetopologyv1.SubnetConfig{
233+
Name: nodeSubnet,
234+
SubnetPath: subnetPrefix + nodeSubnet,
235+
})
236+
}
201237
// We always expect zones field in the status.
202238
if updatedCR.Status.Zones == nil {
203239
updatedCR.Status.Zones = []string{}
204240
}
241+
if shouldUpdateZone {
242+
updatedCR.Status.Zones = append(updatedCR.Status.Zones, zone)
243+
}
205244

206245
_, updateErr := syncer.nodeTopologyClient.NetworkingV1().NodeTopologies().UpdateStatus(context.TODO(), updatedCR, metav1.UpdateOptions{})
207246
if updateErr != nil {
208247
klog.Errorf("Error updating the CR: %v, err: %v", nodeTopologyCRName, updateErr)
209248
return updateErr
210249
}
211250

212-
klog.V(2).Infof("Successfully add the subnet %v to the nodetopology CR", nodeSubnet)
213-
251+
if shouldUpdateSubnet {
252+
klog.V(2).Infof("Successfully add the subnet %v to the nodetopology CR", nodeSubnet)
253+
}
254+
if zoneErr != nil {
255+
klog.ErrorS(zoneErr, "Error updating zone for nodeTopology CR", "nodetopologyCR", nodeTopologyCRName, "node", node.Name)
256+
return zoneErr
257+
}
258+
if shouldUpdateZone {
259+
klog.V(2).Infof("Successfully add zone %v to the nodetopology CR", zone)
260+
}
214261
return nil
215262

216263
}
@@ -249,3 +296,38 @@ func getSubnetWithPrefixFromURL(url string) (subnetName string, subnetPrefix str
249296
subnetPrefix = strings.Join(subnetPrefixParts, "/") + "/"
250297
return
251298
}
299+
300+
func isZoneInCR(nodeZone string, nodeTopologyCR *nodetopologyv1.NodeTopology) bool {
301+
for _, zone := range nodeTopologyCR.Status.Zones {
302+
if zone == nodeZone {
303+
return true
304+
}
305+
}
306+
return false
307+
}
308+
309+
func isSubnetInCR(nodeSubnet string, nodeTopologyCR *nodetopologyv1.NodeTopology) bool {
310+
for _, subnet := range nodeTopologyCR.Status.Subnets {
311+
if subnet.Name == nodeSubnet {
312+
return true
313+
}
314+
}
315+
return false
316+
}
317+
318+
func getZoneFromNode(ctx context.Context, syncer *NodeTopologySyncer, node *v1.Node) (string, error) {
319+
providerID := node.Spec.ProviderID
320+
if providerID == "" {
321+
err := fmt.Errorf("node doesn't have providerID")
322+
klog.ErrorS(err, "node doesn't have providerID", "node", node.Name)
323+
return "", err
324+
}
325+
326+
nodeZoneConfig, err := syncer.cloud.GetZoneByProviderID(ctx, providerID)
327+
if err != nil {
328+
klog.ErrorS(err, "Failed to get zone information for node", "node", node.Name, "providerID", providerID)
329+
return "", err
330+
}
331+
332+
return nodeZoneConfig.FailureDomain, nil
333+
}

0 commit comments

Comments
 (0)