Skip to content

Commit 43d0fc3

Browse files
mtuliokmala
authored andcommitted
e2e/debug: increase data collection on e2e failures
1 parent 18d1ae5 commit 43d0fc3

1 file changed

Lines changed: 124 additions & 1 deletion

File tree

tests/e2e/loadbalancer.go

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,23 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
213213

214214
By("creating backend server pods")
215215
_, err = e2e.LBJig.Run(e2e.buildReplicationController(tc.requireAffinity))
216-
framework.ExpectNoError(err)
216+
if err != nil {
217+
serviceName := e2e.LBJig.Name
218+
if e2e.svc != nil {
219+
serviceName = e2e.svc.Name
220+
}
221+
framework.Logf("ERROR: LoadBalancer provisioning failed for service %q: %v", serviceName, err)
222+
framework.Logf("ERROR: LoadBalancer provisioning timeout reached after %v", loadBalancerCreateTimeout)
223+
224+
// Ensure we have detailed debugging information before failing
225+
framework.Logf("=== LoadBalancer Provisioning Failure Debug Information ===")
226+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
227+
framework.Logf("=== End of LoadBalancer Provisioning Failure Debug Information ===")
228+
229+
// Fail the test immediately to prevent further execution
230+
framework.ExpectNoError(err, "LoadBalancer provisioning failed - check debug information above")
231+
}
232+
217233
framework.Logf("[K8S] Backend pods created, affinity required: %t", tc.requireAffinity)
218234

219235
if tc.hookPostServiceCreate != nil {
@@ -222,16 +238,39 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
222238
}
223239

224240
By("collecting service and load balancer information")
241+
if e2e.svc == nil {
242+
framework.Logf("=== Service Validation Error Debug Information ===")
243+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
244+
framework.Logf("=== End of Service Validation Error Debug Information ===")
245+
framework.Failf("Service is nil after LoadBalancer provisioning for service %s", e2e.LBJig.Name)
246+
}
247+
225248
if len(e2e.svc.Spec.Ports) == 0 {
249+
framework.Logf("=== Service Ports Error Debug Information ===")
250+
framework.Logf("Service spec: %+v", e2e.svc.Spec)
251+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
252+
framework.Logf("=== End of Service Ports Error Debug Information ===")
226253
framework.Failf("No ports found in service spec for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
227254
}
228255
if len(e2e.svc.Status.LoadBalancer.Ingress) == 0 {
256+
framework.Logf("=== LoadBalancer Ingress Error Debug Information ===")
257+
framework.Logf("Service status: %+v", e2e.svc.Status)
258+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
259+
framework.Logf("=== End of LoadBalancer Ingress Error Debug Information ===")
229260
framework.Failf("No ingress found in LoadBalancer status for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
230261
}
231262
svcPort := int(e2e.svc.Spec.Ports[0].Port)
232263
ingressAddress := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
233264
framework.Logf("[LB-INFO] Ingress address: %s, port: %d", ingressAddress, svcPort)
234265

266+
if ingressAddress == "" {
267+
framework.Logf("=== Empty Ingress Address Debug Information ===")
268+
framework.Logf("LoadBalancer ingress[0]: %+v", e2e.svc.Status.LoadBalancer.Ingress[0])
269+
gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
270+
framework.Logf("=== End of Empty Ingress Address Debug Information ===")
271+
framework.Failf("LoadBalancer ingress address is empty for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
272+
}
273+
235274
if tc.hookPreTest != nil {
236275
By("executing pre-test hook")
237276
tc.hookPreTest(e2e)
@@ -737,3 +776,87 @@ func inClusterTestReachableHTTP(cs clientset.Interface, namespace, nodeName, tar
737776

738777
return nil
739778
}
779+
780+
// Gather information from the cluster to help debug failures.
781+
// - Resource events
782+
// - All namespace events
783+
// - Cloud controller manager logs
784+
// - Service status
785+
func gatherResourceEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
786+
framework.Logf("=== Collecting resource events for debugging ===")
787+
events, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
788+
FieldSelector: "involvedObject.name=" + resourceName,
789+
})
790+
if err != nil {
791+
framework.Logf("Error getting events for resource %q: %v", resourceName, err)
792+
} else {
793+
framework.Logf("Resource events for %q:", resourceName)
794+
for _, event := range events.Items {
795+
framework.Logf(" [%s] %s/%s: %s - %s", event.Type, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
796+
}
797+
}
798+
}
799+
800+
func gatherAllEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
801+
framework.Logf("=== Collecting all namespace events ===")
802+
allEvents, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
803+
if err != nil {
804+
framework.Logf("Error getting all namespace events: %v", err)
805+
} else {
806+
framework.Logf("All events in namespace %q:", namespace)
807+
for _, event := range allEvents.Items {
808+
if strings.Contains(event.Message, "loadbalancer") || strings.Contains(event.Message, "LoadBalancer") ||
809+
strings.Contains(event.Reason, "LoadBalancer") || strings.Contains(event.Source.Component, "cloud-controller-manager") {
810+
framework.Logf(" [%s] %s/%s/%s: %s - %s", event.Type, event.Source.Component, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
811+
}
812+
}
813+
}
814+
}
815+
816+
func gatherControllerLogs(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
817+
framework.Logf("=== Collecting cloud controller manager logs ===")
818+
ccmPods, err := cs.CoreV1().Pods("").List(ctx, metav1.ListOptions{
819+
LabelSelector: "app=cloud-controller-manager",
820+
})
821+
if err != nil {
822+
framework.Logf("Error listing cloud controller manager pods: %v", err)
823+
} else {
824+
for _, pod := range ccmPods.Items {
825+
framework.Logf("Found CCM pod: %s/%s (phase: %s)", pod.Namespace, pod.Name, pod.Status.Phase)
826+
827+
// Get recent logs (last 50 lines)
828+
tailLines := int64(50)
829+
logOpts := &v1.PodLogOptions{
830+
TailLines: &tailLines,
831+
Previous: false,
832+
}
833+
logs, err1 := cs.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOpts).DoRaw(ctx)
834+
if err1 != nil {
835+
framework.Logf("Error getting logs for CCM pod %s/%s: %v", pod.Namespace, pod.Name, err)
836+
} else {
837+
framework.Logf("Recent logs from CCM pod %s/%s:", pod.Namespace, pod.Name)
838+
framework.Logf("%s", string(logs))
839+
}
840+
}
841+
}
842+
}
843+
844+
func gatherServiceStatus(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
845+
framework.Logf("=== Service Status ===")
846+
currentSvc, err := cs.CoreV1().Services(namespace).Get(ctx, resourceName, metav1.GetOptions{})
847+
if err != nil {
848+
framework.Logf("Error getting current service status: %v", err)
849+
} else {
850+
framework.Logf("Service %s status:", currentSvc.Name)
851+
framework.Logf(" Annotations: %+v", currentSvc.Annotations)
852+
framework.Logf(" LoadBalancer status: %+v", currentSvc.Status.LoadBalancer)
853+
framework.Logf(" Conditions: %+v", currentSvc.Status.Conditions)
854+
}
855+
}
856+
857+
func gatherEventosOnFailure(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
858+
gatherResourceEvents(ctx, cs, namespace, resourceName)
859+
gatherAllEvents(ctx, cs, namespace, resourceName)
860+
gatherControllerLogs(ctx, cs, namespace, resourceName)
861+
gatherServiceStatus(ctx, cs, namespace, resourceName)
862+
}

0 commit comments

Comments
 (0)