[root@node1 ~]# kubectl describe pod -n xxx cam1-78b6fc6bc8-cjsw5 // 没有发现什么异常信息,这里就不贴日志了
Event事件中未见明显异常,那就看负责删除Pod的kubelet组件日志(已过滤出关键性日志):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
I0728 16:24:57.339295 9744 kubelet.go:1904] SyncLoop (DELETE, "api"): "cam1-78b6fc6bc8-cjsw5_cam(5c948341-c030-4996-b888-f032577d97b0)" I0728 16:24:57.339720 9744 kuberuntime_container.go:581] Killing container "docker://a73082a4a9a4cec174bb0d1c256cc11d804d93137551b9bfd3e6fa1522e98589" with 60 second grace period I0728 16:25:18.259418 9744 kubelet.go:1904] SyncLoop (DELETE, "api"): "cam1-78b6fc6bc8-cjsw5_cam(5c948341-c030-4996-b888-f032577d97b0)" 2021-07-28 16:25:19.247 [INFO][394011] ipam.go 1173: Releasing all IPs with handle 'cam.cam1-78b6fc6bc8-cjsw5' 2021-07-28 16:25:19.254 [INFO][393585] k8s.go 498: Teardown processing complete.
// 可疑点1:没有获取到pod IP W0728 16:25:19.303513 9744 docker_sandbox.go:384] failed to read pod IP from plugin/docker: NetworkPlugin cni failed on the status hook for pod "cam1-78b6fc6bc8-cjsw5_cam": Unexpected command output Device "eth0" does not exist. with error: exit status 1 I0728 16:25:19.341068 9744 kubelet.go:1933] SyncLoop (PLEG): "cam1-78b6fc6bc8-cjsw5_cam(5c948341-c030-4996-b888-f032577d97b0)", event: &pleg.PodLifecycleEvent{ID:"5c948341-c030-4996-b888-f032577d97b0", Type:"ContainerDied", Data:"a73082a4a9a4cec174bb0d1c256cc11d804d93137551b9bfd3e6fa1522e98589"} I0728 16:25:20.578095 9744 kubelet.go:1933] SyncLoop (PLEG): "cam1-78b6fc6bc8-cjsw5_cam(5c948341-c030-4996-b888-f032577d97b0)", event: &pleg.PodLifecycleEvent{ID:"5c948341-c030-4996-b888-f032577d97b0", Type:"ContainerDied", Data:"c3b992465cd2085300995066526a36665664558446ff6e1756135c3a5b6df2e6"}
I0728 16:25:20.711967 9744 kubelet_pods.go:1090] Killing unwanted pod "cam1-78b6fc6bc8-cjsw5"
// 可疑点2:Unmount失败 E0728 16:25:20.939400 9744 nestedpendingoperations.go:301] Operation for "{volumeName:kubernetes.io/glusterfs/5c948341-c030-4996-b888-f032577d97b0-cam-pv-50g podName:5c948341-c030-4996-b888-f032577d97b0 nodeName:}" failed. No retries permitted until 2021-07-28 16:25:21.439325811 +0800 CST m=+199182.605079651 (durationBeforeRetry 500ms). Error: "UnmountVolume.TearDown failed for volume \"diag-log\" (UniqueName: \"kubernetes.io/glusterfs/5c948341-c030-4996-b888-f032577d97b0-cam-pv-50g\") pod \"5c948341-c030-4996-b888-f032577d97b0\" (UID: \"5c948341-c030-4996-b888-f032577d97b0\") : Unmount failed: exit status 32\nUnmounting arguments: /var/lib/kubelet/pods/5c948341-c030-4996-b888-f032577d97b0/volumes/kubernetes.io~glusterfs/cam-pv-50g\nOutput: umount: /var/lib/kubelet/pods/5c948341-c030-4996-b888-f032577d97b0/volumes/kubernetes.io~glusterfs/cam-pv-50g:目标忙。\n (有些情况下通过 lsof(8) 或 fuser(1) 可以\n 找到有关使用该设备的进程的有用信息。)\n\n"
pkg/kubelet/dockershim/docker_sandbox.go:348 func(ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) string { if sandbox.NetworkSettings == nil { return"" } if networkNamespaceMode(sandbox) == runtimeapi.NamespaceMode_NODE { // For sandboxes using host network, the shim is not responsible for // reporting the IP. return"" }
// Don't bother getting IP if the pod is known and networking isn't ready ready, ok := ds.getNetworkReady(podSandboxID) if ok && !ready { return"" }
ip, err := ds.getIPFromPlugin(sandbox) if err == nil { return ip } if sandbox.NetworkSettings.IPAddress != "" { return sandbox.NetworkSettings.IPAddress } if sandbox.NetworkSettings.GlobalIPv6Address != "" { return sandbox.NetworkSettings.GlobalIPv6Address }
// 错误日志在这里 klog.Warningf("failed to read pod IP from plugin/docker: %v", err) return"" }
// Parse the timestamps. createdAt, _, _, err := getContainerTimestamps(r) if err != nil { returnnil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err) } ct := createdAt.UnixNano()
// Translate container to sandbox state. state := runtimeapi.PodSandboxState_SANDBOX_NOTREADY if r.State.Running { state = runtimeapi.PodSandboxState_SANDBOX_READY }
// 调用getIP方法的位置 var IP string if IP = ds.determinePodIPBySandboxID(podSandboxID); IP == "" { IP = ds.getIP(podSandboxID, r) }
pkg/kubelet/kubelet_pods.go:900 func(kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool { ... // 这里会判断挂载卷是否已卸载 if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes { // We shouldnt delete pods whose volumes have not been cleaned up if we are not keeping terminated pod volumes klog.V(3).Infof("Pod %q is terminated, but some volumes have not been cleaned up", format.Pod(pod)) returnfalse } if kl.kubeletConfiguration.CgroupsPerQOS { pcm := kl.containerManager.NewPodContainerManager() if pcm.Exists(pod) { klog.V(3).Infof("Pod %q is terminated, but pod cgroup sandbox has not been cleaned up", format.Pod(pod)) returnfalse } } returntrue }