[root@node01 ~]# kubectl get svc -A NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE default kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 2d kube-system grafana-service ClusterIP 10.96.78.163 <none> 3000/TCP 2d kube-system grafana-service111 ClusterIP 10.96.52.101 <none> 3000/TCP 13s
[root@node01 ~]# kubectl get ep -A NAMESPACE NAME ENDPOINTS AGE default kubernetes 10.10.72.15:6443 2d kube-system grafana-service 10.78.104.6:3000,10.78.135.5:3000 2d kube-system grafana-service111 <none> 18s
进入一个业务Pod,并请求grafana-service111,结果请求卡住并超时终止:
1 2 3 4 5 6 7
[root@node01 ~]# kubectl exec -it -n kube-system influxdb-rs1-5bdc67f4cb-lnfgt bash root@influxdb-rs1-5bdc67f4cb-lnfgt:/# time curl http://10.96.52.101:3000 curl: (7) Failed to connect to 10.96.52.101 port 3000: Connection timed out
有问题的环境: [root@node4 ~]# iptables -t filter -S cali-FORWARD -N cali-FORWARD -A cali-FORWARD -m comment --comment "cali:vjrMJCRpqwy5oRoX" -j MARK --set-xmark 0x0/0xe0000 -A cali-FORWARD -m comment --comment "cali:A_sPAO0mcxbT9mOV" -m mark --mark 0x0/0x10000 -j cali-from-hep-forward -A cali-FORWARD -i cali+ -m comment --comment "cali:8ZoYfO5HKXWbB3pk" -j cali-from-wl-dispatch -A cali-FORWARD -o cali+ -m comment --comment "cali:jdEuaPBe14V2hutn" -j cali-to-wl-dispatch -A cali-FORWARD -m comment --comment "cali:12bc6HljsMKsmfr-" -j cali-to-hep-forward -A cali-FORWARD -m comment --comment "cali:MH9kMp5aNICL-Olv" -m comment --comment "Policy explicitly accepted packet." -m mark --mark 0x10000/0x10000 -j ACCEPT //问题在这最后这一条规则,新版本的calico把这条规则移到了FORWARD链
正常的环境: [root@node01 ~]# iptables -t filter -S cali-FORWARD -N cali-FORWARD -A cali-FORWARD -m comment --comment "cali:vjrMJCRpqwy5oRoX" -j MARK --set-xmark 0x0/0xe0000 -A cali-FORWARD -m comment --comment "cali:A_sPAO0mcxbT9mOV" -m mark --mark 0x0/0x10000 -j cali-from-hep-forward -A cali-FORWARD -i cali+ -m comment --comment "cali:8ZoYfO5HKXWbB3pk" -j cali-from-wl-dispatch -A cali-FORWARD -o cali+ -m comment --comment "cali:jdEuaPBe14V2hutn" -j cali-to-wl-dispatch -A cali-FORWARD -m comment --comment "cali:12bc6HljsMKsmfr-" -j cali-to-hep-forward -A cali-FORWARD -m comment --comment "cali:NOSxoaGx8OIstr1z" -j cali-cidr-block
下面是在最新的K8S集群上做相同的测试记录,可以跟异常环境做对比。
模拟一个业务请求pod:
1 2 3 4 5
[root@node01 home]# kubectl run busybox --image=busybox-curl:v1.0 --image-pull-policy=IfNotPresent -- sleep 300000 pod/busybox created
[root@node01 home]# kubectl get pod -A -owide NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE default busybox 1/1 Running 0 14h 10.78.153.73 10.10.11.49