#!/bin/bash # Add local user # Either use the LOCAL_USER_ID if passed in at runtime or # fallback
USER_ID=${LOCAL_USER_ID:-9001}
if [ "${RUN_AS_ROOT}" = "true" ]; then exec "$@" fi
echo "Starting with UID : $USER_ID" 1>&2 # Do not create mail box. /bin/sed -i 's/^CREATE_MAIL_SPOOL=yes/CREATE_MAIL_SPOOL=no/' /etc/default/useradd # Don't pass "-m" to useradd if the home directory already exists (which can occur if it was volume mounted in) otherwise it will fail. if [[ ! -d "/home/user" ]]; then /usr/sbin/useradd -m -U -s /bin/bash -u $USER_ID user else /usr/sbin/useradd -U -s /bin/bash -u $USER_ID user fi
... docker build --pull -t calico/node:latest-amd64 . --build-arg BIRD_IMAGE=calico/bird:v0.3.3-151-g767b5389-amd64 --build-arg QEMU_IMAGE=calico/go-build:v0.40 --build-arg GIT_VERSION= -f ./Dockerfile.amd64 Sending build context to Docker daemon 66.3MB Step 1/41 : ARG ARCH=x86_64 Step 2/41 : ARG GIT_VERSION=unknown Step 3/41 : ARG IPTABLES_VER=1.8.2-16 Step 4/41 : ARG RUNIT_VER=2.1.2 Step 5/41 : ARG BIRD_IMAGE=calico/bird:latest Step 6/41 : FROM calico/bpftool:v5.3-amd64 as bpftool ... Step 12/41 : ARG CENTOS_MIRROR_BASE_URL=https://mirrors.aliyun.com/centos-vault/8.1.1911 ---> Using cache ---> a96f716928d7 ... Step 17/41 : RUN mv /etc/yum.repos.d /etc/yum.repo.d-bk && mkdir -p /etc/yum.repos.d && mv /centos.repo /etc/yum.repos.d && yum clean all && yum makecache && dnf install -y 'dnf-command(config-manager)' && yum install -y rpm-build yum-utils make && yum install -y wget glibc-static gcc && yum -y update-minimal --security --sec-severity=Important --sec-severity=Critical ---> Using cache ---> a9ffd418a7a4 ... Step 24/41 : FROM registry.access.redhat.com/ubi8/ubi-minimal:8.1-407 8.1-407: Pulling from ubi8/ubi-minimal Digest: sha256:01b8fb7b3ad16a575651a4e007e8f4d95b68f727b3a41fc57996be9a790dc4fa Status: Image is up to date for registry.access.redhat.com/ubi8/ubi-minimal:8.1-407 ---> 6ce38bb5210c ... Step 39/41 : COPY dist/bin/calico-node-amd64 /bin/calico-node ---> Using cache ---> 916fbf133fb0 Step 40/41 : COPY --from=bpftool /bpftool /bin ---> Using cache ---> f797db5c4eb4 Step 41/41 : CMD ["start_runit"] ---> Using cache ---> fe6496ded4a6 [Warning] One or more build-args [QEMU_IMAGE] were not consumed Successfully built fe6496ded4a6 Successfully tagged calico/node:latest-amd64 touch .calico_node.created-amd64 make: Leaving directory `/home/go/gopath/src/github.com/projectcalico/node'
查看编译的镜像:
1 2 3 4
[root@node01 github.com]# docker images REPOSITORY TAG IMAGE ID CREATED SIZE calico/node latest-amd64 77f4ca933207 7 hours ago 264MB <none> <none> 420e5252b060 7 hours ago 633MB
[root@node01 ~]# kubectl get svc -A NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE default kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 2d kube-system grafana-service ClusterIP 10.96.78.163 <none> 3000/TCP 2d kube-system grafana-service111 ClusterIP 10.96.52.101 <none> 3000/TCP 13s
[root@node01 ~]# kubectl get ep -A NAMESPACE NAME ENDPOINTS AGE default kubernetes 10.10.72.15:6443 2d kube-system grafana-service 10.78.104.6:3000,10.78.135.5:3000 2d kube-system grafana-service111 <none> 18s
进入一个业务Pod,并请求grafana-service111,结果请求卡住并超时终止:
1 2 3 4 5 6 7
[root@node01 ~]# kubectl exec -it -n kube-system influxdb-rs1-5bdc67f4cb-lnfgt bash root@influxdb-rs1-5bdc67f4cb-lnfgt:/# time curl http://10.96.52.101:3000 curl: (7) Failed to connect to 10.96.52.101 port 3000: Connection timed out
有问题的环境: [root@node4 ~]# iptables -t filter -S cali-FORWARD -N cali-FORWARD -A cali-FORWARD -m comment --comment "cali:vjrMJCRpqwy5oRoX" -j MARK --set-xmark 0x0/0xe0000 -A cali-FORWARD -m comment --comment "cali:A_sPAO0mcxbT9mOV" -m mark --mark 0x0/0x10000 -j cali-from-hep-forward -A cali-FORWARD -i cali+ -m comment --comment "cali:8ZoYfO5HKXWbB3pk" -j cali-from-wl-dispatch -A cali-FORWARD -o cali+ -m comment --comment "cali:jdEuaPBe14V2hutn" -j cali-to-wl-dispatch -A cali-FORWARD -m comment --comment "cali:12bc6HljsMKsmfr-" -j cali-to-hep-forward -A cali-FORWARD -m comment --comment "cali:MH9kMp5aNICL-Olv" -m comment --comment "Policy explicitly accepted packet." -m mark --mark 0x10000/0x10000 -j ACCEPT //问题在这最后这一条规则,新版本的calico把这条规则移到了FORWARD链
正常的环境: [root@node01 ~]# iptables -t filter -S cali-FORWARD -N cali-FORWARD -A cali-FORWARD -m comment --comment "cali:vjrMJCRpqwy5oRoX" -j MARK --set-xmark 0x0/0xe0000 -A cali-FORWARD -m comment --comment "cali:A_sPAO0mcxbT9mOV" -m mark --mark 0x0/0x10000 -j cali-from-hep-forward -A cali-FORWARD -i cali+ -m comment --comment "cali:8ZoYfO5HKXWbB3pk" -j cali-from-wl-dispatch -A cali-FORWARD -o cali+ -m comment --comment "cali:jdEuaPBe14V2hutn" -j cali-to-wl-dispatch -A cali-FORWARD -m comment --comment "cali:12bc6HljsMKsmfr-" -j cali-to-hep-forward -A cali-FORWARD -m comment --comment "cali:NOSxoaGx8OIstr1z" -j cali-cidr-block
下面是在最新的K8S集群上做相同的测试记录,可以跟异常环境做对比。
模拟一个业务请求pod:
1 2 3 4 5
[root@node01 home]# kubectl run busybox --image=busybox-curl:v1.0 --image-pull-policy=IfNotPresent -- sleep 300000 pod/busybox created
[root@node01 home]# kubectl get pod -A -owide NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE default busybox 1/1 Running 0 14h 10.78.153.73 10.10.11.49
[root@node01 k8s]# ./ezdown -S 2023-03-22 13:39:40 INFO Action begin: start_kubeasz_docker 2023-03-22 13:39:41 INFO try to run kubeasz in a container 2023-03-22 13:39:41 DEBUG get host IP: 10.10.11.49 2023-03-22 13:39:41 DEBUG generate ssh key pair # 10.10.11.49 SSH-2.0-OpenSSH_6.6.1 f1b442b7fdaf757c7787536b17d12d76208a2dd7884d56fbd1d35817dc2e94ca 2023-03-22 13:39:41 INFO Action successed: start_kubeasz_docker
[root@node01 k8s]# docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES f1b442b7fdaf easzlab/kubeasz:3.5.0 "sleep 36000" 15 seconds ago Up 14 seconds kubeasz
执行后看不出是成功,还是失败。根据文档说明,进入容器内手动执行命令:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
[root@node01 ~]# docker exec -it kubeasz ezctl start-aio 2023-03-22 06:15:05 INFO get local host ipadd: 10.10.11.49 2023-03-22 06:15:05 DEBUG generate custom cluster files in /etc/kubeasz/clusters/default 2023-03-22 06:15:05 DEBUG set versions 2023-03-22 06:15:05 DEBUG disable registry mirrors 2023-03-22 06:15:05 DEBUG cluster default: files successfully created. 2023-03-22 06:15:05 INFO next steps 1: to config '/etc/kubeasz/clusters/default/hosts' 2023-03-22 06:15:05 INFO next steps 2: to config '/etc/kubeasz/clusters/default/config.yml' ansible-playbook -i clusters/default/hosts -e @clusters/default/config.yml playbooks/90.setup.yml 2023-03-22 06:15:05 INFO cluster:default setup step:all begins in 5s, press any key to abort:
PLAY [kube_master,kube_node,etcd,ex_lb,chrony] **********************************************************************************************************************************************************
TASK [Gathering Facts] ********************************************************************************************************************************************************************************** fatal: [10.10.11.49]: UNREACHABLE! => {"changed": false, "msg": "Failed to connect to the host via ssh: root@10.10.11.49: Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password).", "unreachable": true}
bash-5.1# ssh-keygen Generating public/private rsa key pair. Enter file in which to save the key (/root/.ssh/id_rsa): /root/.ssh/id_rsa already exists. Overwrite (y/n)? bash-5.1# ssh-copy-id root@10.10.11.49 /usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub" /usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed expr: warning: '^ERROR: ': using '^' as the first character of a basic regular expression is not portable; it is ignored /usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys root@10.10.11.49's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'root@10.10.11.49'" and check to make sure that only the key(s) you wanted were added.
[root@node01 kubeasz]# docker exec -it kubeasz ezctl setup default all ansible-playbook -i clusters/default/hosts -e @clusters/default/config.yml playbooks/90.setup.yml 2023-03-22 07:35:46 INFO cluster:default setup step:all begins in 5s, press any key to abort:
PLAY [kube_master,kube_node,etcd,ex_lb,chrony] **********************************************************************************************************************************************************
TASK [Gathering Facts] ********************************************************************************************************************************************************************************** fatal: [10.10.11.49]: FAILED! => {"msg": "to use the 'ssh' connection type with passwords, you must install the sshpass program"}
Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Scheduled 41s default-scheduler Successfully assigned kube-system/calico-node-rqpjm to 10.10.11.49 Normal Pulling 20s (x2 over 31s) kubelet Pulling image "easzlab.io.local:5000/calico/cni:v3.23.5" Warning Failed 19s (x2 over 31s) kubelet Failed to pull image "easzlab.io.local:5000/calico/cni:v3.23.5": rpc error: code = Unknown desc = failed to pull and unpack image "easzlab.io.local:5000/calico/cni:v3.23.5": failed to resolve reference "easzlab.io.local:5000/calico/cni:v3.23.5": failed to do request: Head "https://easzlab.io.local:5000/v2/calico/cni/manifests/v3.23.5": http: server gave HTTP response to HTTPS client Warning Failed 19s (x2 over 31s) kubelet Error: ErrImagePull Normal BackOff 5s (x2 over 30s) kubelet Back-off pulling image "easzlab.io.local:5000/calico/cni:v3.23.5" Warning Failed 5s (x2 over 30s) kubelet Error: ImagePullBackOff
[root@node01 log]# docker pull easzlab.io.local:5000/calico/cni:v3.23.5 v3.23.5: Pulling from calico/cni Digest: sha256:9c5055a2b5bc0237ab160aee058135ca9f2a8f3c3eee313747a02edcec482f29 Status: Image is up to date for easzlab.io.local:5000/calico/cni:v3.23.5 easzlab.io.local:5000/calico/cni:v3.23.5
[root@node01 ~]# kubectl get pod -A NAMESPACE NAME READY STATUS RESTARTS AGE kube-system calico-kube-controllers-89b744d6c-klzwh 1/1 Running 0 5m35s kube-system calico-node-wmvff 1/1 Running 0 5m35s kube-system coredns-6665999d97-mp7xc 0/1 ContainerCreating 0 5m35s kube-system dashboard-metrics-scraper-57566685b4-8q5fm 0/1 ContainerCreating 0 5m35s kube-system kubernetes-dashboard-57db9bfd5b-h6jp4 0/1 ContainerCreating 0 5m35s kube-system metrics-server-6bd9f986fc-njpnj 0/1 ContainerCreating 0 5m35s kube-system node-local-dns-wz9bg 1/1 Running 0 5m31s
选择一个describe查看:
1 2 3 4 5 6 7
Events: Type Reason Age From Message ---- ------ ---- ---- ------- Warning FailedScheduling 6m7s default-scheduler 0/1 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.. Normal Scheduled 5m47s default-scheduler Successfully assigned kube-system/coredns-6665999d97-mp7xc to 10.10.11.49 Warning FailedCreatePodSandBox 5m46s kubelet Failed to create pod sandbox: rpc error: code = Unknown desc = failed to setup network for sandbox "072c164d79f4874a8d851d36115ea04b75a2155dae3cecdc764e923c9f38f86b": plugin type="calico" failed (add): failed to find plugin "calico" in path [/opt/cni/bin] Normal SandboxChanged 33s (x25 over 5m46s) kubelet Pod sandbox changed, it will be killed and re-created.
projector-user@storage:~/go/src/github.com$ git clone https://github.com/kubernetes/kubernetes.git Cloning into 'kubernetes'... fatal: unable to access 'https://github.com/kubernetes/kubernetes.git/': server certificate verification failed. CAfile: none CRLfile: none
拉取失败,提示CA证书问题,通过以下命令解决:
1
git config --global http.sslVerify false
又拉取失败:
1 2 3 4 5
projector-user@storage:~/go/src/github.com$ git clone https://github.com/kubernetes/kubernetes.git Cloning into 'kubernetes'... fatal: unable to update url base from redirection: asked for: https://github.com/kubernetes/kubernetes.git/info/refs?service=git-upload-pack redirect: http://x.x.x.x/proxy.html?template=default&tabs=pwd&vlanid=0&url=https://github.com%2Fkubernetes%2Fkubernetes.git%2Finfo%2Frefs%3Fservice%3Dgit-upload-pack
通过查看启动日志确认ssl是否配置成功,如下日志所示,WebSocket SSL is enabled: /root/ssl/ssl.properties表示配置成功,此时在浏览器用https://xxx:8887/?wss访问即可。
1 2 3 4 5 6 7 8 9 10 11 12 13
Found IDE: goland OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [DEBUG] :: IdeState :: Starting attempts to Init ProjectorClassLoader [DEBUG] :: IdeState :: Starting attempts to attach IJ injector agent [DEBUG] :: IdeState :: Starting attempts to initialize IDEA: fix AA and disable smooth scrolling (at start) [DEBUG] :: IdeState :: Starting attempts to Getting IDE colors [DEBUG] :: ProjectorServer :: Daemon thread starts [DEBUG] :: IdeState :: Starting attempts to search for editors [INFO] :: ProjectorServer :: ProjectorServer is starting on host 0.0.0.0/0.0.0.0 and port 8887 [INFO] :: HttpWsServerBuilder :: WebSocket SSL is enabled: /root/ssl/ssl.properties [INFO] :: HttpWsServer :: Server started on host 0.0.0.0/0.0.0.0 and port 8887 [DEBUG] :: IdeState :: "Init ProjectorClassLoader" is done [DEBUG] :: IdeState :: "search for editors" is done
nmap.exe -sV -Pn --script ssl-dh-params 443 192.168.1.10 Starting Nmap 7.92 ( https://nmap.org ) at 2022-07-09 11:14 Nmap scan report for 192.168.1.10 Host is up (0.0033s latency). Not shown: 996 closed tcp ports (reset) … | ssl-dh-params: | VULNERABLE: | Diffie-Hellman Key Exchange Insufficient Group Strength | State: VULNERABLE | Transport Layer Security (TLS) services that use Diffie-Hellman groups | of insufficient strength, especially those using one of a few commonly | shared groups, may be susceptible to passive eavesdropping attacks. | Check results: | WEAK DH GROUP 1 | Cipher Suite: TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 | Modulus Type: Safe prime | Modulus Source: RFC2409/Oakley Group 2 | Modulus Length: 1024 | Generator Length: 8 | Public Key Length: 1024 | References: |_ https://weakdh.org
修复方案
参考[3,4],修改方案如下:
1 2 3 4
[root@node1 etc]# cat org.ops4j.pax.web.cfg ... # Excluded SSL/TLS Cipher Suites comma-separated list of Regular Expressions org.ops4j.pax.web.ssl.ciphersuites.excluded=.*NULL.*,.*RC4.*,.*MD5.*,.*DES.*,.*DSS.*,TLS_DHE.*,SSL.*,.*anon.*,.*EXPORT.*
修改后,再次使用nmap -sV -Pn --script ssl-dh-params port ip查看扫描结果,漏洞解决:
1 2 3 4 5 6 7 8 9 10 11
nmap.exe -sV -Pn --script ssl-dh-params 443 192.168.1.10(主机IP) Starting Nmap 7.92 ( https://nmap.org ) at 2022-07-07 11:53 Nmap scan report for 192.168.1.10 Host is up (0.0032s latency). Not shown: 997 closed tcp ports (reset PORT STATE SERVICE VERSION 22/tcp open ssh OpenSSH 7.4 (protocol 2.0) 111/tcp open rpcbind 2-4 (RPC #100000) ... Service detection performed. Please report any incorrect results at https://nmap.org/submit/ . Nmap done: 1 IP address (1 host up) scanned in 18.74 seconds