kubectl top node报错处理
- 解决步骤
- 环境说明
- 问题现象
- 初次排查
- 问题解决
- 版本兼容性
- metric-server.yaml
- 问题验证
解决步骤
因项目要求,需在k8s集群中使用 kubectl top node命令,但是一直报error: metrics not available yet错误。为了更好的复现问题,我们将本次解决问题的步骤整理如下。
环境说明
k8s版本:v1.23.5
k8s部署方式:kubeadm
##版本说明,当前的k8s版本为v1.23.5
[root@XXXX ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
test-worker1 Ready <none> 160d v1.23.5
问题现象
#1 查看node出错
[root@XXXX ~]# kubectl top node
error: metrics not available yet#2 查看pod没问题
[root@XXXXX ~]# kubectl top pod
NAME CPU(cores) MEMORY(bytes)
XXX-48jgq 16m 75Mi
XXX-55g2z 12m 82Mi
XXX-6tj6c 8m 143Mi
初次排查
# 查看metrics.k8s.io的api情况
kubectl get --raw /apis/metrics.k8s.io/v1beta1 | python -m json.tool
{"apiVersion": "v1","groupVersion": "metrics.k8s.io/v1beta1","kind": "APIResourceList","resources": [{"kind": "NodeMetrics","name": "nodes","namespaced": false,"singularName": "","verbs": ["get","list"]},{"kind": "PodMetrics","name": "pods","namespaced": true,"singularName": "","verbs": ["get","list"]}]
}#2 查看apiservices 服务
kubectl describe apiservices v1beta1.metrics.k8s.io
Status:Conditions:Last Transition Time: 2023-11-07T16:16:24ZMessage: all checks passedReason: PassedStatus: TrueType: Available
问题解决
部署metric-server,使用yaml方式进行部署,因为git下载不下来,我提前下载下来,然后换了阿里的镜像源,要注意metric-server和k8s版本的兼容性:
版本兼容性
根据图表,我们的k8s版本是1.23,因此我们使用metrics-server:v0.6.4
metric-server.yaml
apiVersion: v1
kind: ServiceAccount
metadata:labels:k8s-app: metrics-servername: metrics-servernamespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:labels:k8s-app: metrics-serverrbac.authorization.k8s.io/aggregate-to-admin: "true"rbac.authorization.k8s.io/aggregate-to-edit: "true"rbac.authorization.k8s.io/aggregate-to-view: "true"name: system:aggregated-metrics-reader
rules:
- apiGroups:- metrics.k8s.ioresources:- pods- nodesverbs:- get- list- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:labels:k8s-app: metrics-servername: system:metrics-server
rules:
- apiGroups:- ""resources:- nodes/metricsverbs:- get
- apiGroups:- ""resources:- pods- nodesverbs:- get- list- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:labels:k8s-app: metrics-servername: metrics-server-auth-readernamespace: kube-system
roleRef:apiGroup: rbac.authorization.k8s.iokind: Rolename: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccountname: metrics-servernamespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:labels:k8s-app: metrics-servername: metrics-server:system:auth-delegator
roleRef:apiGroup: rbac.authorization.k8s.iokind: ClusterRolename: system:auth-delegator
subjects:
- kind: ServiceAccountname: metrics-servernamespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:labels:k8s-app: metrics-servername: system:metrics-server
roleRef:apiGroup: rbac.authorization.k8s.iokind: ClusterRolename: system:metrics-server
subjects:
- kind: ServiceAccountname: metrics-servernamespace: kube-system
---
apiVersion: v1
kind: Service
metadata:labels:k8s-app: metrics-servername: metrics-servernamespace: kube-system
spec:ports:- name: httpsport: 443protocol: TCPtargetPort: httpsselector:k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:labels:k8s-app: metrics-servername: metrics-servernamespace: kube-system
spec:selector:matchLabels:k8s-app: metrics-serverstrategy:rollingUpdate:maxUnavailable: 0template:metadata:labels:k8s-app: metrics-serverspec:containers:- args:- --cert-dir=/tmp- --secure-port=4443- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname- --kubelet-use-node-status-port- --metric-resolution=15s- --kubelet-insecure-tls image: registry.cn-hangzhou.aliyuncs.com/rainux/metrics-server:v0.6.4imagePullPolicy: IfNotPresentlivenessProbe:failureThreshold: 3httpGet:path: /livezport: httpsscheme: HTTPSperiodSeconds: 10name: metrics-serverports:- containerPort: 4443name: httpsprotocol: TCPreadinessProbe:failureThreshold: 3httpGet:path: /readyzport: httpsscheme: HTTPSinitialDelaySeconds: 20periodSeconds: 10resources:requests:cpu: 100mmemory: 200MisecurityContext:allowPrivilegeEscalation: falsereadOnlyRootFilesystem: truerunAsNonRoot: truerunAsUser: 1000volumeMounts:- mountPath: /tmpname: tmp-dirnodeSelector:kubernetes.io/os: linuxpriorityClassName: system-cluster-criticalserviceAccountName: metrics-servervolumes:- emptyDir: {}name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:labels:k8s-app: metrics-servername: v1beta1.metrics.k8s.io
spec:group: metrics.k8s.iogroupPriorityMinimum: 100insecureSkipTLSVerify: trueservice:name: metrics-servernamespace: kube-systemversion: v1beta1versionPriority: 100
问题验证
#1 运行yaml文件
[root@k8s-master][~]
$kubectl create -f metrics-server.yaml#2 问题验证
[root@XXXX][~]
$kubectl top node
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8s-master 194m 9% 1689Mi 35% [root@XXXX][~]
$kubectl top pod
NAME CPU(cores) MEMORY(bytes)
cm-deploy-XXXX 0m 3Mi
nfs-XXXXX 0m 13Mi