一、通过daemonset 安装cadvisor
下载cadvisor并上传到harbor
docker tag gcr.io/cadvisor/cadvisor:v0.45.0 harbor.magedu.net/baseimages/cadvisor:v0.45.0
docker push harbor.magedu.net/baseimages/cadvisor:v0.45.0
root@easzlab-deploy:~/20220911# kubectl create ns monitor
namespace/monitor created
root@easzlab-deploy:~/20220911#
cat case1-daemonset-deploy-cadvisor.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cadvisor
namespace: monitor
spec:
selector:
matchLabels:
app: cAdvisor
template:
metadata:
labels:
app: cAdvisor
spec:
tolerations: #污点容忍,忽略master的NoSchedule
- effect: NoSchedule
key: node-role.kubernetes.io/master
hostNetwork: true
restartPolicy: Always # 重启策略
containers:
- name: cadvisor
image: harbor.magedu.net/baseimages/cadvisor:v0.45.0
imagePullPolicy: IfNotPresent # 镜像策略
ports:
- containerPort: 8080
volumeMounts:
- name: root
mountPath: /rootfs
- name: run
mountPath: /var/run
- name: sys
mountPath: /sys
- name: docker
mountPath: /var/lib/containerd
volumes:
- name: root
hostPath:
path: /
- name: run
hostPath:
path: /var/run
- name: sys
hostPath:
path: /sys
- name: docker
hostPath:
path: /var/lib/containerd
验证cadvisor
二、部署node-exporter
cat case2-daemonset-deploy-node-exporter.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitor
labels:
k8s-app: node-exporter
spec:
selector:
matchLabels:
k8s-app: node-exporter
template:
metadata:
labels:
k8s-app: node-exporter
spec:
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
containers:
- image: prom/node-exporter:v1.3.1
imagePullPolicy: IfNotPresent
name: prometheus-node-exporter
ports:
- containerPort: 9100
hostPort: 9100
protocol: TCP
name: metrics
volumeMounts:
- mountPath: /host/proc
name: proc
- mountPath: /host/sys
name: sys
- mountPath: /host
name: rootfs
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
hostNetwork: true
hostPID: true
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitor
spec:
type: NodePort
ports:
- name: http
port: 9100
nodePort: 39100
protocol: TCP
selector:
k8s-app: node-exporter
验证node_exporter数据
三、deployment部署Prometheus server
cat case3-1-prometheus-cfg.yaml
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-
config
namespace
: monitor
data:
prometheus.yml:
|
global
:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name:
'
kubernetes-node
'
kubernetes_sd_configs:
-
role: node
relabel_configs:
-
source_labels: [__address__]
regex:
'
(.*):10250
'
replacement:
'
${1}:9100
'
target_label: __address__
action: replace
-
action: labelmap
regex: __meta_kubernetes_node_label_(.
+
)
- job_name:
'
kubernetes-node-cadvisor
'
kubernetes_sd_configs:
-
role: node
scheme: https
tls_config:
ca_file:
/
var
/run/secrets/kubernetes.io/serviceaccount/
ca.crt
bearer_token_file:
/
var
/run/secrets/kubernetes.io/serviceaccount/
token
relabel_configs:
-
action: labelmap
regex: __meta_kubernetes_node_label_(.
+
)
-
target_label: __address__
replacement: kubernetes.
default
.svc:
443
-
source_labels: [__meta_kubernetes_node_name]
regex: (.
+
)
target_label: __metrics_path__
replacement:
/api/v1/nodes/${
1
}/proxy/metrics/
cadvisor
- job_name:
'
kubernetes-apiserver
'
kubernetes_sd_configs:
-
role: endpoints
scheme: https
tls_config:
ca_file:
/
var
/run/secrets/kubernetes.io/serviceaccount/
ca.crt
bearer_token_file:
/
var
/run/secrets/kubernetes.io/serviceaccount/
token
relabel_configs:
-
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex:
default
;kubernetes;https
- job_name:
'
kubernetes-service-endpoints
'
kubernetes_sd_configs:
-
role: endpoints
relabel_configs:
-
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex:
true
-
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https
?
)
-
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.
+
)
-
source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([
^:]+)(?::\d+)?;(\d+
)
replacement: $
1
:$
2
-
action: labelmap
regex: __meta_kubernetes_service_label_(.
+
)
-
source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
-
source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_service_name
指定172.16.88.156 node-01 节点为Prometheus 数据存放路径
mkdir -p /data/prometheusdata
root@easzlab-k8s-master-03:~# chmod 777 /data/prometheusdata
root@easzlab-k8s-master-03:~# chown 65534.65534 /data/prometheusdata -R
#否则Prometheus pod会报错
cat case3-2-prometheus-deployment.yaml
apiVersion: apps/
v1
kind: Deployment
metadata:
name: prometheus
-
server
namespace
: monitor
labels:
app: prometheus
spec:
replicas:
1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#
- {key: app,
operator
: In, values: [prometheus]}
#
- {key: component,
operator
: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io
/scrape:
'
false
'
spec:
nodeName:
172.16
.
88.156
serviceAccountName: monitor
containers:
-
name: prometheus
image: prom
/prometheus:v2.
31.2
imagePullPolicy: IfNotPresent
command:
-
prometheus
- --config.file=/etc/prometheus/
prometheus.yml
- --storage.tsdb.path=/
prometheus
- --storage.tsdb.retention=
720h
ports:
- containerPort:
9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus/
prometheus.yml
name: prometheus
-
config
subPath: prometheus.yml
- mountPath: /prometheus/
name: prometheus
-storage-
volume
volumes:
- name: prometheus-
config
configMap:
name: prometheus
-
config
items:
-
key: prometheus.yml
path: prometheus.yml
mode:
0644
- name: prometheus-storage-
volume
hostPath:
path:
/data/
prometheusdata
type: Directory
root@easzlab-deploy:~/20220911# kubectl create serviceaccount monitor -n monitor
serviceaccount/monitor created
root@easzlab-deploy:~/20220911# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor --clusterrole=cluster-admin --serviceaccount=monitor:monitor
clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding created
root@easzlab-deploy:~/20220911#
root@easzlab-deploy:~/20220911# kubectl apply -f case3-2-prometheus-deployment.yaml
deployment.apps/prometheus-server created
root@easzlab-deploy:~/20220911#
创建Prometheus service
cat case3-3-prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace
: monitor
labels:
app: prometheus
spec:
type: NodePort
ports:
- port:
9090
targetPort:
9090
nodePort:
30090
protocol: TCP
selector:
app: prometheus
component: server
kubernetes.io/service-account.name:
"
prometheus
"
apiVersion: rbac.authorization.k8s.io/
v1
kind: ClusterRole
metadata:
name: prometheus
rules:
-
apiGroups:
resources:
-
nodes
-
services
-
endpoints
-
pods
- nodes/
proxy
verbs:
-
get
-
list
-
watch
-
apiGroups:
-
"
extensions
"
resources:
-
ingresses
verbs:
-
get
-
list
-
watch
-
apiGroups:
resources:
-
configmaps
- nodes/
metrics
verbs:
-
get
-
nonResourceURLs:
- /
metrics
verbs:
-
get
#apiVersion: rbac.authorization.k8s.io/
v1beta1
apiVersion: rbac.authorization.k8s.io
/
v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
-
kind: ServiceAccount
name: prometheus
namespace
: monitor
四、安装kube-state-metrics
cat case5-kube-state-metrics-deploy.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: kube-state-metrics
template:
metadata:
labels:
app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: registry.cn-hangzhou.aliyuncs.com/zhangshijie/kube-state-metrics:v2.6.0
ports:
- containerPort: 8080
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources: ["nodes", "pods", "services", "resourcequotas", "replicationcontrollers", "limitranges", "persistentvolumeclaims", "persistentvolumes", "namespaces", "endpoints"]
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources: ["daemonsets", "deployments", "replicasets"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["statefulsets"]
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources: ["cronjobs", "jobs"]
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources: ["horizontalpodautoscalers"]
verbs: ["list", "watch"]
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
name: kube-state-metrics
namespace: kube-system
labels:
app: kube-state-metrics
spec:
type: NodePort
ports:
- name: kube-state-metrics
port: 8080
targetPort: 8080
nodePort: 31666
protocol: TCP
selector:
app: kube-state-metrics