0%

加解密JAVA 实现

引入依赖

1
2
3
4
5
6
7
8

<dependencies>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
</dependencies>

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import javax.crypto.spec.SecretKeySpec;
import java.nio.charset.StandardCharsets;
import java.security.NoSuchAlgorithmException;
import java.security.Provider;
import java.security.SecureRandom;
import java.util.Base64;
import java.util.Optional;
import java.util.UUID;

public class SecretUtil {
private static final Logger log = LoggerFactory.getLogger(SecretUtil.class);
private static final String AES = "AES";

private SecretUtil() {
}

/**
* 生成 key
* 返回到前端时:keyStr=Base64.getEncoder().encodeToString(key);
* 解密时:keyStr.getBytes()
*
* @return 密钥(Base64)
*/
public static String generateAesKey() {
return generateAesKey(null);
}

/**
* 生成 key
* 返回到前端时:keyStr=Base64.getEncoder().encodeToString(key);
* 解密时:keyStr.getBytes()
*
* @param word 辅助关键字
* @return key
*/
public static String generateAesKey(String word) {
/*随机UUID*/
try {
// 创建AES的 Key生产者
KeyGenerator gen = KeyGenerator.getInstance(AES);
// 利用用户密码作为随机数初始化出
gen.init(128, new SecureRandom((Optional.ofNullable(word).orElse("") + UUID.randomUUID()).getBytes()));
//加密没关系,SecureRandom 是生成安全随机数序列,password.getBytes()是种子,只要种子相同,序列就一样,所以解密只要有password就行
// 根据用户密码,生成一个密钥
SecretKey secretKey = gen.generateKey();
// 返回基本编码格式的密钥,如果此密钥不支持编码,则返
return Base64.getEncoder().encodeToString(secretKey.getEncoded());
} catch (NoSuchAlgorithmException e) {
log.error("生成 AES key 失败:{}", e.getMessage(), e);
}
return null;
}

/**
* AES 加密
*
* @param content 待加密内容
* @param key 密钥
* @return 加密后结果(Base64编码)
*/
public static String aesEncrypt(String content, String key) {
return encrypt(content, Base64.getDecoder().decode(key), AES, null);
}


/**
* AES 解密
*
* @param content 内容(Base64编码)
* @param key 私钥
* @return 解密后数据
*/
public static String aesDecrypt(String content, String key) {
return decrypt(content, Base64.getDecoder().decode(key), AES, null);
}

public static String encrypt(String content, byte[] key, String algorithm, Provider provider) {
try {
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(key, algorithm));
byte[] encryptStr = cipher.doFinal(content.getBytes(StandardCharsets.UTF_8));
return Base64.getEncoder().encodeToString(encryptStr);
} catch (Exception e) {
log.error("{} 数据加密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}

public static String decrypt(String content, byte[] key, String algorithm, Provider provider) {
try {
byte[] encryptByte = Base64.getDecoder().decode(content);
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(key, algorithm));
byte[] decryptBytes = cipher.doFinal(encryptByte);
return new String(decryptBytes);
} catch (Exception e) {
log.error("{} 数据解密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}
}

本文地址: https://github.com/maxzhao-it/blog/post/e89f3b77/

SM2 是国家密码管理局于2010年12月17日发布的椭圆曲线公钥密码算法。

加解密JAVA 实现

引入依赖

1
2
3
4
5
6
7
8
9
10
11
12
13

<dependencies>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15to18</artifactId>
<version>1.71</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
</dependencies>

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.bouncycastle.jce.provider.BouncyCastleProvider;

import javax.crypto.Cipher;
import java.nio.charset.StandardCharsets;
import java.security.*;
import java.security.spec.EncodedKeySpec;
import java.security.spec.PKCS8EncodedKeySpec;
import java.security.spec.X509EncodedKeySpec;
import java.util.Base64;

public class SecretUtil {
private static final Logger log = LoggerFactory.getLogger(SecretUtil.class);
private static final Provider BOUNCY_CASTLE_PROVIDER = new BouncyCastleProvider();
private static final String SM2 = "SM2";
private static final String SM2_ALGORITHM = "EC";
private static final int SM2_KEY_SIZE = 256;

private SecretUtil() {
}


/**
* 生成 sm2 key
*
* @return [publicKey, privateKey]
*/
public static String[] generateSm2Key() {
return generatePairKey(SM2_ALGORITHM, SM2_KEY_SIZE, BOUNCY_CASTLE_PROVIDER);
}

private static String[] generatePairKey(String algorithm, int keySize, Provider provider) {
KeyPairGenerator keyPairGenerator = null;
try {
/*KeyPairGenerator类用于生成公钥和私钥对,基于RSA算法生成对象*/
keyPairGenerator = provider == null
? KeyPairGenerator.getInstance(algorithm)
: KeyPairGenerator.getInstance(algorithm, provider);
/* 初始化密钥对生成器 */
keyPairGenerator.initialize(keySize, new SecureRandom());
} catch (NoSuchAlgorithmException e) {
log.warn("{} 公私钥生成失败", algorithm, e);
}
/*判断是否生成成功*/
if (keyPairGenerator == null) {
/*反馈空数组*/
return new String[0];
}
/*生成秘钥*/
KeyPair keyPair = keyPairGenerator.generateKeyPair();
PublicKey publicKey = keyPair.getPublic();
byte[] publicKeyEncoded = publicKey.getEncoded();
String publicKeyString = Base64.getEncoder().encodeToString(publicKeyEncoded);
PrivateKey privateKey = keyPair.getPrivate();
byte[] privateKeyEncoded = privateKey.getEncoded();
String privateKeyString = Base64.getEncoder().encodeToString(privateKeyEncoded);
return new String[]{publicKeyString, privateKeyString};
}

/**
* SM2 加密
*
* @param content 待加密内容
* @param key 公钥
* @return 加密后结果(Base64编码)
*/
public static String sm2Encrypt(String content, String key) {
return encryptPk(content, key, SM2, BOUNCY_CASTLE_PROVIDER);
}

public static String encryptPk(String content, String key, String algorithm, Provider provider) {
try {
EncodedKeySpec keySpec = new X509EncodedKeySpec(Base64.getDecoder().decode(key));
KeyFactory keyFactory = KeyFactory.getInstance(SM2.equalsIgnoreCase(algorithm) ? SM2_ALGORITHM : algorithm);
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.ENCRYPT_MODE, keyFactory.generatePublic(keySpec));
byte[] encryptStr = cipher.doFinal(content.getBytes(StandardCharsets.UTF_8));
return Base64.getEncoder().encodeToString(encryptStr);
} catch (Exception e) {
log.error("{} 数据加密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}

/**
* SM2 解密
*
* @param content 内容(Base64编码)
* @param key 私钥
* @return 解密后数据
*/
public static String sm2Decrypt(String content, String key) {
return decryptPk(content, key, SM2, BOUNCY_CASTLE_PROVIDER);
}

public static String decryptPk(String content, String key, String algorithm, Provider provider) {
try {
EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(Base64.getDecoder().decode(key));
KeyFactory keyFactory = KeyFactory.getInstance(SM2.equalsIgnoreCase(algorithm) ? SM2_ALGORITHM : algorithm);
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.DECRYPT_MODE, keyFactory.generatePrivate(keySpec));
byte[] decryptBytes = cipher.doFinal(Base64.getDecoder().decode(content));
return new String(decryptBytes);
} catch (Exception e) {
log.error("{} 数据解密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}
}

本文地址: https://github.com/maxzhao-it/blog/post/a97a9b/

SM4 是国家密码管理局于2012年3月21日发布的分组密码算法。

加解密JAVA 实现

引入依赖

1
2
3
4
5
6
7
8
9
10
11
12
13

<dependencies>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15to18</artifactId>
<version>1.71</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
</dependencies>

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.bouncycastle.jce.provider.BouncyCastleProvider;

import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.spec.SecretKeySpec;
import java.nio.charset.StandardCharsets;
import java.security.NoSuchAlgorithmException;
import java.security.Provider;
import java.security.SecureRandom;
import java.util.Base64;

public class SecretUtil {
private static final Logger log = LoggerFactory.getLogger(SecretUtil.class);
private static final Provider BOUNCY_CASTLE_PROVIDER = new BouncyCastleProvider();
private static final String SM4 = "SM4";

private SecretUtil() {
}

/**
* 生成 sm4 密钥
*
* @return 密钥(Base16)
*/
public static String generateSm4Key() {
try {
KeyGenerator kg = KeyGenerator.getInstance(SM4, BOUNCY_CASTLE_PROVIDER);
kg.init(128, new SecureRandom());
byte[] encoded = kg.generateKey().getEncoded();
return new String(Hex.encodeHex(encoded));
} catch (NoSuchAlgorithmException e) {
log.error("生成 SM4 key 失败:{}", e.getMessage(), e);
return null;
}
}

/**
* SM4 加密
*
* @param content 待加密内容
* @param key 密钥
* @return 加密后结果(Base64编码)
*/
public static String sm4Encrypt(String content, String key) {
try {
return encrypt(content, Hex.decodeHex(key), SM4, BOUNCY_CASTLE_PROVIDER);
} catch (DecoderException e) {
log.error("SM4 数据加密失败,key 无法解析:{}", e.getMessage(), e);
return null;
}
}


/**
* SM4 解密
*
* @param content 内容(Base64编码)
* @param key 私钥
* @return 解密后数据
*/
public static String sm4Decrypt(String content, String key) {
try {
return decrypt(content, Hex.decodeHex(key), SM4, BOUNCY_CASTLE_PROVIDER);
} catch (DecoderException e) {
log.error("SM4 数据解密失败,key 无法解析:{}", e.getMessage(), e);
return null;
}
}

public static String encrypt(String content, byte[] key, String algorithm, Provider provider) {
try {
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(key, algorithm));
byte[] encryptStr = cipher.doFinal(content.getBytes(StandardCharsets.UTF_8));
return Base64.getEncoder().encodeToString(encryptStr);
} catch (Exception e) {
log.error("{} 数据加密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}

public static String decrypt(String content, byte[] key, String algorithm, Provider provider) {
try {
byte[] encryptByte = Base64.getDecoder().decode(content);
Cipher cipher = provider == null
? Cipher.getInstance(algorithm)
: Cipher.getInstance(algorithm, provider);
cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(key, algorithm));
byte[] decryptBytes = cipher.doFinal(encryptByte);
return new String(decryptBytes);
} catch (Exception e) {
log.error("{} 数据解密失败:{}", algorithm, e.getMessage(), e);
return null;
}
}
}

本文地址: https://github.com/maxzhao-it/blog/post/848ae74e/

前言

etcd 是一个开源的分布式键值存储,用于分布式系统最关键的数据。
它通过将数据复制到多台机器来分布,因此对于单点故障具有很高的可用性。
使用 Raft 共识算法,etcd 优雅地处理网络分区和机器故障,甚至是领导者故障。
etcd 被广泛应用于生产环境:CoreOS、Kubernetes、YouTube Doorman 等。

我这里有

  • 192.168.2.158 etcd-158
  • 192.168.2.159 etcd-159
  • 192.168.2.160 etcd-160

etcd 集群指南

CA

生成CA

配置时间

1
2
yum install  ntpdate -y 
ntpdate time1.aliyun.com

etcd GitHub地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
rm -f ~/etcd-v3.5.4-linux-amd64.tar.gz
rm -rf /opt/etcd && mkdir -p /opt/etcd
curl -L https://github.com/etcd-io/etcd/releases/download/v3.5.4/etcd-v3.5.4-linux-amd64.tar.gz -o ~/etcd-v3.5.4-linux-amd64.tar.gz
tar xzvf ~/etcd-v3.5.4-linux-amd64.tar.gz -C /opt/etcd --strip-components=1
# rm -f ~/etcd-v3.5.4-linux-amd64.tar.gz
/opt/etcd/etcd --version
/opt/etcd/etcdctl version
/opt/etcd/etcdutl version
# 将 etcd 二进制文件传到 3个master 节点
scp /opt/etcd/etcd root@192.168.2.158:/usr/local/bin/
scp /usr/local/bin/etcd root@192.168.2.159:/usr/local/bin/
scp /usr/local/bin/etcd root@192.168.2.160:/usr/local/bin/
scp /usr/local/bin/etcd root@192.168.2.161:/usr/local/bin/
scp /opt/etcd/etcdctl root@192.168.2.158:/usr/local/bin/
scp /usr/local/bin/etcdctl root@192.168.2.159:/usr/local/bin/
scp /usr/local/bin/etcdctl root@192.168.2.160:/usr/local/bin/
scp /usr/local/bin/etcdctl root@192.168.2.161:/usr/local/bin/
# start a local etcd server
#/opt/etcd/etcd
# write,read to etcd
#/opt/etcd/etcdctl --endpoints=localhost:2379 put foo bar
#/opt/etcd/etcdctl --endpoints=localhost:2379 get foo

etcd example 配置页面

cfssl 生成自签名 TLS 证书的方法

host158上执行

生成自签名 root CA 证书

1
2
3
4
5
6
7
8
9
10
11
12
13
#rm -f /opt/cfssl* 
rm -rf /opt/certs
mkdir -p /opt/certs
cd /opt/certs
curl -L https://github.com/cloudflare/cfssl/releases/download/v1.6.1/cfssl_1.6.1_linux_amd64 -o /usr/local/bin/cfssl
chmod +x /usr/local/bin/cfssl
curl -L https://github.com/cloudflare/cfssl/releases/download/v1.6.1/cfssljson_1.6.1_linux_amd64 -o /usr/local/bin/cfssljson
chmod +x /usr/local/bin/cfssljson
curl -L https://github.com/cloudflare/cfssl/releases/download/v1.5.0/cfssl-certinfo_1.5.0_linux_amd64 -o /usr/local/bin/cfssl-certinfo
chmod +x /usr/local/bin/cfssl-certinfo
# 查看版本
/usr/local/bin/cfssl version
/usr/local/bin/cfssljson -h

生成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# 创建根证书签名请求文件
cat > /opt/certs/ca-csr.json <<EOF
{
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"O": "maxzhao-ca",
"OU": "etcd Security",
"L": "NanJing",
"ST": "Jiang Su",
"C": "CN"
}
],
"CN": "maxzhao"
}
EOF
# CN:Common Name:kube-apiserver 从证书中提取该字段作为请求的用户名 (User Name),
# O:Organization:kube-apiserver 从证书中提取该字段作为请求用户所属的组 (Group);
# kube-apiserver 将提取的 User、Group 作为 RBAC 授权的用户标识;

# 证书配置文件
cat > /opt/certs/ca-config.json <<EOF
{
"signing": {
"default": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "175200h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "175200h"
},
"etcd": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "175200h"
}
}
}
}
EOF
# signing:表示该证书可用于签名其它证书(生成的 ca.pem 证书中 CA=TRUE);
# server auth:表示 client 可以用该该证书对 server 提供的证书进行验证;
# client auth:表示 server 可以用该该证书对 client 提供的证书进行验证;
# "expiry": "175200h" 有效期20年

生成ca 证书和私钥

1
2
3
4
5
# 生成
cfssl gencert --initca /opt/certs/ca-csr.json | cfssljson --bare /opt/certs/ca

# verify
openssl x509 -in /opt/certs/ca.pem -text -noout

结果

1
2
3
4
5
6
7
8
9
10
# CSR configuration
/opt/certs/ca-csr.json
# CSR 双向认证
/opt/certs/ca.csr
# self-signed root CA public key 其它文档里会叫 ca.crt
/opt/certs/ca.pem
# self-signed root CA private key
/opt/certs/ca-key.pem
# 证书配置文件 for other TLS assets
/opt/certs/ca-config.json

使用私钥生成本地颁发的证书

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# peer 
cat > /opt/certs/etcd-158-ca-csr.json <<EOF
{
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"O": "maxzhao-ca",
"OU": "etcd Security",
"L": "NanJing",
"ST": "Jiang Su",
"C": "CN"
}
],
"CN": "etcd-158",
"hosts": [
"127.0.0.1",
"192.168.2.158",
"192.168.2.159",
"192.168.2.160",
"192.168.2.161",
"kubernetes",
"kubernetes.default",
"kubernetes.default.svc",
"kubernetes.default.svc.cluster",
"kubernetes.default.svc.cluster.local"
]
}
EOF
# 生成etcd用的证书文件 peer
cfssl gencert \
--ca /opt/certs/ca.pem \
--ca-key /opt/certs/ca-key.pem \
--config /opt/certs/ca-config.json \
-profile=etcd \
/opt/certs/etcd-158-ca-csr.json | cfssljson --bare /opt/certs/etcd-158
# --profile=k8s-server-client 表示客户端与服务端要双向通讯
# verify
openssl x509 -in /opt/certs/etcd-158.pem -text -noout

生成之后

1
2
3
4
5
6
7
8
9
10
11
12
13
# 传输到每一个节点
rm -rf /etc/certs/etcd
mkdir -p /etc/certs/etcd
\cp /opt/certs/ca.pem /etc/certs/etcd/ca.pem
\cp /opt/certs/etcd-158-key.pem /etc/certs/etcd/etcd-158-key.pem
\cp /opt/certs/etcd-158.pem /etc/certs/etcd/etcd-158.pem
# 拷贝 ca.pem, etcd-158.pem, etcd-158-key.pem
ssh root@192.168.2.159 "mkdir -p /etc/certs/etcd"
ssh root@192.168.2.160 "mkdir -p /etc/certs/etcd"
ssh root@192.168.2.161 "mkdir -p /etc/certs/etcd"
scp -r /etc/certs/etcd/* root@192.168.2.159:/etc/certs/etcd/
scp -r /etc/certs/etcd/* root@192.168.2.160:/etc/certs/etcd/
scp -r /etc/certs/etcd/* root@192.168.2.161:/etc/certs/etcd/

创建用户和组

host158上执行

1
2
3
groupadd etcd && useradd -g etcd etcd && echo '1' | passwd --stdin etcd
ssh root@192.168.2.159 "groupadd etcd && useradd -g etcd etcd && echo '1' | passwd --stdin etcd"
ssh root@192.168.2.160 "groupadd etcd && useradd -g etcd etcd && echo '1' | passwd --stdin etcd"

运行

host158

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
rm -rf /opt/etcd/etcd-158
mkdir /opt/etcd/etcd-158
/usr/local/bin/etcd --name etcd-158 \
--data-dir /opt/etcd/etcd_data-158 \
# 对外提供服务的地址,通常为本机节点。使用域名无效
--listen-client-urls https://192.168.2.158:2379 \
# 节点成员客户端url列表,对外公告此节点客户端监听地址,可以使用域名
--advertise-client-urls https://192.168.2.158:2379 \
# 和其它成员节点间通信地址,每个节点不同,必须使用IP,使用域名无效
--listen-peer-urls https://192.168.2.158:2380 \
# 节点监听地址,并会通告集群其它节点
--initial-advertise-peer-urls https://192.168.2.158:2380 \
# 集群中所有节点信息,格式为:节点名称+监听的本地端口
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

host159

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
rm -rf /opt/etcd/etcd-159
mkdir /opt/etcd/etcd-159
/opt/etcd/etcd --name etcd-159 \
--data-dir /opt/etcd/etcd_data-159 \
--listen-client-urls https://192.168.2.159:2379 \
--advertise-client-urls https://192.168.2.159:2379 \
--listen-peer-urls https://192.168.2.159:2380 \
--initial-advertise-peer-urls https://192.168.2.159:2380 \
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

host160

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
rm -rf /opt/etcd/etcd-160
mkdir /opt/etcd/etcd-160
/opt/etcd/etcd --name etcd-160 \
--data-dir /opt/etcd/etcd_data-160 \
--listen-client-urls https://192.168.2.160:2379 \
--advertise-client-urls https://192.168.2.160:2379 \
--listen-peer-urls https://192.168.2.160:2380 \
--initial-advertise-peer-urls https://192.168.2.160:2380 \
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

校验运行状态

1
2
3
4
5
6
ETCDCTL_API=3 /opt/etcd/etcdctl \
--endpoints 192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379 \
--cacert /etc/certs/etcd/ca.pem \
--cert /etc/certs/etcd/etcd-158.pem \
--key /etc/certs/etcd/etcd-158-key.pem \
endpoint health

配置服务

注意:三个节点配置结束后,服务才会启动成功

host158

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
rm -rf /opt/etcd/etcd
mkdir /opt/etcd/etcd
cat > /etc/systemd/system/etcd.service <<EOF
[Unit]
Description=etcd
Documentation=https://github.com/coreos/etcd
Conflicts=etcd.service
Conflicts=etcd2.service

[Service]
Type=notify
Restart=always
RestartSec=5s
LimitNOFILE=40000
TimeoutStartSec=0

ExecStart=/usr/local/bin/etcd --name etcd-158 \
--data-dir /opt/etcd/etcd_data \
--listen-client-urls https://192.168.2.158:2379 \
--advertise-client-urls https://192.168.2.158:2379 \
--listen-peer-urls https://192.168.2.158:2380 \
--initial-advertise-peer-urls https://192.168.2.158:2380 \
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

[Install]
WantedBy=multi-user.target
EOF
# to start service
sudo systemctl daemon-reload
#sudo systemctl cat etcd.service
sudo systemctl enable etcd.service
sudo systemctl start etcd.service
sudo systemctl status etcd.service

host159

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
rm -rf /opt/etcd/etcd
mkdir /opt/etcd/etcd
cat > /etc/systemd/system/etcd.service <<EOF
[Unit]
Description=etcd
Documentation=https://github.com/coreos/etcd
Conflicts=etcd.service
Conflicts=etcd2.service

[Service]
Type=notify
Restart=always
RestartSec=5s
LimitNOFILE=40000
TimeoutStartSec=0

ExecStart=/usr/local/bin/etcd --name etcd-159 \
--data-dir /opt/etcd/etcd_data \
--listen-client-urls https://192.168.2.159:2379 \
--advertise-client-urls https://192.168.2.159:2379 \
--listen-peer-urls https://192.168.2.159:2380 \
--initial-advertise-peer-urls https://192.168.2.159:2380 \
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

[Install]
WantedBy=multi-user.target
EOF
# to start service
sudo systemctl daemon-reload
#sudo systemctl cat etcd.service
sudo systemctl enable etcd.service
sudo systemctl start etcd.service
sudo systemctl status etcd.service

host160

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
rm -rf /opt/etcd/etcd
mkdir /opt/etcd/etcd
cat > /etc/systemd/system/etcd.service <<EOF
[Unit]
Description=etcd
Documentation=https://github.com/coreos/etcd
Conflicts=etcd.service
Conflicts=etcd2.service

[Service]
Type=notify
Restart=always
RestartSec=5s
LimitNOFILE=40000
TimeoutStartSec=0

ExecStart=/usr/local/bin/etcd --name etcd-160 \
--data-dir /opt/etcd/etcd_data \
--listen-client-urls https://192.168.2.160:2379 \
--advertise-client-urls https://192.168.2.160:2379 \
--listen-peer-urls https://192.168.2.160:2380 \
--initial-advertise-peer-urls https://192.168.2.160:2380 \
--initial-cluster etcd-158=https://192.168.2.158:2380,etcd-159=https://192.168.2.159:2380,etcd-160=https://192.168.2.160:2380 \
--initial-cluster-token etcd-k8s-158 \
--initial-cluster-state new \
--client-cert-auth \
--trusted-ca-file /etc/certs/etcd/ca.pem \
--cert-file /etc/certs/etcd/etcd-158.pem \
--key-file /etc/certs/etcd/etcd-158-key.pem \
--peer-client-cert-auth \
--peer-trusted-ca-file /etc/certs/etcd/ca.pem \
--peer-cert-file /etc/certs/etcd/etcd-158.pem \
--peer-key-file /etc/certs/etcd/etcd-158-key.pem

[Install]
WantedBy=multi-user.target
EOF
# to start service
sudo systemctl daemon-reload
#sudo systemctl cat etcd.service
sudo systemctl enable etcd.service
sudo systemctl start etcd.service
sudo systemctl status etcd.service

校验运行状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# to get logs from service
sudo systemctl status etcd.service -l --no-pager
sudo journalctl -u etcd.service -l --no-pager|less
sudo journalctl -f -u etcd.service
sudo journalctl -xe -u etcd.service
# to stop service
sudo systemctl stop etcd.service
sudo systemctl disable etcd.service

ETCDCTL_API=3 /opt/etcd/etcdctl \
--endpoints 192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379 \
--cacert /etc/certs/etcd/ca.pem \
--cert /etc/certs/etcd/etcd-158.pem \
--key /etc/certs/etcd/etcd-158-key.pem \
endpoint health

image-20220529234008606

查看成员

1
2
3
4
ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
ETCD_AUTH='--cacert /etc/certs/etcd/ca.pem --cert /etc/certs/etcd/etcd-158.pem --key /etc/certs/etcd/etcd-158-key.pem '
etcdctl --endpoints=${ENDPOINTS} member list
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} member list

创建用户

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
export ETCDCTL_API=3
ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
ETCD_AUTH='--cacert /etc/certs/etcd/ca.pem --cert /etc/certs/etcd/etcd-158.pem --key /etc/certs/etcd/etcd-158-key.pem '
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} role add root
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} role get root
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} user add root
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} user grant-role root root
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} user get root
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} auth enable
# 错误的
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} user list
# 正确的
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 user list

# now all client requests go through auth
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 put foo bar
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 get foo
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 --write-out="json" get foo
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 put foo1 bar
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 put foo2 bar
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 get fo --prefix
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 del foo1 --prefix
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 get foo
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 get foo1
# 关闭
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 auth disable

访问

注意:需要添加

1
2
3
4
5
6
export ETCDCTL_API=3
ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
--endpoints ${ENDPOINTS} ${ETCD_AUTH}
--cacert /etc/certs/etcd/ca.pem \
--cert /etc/certs/etcd/etcd-158.pem \
--key /etc/certs/etcd/etcd-158-key.pem \

添加 key-value

1
2
3
4
5
6
export ETCDCTL_API=3
ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379

etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put foo "Hello World!"
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} get foo
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} --write-out="json" get foo

通过前缀获取密钥

1
2
3
4
5
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put web1 value1
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put web2 value2
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put web3 value3

etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} get web --prefix

事务中多次写入

1
2
3
4
5
6
7
8
9
10
11
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put user1 bad
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} txn --interactive

compares:
value("user1") = "bad"

success requests (get, put, delete):
del user1

failure requests (get, put, delete):
put user1 good

查看 keys

1
2
3
4
5
6
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} watch stock1
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put stock1 1000

etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} watch stock --prefix
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put stock1 10
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put stock2 20

创建 lease

1
2
3
4
5
6
7
8
9
10
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} lease grant 300
# lease 2be7547fbc6a5afa granted with TTL(300s)

etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} put sample value --lease=2be7547fbc6a5afa
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} get sample

etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} lease keep-alive 2be7547fbc6a5afa
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} lease revoke 2be7547fbc6a5afa
# or after 300 seconds
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} get sample

创建 locks

1
2
3
4
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} lock mutex1

# another client with the same name blocks
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} lock mutex1

etcd集群中如何进行leader选举

1
2
3
4
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} elect one p1

# another client with the same name blocks
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} elect one p2

校验运行状态

1
2
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} endpoint health
etcdctl --write-out=table --endpoints=${ENDPOINTS} ${ETCD_AUTH} endpoint status

保存数据库

Snapshot can only be requested from one etcd node, so --endpoints flag should contain only one endpoint.

1
2
3
4
ENDPOINTS=192.168.2.158:2379
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} snapshot save my.db

Snapshot saved at my.db
1
2
3
4
5
6
7
etcdctl --write-out=table --endpoints=${ENDPOINTS} ${ETCD_AUTH} snapshot status my.db

+---------+----------+------------+------------+
| HASH | REVISION | TOTAL KEYS | TOTAL SIZE |
+---------+----------+------------+------------+
| c55e8b8 | 9 | 13 | 25 kB |
+---------+----------+------------+------------+

添加和删除节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
export ETCDCTL_API=3
ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
# get member ID
etcdctl --endpoints=${ENDPOINTS} ${ETCD_AUTH} member list

# remove the member
MEMBER_ID=278c654c9a6dfd3b
etcdctl --endpoints=--endpoints=${ENDPOINTS} ${ETCD_AUTH} \
member remove ${MEMBER_ID}

# add a new member (node 4)
export ETCDCTL_API=3
# new member
etcdctl --endpoints=192.168.2.158:2379,192.168.2.159:2379 \
member add etcd-161 \
--peer-urls=http://192.168.2.161:2380

容错

对于 ETCD 集群,建议在集群中提供奇数个节点,下表显示了不同的节点数量时 ETCD 集群可以容忍的错误节点数量:

集群节点数 Majority 最大容错数
1 1 0
2 2 0
3 2 1
4 2 1
5 3 2
6 3 2
7 4 3
8 4 3
9 5 4

本文地址: https://github.com/maxzhao-it/blog/post/ebede6de/

添加用户

1
useradd yy

prometheus 安装:

prometheus-2.36.2.windows-amd64.zip
prometheus-2.36.2.linux-amd64.tar.gz
prometheus-web-ui-2.36.2.tar.gz

1
2
3
4
5
6
7
mkdir ~/tools 
cd ~/tools
wget https://github.com/prometheus/prometheus/releases/download/v2.36.2/prometheus-2.36.2.linux-amd64.tar.gz
tar xfz ~/tools/prometheus-2.36.2.linux-amd64.tar.gz -C ~/
cd ~/
mv prometheus-2.36.2.linux-amd64 prometheus
cd prometheus

配置

不用修改

1
2
cp ~/prometheus/prometheus.yml ~/prometheus/prometheus.yml-bak
cat ~/prometheus/prometheus.yml

启动

1
2
n9e_home=/home/yy/prometheus
nohup ${n9e_home}/prometheus --config.file=${n9e_home}/prometheus.yml --storage.tsdb.path=${n9e_home}/data --web.enable-lifecycle --enable-feature=remote-write-receiver --query.lookback-delta=2m &

夜莺

下载

1
2
3
4
5
mkdir ~/tools 
cd ~/tools
wget https://github.com/ccfos/nightingale/releases/download/v5.12.0/n9e-v5.12.0-linux-amd64.tar.gz
mkdir ~/n9e
tar zxf ~/tools/n9e-v5.12.0-linux-amd64.tar.gz -C ~/n9e/

n9e-fe-5.6.0.tar.gz
n9e-v5.12.0-linux-amd64.tar.gz

配置

1
2
3
# 需要配置其中的 Redis 与 DB
~/n9e/etc/server.conf
~/n9e/etc/webapi.conf

系统配置

1
vim /etc/security/limits.conf

最后加入

1
2
3
4
*       soft    nproc   65535
* hard nproc 65535
* soft nofile 65535
* hard nofile 65535

启动

1
2
3
cd /home/yy/n9e
nohup /home/yy/n9e/n9e server &> server.log &
nohup /home/yy/n9e/n9e webapi &> webapi.log &

http://127.0.0.1:18000

root/root.2020

采集器:Telegraf

安装

官网下载地址

1
2
3
4
5
6
7
mkdir ~/tools && cd ~/tools
version=1.24.1
tarball=telegraf-${version}_linux_amd64.tar.gz
wget https://dl.influxdata.com/telegraf/releases/${tarball}
tar xzvf $tarball
mv $tarball ~/ && cd ~/
mv telegraf-${version} telegraf

配置

全部配置参考

1
cp ~/telegraf/etc/telegraf/telegraf.conf  ~/telegraf/etc/telegraf/telegraf.conf-bak

修改配置项:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
cat <<EOF >~/telegraf/etc/telegraf/telegraf.conf
[global_tags]
# user = "$USER"
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = "0s"
#当前节点IP
hostname = "192.168.14.122"
omit_hostname = false
# OUTPUT PLUGINS #
[[outputs.opentsdb]]
# 监控服务IP
host = "http://192.168.14.122"
#监控服务端口
port = 19000
http_batch_size = 50
http_path = "/opentsdb/put"
debug = false
separator = "_"
# INPUT PLUGINS #
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = true
core_tags = false
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.diskio]]
[[inputs.kernel]]
[[inputs.mem]]
[[inputs.processes]]
[[inputs.swap]]
[[inputs.system]]
fielddrop = ["uptime_format"]
[[inputs.net]]
ignore_protocol_stats = true
EOF

启动

1
nohup ~/telegraf/usr/bin/telegraf --config ~/telegraf/etc/telegraf/telegraf.conf &> telegraf.log &

采集器:Cetegraf

文档

安装

categraf-v0.2.13-linux-amd64.tar.gz

1
2
3
4
5
6
7
cd ~/
mkdir package
cd package
wget https://github.com/flashcatcloud/categraf/releases/download/v0.2.13/categraf-v0.2.13-linux-amd64.tar.gz
tar -zxf categraf-v0.2.13-linux-amd64.tar.gz -C ../
cd ~/
mv categraf-v0.2.13-linux-amd64 categraf

配置

1
vim ~/categraf/conf/config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[writer_opt]
# default: 2000
batch = 2000
# channel(as queue) size
chan_size = 10000

[[writers]]
url = "http://192.168.14.122:19000/prometheus/v1/write"
# Basic auth username
basic_auth_user = ""
# Basic auth password
basic_auth_pass = ""
# timeout settings, unit: ms
timeout = 5000
dial_timeout = 2500
max_idle_conns_per_host = 100

MySQL采集

1
vim ~/categraf/conf/input.mysql/mysql.toml

MySQL TCP 连通性

配置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
cat <<EOF >~/categraf/conf/input.net_response/net_response.toml
interval = 15
[[instances]]
targets = [
"192.168.0.137:3306",
"192.168.14.122:56379"
]
labels = { region="MySQL-137", product="skytech" }
# # interval = global.interval * interval_times
# interval_times = 1
## tcp/udp
protocol = "tcp"
timeout = "5s"
## UDP 选项:
# send = "ssh"
# expect = "ssh"
# read_timeout = "1s"
EOF
大盘JSON
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[
{
"name": "TCP探测",
"tags": "",
"configs": "",
"chart_groups": [
{
"name": "Default chart group",
"weight": 0,
"charts": [
{
"configs": "{\"targets\":[{\"refId\":\"A\",\"expr\":\"max(net_response_result_code) by (target)\",\"legend\":\"UP?\"},{\"expr\":\"max(net_response_response_time) by (target)\",\"refId\":\"C\",\"legend\":\"latency(s)\"}],\"name\":\"Targets\",\"custom\":{\"showHeader\":true,\"calc\":\"lastNotNull\",\"displayMode\":\"labelValuesToRows\",\"aggrDimension\":\"target\"},\"options\":{\"valueMappings\":[],\"standardOptions\":{}},\"overrides\":[{\"properties\":{\"valueMappings\":[{\"type\":\"special\",\"match\":{\"special\":0},\"result\":{\"text\":\"UP\",\"color\":\"#417505\"}},{\"type\":\"range\",\"match\":{\"special\":1,\"from\":1},\"result\":{\"text\":\"DOWN\",\"color\":\"#e90f0f\"}}],\"standardOptions\":{}},\"matcher\":{\"value\":\"A\"}}],\"version\":\"2.0.0\",\"type\":\"table\",\"layout\":{\"h\":4,\"w\":24,\"x\":0,\"y\":0,\"i\":\"0\"}}",
"weight": 0
}
]
}
]
}
]

Redis采集

配置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
cat <<EOF >~/categraf/conf/input.redis/redis.toml
interval = 15
[[instances]]
address = "192.168.14.122:56379"
# username = ""
password = "maxzhao"
# pool_size = 2
# # Optional. Specify redis commands to retrieve values
# commands = [
# {command = ["get", "sample-key1"], metric = "custom_metric_name1"},
# {command = ["get", "sample-key2"], metric = "custom_metric_name2"}
# ]
# interval_times = 1
# important! use global unique string to specify instance
labels = { instance="redis-192.168.14.122:56379" }
## Optional TLS Config
# use_tls = false
# tls_min_version = "1.2"
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = true
EOF
大盘JSON

其中,Redis地址需要修改

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
[
{
"name": "Redis Overview - 模板",
"tags": "Redis Prometheus",
"configs": "{\"var\":[{\"name\":\"instance\",\"definition\":\"label_values(redis_uptime_in_seconds,instance)\",\"selected\":\"192.168.14.122:56379\"}]}",
"chart_groups": [
{
"name": "Basic Info",
"weight": 0,
"charts": [
{
"configs": "{\"targets\":[{\"expr\":\"min(redis_uptime_in_seconds{instance=~\\\"$instance\\\"})\"}],\"name\":\"Redis Uptime\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{}},\"options\":{\"standardOptions\":{\"util\":\"humantimeSeconds\"}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"sum(redis_connected_clients{instance=~\\\"$instance\\\"})\"}],\"name\":\"Connected Clients\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{}},\"options\":{\"standardOptions\":{}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"redis_used_memory{instance=~\\\"$instance\\\"}\"}],\"name\":\"Memory Used\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{}},\"options\":{\"valueMappings\":[{\"type\":\"range\",\"match\":{\"to\":128000000},\"result\":{\"color\":\"#079e05\"}},{\"type\":\"range\",\"match\":{\"from\":128000000},\"result\":{\"color\":\"#f10909\"}}],\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":0}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"redis_maxmemory{instance=~\\\"$instance\\\"}\"}],\"name\":\"Max Memory Limit\",\"custom\":{\"textMode\":\"value\",\"colorMode\":\"value\",\"calc\":\"lastNotNull\",\"colSpan\":1,\"textSize\":{}},\"options\":{\"standardOptions\":{\"util\":\"bytesIEC\"}},\"version\":\"2.0.0\",\"type\":\"stat\",\"layout\":{\"h\":1,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
"weight": 0
}
]
},
{
"name": "Commands",
"weight": 1,
"charts": [
{
"configs": "{\"targets\":[{\"expr\":\"rate(redis_total_commands_processed{instance=~\\\"$instance\\\"}[5m])\"}],\"name\":\"Commands Executed / sec\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"irate(redis_keyspace_hits{instance=~\\\"$instance\\\"}[5m])\",\"legend\":\"hits\"},{\"expr\":\"irate(redis_keyspace_misses{instance=~\\\"$instance\\\"}[5m])\",\"legend\":\"misses\"}],\"name\":\"Hits / Misses per Sec\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"noraml\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"topk(5, irate(redis_cmdstat_calls{instance=~\\\"$instance\\\"} [1m]))\",\"legend\":\"{{command}}\"}],\"name\":\"Top Commands\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
"weight": 0
}
]
},
{
"name": "Keys",
"weight": 2,
"charts": [
{
"configs": "{\"targets\":[{\"expr\":\"sum (redis_keyspace_keys{instance=~\\\"$instance\\\"}) by (db)\",\"legend\":\"{{db}}\"}],\"name\":\"Total Items per DB\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"sum(rate(redis_expired_keys{instance=~\\\"$instance\\\"}[5m])) by (instance)\",\"legend\":\"expired\"},{\"expr\":\"sum(rate(redis_evicted_keys{instance=~\\\"$instance\\\"}[5m])) by (instance)\",\"legend\":\"evicted\"}],\"name\":\"Expired / Evicted\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
"weight": 0
},
{
"configs": "{\"targets\":[{\"expr\":\"sum(redis_keyspace_keys{instance=~\\\"$instance\\\"}) - sum(redis_keyspace_expires{instance=~\\\"$instance\\\"}) \",\"legend\":\"not expiring\"},{\"expr\":\"sum(redis_keyspace_expires{instance=~\\\"$instance\\\"}) \",\"legend\":\"expiring\"}],\"name\":\"Expiring vs Not-Expiring Keys\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"none\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"noraml\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
"weight": 0
}
]
},
{
"name": "Network",
"weight": 3,
"charts": [
{
"configs": "{\"targets\":[{\"expr\":\"sum(rate(redis_total_net_input_bytes{instance=~\\\"$instance\\\"}[5m]))\",\"legend\":\"input\"},{\"expr\":\"sum(rate(redis_total_net_output_bytes{instance=~\\\"$instance\\\"}[5m]))\",\"legend\":\"output\"}],\"name\":\"Network I/O\",\"options\":{\"tooltip\":{\"mode\":\"all\",\"sort\":\"desc\"},\"legend\":{\"displayMode\":\"hidden\"},\"standardOptions\":{\"util\":\"bytesIEC\",\"decimals\":2},\"thresholds\":{}},\"custom\":{\"drawStyle\":\"lines\",\"lineInterpolation\":\"smooth\",\"fillOpacity\":0.5,\"stack\":\"off\"},\"version\":\"2.0.0\",\"type\":\"timeseries\",\"layout\":{\"h\":2,\"w\":24,\"x\":0,\"y\":0,\"i\":\"0\"}}",
"weight": 0
}
]
}
]
}
]

启动

1
nohup ~/categraf/categraf -configs ~/categraf/conf/ &> categraf.log &

重启加脚本

1
2
3
4
5
6
7
8
9
10
11
#!/bin/bash
t=$(ps -ef|grep "/home/yy/categraf/categraf" |grep -v grep |awk '{print $2}')
if [ -z "$t" ]
then
echo "categraf stop"
else
kill -9 $t
echo "categraf shutdown"
fi
nohup nohup ~/categraf/categraf -configs ~/categraf/conf/ &> categraf.log & &> categraf.log &
echo "categraf start"

应用系统监测

telegraf采集

1
vim ~/telegraf/etc/telegraf/telegraf.conf

Http 应用超时采集

1
2
3
4
5
6
7
8
[[inputs.http_response]]
address = "http://127.0.0.1:30001/seed-auth/index.html"
interval = "60s"
response_timeout = "5s"
method = "GET"
[inputs.http_response.tags]
app = "统一认证平台服务端-122"
group = "统一认证平台"

Tcp 应用超时

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
[[inputs.net_response]]
protocol = "tcp"
#应用系统ip:port
address = "192.168.0.137:1521"
timeout = "60s"
tagexclude = ["result"]
[inputs.net_response.tags]
#应用系统名称
app = "Oracle"
#如果需要对应用系统进行分组,可以添加分组的标签,分组后便于夜莺告警规则中根据组名分别制定不同规则。
group = "skytech"

[[inputs.net_response]]
protocol = "tcp"
address = "192.168.0.137:3306"
timeout = "60s"
tagexclude = ["result"]
[inputs.net_response.tags]
app = "MySQL"
group = "skytech"

MySQL采集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[[inputs.mysql]]
interval = "30"
servers = ["seed_demo_auth_server:Seed@tcp(192.168.0.137:3306)/?tls=false"]
perf_events_statements_digest_text_limit = 120
perf_events_statements_limit = 250
perf_events_statements_time_limit = 86400
table_schema_databases = [""]
gather_table_schema = false
gather_process_list = true
gather_info_schema_auto_inc = true
gather_slave_status = true
gather_binary_logs = false
gather_table_io_waits = false
gather_table_lock_waits = false
gather_index_io_waits = false
gather_event_waits = false
gather_file_events_stats = false
interval_slow = "30m"

Redis 采集

1
2
3
[[inputs.redis]]
## [protocol://][:password]@address[:port]
servers = ['tcp://:maxzhao@192.168.14.122:56379']

夜莺展示系统

服务器资源监控大盘

监控看图-监控大盘-更多操作中选择导入监控大盘-导入内置大盘模块,导入 linux_by_telegraf

TCP 监控大盘

更多操作中选择导入监控大盘-导入大盘JSON

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
"name": "TCP监测",
"tags": "",
"configs": {
"var": [],
"panels": [
{
"targets": [
{
"refId": "A",
"expr": "net_response_result_code{group='tcpDemo'}",
"legend": "{{app}}【{{server}}:{{port}}】"
}
],
"name": "TCP连接",
"custom": {
"showHeader": true,
"calc": "lastNotNull",
"displayMode": "seriesToRows",
"sortColumn": "value",
"sortOrder": "descend"
},
"options": {
"standardOptions": {}
},
"overrides": [
{}
],
"version": "2.0.0",
"type": "table",
"layout": {
"h": 16,
"w": 24,
"x": 0,
"y": 0,
"i": "8c2e31ab-6c82-486d-814a-c1b20ca30bb8",
"isResizable": true
},
"id": "8c2e31ab-6c82-486d-814a-c1b20ca30bb8"
}
],
"version": "2.0.0"
}
}

Http 监控大盘

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
"name": "HTTP监测",
"tags": "",
"configs": {
"var": [],
"panels": [
{
"targets": [
{
"refId": "A",
"expr": "http_response_http_response_code{group='httpDemo'}",
"legend": "{{app}}【{{server}} 】"
}
],
"name": "WEB应用响应值",
"custom": {
"showHeader": true,
"calc": "lastNotNull",
"displayMode": "seriesToRows",
"sortColumn": "value",
"sortOrder": "descend"
},
"options": {
"standardOptions": {}
},
"overrides": [
{}
],
"version": "2.0.0",
"type": "table",
"layout": {
"h": 16,
"w": 24,
"x": 0,
"y": 0,
"i": "a61b2107-efff-4ef0-b4d6-2555e3d13978",
"isResizable": true
},
"id": "a61b2107-efff-4ef0-b4d6-2555e3d13978"
}
],
"version": "2.0.0"
}
}

Redis大盘参数

redis官网相关文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
redis_active_defrag_running:活动碎片整理是否运行[lw]

redis_allocator_active_bytes:分配器活动字节[lw]
redis_active_allocated_bytes:活动分配的字节[lw]
redis_assocator_frag_bytes:关联碎片字节[lw]
redis_allocator_frag_ratio:分配器碎片比率[lw]
redis_allocator_resident_bytes:分配器常驻字节[lw]
redis_allocator_rss_bytes:分配器RSS字节[lw]
redis_allocator_rss_ratio:分配器RSS比率[lw]


redis_aof_current_rewrite_duration_sec:aof当前重写持续时间sec[lw]
redis_aof_enabled:是否启用aof[lw]
redis_aof_last_bgrewrite_status:最近一次AOF重写操作是否执行成功[lw]
redis_aof_last_cow_size_bytes:在执行AOF重写期间,分配给COW的大小[lw]
redis_aof_last_rewrite_duration_sec:最近一次AOF重写操作消耗的时间[lw]
redis_aof_last_write_status:aof上次写入状态[lw]
redis_aof_rewrite_in_progress:是否在进行AOF的重写操作[lw]
redis_aof_rewrite_scheduled:是否有AOF操作等待执行[lw]

redis_blocked_clients:被阻止的客户[lw]

redis_client_recent_max_input_buffer_bytes:客户端最近最大输入缓冲区字节[lw]
redis_client_recent_max_output_buffer_bytes:客户端最近最大输出缓冲区字节[lw]
redis_cluster_enabled:是否启用集群[lw]

redis_commands_duration_seconds_total:命令持续时间总秒数[lw]
redis_commands_processed_total:命令处理总数[lw]
redis_commands_total:命令总数[lw]

redis_config_maxclients:配置最大客户端[lw]
redis_config_maxmemory:配置最大内存[lw]

redis_connected_clients:连接的客户[lw]
redis_connected_slave_lag_seconds:连接的从节点延迟秒[lw]
redis_connected_slave_offset_bytes:连接的从节点偏移字节[lw]
redis_connected_slaves:连接的从节点[lw]
redis_connections_received_total:收到的连接总数[lw]

redis_cpu_sys_children_seconds_total:由后台进程消耗的系统CPU[lw]
redis_cpu_sys_seconds_total:由Redis服务器消耗的用户CPU[lw]
redis_cpu_user_children_seconds_total:由后台进程消耗的用户CPU[lw]
redis_cpu_user_seconds_total:由Redis服务消耗的用户CPU[lw]

redis_db_avg_ttl_seconds:db平均ttl秒[lw]
redis_db_keys:数据库key的数量[lw]
redis_db_keys_expiring:即将过期的key[lw]

redis_defrag_hits:碎片整理命中[lw]
redis_defrag_key_hits:碎片整理命中key[lw]
redis_defrag_key_misses:碎片整理未命中key[lw]
redis_evicted_keys_total:被驱逐的key总数[lw]

redis_expired_keys_total:过期key总数[lw]
redis_expired_stale_percentage:过期陈旧key占百分比[lw]
redis_expired_time_cap_reached_total:已达到总时间上限[lw]

redis_exporter_build_infor:redis_exporter信息[lw]
redis_exporter_last_scrape_connect_time_seconds:redis_exporter最后一次采集时间[lw]
redis_exporter_last_scrape_duration_seconds:redis_exporter次抓取持续时间秒[lw]
redis_exporter_last_scrape_error:redis_exporter次抓取错误[lw]
redis_exporter_scrape_duration_seconds_count:redis_exporter采集续时间秒数[lw]
redis_exporter_scrape_duration_seconds_sum:redis_exporter持续时间秒总和[lw]
redis_exporter_scrapes_total:redis_exporter抓取总数[lw]

redis_instance_info:实例信息[lw]
redis_keyspace_hits_total:键空间命中总数[lw]
redis_keyspace_misses_total:键空间未命中总数[lw]

redis_last_key_groups_scrape_duration_milliseconds:最后一个键组抓取持续时间毫秒[lw]
redis_last_slow_execution_duration_seconds:最后一个慢执行持续时间秒[lw]
redis_latest_fork_seconds:最新fork时间[lw]
redis_lazyfree_pending_objects:惰性删除或延迟释放的对象[lw]
redis_loading_dump_file:加载转储文件[lw]

redis_master_last_io_seconds_ago:master最后io过去时间[lw]
redis_master_repl_offset:主节点累加偏移量(判断主从是否同步)[lw]
redis_master_sync_in_progress:正在进行主同步[lw]

redis_mem_clients_normal:[lw]
redis_mem_clients_slaves:[lw]
redis_mem_fragmentation_bytes:内存碎片字节[lw]
redis_mem_fragmentation_ratio:内存碎片率[lw]
redis_mem_not_counted_for_eviction_bytes:内存不计入驱逐的字节数[lw]
redis_memory_max_bytes:内存最大字节[lw]
redis_memory_used_lua_bytes:lua脚本使用内存字节数[lw]
redis_memory_used_overhead_bytes:维护数据集的内部机制所需的内存开销[lw]
redis_memory_used_peak_bytes:内存使用峰值[lw]
redis_memory_used_rss_bytes:rss占用内存的字节数[lw]
redis_memory_used_scripts_bytes:脚本占用内存的字节数[lw]
redis_memory_used_startup_bytes:启动占用内存的字节数[lw]
redis_migrate_cached_sockets_total:[lw]
redis_net_input_bytes_total:网络input总数[lw]
redis_net_output_bytes_total:网络output总数[lw]
reids_process_id:进程号[lw]
redis_pubsub_channels:发布订阅频道[lw]
redis_pubsub_patterns:发布订阅模式[lw]

redis_rdb_bgsave_in_progress:[lw]
redis_rdb_changes_since_last_save:自上次保存以来的rdb更改[lw]
redis_rdb_current_bgsave_duration_sec:rdb当前bgsave持续时间[lw]
redis_rdb_last_bgsave_duration_sec:rdb上次bgsave持续时间[lw]
redis_rdb_last_bgsave_status:rdb上次bgsave状态[lw]
redis_rdb_last_cow_size_bytes:rdb上次cow的大小[lw]
redis_rdb_last_save_timestamp_seconds:rdb最后保存时间戳[lw]

redis_rejected_connections_total:拒绝的连接总数[lw]
redis_repl_backlog_first_byte_offset:复制起始偏移量[lw]
redis_repl_backlog_history_bytes:repl_backlog历史数据大小[lw]
redis_repl_backlog_is_active:repl_backlog是否开启[lw]
redis_replica_partial_resync_accepted:[lw]
redis_replica_partial_resync_denied:[lw]
redis_replica_resyncs_full:[lw]
redis_replication_backlog_bytes:[lw]
redis_second_repl_offset:[lw]
redis_slave_expires_tracked_keys:[lw]
redis_slave_info:从节点信息[lw]
redis_slave_priority:从节点优先级[lw]
redis_slave_repl_offset:从节点累加偏移量(判断主从是否同步)[lw]
redis_slowlog_last_id:慢查询日志最后一个的id[lw]
redis_slowlog_length:慢查询日志长度[lw]
redis_start_time_seconds:开始时间秒[lw]
redis_target_scrape_request_errors_total:[lw]
redis_up:运行时间[lw]
redis_uptime_in_seconds:正常运行时间[lw]

Redis大盘JSON

1

本文地址: https://github.com/maxzhao-it/blog/post/ac3f2297/

前言

这里使用 kubebetes + containerd.io

dashborad 没有成功

我这里有

  • 192.168.2.158 master-158
  • 192.168.2.159 master-159
  • 192.168.2.160 master-160
  • 192.168.2.161 node-161
  • 192.168.2.240 nfs

可能需要:

安装etcd集群

安装dockercontainerd(下文有)

系统配置

检查端口

1
2
3
su root
sudo yum install -y netcat
nc 127.0.0.1 6443

虚拟机需要挂载ISO镜像

关闭防火墙

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
sudo systemctl stop firewalld
sudo systemctl disable firewalld
# 关闭selinux
setenforce 0
# 永久
sed -i 's/enforcing/disabled/' /etc/selinux/config
# 关闭swap(k8s禁止虚拟内存以提高性能)
swapoff -a
#永久
sed -ri 's/.*swap.*/#&/' /etc/fstab
# 所有节点添加 host
cat >> /etc/hosts << EOF
192.168.2.158 host158
192.168.2.159 host159
192.168.2.160 host160
192.168.2.161 host161
192.168.2.240 host240
192.168.2.240 host241
192.168.2.158 master-158
192.168.2.159 master-159
192.168.2.160 master-160
192.168.2.161 node-161
EOF
cat /etc/hosts

Docker 镜像

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 需要挂载 ISO 镜像
sudo mkdir /mnt/cdrom
sudo mount /dev/cdrom /mnt/cdrom/
sudo yum install -y yum-utils
sudo yum-config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo

# 或者 阿里云
sudo yum-config-manager \
--add-repo \
http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 或者 清华大学源
sudo yum-config-manager \
--add-repo \
https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo

配置 k8s 镜像

配置:/etc/yum.repos.d/kubernetes.repo

使用阿里云镜像

1
2
3
4
5
6
7
8
9
cat > /etc/yum.repos.d/kubernetes.repo <<EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

刷新

1
2
# 由于官网未开放同步方式, 可能会有索引gpg检查失败的情况
# sudo yum clean all && sudo yum makecache

或者使用华为云镜像

1
2
3
4
5
6
7
8
9
10
cat > /etc/yum.repos.d/kubernetes.repo <<EOF
[kubernetes]
name=Kubernetes
baseurl=https://repo.huaweicloud.com/kubernetes/yum/repos/kubernetes-el7-$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=0
gpgkey=https://repo.huaweicloud.com/kubernetes/yum/doc/yum-key.gpg
https://repo.huaweicloud.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

Kubeadm 安装

万事不决,重启解决,解决不了,删掉重搞

1
2
3
4
5
6
7
8
systemctl restart kubelet
systemctl restart containerd
# 更新配置文件
kubeadm init phase upload-config all --config=/root/kubeadm-config-init.yaml --v=5
# 更新配置文件 kubeconfig
kubeadm init phase kubeconfig all --config=/root/kubeadm-config-init.yaml --v=5
# 更新 kube-proxy
kubeadm init phase addon kube-proxy --config=/root/kubeadm-config-init.yaml --v=5

一、安装依赖

k8s 依赖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 卸载 docker
sudo yum remove -y docker-ce docker-ce-cli docker-compose-plugin
sudo yum remove -y kubelet kubeadm kubectl
# SELinux 设置为 permissive 模式(将其禁用)
sudo setenforce 0
# 永久
sed -i 's/enforcing/disabled/' /etc/selinux/config
# 所有 kubernetes 安装
sudo yum install -y kubelet kubeadm kubectl
# 安装可能会签名失败( signature could not be verified for kubernetes ),用下面不校验 gpg
# sudo yum install -y --nogpgcheck kubelet kubeadm kubectl
sudo systemctl daemon-reload
sudo systemctl disable kubelet
sudo systemctl enable kubelet
sudo systemctl start kubelet
# 这里会因为缺失/var/lib/kubelet/config.yaml 而停止服务,但一直会尝试重启
sudo systemctl status kubelet -l
# 查看安装后的版本
kubectl version --client
kubectl version --client --output=yaml
  • kubelet 节点通信
  • kubeadm 自动化部署工具
  • kubectl 集群管理工具

Linux 官方推荐方式

ipvs 依赖

1
2
3
4
5
6
7
8
9
10
# 安装
sudo yum install -y ipset ipvsadm
# 查看
lsmod|grep ip_vs
# 加载
modprobe ip_vs
modprobe ip_vs_rr
modprobe ip_vs_wrr
modprobe ip_vs_sh
modprobe nf_conntrack_ipv4

安装 kubectl 命令补全工具(可选)

1
2
3
4
5
6
7
8
9
10
11
sudo yum install -y bash-completion
cat /usr/share/bash-completion/bash_completion
# 重新加载 shell 后查看(可以 exit 后ssh 重连)
type _init_completion
# 如果重新加载后没有成功,请将下面加入 ~/.bashrc 中
source /usr/share/bash-completion/bash_completion
# 启动 kubectl 自动补全
sudo kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl > /dev/null
# 添加 kubectl 别名
echo 'alias k=kubectl' >>~/.bashrc
echo 'complete -F __start_kubectl k' >>~/.bashrc

二、安装kubeadm

全部节点

允许 iptables 检查桥接流量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 确保 br_netfilter 模块被加载
lsmod | grep br_netfilter
# 显式加载该模块
sudo modprobe br_netfilter
#为了让你的 Linux 节点上的 iptables 能够正确地查看桥接流量
echo "1" > /proc/sys/net/bridge/bridge-nf-call-iptables
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF

cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system

确保MAC地址和product_uuid的唯一

  • 你可以使用命令 ip linkifconfig -a 来获取网络接口的 MAC 地址
  • 可以使用 sudo cat /sys/class/dmi/id/product_uuid 命令对 product_uuid 校验

三、容器运行时

全部节点

安装containerd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#关闭swap
swapoff -a
#永久
sed -ri 's/.*swap.*/#&/' /etc/fstab
#
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter
#验证 br_netfilter 模块是否已加载
lsmod | grep br_netfilter
# 设置必需的 sysctl 参数,这些参数在重新启动后仍然存在。
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

# 应用 sysctl 参数而无需重新启动
sudo sysctl --system

安装 Containerd.io

配置Docker 镜像

1
2
3
4
5
6
7
8
9
10
11
# 所有节点安装 containerd
sudo yum install -y containerd.io
# 配置 containerd:
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
# 使用 `systemd cgroup` 驱动程序
sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml
sudo cat /etc/containerd/config.toml |grep SystemdCgroup
sudo systemctl restart containerd
sudo systemctl enable containerd
sudo systemctl status containerd

github

结果:

1
2
3
4
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
# ...
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true

当使用 kubeadm
时,请手动配置 kubeletcgroup 驱动
.

四、创建集群

CA证书(可以跳过)

生成CA证书

路径 默认 CN 描述
ca.crt,key kubernetes-ca Kubernetes 通用 CA
etcd/ca.crt,key etcd-ca 与 etcd 相关的所有功能
front-proxy-ca.crt,key kubernetes-front-proxy-ca 用于 前端代理

上面的 CA 之外,还需要获取用于服务账户管理的密钥对,也就是 sa.keysa.pub

下面的例子说明了上表中所示的 CA 密钥和证书文件。

1
2
3
4
5
6
/etc/kubernetes/pki/ca.crt
/etc/kubernetes/pki/ca.key
/etc/kubernetes/pki/etcd/ca.crt
/etc/kubernetes/pki/etcd/ca.key
/etc/kubernetes/pki/front-proxy-ca.crt
/etc/kubernetes/pki/front-proxy-ca.key

PKI证书和要求

etcd集群(含CA)

安装etcd集群(含CA)

列出镜像版本

1
2
kubeadm config images list
kubeadm config images pull

配置文件创建master 节点(强建议)

通过配置文件创建集群节点

写入配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# 写入配置
cat > ~/kubeadm-config-init.yaml <<EOF
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: fk3wpg.gs0mcv4twx3tz2mc
ttl: 240h0m0s
usages:
- signing
- authentication
description: "描述设置了一个人性化的消息,为什么这个令牌存在以及它的用途"
# NodeRegistration 包含与将新控制平面节点注册到集群相关的字段
nodeRegistration:
name: master-158
criSocket: unix:///var/run/containerd/containerd.sock
ignorePreflightErrors:
- IsPrivilegedUser
# LocalAPIEndpoint表示部署在这个控制平面节点上的API服务器实例的端点。
localAPIEndpoint:
advertiseAddress: 192.168.2.158
bindPort: 6443
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
clusterName: k8s-cluster
etcd:
external:
endpoints:
- "https://192.168.2.158:2379"
- "https://192.168.2.159:2379"
- "https://192.168.2.160:2379"
caFile: "/etc/certs/etcd/ca.pem"
certFile: "/etc/certs/etcd/etcd-158.pem"
keyFile: "/etc/certs/etcd/etcd-158-key.pem"
# 网络持有集群的网络拓扑结构的配置
networking:
dnsDomain: cluster.158
serviceSubnet: 10.96.0.0/16
podSubnet: "10.244.0.0/16"
kubernetesVersion: 1.24.1
controlPlaneEndpoint: "192.168.2.158:6443"
# APIServer 包含 API 服务器控制平面组件的额外设置
apiServer:
extraArgs:
bind-address: 0.0.0.0
authorization-mode: "Node,RBAC"
#service-cluster-ip-range: 10.96.0.0/16
#service-node-port-range: 30000-32767
timeoutForControlPlane: 4m0s
certSANs:
- "localhost"
- "cluster.158"
- "127.0.0.1"
- "master-158"
- "master-159"
- "master-160"
- "node-161"
- "10.96.0.1"
- "10.244.0.1"
- "192.168.2.158"
- "192.168.2.159"
- "192.168.2.160"
- "192.168.2.161"
- "host158"
- "host159"
- "host160"
- "host161"
# 包含控制器管理器控制平面组件的额外设置
controllerManager:
extraArgs:
bind-address: 0.0.0.0
#"node-cidr-mask-size": "20"
# 这里与 KubeProxyConfiguration.clusterCIDR 一致
#cluster-cidr: 10.244.0.0/16
#service-cluster-ip-range: 10.96.0.0/16
#config: /etc/kubernetes/scheduler-config.yaml
#extraVolumes:
# - name: schedulerconfig
# hostPath: /home/johndoe/schedconfig.yaml
# mountPath: /etc/kubernetes/scheduler-config.yaml
# readOnly: true
# pathType: "File"
# 调度程序包含调度程序控制平面组件的额外设置
scheduler:
extraArgs:
bind-address: 0.0.0.0
#config: /etc/kubernetes/kubescheduler-config.yaml
#extraVolumes:
# - hostPath: /etc/kubernetes/kubescheduler-config.yaml
# mountPath: /etc/kubernetes/kubescheduler-config.yaml
# name: kubescheduler-config
# readOnly: true
# DNS 定义集群中安装的 DNS 插件的选项。
dns: {}
certificatesDir: /etc/kubernetes/pki
#imageRepository: k8s.gcr.io
imageRepository: registry.aliyuncs.com/google_containers
#用户启用的 FeatureGates。
#featureGates:
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
bindAddress: 0.0.0.0
bindAddressHardFail: false
clientConnection:
acceptContentTypes: ""
burst: 0
contentType: ""
kubeconfig: /var/lib/kube-proxy/kubeconfig.conf
qps: 0
clusterCIDR: 10.244.0.0/16
configSyncPeriod: 2s
conntrack:
maxPerCore: null
min: null
tcpCloseWaitTimeout: 60s
tcpEstablishedTimeout: 2s
# 默认 LocalModeClusterCIDR
detectLocalMode: ""
detectLocal:
bridgeInterface: ""
interfaceNamePrefix: ""
enableProfiling: false
healthzBindAddress: "0.0.0.0:10256"
hostnameOverride: "kube-proxy-158"
ipvs:
excludeCIDRs: null
minSyncPeriod: 1m
scheduler: ""
strictARP: true
syncPeriod: 1m
tcpFinTimeout: 0s
tcpTimeout: 0s
udpTimeout: 0s
metricsBindAddress: "127.0.0.1:10249"
mode: "ipvs"
nodePortAddresses: null
oomScoreAdj: null
portRange: ""
showHiddenMetricsForVersion: ""
udpIdleTimeout: 0s
winkernel:
enableDSR: false
forwardHealthCheckVip: false
networkName: ""
rootHnsEndpointName: ""
sourceVip: ""
EOF

创建

1
kubeadm init --config=kubeadm-config-init.yaml  --v=5

直接创建 master节点(不建议)

1
2
3
4
5
6
7
8
9
10
11
12
sudo kubeadm init \
--node-name master-158 \
# 用于指定`kube-apiserver`监听的`ip`地址,就是`master`本机`IP`地址
--apiserver-advertise-address=192.168.2.158 \
## 指定阿里云镜像仓库地址
--image-repository registry.aliyuncs.com/google_containers \
# 用于指定`k8s`版本(`kubeadm config images list`查看的)
--kubernetes-version v1.24.1 \
# 用于指定`SVC`的网络范围
--service-cidr=10.96.0.0/16 \
# 用于指定`Pod`的网络范围:`10.244.0.0/16`
--pod-network-cidr=10.244.0.0/16

image-20220519114218846

非Root用户运行kubectl配置

1
2
3
4
5
6
# 普通用户
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# root 可以直接执行
export KUBECONFIG=/etc/kubernetes/admin.conf

加入当前集群

host158上生成加入节点的脚本:

1
kubeadm token create --print-join-command

host159加入

证书

1
2
3
# 在 158 上执行
ssh root@192.168.2.159 "mkdir -p /etc/kubernetes/pki"
scp -r /etc/kubernetes/pki/* root@192.168.2.159:/etc/kubernetes/pki

加入

1
2
3
4
# 在159上执行:添加加 master-159 节点
kubeadm join 192.168.2.158:6443 --node-name master-159 --token fk3wpg.gs0mcv4twx3tz2mc \
--discovery-token-ca-cert-hash sha256:533e488affe662b66310ed8a353b1fba17c354e4c0df5285b95a01a33e986219 \
--control-plane --v=5

配置和校验

1
2
3
4
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
kubectl get nodes

configmap jwt token 错误

host160加入

证书

1
2
3
# 在 158 上执行
ssh root@192.168.2.160 "mkdir -p /etc/kubernetes/pki"
scp -r /etc/kubernetes/pki/* root@192.168.2.160:/etc/kubernetes/pki

加入

1
2
3
4
# 在160上执行:添加 master-160 节点
kubeadm join 192.168.2.158:6443 --node-name master-160 --token fk3wpg.gs0mcv4twx3tz2mc \
--discovery-token-ca-cert-hash sha256:533e488affe662b66310ed8a353b1fba17c354e4c0df5285b95a01a33e986219 \
--control-plane --v=5

配置和校验

1
2
3
4
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
kubectl get nodes

host161加入

1
2
3
# 添加普通节点
kubeadm join 192.168.2.158:6443 --node-name node-161 --token fk3wpg.gs0mcv4twx3tz2mc \
--discovery-token-ca-cert-hash sha256:533e488affe662b66310ed8a353b1fba17c354e4c0df5285b95a01a33e986219 --v=5

image-20220529185350418

--experimental-control-plane 作为控制节点加入

在控制节点 host158 上执行:

1
kubectl get nodes

image-20220529185643763

token

获取令牌:

1
kubeadm token list

image-20220519143852023

创建新令牌

1
kubeadm token create

输出类似于以下内容:

1
5didvk.d09sbcov8ph2amjw

直接创建加入脚本

1
kubeadm token create --print-join-command --ttl=240h 

如果你没有 --discovery-token-ca-cert-hash 的值,则可以通过在控制平面节点上执行以下命令链来获取它:

1
2
3
#默认证书 /etc/kubernetes/pki/ca.crt
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \
openssl dgst -sha256 -hex | sed 's/^.* //'

输出类似于以下内容:

1
5094a16f108636a64edc65194ef8f61b446f831ebab3265dda5723a394030ee1

查看证书

1
2
3
4
# 查看证书
kubeadm alpha certs check-expiration
# 更新证书
kubeadm alpha certs renew all

init 时的几个配置

1
2
3
4
5
6
7
8
/etc/kubernetes
# certificate-authority-data: cat /etc/kubernetes/pki/ca.crt | base64 -w 0
/etc/kubernetes/kubelet.conf
#kubelet
/var/lib/kubelet/kubeadm-flags.env
/var/lib/kubelet/config.yaml
# manifest
/etc/kubernetes/manifests

五、安装pods网络插件

  • Flannel: 最成熟、最简单的选择(当前选择)
  • Calico: 性能好、灵活性最强,目前的企业级主流
  • Canal: 将Flannel提供的网络层与Calico的网络策略功能集成在一起。
  • Weave: 独有的功能,是对整个网络的简单加密,会增加网络开销
  • Kube-router: kube-router采用lvs实现svc网络,采用bgp实现pod网络.
  • CNI-GenieCNI-Genie 是一个可以让k8s使用多个cni网络插件的组件,暂时不支持隔离策略

k8s的容器虚拟化网络方案大体分为两种: 基于隧道方案和基于路由方案

  • 隧道方案:flannelvxlan模式、calicoipip模式都是隧道模式。
  • 路由方案:flannelhost-gw模式,calicobgp模式都是路由方案

calico 安装(推荐)

k8s安装pods网络插件calico(etcd+tls

Containerd 镜像配置

calic0oGitHub

Fiannel安装

master上执行

1
2
3
4
5
cd ~
# 可选参数 --no-check-certificate
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml -O kube-flannel.yml
# 或者,直接使用没有指定网卡,可能会使 flannel处在CrashLoopBackOff状态
kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml

建议 flannel 使用 Kubernetes API 作为其后备存储,这样可以避免为 flannel 部署离散的 etcd 集群。

Flannel GitHub地址

修改 kube-flannel.yml

添加网卡

image-20220529194923226

不添加网卡会报错 Failed to find any valid interface to use: failed to get default interface: protocol not available

错误:Failed to find interface

1
2
Could not find valid interface matching ens33: failed to find IPv4 address for interface ens33
Failed to find interface to use that matches the interfaces and/or regexes provided

进入容器

1
2
kubectl -n kube-system exec -it kube-flannel-ds-f92wg sh
kubectl -n kube-system exec -it kube-flannel-ds-f92wg bash

修改网段

kubeadm init 时自定义的--pod-network-cidr=10.244.0.0/16 如果有变动,则需要修改 kube-flannel.yml

加载

1
kubectl apply -f kube-flannel.yml 

image-20220519114713469

查看容器配置

1
kubectl -n kube-system get ds kube-flannel-ds -o yaml

验证

1
2
3
kubectl get pods --all-namespaces
#
kubectl get pods -n kube-system

image-20220519114825112

1
kubectl logs -n kube-system kube-flannel-ds-88xsf -f

image-20220519143532444

kube-proxy ipvs 模式源码分析

使用ipvs负载均衡

获取节点

1
kubectl get nodes

image-20220519114512672

etcd 集群指南

安装etcd集群

校验运行状态

1
2
3
4
5
6
7
8
9
10
ETCDCTL_API=3 /opt/etcd/etcdctl \
--endpoints 192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379 \
--cacert /etc/certs/etcd-root-ca.pem \
--cert /etc/certs/etcd-158.pem \
--key /etc/certs/etcd-158-key.pem \
endpoint health

ENDPOINTS=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
ETCD_AUTH='--cacert /etc/certs/etcd-root-ca.pem --cert /etc/certs/etcd-158.pem --key /etc/certs/etcd-158-key.pem '
etcdctl --write-out=table --endpoints=${ENDPOINTS} ${ETCD_AUTH} --user=root:1 endpoint status

image-20220529234008606

要动 Kubernetes API 服务参数

1
2
3
4
5
6
# 服务地址(可以使用负载均衡)
--etcd-servers=192.168.2.158:2379,192.168.2.159:2379,192.168.2.160:2379
# 安全通信
--etcd-certfile=/etc/certs/etcd-158.pem
--etcd-keyfile=/etc/certs/etcd-158-key.pem
--etcd-cafile=/etc/certs/etcd-root-ca.pem

配置kubelet

由于 etcd 是首先创建的,因此你必须通过创建具有更高优先级的新文件来覆盖 kubeadm 提供的 kubelet 单元文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 原文件 
sudo cat /etc/systemd/system/multi-user.target.wants/kubelet.service
sudo mkdir /etc/systemd/system/kubelet.service.d/
sudo cat << EOF > /etc/systemd/system/kubelet.service.d/20-etcd-service-manager.conf
[Service]
ExecStart=
# 将下面的 "systemd" 替换为你的容器运行时所使用的 cgroup 驱动。
# kubelet 的默认值为 "cgroupfs"。
# 如果需要的话,将 "--container-runtime-endpoint " 的值替换为一个不同的容器运行时。
ExecStart=/usr/bin/kubelet --address=127.0.0.1 --pod-manifest-path=/etc/kubernetes/manifests --cgroup-driver=systemd
Restart=always
EOF

sudo systemctl daemon-reload
sudo systemctl restart kubelet
sudo systemctl status kubelet
journalctl -xeu kubelet
cat /etc/kubernetes/manifests/etcd.yaml
cat /etc/kubernetes/kubelet.conf
cat /var/lib/kubelet/config.yaml
cat /etc/kubernetes/bootstrap-kubelet.conf

https://kubernetes.io/zh/docs/setup/best-practices/certificates/)

安装dashboard

1
2
3
cd ~
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.5.0/aio/deploy/recommended.yaml -O recommended.yaml --no-check-certificate
kubectl apply -f recommended.yaml

image-20220519115844935

1
2
3
kubectl get pods --all-namespaces
kubectl describe pod -n kubernetes-dashboard kubernetes-dashboard-6cdd697d84-9nv4w
kubectl logs -f -n kubernetes-dashboard kubernetes-dashboard-6cdd697d84-9nv4w

启用 Dashboard 访问

1
kubectl proxy

http://192.168.2.150/:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/

创建管理员

1
vim ~/dashboard-admin.yaml 

写入

1
2
3
4
5
6
7
8
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
name: dashboard-admin
namespace: kubernetes-dashboard

为用户分配权限

1
vim ~/dashboard-admin-bind-cluster-role.yaml 

写入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dashboard-admin-bind-cluster-role
labels:
k8s-app: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: dashboard-admin
namespace: kubernetes-dashboard

分配权限

1
2
kubectl create -f ~/dashboard-admin.yaml
kubectl create -f ~/dashboard-admin-bind-cluster-role.yaml

image-20220519120532859

查看并复制Token

1
kubectl -n kubernetes-dashboard describe secret $(kubectl -n kubernetes-dashboard get secret | grep dashboard-admin | awk '{print $1}')

访问,用刚刚的token登录

https://192.168.2.150:30000

部署应用

在Kubernetes集群中部署一个Nginx

kubectl create deployment nginx –image=nginx

kubectl expose deployment nginx –port=80 –type=NodePort

kubectl get pod,svc

访问地址:http://NodeIP:Port

在Kubernetes集群中部署一个Tomcat

kubectl create deployment tomcat –image=tomcat

kubectl expose deployment tomcat –port=8080 –type=NodePort

访问地址:http://NodeIP:Port

K8s部署微服务(springboot程序)

1、项目打包(jar、war)–>可以采用一些工具git、maven、jenkins

2、制作Dockerfile文件,生成镜像;

3、kubectl create deployment nginx –image= 你的镜像

4、你的springboot就部署好了,是以docker容器的方式运行在pod里面的;

卸载集群

1
2
3
4
5
6
7
8
9
10
# 删除对集群的本地引用,集群名称 k8s-cluster
kubectl config delete-cluster k8s-cluster
# 重置 `kubeadm` 安装的状态
echo "y" | kubeadm reset
# 删除节点信息
rm -rf /etc/kubernetes/
# 删除本地配置
rm -rf $HOME/.kube/config
# 删除网路
# rm -rf /var/lib/kubelet/

删除 calico

1
2
3
4
# 删除 cni
rm -rf /etc/cni/net.d/
# 删除 calico
rm -rf /var/log/calico/

删除节点

使用适当的凭证与控制平面节点通信,运行:

1
kubectl drain <node name> --delete-emptydir-data --force --ignore-daemonsets

在删除节点之前,请重置 kubeadm 安装的状态:

1
echo "y" | kubeadm reset

重置过程不会重置或清除 iptables 规则或 IPVS 表。如果你希望重置 iptables,则必须手动进行:

1
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X

如果要重置 IPVS 表,则必须运行以下命令:

1
2
ipvsadm -C
ipvsadm --clear

现在删除节点:

1
kubectl delete node <node name>

如果你想重新开始,只需运行 kubeadm initkubeadm join 并加上适当的参数。

卸载 Containerd.io

1
2
sudo yum remove -y containerd.io
sudo rm -rf /var/lib/containerd

参考

Kubernetes容器运行时弃用Docker转型Containerd

Kubernetes02:容器运行时:Docker or Containerd如何选择、Containerd全面上手实践

附录

CentOS7镜像

1
2
3
4
5
6
7
8
# 备份
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
# 下载源文件
wget -O /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# 或者
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# 生成缓存
yum makecache

Docker

虚拟机需要挂载ISO镜像

1
2
3
4
5
6
7
# 需要挂载 ISO 镜像
sudo mkdir /mnt/cdrom
sudo mount /dev/cdrom /mnt/cdrom/
sudo yum install -y yum-utils
sudo yum-config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo

使用阿里云

1
2
3
4
5
6
7
8
# step 1: 安装必要的一些系统工具
sudo yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
sudo yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3
sudo sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# Step 4: 更新并安装Docker-CE
sudo yum makecache fast

Containerd 镜像

1
vim /etc/containerd/config.toml

找到 [plugins."io.containerd.grpc.v1.cri".registry.mirrors]

写入

1
2
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://docker.mirrors.ustc.edu.cn","https://registry.cn-hangzhou.aliyuncs.com","https://registry.docker-cn.com","https://aj2rgad5.mirror.aliyuncs.com"]

image-20220601111938470

Kubelet启动失败问题

1
2
journalctl -xefu kubelet
journalctl -f -u kubelet

问题处理

pod/kube-proxy CrashLoopBackOff

查看 ipvs安装步骤。

1
2
#确定检测本地流量的方式,默认为 LocalModeClusterCIDR
detectLocalMode: "" # 这里不需要填默认值

错误处理

could not find a JWS signature in the cluster-info ConfigMap

1
2
3
4
5
6
7
8
9
10
11
# kube config 命令查看 cluster-info 
kubectl get configmap cluster-info --namespace=kube-public -o yaml
cat /etc/kubernetes/kubelet.conf
# 可能是CA不对。
# 重新生成 hash256
# 默认证书 /etc/kubernetes/pki/ca.crt
openssl x509 -pubkey -in /etc/kubernetes/pki/etcd/etcd-ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \
openssl dgst -sha256 -hex | sed 's/^.* //'
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \
openssl dgst -sha256 -hex | sed 's/^.* //'
# 对比之后发现,真的CA不对

join 时的 JWS 问题

1
The cluster-info ConfigMap does not yet contain a JWS signature for token ID "j2lxkq", will try again

这里 kubeadm token list 可以看到 token 都很正常。

cluster info中的 JWS 需要在kube-controller-manager运行后创建。

1
kubectl get pods -A

image-20220602104516917

1
2
3
4
# 查看
kubectl describe -n kube-system kube-controller-manager-master-158
kubectl logs -n kube-system kube-controller-manager-master-158
kubectl logs -n kube-system kube-controller-manager-master-158 --v=10

image-20220602104739590

节点NotReady

1
kubectl describe nodes master-160

image-20220603102357056

cni plugin not initialized

1
sudo systemctl restart containerd

failed to \"CreatePodSandbox\" for \"coredns

1
no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/\""

是因为calico 没有启动成功

操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# 查看 Node状态
kubectl get nodes
# 节点IP可以用空格隔开写多个
kubectl get node master-158
# 查看 Service 信息
kubectl get service
#查看namespace
kubectl get namespaces
# 创建
kubectl create namespace xxxxxx
# 删除
kubectl delete namespaces xxxxxx
# 查看所有名称空间内资源
kubectl get pods --all-namespaces
# 或者
kubectl get pods -A
#进入pod里面
kubectl exec -it podName -n uat bash
# 同时查看多种资源信息
kubectl get pod,svc -n kube-system
# 创建pod
kubectl create -f xxxx.yaml --namespace=xxxx
# 查看具体pod的yaml配置信息
kubectl get pod -n namespace_name xxxxpod -o=yaml
# 查看Pod的详细信息,包括记录的事件:
kubectl describe pod -n namespace_name xxxxpod
# 查看节点信息
kubectl get pod -n kube-system -l k8s-app=flannel -o wide
# 删除节点信息:
kubectl delete pod -n kube-system 节点名称
kubectl delete pod/节点名称 -n kube-system
# 查看 API 对象细节
# 使用 kubectl describe 命令,查看一个 API 对象的细节:
kubectl describe node master-158
#kubectl delete node master-158
# 查询 kubeadm 配置文件
kubectl describe cm -n kube-system kubeadm-config
kubectl get cm -n kube-system kubeadm-config -o yaml
kubectl describe cm -n kube-system kubelet-config
kubectl describe cm -n kube-system kube-proxy
kubectl get cm -n kube-system coredns -o yaml
kubectl get cm -n kube-system kube-proxy -o yaml
kubectl describe cm -n kube-system
# 编辑,编辑后删除响应的pod,k8s 会自动重建
kubectl edit cm -n kube-system kubeadm-config
kubectl edit cm -n kube-system kubelet-config
kubectl edit cm -n kube-system kube-proxy
kubectl describe ns default
#查看k8s监控Dashboard的token
kubectl -n kube-system get serviceaccount -l k8s-app=kubernetes-dashboard -o yaml
kubectl -n kube-system describe secrets secrets.name
# 更新 ConfigMap 内容到本地文件 /var/lib/kubelet/config.conf
kubeadm upgrade node phase kubelet-config --v=10
systemctl restart kubelet
kubeadm upgrade node phase control-plane --v=5
kubeadm upgrade node phase preflight --v=5


# 查看集群信息
kubectl cluster-info
# Kubernetes master is running at https://127.0.0.1:6443
# 查看各组件信息
# 使用安全连接:
kubectl -s https://192.168.2.151 get componentstatuses
# 未使用安全连接
kubectl -s http://192.168.2.151 get componentstatuses
# 查看资源类型所对应的Apiversion
kubectl explain pod
# 查看帮助
kubectl explain deployment
kubectl explain deployment.spec
kubectl explain deployment.spec.replicas
# 进入容器
kubectl -n kube-system exec -it kube-flannel-ds-f92wg sh
kubectl -n kube-system exec -it kube-flannel-ds-f92wg bash

Docker or Containerd

kubelet 通过 Container Runtime Interface (CRI) 与容器运行时交互,以管理镜像和容器。

通用的容器运行时:

image-20220529003748105

调用链区别有哪些?

  • Docker 作为 K8S 容器运行时,调用关系如下:
    kubelet --> docker shim (在 kubelet 进程中) --> dockerd --> containerd
  • Containerd 作为 K8S 容器运行时,调用关系如下:
    kubelet --> cri plugin(在 containerd 进程中) --> containerd

CNI 网络

对比项 Docker Containerd
谁负责调用 CNI Kubelet 内部的 docker-shim Containerd 内置的 cri-plugin(containerd 1.1 以后)
如何配置 CNI Kubelet 参数 --cni-bin-dir--cni-conf-dir Containerd 配置文件(toml): [plugins.cri.cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d"

容器日志及相关参数

对比项 Docker Containerd
存储路径 如果 Docker 作为 K8S 容器运行时,容器日志的落盘将由 docker 来完成,保存在类似/var/lib/docker/containers/$CONTAINERID 目录下。Kubelet 会在 /var/log/pods/var/log/containers 下面建立软链接,指向 /var/lib/docker/containers/$CONTAINERID 该目录下的容器日志文件。 如果 Containerd 作为 K8S 容器运行时, 容器日志的落盘由 Kubelet 来完成,保存至 /var/log/pods/$CONTAINER_NAME 目录下,同时在 /var/log/containers 目录下创建软链接,指向日志文件。
配置参数 在 docker 配置文件中指定: "log-driver": "json-file", "log-opts": {"max-size": "100m","max-file": "5"} 方法一:在 kubelet 参数中指定: --container-log-max-files=5--container-log-max-size="100Mi" 方法二:在 KubeletConfiguration 中指定: "containerLogMaxSize": "100Mi", "containerLogMaxFiles": 5,
把容器日志保存到数据盘 把数据盘挂载到 “data-root”(缺省是 /var/lib/docker)即可。 创建一个软链接 /var/log/pods 指向数据盘挂载点下的某个目录。 在 TKE 中选择“将容器和镜像存储在数据盘”,会自动创建软链接 /var/log/pods

容器运行时的选择:

  • kubenetes不支持docker

  • Containerd 不支持 docker API 和 docker CLI,但是可以通过 cri-tool 命令实现类似的功能。

  • 当您遇到以下情况时,请选择 docker 作为运行时组件:

    • 如需使用 docker in docker
    • 如需在 TKE 节点使用 docker build/push/save/load 等命令。
    • 如需调用 docker API
    • 如需 docker composedocker swarm

DockerContainerd常用命令

镜像相关功能 Docker Containerd
显示本地镜像列表 docker images crictl images
下载镜像 docker pull crictl pull
上传镜像 docker push
删除本地镜像 docker rmi crictl rmi
查看镜像详情 docker inspect IMAGE-ID crictl inspect IMAGE-ID
容器相关功能 Docker Containerd
显示容器列表 docker ps crictl ps
创建容器 docker create crictl create
启动容器 docker start crictl start
停止容器 docker stop crictl stop
删除容器 docker rm crictl rm
查看容器详情 docker inspect crictl inspect
attach docker attach crictl attach
exec docker exec crictl exec
logs docker logs crictl logs
stats docker stats crictl stats
POD 相关功能 Docker Containerd
显示 POD 列表 crictl pods
查看 POD 详情 crictl inspectp
运行 POD crictl runp
停止 POD crictl stopp

本文地址: https://github.com/maxzhao-it/blog/post/cdb1e23a/

前言

安装好docker

dashborad 没有成功

我这里有

  • 192.168.222.150 master
  • 192.168.222.151 node
  • 192.168.222.152 node

Node节点

Kuboard安装方式

这里只做记录:点击参考官方文档

这里需要:

  • 192.168.222.150 k8s master
  • 192.168.222.151 k8s node
  • 192.168.222.152 k8s node
  • 192.168.222.251 Docker 安装 Kuboard-Sprayk8s不能安装这台设备上

注意:如果是虚拟机,必须是固定IP

  • kubelet 节点通信
  • kubectl 集群管理工具

安装Kuboard-Spray

关闭防火墙

1
2
3
4
sudo systemctl stop firewalld
sudo systemctl disable firewalld
# 关闭selinux
setenforce 0

加载镜像

1
2
3
4
5
6
7
8
9
10
docker run -d \
--privileged \
--restart=unless-stopped \
--name=kuboard-spray \
-p 80:80/tcp \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/kuboard-spray-data:/data \
swr.cn-east-2.myhuaweicloud.com/kuboard/kuboard-spray:latest-amd64
# 如果抓不到这个镜像,可以尝试一下这个备用地址:
# eipwork/kuboard-spray:latest-amd64

http://192.168.222.251,输入用户名 admin,默认密码 Kuboard123,即可登录 Kuboard-Spray 界面

加载资源

系统设置 –> 资源包管理 界面,找到最新版本(需要的版本点击导入),点击标题中的加载xxx操作。

离线导入请参考官方文档

规划集群

集群管理 界面,点击 添加集群安装计划 按钮,填写集群名称和资源包:

image-20220522203628018

注意事项

  • 最少的节点数量是 1 个;

  • ETCD 节点、控制节点的总数量必须为奇数;

  • 全局设置 标签页,可以设置节点的通用连接参数,例如所有的节点都使用相同的 ssh 端口、用户名、密码,则共同的参数只在此处设置即可;

  • 在节点标签页,如果该节点的角色包含 etcd 则必须填写 ETCD 成员名称 这个字段;

  • 如果您 KuboardSpray 所在节点不能直接访问到 Kubernetes 集群的节点,您可以设置跳板机参数,使 KuboardSpray 可以通过 ssh 访问集群节点。

  • 集群安装过程中,除了已经导入的资源包以外,还需要使用 yum 或 apt 指令安装一些系统软件,例如 curl, rsync, ipvadm, ipset, ethtool 等,此时要用到操作系统的 apt 软件源或者 yum 软件源。全局设置标签页中,可以引导您完成 apt / yum 软件源的设置,您可以:

    • 使用节点操作系统已经事先配置的 apt / yum 源,或者
    • 在安装过程中自动配置节点的操作系统使用指定的软件源
  • 如果您使用 docker 作为集群的容器引擎,还需要在全局设置标签页指定安装 docker 用的 apt / yum 源。

    如果您使用 containerd 作为容器引擎,则无需配置 docker 的 apt / yum 源,containerd 的安装包已经包含在 KuboardSpray 离线资源包中。

配置镜像源

只有一个 aliyunCentOS源的路径是新的,其它在添加时都能找到。

强烈建议配在操作系统里。镜像源使用操作系统默认参考配置

注意:虚拟机需要挂载ISO镜像 mount /dev/cdrom /mnt/cdrom

否则可能会报错

image-20220522212745114

image-20220522205725154

全局配置

建议默认配置,镜像源使用操作系统默认参考配置

节点配置

image-20220522210928019

开始安装

点击保存,然后点击安装/设置集群

image-20220522214914299

访问集群

界面给出了三种方式可以访问 kubernetes 集群:

  • 在集群主节点上执行 kubectl 命令
  • 获取集群的 .kubeconfig 文件
  • 将集群导入到 kuboard管理界面

image-20220522215445220

也可以安装 portainer.io

安装k8s管理工具

这里使用:poerainer

安装在 192.168.222.150

数据持久化

查看 StorageClass

1
kubectl get sc

如果是 No resources found 则需要创建一个。

创建 StorageClass

https://blog.csdn.net/huwh_/article/details/96016423

https://kubernetes.io/docs/concepts/storage/storage-classes/

https://docs.portainer.io/start/install/server/kubernetes/baremetal

设置 StorageClass为默认值

1
kubectl patch storageclass maxzhao-cluster-storage-class -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

deploy use yaml manifests

1
2
3
4
# 社区版
kubectl apply -n portainer -f https://downloads.portainer.io/ce2-13/portainer.yaml
#
kubectl patch deployments -n portainer portainer -p '{"spec": {"template": {"spec": {"nodeSelector": {"kubernetes.io/master-150": "'$(kubectl get pods -n portainer -o jsonpath='{ ..nodeName }')'"}}}}}' || (echo Failed to identify current node of portainer pod; exit 1)

也可以使用 portainerhelm

1
2
3
4
helm repo add portainer https://portainer.github.io/k8s/
helm repo update
# 社区版
helm install --create-namespace -n portainer portainer portainer/portainer --set tls.force=true

https://localhost:30779/ or http://localhost:30777/

重启虚拟机

请参考Kuboard中的介绍

Worker节点不能

可能是IP变化引起的,需要固定IP后重装集群。

许多Pod一直Crash或不能正常访问

1
kubectl get pods --all-namespaces

重启后会发现许多 Pod 不在 Running 状态,此时,请使用如下命令删除这些状态不正常的 Pod。通常,您的 Pod 如果是使用 Deployment、StatefulSet 等控制器创建的,kubernetes 将创建新的 Pod 作为替代,重新启动的 Pod 通常能够正常工作。

1
kubectl delete pod <pod-name> -n <pod-namespece>

系统配置

关闭防火墙

1
2
3
4
5
6
7
8
9
10
sudo systemctl stop firewalld
sudo systemctl disable firewalld
# 关闭selinux
setenforce 0
# 永久
sed -i 's/enforcing/disabled/' /etc/selinux/config
# 关闭swap(k8s禁止虚拟内存以提高性能)
swapoff -a
#永久
sed -ri 's/.*swap.*/#&/' /etc/fstab

Docker镜像

1
sudo vim /etc/docker/daemon.json

改为:

1
2
3
4
5
{
"registry-mirrors": ["https://aj2rgad5.mirror.aliyuncs.com","https://docker.mirrors.ustc.edu.cn"],
"dns": ["8.8.8.8", "8.8.4.4"],
"exec-opts": ["native.cgroupdriver=systemd"]
}

远程访问:"hosts": ["unix:///var/run/docker.sock", "tcp://127.0.0.1:2375"]

配置 k8s 镜像

1
sudo vim /etc/yum.repos.d/kubernetes.repo

写入

1
2
3
4
5
6
7
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg

刷新

1
2
# 由于官网未开放同步方式, 可能会有索引gpg检查失败的情况
# sudo yum clean all && sudo yum makecache

华为云镜像

1
2
3
4
5
6
7
8
[kubernetes]
name=Kubernetes
baseurl=https://repo.huaweicloud.com/kubernetes/yum/repos/kubernetes-el7-$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=0
gpgkey=https://repo.huaweicloud.com/kubernetes/yum/doc/yum-key.gpg
https://repo.huaweicloud.com/kubernetes/yum/doc/rpm-package-key.gpg

卸载集群

kubectl config delete-cluster 删除对集群的本地引用。

删除节点

使用适当的凭证与控制平面节点通信,运行:

1
kubectl drain <node name> --delete-emptydir-data --force --ignore-daemonsets

在删除节点之前,请重置 kubeadm 安装的状态:

1
kubeadm reset

重置过程不会重置或清除 iptables 规则或 IPVS 表。如果你希望重置 iptables,则必须手动进行:

1
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X

如果要重置 IPVS 表,则必须运行以下命令:

1
ipvsadm -C

现在删除节点:

1
kubectl delete node <node name>

如果你想重新开始,只需运行 kubeadm initkubeadm join 并加上适当的参数。

附录

CentOS7镜像

1
2
3
4
5
6
7
8
# 备份
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
# 下载源文件
wget -O /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# 或者
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# 生成缓存
yum makecache

Docker

1
2
3
4
5
6
7
# 需要挂载 ISO 镜像
sudo mkdir /mnt/cdrom
sudo mount /dev/cdrom /mnt/cdrom/
sudo yum install -y yum-utils
sudo yum-config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo

使用阿里云

1
2
3
4
5
6
7
8
# step 1: 安装必要的一些系统工具
sudo yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
sudo yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3
sudo sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# Step 4: 更新并安装Docker-CE
sudo yum makecache fast

Docker镜像加速

手动修改

1
2
mkdir /etc/docker
vim /etc/docker/daemon.json

写入

1
2
3
4
5
6
7
8
9
10
{
"registry-mirrors": [
"https://aj2rgad5.mirror.aliyuncs.com",
"https://docker.mirrors.ustc.edu.cn",
"https://registry.cn-hangzhou.aliyuncs.com",
"https://registry.docker-cn.com",
"https://05f073ad3c0010ea0f4bc00b7105ec20.mirror.swr.myhuaweicloud.com"
],
"dns": ["8.8.8.8", "8.8.4.4"]
}

Spray安装后使用脚本

可以使用任意一个地址

1
2
3
4
5
6
7
8
9
10
11
12
13
# Docker中国 mirror
# export REGISTRY_MIRROR="https://registry.docker-cn.com"
# 腾讯云 docker hub mirror
# export REGISTRY_MIRROR="https://mirror.ccs.tencentyun.com"
# 华为云镜像
# export REGISTRY_MIRROR="https://05f073ad3c0010ea0f4bc00b7105ec20.mirror.swr.myhuaweicloud.com"
# DaoCloud 镜像
# export REGISTRY_MIRROR="http://f1361db2.m.daocloud.io"
# 阿里云 docker hub mirror
export REGISTRY_MIRROR=https://registry.cn-hangzhou.aliyuncs.com
curl -sSL https://kuboard.cn/install-script/set_mirror.sh | sh -s ${REGISTRY_MIRROR}

systemctl restart kubelet # 假设您安装了 kubenetes

查看修改结果

1
docker info

image-20220522214702999

本文地址: https://github.com/maxzhao-it/blog/post/9c2c35af/

前言

StorageClass 为管理员提供了一种描述他们提供的存储Class的方法。不同的类可能映射到服务质量级别、备份策略或集群管理员确定的任意策略。 Kubernetes 本身对类代表什么没有意见。这个概念有时在其他存储系统中称为profiles

每个 StorageClass 都包含字段 provisionerparametersreclaimPolicy,当需要动态配置属于该类的 PersistentVolume 时使用这些字段。

StorageClass 对象的名称很重要,它是用户请求特定类的方式。管理员在第一次创建StorageClass对象时设置类的名称和其他参数,对象一旦创建就不能更新

管理员只能为不请求绑定任何特定类的 PVC 指定默认 StorageClass:有关详细信息,请参阅 PersistentVolumeClaim 部分。

1
2
3
4
5
6
7
8
9
10
11
12
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp2
reclaimPolicy: Retain
allowVolumeExpansion: true
mountOptions:
- debug
volumeBindingMode: Immediate

来自storage-classes

属性

Provisioner分配器

每个StorageClass都有一个provisioner,决定使用什么卷插件来配置PV。这个字段必须被指定。

分配器分为:内部分配器、外部分配器

Reclaim Policy回收策略

StorageClass 动态创建的 PersistentVolume 将具有在类的 reclaimPolicy 字段中指定的回收策略,该字段可以是 DeleteRetain。如果在创建 StorageClass 对象时未指定 reclaimPolicy,则默认为 Delete

手动创建并通过 StorageClass 管理的 PersistentVolume 将具有在创建时分配的任何回收策略

Allow Volume Expansion允许卷扩展

PersistentVolume 可以配置为可扩展。此功能设置为 true 时,允许用户通过编辑相应的 PVC 对象来调整卷的大小。

Mount Options挂载

StorageClass 动态创建的 PersistentVolume 将具有在类的 mountOptions 字段中指定的挂载选项。

如果卷插件不支持挂载选项但指定了挂载选项,则配置将失败。挂载选项未在类或 PV 上验证。如果挂载选项无效,则 PV 挂载失败

Volume Binding Mode

volumeBindingMode 字段控制何时应该发生卷绑定和动态配置。未设置时,默认使用“立即”模式。

即时模式表示一旦创建 PersistentVolumeClaim,就会发生卷绑定和动态供应。对于拓扑受限且无法从集群中的所有节点全局访问的存储后端,将在不知道 Pod 调度要求的情况下绑定或配置 PersistentVolume。这可能会导致不可调度的 Pod

Allowed Topologies

当集群操作员指定 WaitForFirstConsumer 卷绑定模式时,在大多数情况下不再需要将配置限制为特定拓扑。但是,如果仍然需要,可以指定 allowedTopologies

此示例演示如何将已配置卷的拓扑限制到特定区域,并且应该用作受支持插件的区域和区域参数的替代。

Parameters

存储类具有描述属于该存储类的卷的参数。根据 provisioner 的不同,可以接受不同的参数。例如,参数类型的值 io1 和参数 iopsPerGB 特定于 EBS。当省略参数时,使用一些默认值。

一个 StorageClass 最多可以定义 512 个参数。参数对象的总长度(包括其键和值)不能超过 256 KiB

NFS

创建存储类

example-nfs-storage-class.yaml

1
2
3
4
5
6
7
8
9
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: example-nfs-storage-class
provisioner: nfs-provisioner
parameters:
server: 192.168.2.240
path: /share
readOnly: "false"
  • serverServerNFS 服务器的主机名或 IP 地址。
  • pathNFS服务器导出的路径。
  • readOnly:指示存储是否将被安装为只读的标志(默认为 false)。

Kubernetes 不包含内部 NFS 供应商。您需要使用外部供应商为 NFS 创建 StorageClass。这里有些例子:

创建pvc

nfs-pvc.yaml

1
2
3
4
5
6
7
8
9
10
11
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: nfs-pvc
spec:
accessModes:
- ReadWriteMany
storageClassName: example-nfs-storage-class
resources:
requests:
storage: 1Mi

使用nfs-pvc

test-deployment.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-deployment
spec:
replicas: 2
selector:
matchLabels:
name: test-deployment
template:
metadata:
labels:
name: test-deployment
spec:
serviceAccount: nfs-provisioner
containers:
- image: redis
command:
- sh
- -c
- 'while true; do date > /mnt/index.html; hostname >> /mnt/index.html; sleep $(($RANDOM % 5 + 5)); done'
imagePullPolicy: IfNotPresent
name: redis
volumeMounts:
- name: nfs
mountPath: /mnt
volumes:
- name: nfs
persistentVolumeClaim:
claimName: nfs-pvc
1
kubectl apply -f test-deployment.yaml

本文地址: https://github.com/maxzhao-it/blog/post/cfe111da/

一般情况下 nginx 是代理七层的 http 协议,其实 nginx 也可以代理第四层协议

修改配置

1
2
3
4
5
6
7
8
9
10
http {}
stream {
upstream ssh-proxy {
server 192.168.2.240:22;
}
server {
listen 50022;
proxy_pass ssh-proxy;
}
}

连接命令

1
2
ssh root@192.168.7.173 -p 50022
ssh 192.168.7.173 -l root -p 50022

转发请求头

1
2
3
4
5
6
7
location /prod-api/ {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header REMOTE-HOST $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_pass xxx;
}

本文地址: https://github.com/maxzhao-it/blog/post/605d48da/

安装工具

1
sudo yum install -y ntpdate

同步时间

1
2
3
sudo ntpdate -u ntp.api.bz
或者
sudo ntpdate time.windows.com

常用服务

ntp常用服务器:

  • 中国国家授时中心:210.72.145.44
  • NTP服务器(上海) :ntp.api.bz
  • 美国: time.nist.gov
  • 复旦: ntp.fudan.edu.cn
  • 微软公司授时主机(美国) :time.windows.com
  • 北京邮电大学 : s1a.time.edu.cn
  • 清华大学 : s1b.time.edu.cn
  • 北京大学 : s1c.time.edu.cn
  • 台警大授时中心(台湾):asia.pool.ntp.org

查看时区

1
date -R

修改时区

1
2
3
4
5
tzselect
# 5 9 1 1
sudo rm /etc/localtime
sudo ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
sudo ntpdate time1.aliyun.com

本文地址: https://github.com/maxzhao-it/blog/post/297cc7d3/