ClusterD运行后,会启动gRPC服务端侦听训练容器内gRPC客户端的消息,完成断点续训特性。ClusterD默认情况下会使用非安全的gRPC通信方式,用户可采用TSL/SSL加密方式通信,防止通信过程被攻击。
以下为ClusterD配置安全gRPC通信方式的参考样例。
docker pull nginx
# Deployment中的containers项增加 - name: nginx image: nginx:latest imagePullPolicy: Never command: [ "/bin/bash", "-c", "--"] args: [ "sleep infinity" ] volumeMounts: - name: nginx-cert mountPath: /etc/nginx/conf.d/cert - name: nginx-conf mountPath: /etc/nginx/nginx.conf # Deployment中的volumes项增加 - name: nginx-cert hostPath: path: /path/to/cert/and/private/key # x509证书、私钥目录路径 - name: nginx-conf hostPath: path: /path/to/nginx/config # nginx启动配置文件 # Service中的ports项改为如下 - protocol: TCP port: 8888 targetPort: 8888
kubectl apply -f clusterd-v{version}.yaml
kubectl get pod -A -o wide
worker_processes 1; worker_cpu_affinity 0001; worker_rlimit_nofile 4096; events { worker_connections 4096; } http { port_in_redirect off; server_tokens off; autoindex off; access_log /var/log/nginx/access.log; error_log /var/log/nginx/error.log info; limit_req_zone global zone=req_zone:100m rate=20r/s; limit_conn_zone global zone=north_conn_zone:100m; server { listen <clusterD pod ip>:8888 ssl; # ClusterD的pod IP地址,端口与ClusterD配置文件中的端口保持一致 http2 on; add_header Referrer-Policy "no-referrer"; add_header X-XSS-Protection "1; mode=block"; add_header X-Frame-Options DENY; add_header X-Content-Type-Options nosniff; add_header Strict-Transport-Security " max-age=31536000; includeSubDomains "; add_header Content-Security-Policy "default-src 'self'"; add_header Cache-control "no-cache, no-store, must-revalidate"; add_header Pragma no-cache; add_header Expires 0; ssl_session_tickets off; ssl_certificate /path/to/cert; # 服务端证书路径(权限400) ssl_certificate_key /path/to/private/key; # 服务端私钥路径,私钥不能明文配置(权限400) send_timeout 60; limit_req zone=req_zone burst=20 nodelay; limit_conn north_conn_zone 20; keepalive_timeout 60; proxy_read_timeout 900; proxy_connect_timeout 60; proxy_send_timeout 60; client_header_timeout 60; client_body_timeout 10; client_header_buffer_size 2k; large_client_header_buffers 4 8k; client_body_buffer_size 16K; client_max_body_size 20m; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers "ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS !RC4"; ssl_session_timeout 10s; ssl_session_cache shared:SSL:10m; location / { grpc_pass grpc://<clusterD pod ip>:8899; # ClusterD的Pod IP地址 } } }
kubectl exec -it -n mindx-dl clusterd-xxxxx -c nginx bash #xxxx表示ClusterD的Pod启动以后K8s随机生成的Pod id
# 执行如下命令后,看到提示输入密钥口令 nginx -c /etc/nginx/nginx.conf