🐳 容器化与 DevOps
Docker 容器化、Kubernetes 编排、GitHub Actions CI/CD、Prometheus 监控——现代工程交付的完整工具链。
1. Docker 容器化
生产级 Dockerfile
# ─── 阶段1: 构建 ──────────────────────────────────────────────
FROM golang:1.22-alpine AS builder
# 安装必要工具
RUN apk add --no-cache git ca-certificates tzdata
WORKDIR /app
# 先复制依赖文件 (利用层缓存)
COPY go.mod go.sum ./
RUN go mod download
# 复制源码并编译
COPY . .
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -ldflags="-w -s -X main.version=$(git describe --tags --always)" \
-trimpath \
-o server ./cmd/server
# ─── 阶段2: 运行 (最小镜像) ──────────────────────────────────
FROM scratch
# 从构建阶段复制必要文件
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=builder /app/server /server
# 非 root 用户运行 (安全)
USER 65534:65534
EXPOSE 8080
ENTRYPOINT ["/server"]
多阶段构建的优势
最终镜像只包含二进制文件,无需 Go 编译器。从 ~1GB 的 golang 镜像缩减到 <20MB 的 scratch 镜像,同时消除构建工具带来的安全攻击面。
Docker Compose 本地开发
version: '3.9'
services:
# 应用服务
app:
build:
context: .
target: builder # 开发模式用构建阶段镜像
ports:
- "8080:8080"
environment:
- DATABASE_URL=postgres://dev:dev@postgres:5432/myapp?sslmode=disable
- REDIS_URL=redis:6379
- JWT_SECRET=dev-secret-change-in-production
volumes:
- .:/app # 挂载源码 (配合 air 热重载)
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_started
command: air # 使用 air 热重载
# PostgreSQL
postgres:
image: postgres:16-alpine
environment:
POSTGRES_DB: myapp
POSTGRES_USER: dev
POSTGRES_PASSWORD: dev
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U dev -d myapp"]
interval: 5s
timeout: 5s
retries: 5
# Redis
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis_data:/data
ports:
- "6379:6379"
# Prometheus 监控
prometheus:
image: prom/prometheus:latest
volumes:
- ./config/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
# Grafana 可视化
grafana:
image: grafana/grafana:latest
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
ports:
- "3000:3000"
depends_on:
- prometheus
volumes:
postgres_data:
redis_data:
grafana_data:
2. Kubernetes 部署
Deployment + Service
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp
namespace: production
labels:
app: myapp
spec:
replicas: 3
selector:
matchLabels:
app: myapp
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0 # 零停机更新
template:
metadata:
labels:
app: myapp
spec:
containers:
- name: myapp
image: registry.example.com/myapp:v1.2.3
ports:
- containerPort: 8080
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: myapp-secrets
key: database-url
- name: JWT_SECRET
valueFrom:
secretKeyRef:
name: myapp-secrets
key: jwt-secret
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "500m"
memory: "512Mi"
# 存活探针: 确定是否需要重启
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 10
periodSeconds: 15
# 就绪探针: 确定是否接收流量
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
# 优雅关机
terminationGracePeriodSeconds: 30
---
apiVersion: v1
kind: Service
metadata:
name: myapp-svc
namespace: production
spec:
selector:
app: myapp
ports:
- port: 80
targetPort: 8080
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: myapp-ingress
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/rate-limit: "100"
spec:
ingressClassName: nginx
tls:
- hosts:
- api.example.com
secretName: myapp-tls
rules:
- host: api.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: myapp-svc
port:
number: 80
HPA 水平自动伸缩
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: myapp-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: myapp
minReplicas: 2
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70 # CPU 超过 70% 时扩容
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 60 # 1分钟稳定窗口
policies:
- type: Pods
value: 2
periodSeconds: 60
scaleDown:
stabilizationWindowSeconds: 300 # 5分钟稳定窗口防止抖动
3. CI/CD:GitHub Actions
代码推送
git push
→
单元测试
go test
→
代码扫描
golangci-lint
→
构建镜像
docker build
→
部署 staging
kubectl apply
→
发布生产
manual approve
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
# ─── 测试与代码质量 ─────────────────────────────────────────
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16
env:
POSTGRES_PASSWORD: test
POSTGRES_DB: testdb
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-retries 5
steps:
- uses: actions/checkout@v4
- name: 设置 Go 环境
uses: actions/setup-go@v5
with:
go-version: '1.22'
cache: true # 缓存 go module
- name: 安装依赖
run: go mod download
- name: 运行 linter
uses: golangci/golangci-lint-action@v6
with:
version: latest
- name: 运行测试
run: |
go test -v -race -coverprofile=coverage.out ./...
go tool cover -func=coverage.out
env:
DATABASE_URL: postgres://postgres:test@localhost/testdb?sslmode=disable
- name: 上传覆盖率报告
uses: codecov/codecov-action@v4
with:
files: coverage.out
# ─── 构建并推送镜像 ─────────────────────────────────────────
build:
needs: test
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
outputs:
image-tag: ${{ steps.meta.outputs.tags }}
image-digest: ${{ steps.build.outputs.digest }}
steps:
- uses: actions/checkout@v4
- name: 登录 Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 提取镜像元数据
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=,suffix=,format=short
type=ref,event=branch
- name: 构建并推送
id: build
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
# ─── 部署到 Staging ─────────────────────────────────────────
deploy-staging:
needs: build
runs-on: ubuntu-latest
environment: staging
steps:
- uses: actions/checkout@v4
- name: 部署到 Kubernetes
run: |
kubectl set image deployment/myapp \
myapp=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.build.outputs.image-tag }} \
-n staging
kubectl rollout status deployment/myapp -n staging --timeout=3m
# ─── 部署到生产 (需人工审批) ─────────────────────────────────
deploy-production:
needs: [build, deploy-staging]
runs-on: ubuntu-latest
environment: production # 配置了 required reviewers
if: github.ref == 'refs/heads/main'
steps:
- name: 蓝绿部署
run: |
kubectl set image deployment/myapp \
myapp=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.build.outputs.image-tag }} \
-n production
kubectl rollout status deployment/myapp -n production --timeout=5m
4. Prometheus + Grafana 监控
package metrics
import (
"net/http"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/gin-gonic/gin"
)
var (
// HTTP 请求总数
httpRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "HTTP 请求总数",
},
[]string{"method", "path", "status"},
)
// HTTP 请求延迟
httpRequestDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP 请求处理时长",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10},
},
[]string{"method", "path"},
)
// 活跃连接数
activeConnections = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "active_connections",
Help: "当前活跃连接数",
},
)
// 业务指标: 用户注册数
userRegistrations = promauto.NewCounter(
prometheus.CounterOpts{
Name: "user_registrations_total",
Help: "用户注册总数",
},
)
)
// Prometheus 中间件 (Gin)
func PrometheusMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
path := c.FullPath()
if path == "" {
path = "unknown"
}
start := time.Now()
activeConnections.Inc()
defer activeConnections.Dec()
c.Next()
duration := time.Since(start).Seconds()
status := strconv.Itoa(c.Writer.Status())
httpRequestsTotal.WithLabelValues(c.Request.Method, path, status).Inc()
httpRequestDuration.WithLabelValues(c.Request.Method, path).Observe(duration)
}
}
// 注册 metrics 端点
func RegisterMetrics(r *gin.Engine) {
r.GET("/metrics", gin.WrapH(promhttp.Handler()))
}
// 业务代码中记录指标
func RecordUserRegistration() {
userRegistrations.Inc()
}
# config/prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'myapp'
static_configs:
- targets: ['app:8080']
metrics_path: /metrics
关键监控指标 (黄金信号)
① 延迟:P50/P95/P99 请求延迟 ② 流量:每秒请求数 (RPS) ③ 错误率:5xx 错误率 ④ 饱和度:CPU/内存使用率。这四项是判断服务健康的核心指标。