apiVersion: apps/v1 kind: Deployment metadata: { name: rag-server, namespace: ai } spec: replicas: 1 selector: { matchLabels: { app: rag-server } } template: metadata: { labels: { app: rag-server } } spec: containers: - name: rag-server image: docker.io/ion606/rag-server:latest ports: [{ containerPort: 8788 }] env: - { name: PORT, value: "8788" } - { name: OLLAMA_BASE, value: "http://ollama.ai.svc.cluster.local:11434", } - { name: OLLAMA_CHAT_MODEL, value: "llama3.1" } - { name: OLLAMA_EMBED_MODEL, value: "nomic-embed-text" } readinessProbe: httpGet: { path: "/openapi.json", port: 8788 } livenessProbe: httpGet: { path: "/", port: 8788 } initialDelaySeconds: 10 resources: requests: { cpu: "200m", memory: "256Mi" } limits: { cpu: "1", memory: "1Gi" } --- apiVersion: v1 kind: Service metadata: { name: rag-server, namespace: ai } spec: selector: { app: rag-server } ports: - name: http port: 8788 targetPort: 8788 nodePort: 31788 type: NodePort