2025-09-12 11:20:18 -04:00
|
|
|
apiVersion: apps/v1
|
|
|
|
|
kind: Deployment
|
|
|
|
|
metadata: { name: rag-server, namespace: ai }
|
|
|
|
|
spec:
|
|
|
|
|
replicas: 1
|
|
|
|
|
selector: { matchLabels: { app: rag-server } }
|
|
|
|
|
template:
|
|
|
|
|
metadata: { labels: { app: rag-server } }
|
|
|
|
|
spec:
|
|
|
|
|
containers:
|
|
|
|
|
- name: rag-server
|
|
|
|
|
image: docker.io/ion606/rag-server:latest
|
|
|
|
|
ports: [{ containerPort: 8788 }]
|
|
|
|
|
env:
|
|
|
|
|
- { name: PORT, value: "8788" }
|
|
|
|
|
- {
|
|
|
|
|
name: OLLAMA_BASE,
|
|
|
|
|
value: "http://ollama.ai.svc.cluster.local:11434",
|
|
|
|
|
}
|
|
|
|
|
- { name: OLLAMA_CHAT_MODEL, value: "llama3.1" }
|
|
|
|
|
- { name: OLLAMA_EMBED_MODEL, value: "nomic-embed-text" }
|
|
|
|
|
readinessProbe:
|
2025-09-13 22:13:34 -04:00
|
|
|
httpGet: { path: "/openapi.json", port: 8788 }
|
2025-09-12 11:20:18 -04:00
|
|
|
livenessProbe:
|
2025-09-13 22:13:34 -04:00
|
|
|
httpGet: { path: "/", port: 8788 }
|
|
|
|
|
initialDelaySeconds: 10
|
2025-09-12 11:20:18 -04:00
|
|
|
resources:
|
|
|
|
|
requests: { cpu: "200m", memory: "256Mi" }
|
|
|
|
|
limits: { cpu: "1", memory: "1Gi" }
|
|
|
|
|
---
|
|
|
|
|
apiVersion: v1
|
|
|
|
|
kind: Service
|
|
|
|
|
metadata: { name: rag-server, namespace: ai }
|
|
|
|
|
spec:
|
|
|
|
|
selector: { app: rag-server }
|
2025-09-13 22:13:34 -04:00
|
|
|
ports:
|
|
|
|
|
- name: http
|
|
|
|
|
port: 8788
|
|
|
|
|
targetPort: 8788
|
|
|
|
|
nodePort: 31788
|
|
|
|
|
type: NodePort
|