diff --git a/README.md b/README.md index 8e54b4c..7e1775b 100644 --- a/README.md +++ b/README.md @@ -1,185 +1,220 @@ -# ML Repo — Architecture and External RAG Server Design (for Ollama/Open WebUI) +# ML Stack — Local AI Orchestration Toolkit -My openWebUI/searxng configs, plugins, RAG server, as well as a custom program that runs the AI's code in isolated Docker containers +This repository packages a complete self-hosted assistant stack around Open WebUI plus several companion services: a scheduler that can trigger chats and workflows, a docker-backed code runner, a Roku remote tool server, Nextcloud file access, SearxNG metasearch, and a headless browser UI for deep-research sessions. Everything is wired together through `docker-compose.yml` so the stack can be brought up on a single host. -*Last updated: 2025-09-10* +_Last updated: 2025-10-03_ --- -## Summary :3 +## A (Few) Notes -This repository wires together a local AI stack built around **Open WebUI**, **Ollama**, **SearxNG**, and two custom utilities: a **code runner** (executes model-generated code inside sandboxed containers) and a **headless research browser UI**. The current compose setup already gives you working RAG (retrieval-augmented generation) **inside Open WebUI** without needing a separate RAG service. +1. ports are currently exposed on most services for development purposes (e.g. 12253 for the scheduler), remove these in production or consider adding a proxy + +2. **ALL DATA IS STORED IN VOLUMES!!!** This means if you do `docker compose down -v` your data **WILL** dissapear. Consider mounting a persistant directory to avoid this + +3. Before starting the cluster, check if you need the different components (e.g. Nextcloud Tool Server). They are set to restart on failiure and will throw if missing env vars/credentials, which will loop endlessly + +4. If you do not use cloudflared for tunneling, please adjust the CORS policies accordingly, and consider adding a reverse proxy to either your local machine or the compose + +5. The code runner and scheduler both mount the host Docker socket. Ensure the host user/group IDs match the compose configuration (`DOCKER_GID` build arg defaults to 977) so containers can operate without root. This will be replaced when I enentually migrate this to a kubernetes cluster + +6. When adjusting `NEXTCLOUD_ACCESS_DIRS`, remember to restart `ollama-nextcloud` so the regex list is reloaded --- -## Repo map and how each piece fits +## Stack At A Glance + +| Compose service | Directory / build context | External ports | Primary role | +|-----------------|---------------------------|----------------|--------------| +| `open-webui` | (image: `ghcr.io/open-webui/open-webui:main`) | `4000 -> 8080` | Chat UI, agent orchestration, embedded knowledge base & RAG powered by Postgres | +| `postgres` | – | – | Persistence for Open WebUI (users, KB, events) | +| `searxng` | `searxng.yml` | `4001 -> 8080` (debug only) | Private SearxNG instance used for live web search tools | +| `coderunner` | `coderunner/` | – (internal `8787`) | Bun service that executes pure source code inside sandboxed Docker containers | +| `openwebui_tools` | `tools/` | – (internal `1331`) | Python Roku remote API exposed as an OpenAPI tool server | +| `browser` | `browser/` | `7788 -> 7788` | Playwright Chromium UI for autonomous browsing / research | +| `schedules-api` | `scheduler/` | `12253 -> 12253` | Cron-style job scheduler that can open chats, call templates, and upload files | +| `ollama-nextcloud` | `nextcloud/` | `13284 -> 1111` | Nextcloud WebDAV proxy with caching and access controls | + +Volumes declared in compose: `open-webui`, `pgdata`, `searxng_data`, `webui_data`, `schedule_data`, and `nextcloud_data` + + +> [!CAUTION] +> PLEASE I BEG OF YOU REMEMBER TO BACK THESE UP/USE A LOCAL DIRECTORY. +> IF YOU DO NOT AND REMOVE OR PRUNE THE VOLUMES YOU WILL LOSE *ALL* DATA + +--- + +## Service Details + +### Open WebUI (`open-webui`) +- Runs the latest `ghcr.io/open-webui/open-webui:main` image with Postgres backing for durable data (`open-webui` and `pgdata` volumes) + +- `.env` enables the login form, optional API keys (not currently used), and forwards identifying headers so downstream tools know which user initiated a request + +- Depends on the tool containers (`openwebui_tools`, `coderunner`, `schedules-api`, `ollama-nextcloud`) via internal networking; discover their OpenAPI docs from inside the UI to register tools + +### Postgres (`postgres`) +> [!IMPORTANT] +> If you plan on exposing ports on this service, please move the inline credentials to the `.env` file + +- Standard `postgres:latest` image. Credentials are set inline in compose for local development + +- Health-checked with `pg_isready`; the data volume `pgdata` stores Open WebUI metadata + +### SearxNG (`searxng`) +- Private SearxNG deployment for agent web search tasks with HTML/JSON outputs enabled + +- Mounts `searxng.yml` and persists internal data to `searxng_data`. External port 4001 is exposed only for local debugging and should be removed in production + +### Code Runner (`coderunner`) +- Bun-based HTTP server that accepts pure source code plus optional extra files, then runs the workload in a throwaway Docker container pinned to an allow-listed base image per language + +- Enforces strict limits (`--network=none`, read-only root FS, tmpfs workdir, CPU/memory caps, dropped capabilities). Supported Languages: + - `python` + - `node` + - `bun` + - `bash` + - `ruby` + - `go` + - `rust` + - `java` + - `c` + - `cpp` +- Exposes `GET /openapi.json` and `POST /execute` inside the internal network (`http://coderunner:8787`). Requires the host Docker socket to spawn child sandboxes; the compose file mounts it read-only with matching group ID. + +### Roku Tool Server (`openwebui_tools`) +- Lightweight Python HTTP server that proxies Roku remote commands + +- Reads `ROKU_IP` from `.env`; returns helpful errors when the IP is missing or the device is offline + +- Serves `GET /roku/openapi.json` for automatic tool registration and handles `GET /roku/{command}` requests. Supported command list matches the enum in `spec/roku.openapi.json` (navigation, inputs, power, volume, remote finder) + +### Browser Research UI (`browser`) +- Builds the upstream `browser-use/web-ui` project, installs Chromium plus dependencies, and launches the UI on port 7788 + +- Runs as an unprivileged user (uid 1000) with dedicated tmpfs directories and a `webui_data` volume for persisted history/state + +- Configure resolution, telemetry, and default LLM via `browser/.env` or container environment variables + +- The browser-use docs can be found at https://docs.browser-use.com/ + +### Scheduler API (`schedules-api`) +- Bun/Node cron worker that lets you schedule Open WebUI chats or template-driven jobs using authenticated user tokens + +- Persists schedule definitions to `schedule_data` (JSON payload) and can store uploaded supporting files under the same volume + +- Reads workflow templates from the bundled `scheduler/templates.json`. To inject custom templates, mount a host file or populate the root-level `templates.json/` directory and update the compose volume mapping + +- Key endpoints (documented in `scheduler/openapi.json`): + - `GET /openapi.json`: tool contract. + - `POST /api/schedules`: create or replace a schedule (cron or one-shot ISO timestamp). Validates feature flags, attachments, and template references + - `GET /api/schedules`: list schedules scoped to the calling user (identified via Open WebUI bearer token) + - `DELETE /api/schedules/{name}`: remove a schedule the user owns +- Includes a static UI in `scheduler/public/` for manual interaction. Uses `node-cron` to avoid overlapping executions; failed jobs clean themselves up + +### Nextcloud Files Tool (`ollama-nextcloud`) +- Express + WebDAV proxy that exposes a simple JSON API for browsing, downloading, and uploading files stored in Nextcloud + +- Environment variables (configured in `.env`): + - `NEXTCLOUD_APP_ID` / `NEXTCLOUD_APP_PASS` / `NEXTCLOUD_WEBDAV_ADDR`: service credentials + - `NEXTCLOUD_ACCESS_DIRS`: JSON array of regex strings that whitelist readable paths (e.g. `["^/Notes", "^/School"]`). When unset, the tool has full access + +- Cached downloads are stored under `/tmp` using an embedded SQLite index (`cache.ts`). The server keeps ETags in sync and reuses cached bytes when possible unless `bypasscache` is requested + +- Major endpoints (see `nextcloud/openapi.json`): + - `GET /openapi.json`: discovery document for tool registration. + - `POST /file`: fetch a file. Automatically caches and returns metadata + content-type. + - `POST /dir`: list directory contents (shallow or recursive). + - `PUT /file`: upload via multipart form-data (optional recursive dir creation, never overwrites existing files). + +### Cloudflared Tunnel Config +- `cloudflared-tunnel-config.yml` maps friendly hostnames to the local services (Ollama, Open WebUI, tool servers). Use it as a blueprint when exposing the stack through Cloudflare Tunnels. + +--- + +## Configuration (`.env`) + +```env +ROKU_IP= + +WEBUI_URL= + +# use built-in login form (username/password) +ENABLE_LOGIN_FORM="true" + +# forward identity on outbound model requests (if you're going to use openAI/external LLM) +ENABLE_FORWARD_USER_INFO_HEADERS="true" + +# allow user api keys for the scheduler calling OWUI’s +ENABLE_API_KEY_AUTH="true" + +NEXTCLOUD_APP_ID= +NEXTCLOUD_APP_PASS= +NEXTCLOUD_WEBDAV_ADDR= +NEXTCLOUD_ACCESS_DIRS= +``` + +--- + +## Running the Stack + +1. Install Docker and Docker + +2. Populate `.env` with the correct Roku and Nextcloud settings plus any Open WebUI options + +3. Build images (pull base layers and bake GID overrides where needed): + ```sh + docker compose build --pull + ``` + +4. Launch everything: + ```sh + docker compose up -d + ``` + +5. Open WebUI is available on http://localhost:4000 (use credentials from the UI setup). The supporting services are reachable on the ports listed above or through the internal Docker network + +To inspect logs for a specific service: ```sh -. -├─ docker-compose.yml -├─ searxng.yml # searxng settings; defaults, json+html enabled; not a public instance -├─ cloudflared-tunnel-config.yml # cloudflare tunnel routing to ollama, openwebui, and tools -├─ README.md -├─ LICENSE # apache-2.0 -│ -├─ rag-server/ -│ ├─ Dockerfile # Runs the file that does the RAG stuff -│ └─ index.tsx # Does the RAG stuff -│ -├─ browser/ -│ └─ Dockerfile # builds browser-use/web-ui (playwright chromium) on :7788 -| -└─ coderunner/ - ├─ Dockerfile # bun-based service that exposes an OpenAPI tool for sandboxed code exec - ├─ index.ts # the server; integrates with Open WebUI as a tool via /openapi.json - └─ package.json # @types/node only (dev) to feed the OCD +docker compose logs -f coderunner ``` -### Open WebUI (in `docker-compose.yml`) +Bring the stack down (volumes persist): -* purpose: chat UI + orchestration layer; **includes a built-in knowledge base + RAG** with chunking, embedding, search, and prompt templating. -* notable: backed by Postgres in this compose. exposes `4000:8080`. -* storage: a docker volume `open-webui:` holds app data; Postgres uses `pgdata:`. - -### Postgres (in `docker-compose.yml`) - -* purpose: persistence for Open WebUI features (users, knowledge, etc.). health-checked with `pg_isready`. - -### SearxNG (in `docker-compose.yml` + `searxng.yml`) - -* purpose: metasearch engine used by Open WebUI tools/agents for live web lookups. -* config highlights: `use_default_settings: true`, `public_instance: false`, `limiter: false`; formats: `html` and `json`. - -### Coderunner service (`coderunner/`) - -* **what it is:** a small HTTP server (Bun runtime) that executes pure source code in short-lived, sandboxed containers. -* **why it exists:** lets Open WebUI tools run code safely with tight resource limits (no network, read-only fs, cgroup limits, `--cap-drop=ALL`, `no-new-privileges`). -* **integration contract:** exposes an **OpenAPI schema at `/openapi.json`** and a single POST `/execute` endpoint. Open WebUI can import this as a **tool server**. -* **security posture:** pulls allow-listed base images (gcc, python, node, bun, etc.), mounts only a tmpfs workdir, times out jobs ≈25s, and runs with non-root uid/gid. The container has access to the host’s docker socket *only* to run the sandbox containers. - -### Browser-use web-ui (`browser/`) - -* purpose: “autonomous” research browser UI (chromium via playwright), reachable on `:7788`. -* built from upstream `browser-use/web-ui` repo, with python deps and browsers installed in the image. - -### Cloudflared tunnel (`cloudflared-tunnel-config.yml`) - -* maps hostnames (like `mlep.domain.com` for Ollama, `owebui.domain.com` for Open WebUI, and a `tools` host) to the internal services. Useful for private, authenticated access without public inbound ports. - ---- - -## Why I currently **don’t** use an external RAG server - -Open WebUI ships with pretty good **knowledge / RAG** support: add files/URLs, it chunks + embeds, indexes, retrieves, and automatically **prefixes retrieved context** to the model prompt using a RAG template. For lightweight to mid-sized corpora and single-user/small-team usage, that’s often all you need. - -**Stay with built-in RAG if most of these are true:** - -* total corpus is ≤ \~100k chunks and grows slowly. -* single user or small team (no multi-tenant isolation needed). -* no special retrieval logic (hybrid lexical+semantic, rerankers, metadata filters) beyond what Open WebUI provides. -* tolerance for “UI-managed” knowledge; you don’t need programmatic ingestion pipelines or job queues. - -## When an external RAG server makes sense - -Adopt a decoupled RAG service when you need one or more of: - -* **bigger data / throughput**: millions of chunks, higher QPS, horizontal scaling. -* **advanced retrieval**: custom chunkers, hybrid search (bm25 + vector), **reranking**, time-decay, per-tenant filters, embeddings A/B, or multi-modal (image/audio) retrieval. -* **programmatic ingestion**: CI-driven pipelines from git/docs/confluence/S3; delta updates; background jobs. -* **governance / isolation**: strict multi-tenant separation, PII retention controls, audit trails. -* **interoperability**: a clean HTTP API and OpenAPI so other apps (beyond Open WebUI) can reuse your index. - ---- - -## External RAG Server — Design and Reference Implementation - -This is a small, dependency-light service designed to run with **Bun** and integrate with both **Ollama** and **Open WebUI**. - -### Goals - -* minimal moving parts; runs fine on a single host. -* uses Ollama for **embeddings** and **chat**. -* supports **collections**, **upserts**, **queries**, and an opinionated `/chat` that does retrieve-then-generate. -* ships an **OpenAPI** so Open WebUI can import it as a tool server. -* default in-memory store (persisted to JSON) for simplicity; optional adapters for vector DBs later. - -### API surface - -* `GET /openapi.json` – schema for tool integration. -* `POST /collections` – create a logical collection `{ name }`. -* `GET /collections` – list collections. -* `POST /upsert` – `{ collection, items:[{ id?, text, metadata? }] }`; chunks+embeds text and stores vectors. -* `POST /query` – `{ collection, query, topK?=5, where? }` --> nearest chunks with scores. -* `POST /chat` – `{ collection, query, topK?=5, model?, embedModel? }` --> runs RAG and calls Ollama chat, returns the answer + citations. - -### Storage Strategy - -* **default:** in-memory + JSON file on disk (`./data/rag.json`). good for dev/small usage. -* **plug-in adapters:** swap in Qdrant, SQLite-Vec, pgvector, Weaviate, etc., without changing the HTTP API. - ---- - -### Add to `docker-compose.yml` - -```yaml - rag: - build: - context: ./rag-server - dockerfile: Dockerfile - environment: - OLLAMA_BASE: "http://mlep.domain.com:11434" - OLLAMA_CHAT_MODEL: "llama3.1" - OLLAMA_EMBED_MODEL: "nomic-embed-text" - volumes: - - rag_data:/app/data - networks: - - internal - restart: unless-stopped - -volumes: - rag_data: +```sh +docker compose down ``` -> if you already expose services via cloudflared, add another hostname mapping to the `rag` container (`- hostname: rag.domain.com -> service: http://rag:8788`). +--- + +## Registering Tool Servers in Open WebUI + +Inside Open WebUI (Settings --> Tools --> Add tool server), point to the internal URLs: +- Code runner: `http://coderunner:8787/openapi.json` +- Scheduler: `http://schedules-api:12253/openapi.json` +- Nextcloud files: `http://ollama-nextcloud:1111/openapi.json` +- Roku remote: `http://openwebui_tools:1331/roku/openapi.json` + +These should be fully internal in the docker network. If you expose them consider using a reverse proxy/authentication --- -## Wiring the RAG server into Open WebUI and Ollama +## Data, Volumes, and Shared Paths -### 1. Pull models +- `open-webui` volume: Open WebUI application state (uploads, knowledge base, configs) +- `pgdata` volume: Postgres cluster data directory +- `searxng_data` volume: SearxNG runtime files +- `webui_data` volume: browser-use web UI session data +- `schedule_data` volume: scheduler persisted schedules and stored file attachments +- `nextcloud_data` volume: temp storage for cached Nextcloud content -* `ollama pull nomic-embed-text` (embeddings) -* `ollama pull llama3.1` (chat) - -### 2. Expose the OpenAPI to Open WebUI as a **tool server** - -* in Open WebUI --> **settings --> tools** --> **add tool server** -* paste the url for the cloudflared hostname -* you’ll now see tool functions like `listCollections`, `createCollection`, `upsert`, `query`, `chat` available to the assistant - -### 3. Usage pattern inside a chat - -* to build a knowledge base, call the `createCollection` and `upsert` tools with your documents -* to answer, call `chat` which performs retrieve-then-generate against your chosen collection - ---- - -## FAQ — Built-in vs. External RAG - -**Q: will Open WebUI’s built-in RAG conflict with this server?** -no — you can use either, or both. Open WebUI’s knowledge base is great for ad-hoc use. this service is for programmatic/control-plane needs or when you outgrow the UI’s storage/retrieval. - -**Q: how do enforce tenant isolation?** -use one collection per tenant and never mix. for stronger guarantees, run separate RAG instances or choose Qdrant with per-collection access control. - -**Q: how can use my chunker/reranker?** -yes. place them ahead of `/upsert` and `/query` respectively, or add endpoints like `/rerank` and `/embed` to experiment. - -**Q: can this call OpenAI-compatible endpoints instead of native Ollama?** -Ollama exposes an experimental OpenAI-compatible API. you can add a thin client if you already point tools at `/v1/chat/completions`. +> [!IMPORTANT] +> Back up the volumes you care about before upgrading images --- ## License -This write-up and reference code are provided under the same **Apache-2.0** terms as the repository. +The repository and reference code are released under Apache-2.0 (see `LICENSE`). + diff --git a/coderunner/Dockerfile b/coderunner/Dockerfile index bb8332f..3ceb872 100644 --- a/coderunner/Dockerfile +++ b/coderunner/Dockerfile @@ -9,19 +9,25 @@ RUN apk add --no-cache docker-cli tini curl; # ----- map container 'docker' group to host docker.sock GID ----- # pass the host's docker.sock GID at build time: --build-arg DOCKER_GID=$(stat -c '%g' /var/run/docker.sock) ARG DOCKER_GID=977 + # create (or reuse) a group with that GID, then add the existing 'bun' user to it RUN addgroup -g "${DOCKER_GID}" -S docker || true \ && addgroup bun docker; +RUN chown -R bun:bun /app + # switch to the nonroot bun user (already default in the base image, but explicit is nice) USER bun -# your app -COPY index.ts ./index.ts +# files +COPY package.json . +COPY index.ts . +COPY openapi.json . + +RUN bun i -# expose your tool server -EXPOSE 8787 ENV PORT=8787 + # default docker host path; adjust if you mount elsewhere ENV DOCKER_HOST=unix:///var/run/docker.sock diff --git a/coderunner/index.ts b/coderunner/index.ts index 4450430..2ad34d3 100644 --- a/coderunner/index.ts +++ b/coderunner/index.ts @@ -45,83 +45,7 @@ type fileType = { const DOCKER_BIN = process.env.DOCKER_BIN || "docker"; // basic openapi for open webui -const OPENAPI = { - openapi: "3.1.0", - info: { - title: "Container Code Runner", - version: "1.0.0", - description: - "run source code inside a sandboxed container. important: provide pure source code only; do not wrap code in shell commands or pipelines." - }, - paths: { - "/execute": { - post: { - operationId: "execute", - summary: "Run code in a sandboxed container", - // the model sees this text - description: - "use the language directly, not bash + the language. e.g., `#include...` (good) vs `echo '#include...' && gcc` (bad). pass only pure source text in `code`.", - requestBody: { - required: true, - content: { - "application/json": { - schema: { - type: "object", - properties: { - language: { - type: "string", - enum: Object.keys(LANGS), - description: - "the programming language to run. do not use 'bash' to wrap or invoke compilers/interpreters; select the actual language (e.g., 'c', 'cpp', 'python')." - }, - code: { - type: "string", - description: - "pure source code only. do not include shell commands, redirections, pipes, or `echo`/`printf` wrappers. examples: good: `print('hi')`; bad: `echo \"print('hi')\" | python`." - }, - args: { type: "array", items: { type: "string" } }, - files: { - type: "array", - items: { - type: "object", - properties: { - path: { type: "string" }, - content: { type: "string" } - }, - required: ["path", "content"], - description: - "optional supporting files. contents must be pure file text, not shell commands." - } - } - }, - required: ["language", "code"] - } - } - } - }, - responses: { - "200": { - description: "Execution result", - content: { - "application/json": { - schema: { - type: "object", - properties: { - stdout: { type: "string" }, - stderr: { type: "string" }, - exitCode: { type: "integer" }, - timedOut: { type: "boolean" } - } - } - } - } - } - } - } - } - } -}; - +const OPENAPI = JSON.parse((await import('fs')).readFileSync('openapi.json')) function sendJson(res, status, obj) { const body = JSON.stringify(obj); diff --git a/coderunner/openapi.json b/coderunner/openapi.json new file mode 100644 index 0000000..e4086ea --- /dev/null +++ b/coderunner/openapi.json @@ -0,0 +1,93 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Container Code Runner", + "version": "1.0.0", + "description": "run source code inside a sandboxed container. important: provide pure source code only; do not wrap code in shell commands or pipelines." + }, + "paths": { + "/execute": { + "post": { + "operationId": "execute", + "summary": "Run code in a sandboxed container", + "description": "use the language directly, not bash + the language. e.g., `#include...` (good) vs `echo '#include...' && gcc` (bad). pass only pure source text in `code`.", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "language": { + "type": "string", + "enum": "Object.keys(LANGS)", + "description": "the programming language to run. do not use 'bash' to wrap or invoke compilers/interpreters; select the actual language (e.g., 'c', 'cpp', 'python')." + }, + "code": { + "type": "string", + "description": "pure source code only. do not include shell commands, redirections, pipes, or `echo`/`printf` wrappers. examples:\n\tgood: `print('hi')`;\n\tbad: `echo \"print('hi')\" | python`." + }, + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "type": "array", + "items": { + "type": "object", + "properties": { + "path": { + "type": "string" + }, + "content": { + "type": "string" + } + }, + "required": [ + "path", + "content" + ], + "description": "optional supporting files. contents must be pure file text, not shell commands." + } + } + }, + "required": [ + "language", + "code" + ] + } + } + } + }, + "responses": { + "200": { + "description": "Execution result", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "stdout": { + "type": "string" + }, + "stderr": { + "type": "string" + }, + "exitCode": { + "type": "integer" + }, + "timedOut": { + "type": "boolean" + } + } + } + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/coderunner/package-lock.json b/coderunner/package-lock.json deleted file mode 100644 index ff32074..0000000 --- a/coderunner/package-lock.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "name": "coderunner", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "devDependencies": { - "@types/node": "^24.3.1" - } - }, - "node_modules/@types/node": { - "version": "24.3.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.1.tgz", - "integrity": "sha512-3vXmQDXy+woz+gnrTvuvNrPzekOi+Ds0ReMxw0LzBiK3a+1k0kQn9f2NWk+lgD4rJehFUmYy2gMhJ2ZI+7YP9g==", - "dev": true, - "license": "MIT", - "dependencies": { - "undici-types": "~7.10.0" - } - }, - "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", - "dev": true, - "license": "MIT" - } - } -} diff --git a/coderunner/package.json b/coderunner/package.json index 1220716..0f4f377 100644 --- a/coderunner/package.json +++ b/coderunner/package.json @@ -1,5 +1,16 @@ { + "name": "coderunner", + "module": "index.ts", + "type": "module", + "private": true, "devDependencies": { - "@types/node": "^24.3.1" + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5" + }, + "dependencies": { + "@types/node": "^24.6.2", + "http": "^0.0.1-security" } } diff --git a/nextcloud/package.json b/nextcloud/package.json index c9688b2..fe6792c 100644 --- a/nextcloud/package.json +++ b/nextcloud/package.json @@ -12,7 +12,6 @@ "@types/express": "^5.0.3", "@types/multer": "^2.0.0", "cors": "^2.8.5", - "dotenv": "^17.2.3", "express": "^5.1.0", "multer": "^2.0.2", "webdav": "^5.8.0" diff --git a/rag-server/Dockerfile b/rag-server/Dockerfile deleted file mode 100644 index 4aaf98c..0000000 --- a/rag-server/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -# syntax=docker/dockerfile:1 -FROM oven/bun:1.2.2-alpine - -WORKDIR /app - -COPY index.ts ./index.ts - -ENV PORT=8788 - -EXPOSE 8788 - -CMD ["bun","run","index.ts"] \ No newline at end of file diff --git a/rag-server/index.ts b/rag-server/index.ts deleted file mode 100644 index 64ce2be..0000000 --- a/rag-server/index.ts +++ /dev/null @@ -1,289 +0,0 @@ -import { serve } from "bun"; -import fs from "node:fs"; -import path from "node:path"; - -// types -interface Chunk { - id: string; - text: string; - metadata?: Record; - vector: number[]; -} - -interface Collection { - name: string; - chunks: Chunk[]; -} - -interface OllamaChatMessage { - role: "system" | "user" | "assistant"; - content: string; -} - -interface OllamaChatRequest { - model?: string; - messages: OllamaChatMessage[]; - stream?: boolean; -} - -interface OllamaChatResponse { - message?: OllamaChatMessage; - [k: string]: unknown; -} - -interface UpsertInputItem { - text: string; - metadata?: Record; -} - -interface OpenAPIObject { - openapi: string; - info: { title: string; version: string }; - paths: Record; -} - -// env -const PORT: number = Number(process.env.PORT || 8788), - HOST: string = process.env.HOST || "0.0.0.0", - OLLAMA_BASE: string = process.env.OLLAMA_BASE || "http://localhost:11434", - OLLAMA_CHAT_MODEL: string = process.env.OLLAMA_CHAT_MODEL || "llama3.1", - OLLAMA_EMBED_MODEL: string = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text", - DATA_DIR: string = process.env.DATA_DIR || path.resolve("./data"), - SNAPSHOT: string = path.join(DATA_DIR, "rag.json"); - -// in-memory db -const db: Map = new Map(); - -// util: smol json persistence -function ensureDirs(): void { - if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true }); -} - -// you can probably guess -function loadSnapshot(): void { - try { - ensureDirs(); - if (fs.existsSync(SNAPSHOT)) { - const raw = fs.readFileSync(SNAPSHOT, "utf8"); - const obj = JSON.parse(raw || "{}") as Record; - for (const [name, value] of Object.entries(obj)) db.set(name, value); - } - } catch (e) { - console.warn("failed to load snapshot:", e); - } -} - -// you can probably guess 2 -function saveSnapshot(): void { - try { - ensureDirs(); - const obj = Object.fromEntries(db.entries()); - fs.writeFileSync(SNAPSHOT, JSON.stringify(obj, null, 2)); - } catch (e) { - console.warn("failed to save snapshot:", e); - } -} - -loadSnapshot(); - -// basic text splitter (recursive by punctuation, then by length) -function chunkText(text: string, maxLen = 800): string[] { - const parts = text - .split(/\n{2,}/g) - .flatMap(p => p.split(/(?<=[.!?])\s+/g)) - .flatMap(s => s.length > maxLen ? s.match(new RegExp(`.{1,${maxLen}}`, "g")) || [] : [s]) - .map(s => s.trim()) - .filter(Boolean); - return parts; -} - -// cosine similarity -function dot(a: number[], b: number[]): number { let s = 0; for (let i = 0; i < a.length; i++) s += (a[i] || 0) * (b[i] || 0); return s; } -function norm(a: number[]): number { return Math.sqrt(dot(a, a)); } -function cosineSim(a: number[], b: number[]): number { const d = dot(a, b), n = norm(a) * norm(b) || 1; return d / n; } - -// call ollama embeddings -async function embedAll(texts: string[]): Promise { - const primary = await fetch(`${OLLAMA_BASE}/api/embed`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, input: texts }) - }); - - if (primary.ok) { - const j: { embeddings: number[][] } = await primary.json(); - return j.embeddings; - } - - const results: number[][] = []; - for (const t of texts) { - const r = await fetch(`${OLLAMA_BASE}/api/embeddings`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, prompt: t }) - }); - - if (!r.ok) throw new Error(`embed failed: ${r.status}`); - - const j: { embedding: number[] } = await r.json(); - results.push(j.embedding); - } - return results; -} - -// call ollama chat/generate with retrieved context -async function ollamaChat(req: OllamaChatRequest): Promise { - const res = await fetch(`${OLLAMA_BASE}/api/chat`, { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ model: req.model || OLLAMA_CHAT_MODEL, messages: req.messages, stream: req.stream }) - }); - - if (!res.ok) throw new Error(`ollama chat failed: ${res.status}`); - const j: OllamaChatResponse = await res.json(); - - return j; -} - -// openapi for open webui tool integration -const OPENAPI: OpenAPIObject = { - openapi: "3.1.0", - info: { title: "RAG Server (Ollama)", version: "1.0.0" }, - paths: { - "/collections": { - get: { operationId: "listCollections" }, - post: { operationId: "createCollection" } - }, - "/upsert": { post: { operationId: "upsert" } }, - "/query": { post: { operationId: "query" } }, - "/chat": { post: { operationId: "chat" } } - } -}; - -// tiny router -async function json(req: Request): Promise { try { return await req.json() as T; } catch { return {} as T; } } -function sendJson(_res: unknown, status: number, obj: unknown): Response { - return new Response(JSON.stringify(obj), { status, headers: { "content-type": "application/json; charset=utf-8" } }); -} - -async function handleCollections(req: Request): Promise { - if (req.method === "GET") { - return sendJson(null, 200, { collections: Array.from(db.keys()) }); - } - - if (req.method === "POST") { - const body = await json<{ name?: string }>(req), - name = String(body?.name || "").trim(); - - if (!name) return sendJson(null, 400, { error: "name required" }); - if (!db.has(name)) db.set(name, { name, chunks: [] }); - - saveSnapshot(); - return sendJson(null, 200, { ok: true }); - } - - return new Response("not found", { status: 404 }); -} - -async function handleUpsert(req: Request): Promise { - const body = await json<{ collection?: string; items?: UpsertInputItem[] }>(req), - collection = String(body?.collection || "").trim(), - items: UpsertInputItem[] = Array.isArray(body?.items) ? body.items : []; - - if (!collection) return sendJson(null, 400, { error: "collection required" }); - if (!db.has(collection)) db.set(collection, { name: collection, chunks: [] }); - - const col = db.get(collection)!, - chunksToIndex: { text: string; metadata?: Record; _id: string }[] = []; - - for (const it of items) { - const parts = chunkText(String(it.text || "")); - for (const p of parts) chunksToIndex.push({ text: p, metadata: it.metadata || {}, _id: crypto.randomUUID() }); - } - - const vecs = await embedAll(chunksToIndex.map(x => x.text)); - for (let i = 0; i < chunksToIndex.length; i++) { - const item = chunksToIndex[i], - doc: Chunk = { id: item._id, text: item.text, metadata: item.metadata, vector: vecs[i] }; - - col.chunks.push(doc); - } - - saveSnapshot(); - return sendJson(null, 200, { ok: true, indexed: chunksToIndex.length }); -} - -async function handleQuery(req: Request): Promise { - const body = await json<{ collection?: string; query?: string; topK?: number }>(req), - collection = String(body?.collection || "").trim(), - query = String(body?.query || "").trim(), - topK = Number(body?.topK || 5); - - if (!collection || !query) return sendJson(null, 400, { error: "collection and query required" }); - - const col = db.get(collection); - if (!col) return sendJson(null, 404, { error: "collection not found" }); - - const [qvec] = await embedAll([query]), - scored = col.chunks.map((c) => ({ c, score: cosineSim(qvec, c.vector) })) - .sort((a, b) => b.score - a.score) - .slice(0, topK) - .map(x => ({ id: x.c.id, text: x.c.text, metadata: x.c.metadata, score: x.score })); - return sendJson(null, 200, { matches: scored }); -} - -async function handleChat(req: Request): Promise { - const body = await json<{ collection?: string; query?: string; topK?: number; model?: string }>(req), - collection = String(body?.collection || "").trim(), - query = String(body?.query || "").trim(), - topK = Number(body?.topK || 5), - model = body?.model || OLLAMA_CHAT_MODEL; - - if (!collection || !query) return sendJson(null, 400, { error: "collection and query required" }); - - const col = db.get(collection); - if (!col) return sendJson(null, 404, { error: "collection not found" }); - - const [qvec] = await embedAll([query]), - matches = col.chunks.map((c) => ({ c, score: cosineSim(qvec, c.vector) })) - .sort((a, b) => b.score - a.score) - .slice(0, topK); - - const context = matches.map((m, i) => `[[doc ${i + 1} score=${m.score.toFixed(3)}]]\n${m.c.text}`).join("\n\n"), - system: string = `you are a helpful assistant. use ONLY the provided context to answer. if the answer isn't in the context, say you don't know. cite as [doc N].`, - user: string = `question: ${query}\n\ncontext:\n${context}`; - - const out = await ollamaChat({ model, messages: [{ role: "system", content: system }, { role: "user", content: user }], stream: false }); - return sendJson(null, 200, { - answer: out?.message?.content || "", - citations: matches.map((m, i) => ({ id: m.c.id, score: m.score, text: m.c.text })) - }); -} - -const pickFunc = (pathname: string) => { - switch (pathname) { - case "/collections": - return handleCollections; - case "/upsert": - return handleUpsert; - case "/query": - return handleQuery; - case "/chat": - return handleChat; - default: - return undefined; - } -} - -const server = serve({ - port: PORT, - hostname: HOST, - fetch: async (req: Request): Promise => { - const u = new URL(req.url); - if (req.method === "GET" && u.pathname === "/") return new Response("ok"); - if (req.method === "GET" && u.pathname === "/openapi.json") return sendJson(null, 200, OPENAPI); - return pickFunc(u.pathname)?.call(req) || new Response("not found", { status: 404 }); - } -}); - -console.log(`[rag] listening on http://${HOST}:${PORT}`); \ No newline at end of file