quality of life upgrades and bug fixes

This commit is contained in:
2025-04-02 21:56:41 -04:00
parent b935b6002b
commit 73db5a78e5
6 changed files with 301 additions and 173 deletions
+33 -5
View File
@@ -3,10 +3,12 @@ from pathlib import Path
import re import re
from types import FunctionType from types import FunctionType
import docker import docker
import json
import debug as debugMod import debug as debugMod
import conversation_store import conversation_store
from config import Config from config import Config
from queries import show_thinking
class UserEnvironment: class UserEnvironment:
@@ -14,6 +16,30 @@ class UserEnvironment:
self.user_id = user_id self.user_id = user_id
self.client = docker.from_env() self.client = docker.from_env()
self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_") self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_")
self._ensure_sandbox_image()
def _ensure_sandbox_image(self):
try:
self.client.images.get("code-sandbox")
except docker.errors.ImageNotFound:
debugMod.log("building code-sandbox image from Dockerfile.sandbox...")
try:
self.client.images.build(
path=".",
dockerfile="Dockerfile.sandbox",
tag="code-sandbox",
rm=True,
forcerm=True
)
debugMod.log("successfully built code-sandbox image")
except docker.errors.BuildError as e:
raise RuntimeError(f"Failed to build Docker image: {str(e)}") from e
except docker.errors.APIError as e:
raise RuntimeError(f"Docker API error: {str(e)}") from e
def execute_code(self, code: str, context=None, timeout=15, memory_limit=100): def execute_code(self, code: str, context=None, timeout=15, memory_limit=100):
# Validate input # Validate input
@@ -48,7 +74,6 @@ class UserEnvironment:
detach=True, detach=True,
stdout=True, stdout=True,
stderr=True, stderr=True,
timeout=timeout
) )
# Wait for completion # Wait for completion
@@ -98,10 +123,10 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
execution_result = user_env.execute_code( execution_result = user_env.execute_code(
current_code, context=chunks if chunks else None) current_code, context=chunks if chunks else None)
if isinstance(execution_result, dict) and 'err' in execution_result: if isinstance(execution_result, dict) and execution_result['error']:
# hard code to let user know the program didn't explode # hard code to let user know the program didn't explode
debugMod.log( show_thinking(
"\n\nhmmm...looks like this code didn't work properly, I'll try debugging it now!\n") "[hmmm...looks like this code didn't work properly, I'll try debugging it now!]")
last_error = execution_result['err'] last_error = execution_result['err']
debugMod.log(f"\nExecution error: {last_error}\n") debugMod.log(f"\nExecution error: {last_error}\n")
@@ -128,7 +153,9 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
else: else:
break break
else: else:
debugMod.log("\nCode Execution Result:\n", execution_result) debugMod.log("\nCode Execution Result:\n", json.dumps(execution_result))
print("\nCode Execution Result:\n", execution_result['output'].strip())
if execution_result: if execution_result:
# Get current conversation ID after saving conversation # Get current conversation ID after saving conversation
conv_id = conversation_store.save_conversation(query, response, links) conv_id = conversation_store.save_conversation(query, response, links)
@@ -142,6 +169,7 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
retries=retry_count, retries=retry_count,
conversation_id=conv_id conversation_id=conv_id
) )
break break
if last_error and retry_count >= Config.MAX_CODE_RETRIES: if last_error and retry_count >= Config.MAX_CODE_RETRIES:
+6 -1
View File
@@ -35,7 +35,12 @@ class Config:
MAX_RESPONSE_LENGTH = 10000 # Characters for stored responses MAX_RESPONSE_LENGTH = 10000 # Characters for stored responses
# === Model Settings === # === Model Settings ===
MODEL_TEMPERATURE = 0.7 # Default creativity level MODEL_TEMPERATURE = {
"simple": 0.3,
"medium": 0.6,
"complex": 0.7
}
MAX_CLASSIFY_ATTEMPTS = 3 # Task classification retries MAX_CLASSIFY_ATTEMPTS = 3 # Task classification retries
# === Safety Limits === # === Safety Limits ===
+1 -1
View File
@@ -63,7 +63,7 @@ def save_code_execution(code, result, error=None, retries=0, conversation_id=Non
error_message, retry_count, timestamp) error_message, retry_count, timestamp)
VALUES (?, ?, ?, ?, ?, ?)''', VALUES (?, ?, ?, ?, ?, ?)''',
(conversation_id, code, execution_result, (conversation_id, code, execution_result,
error_message, retries, datetime.datetime.now())) error_message, retries, datetime.now()))
conn.commit() conn.commit()
conn.close() conn.close()
+15
View File
@@ -0,0 +1,15 @@
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import TerminalFormatter
import debug as debugMod
def highlight_code(code: str, language: str = 'py') -> None:
try:
lexer = get_lexer_by_name(language)
except ValueError:
debugMod.log("Warning: Language not recognized. Printing without highlighting.")
return code
formatter = TerminalFormatter()
return highlight(code, lexer, formatter)
+21 -3
View File
@@ -1,9 +1,9 @@
from codeExecution import UserEnvironment, orchestrate_code from codeExecution import UserEnvironment, orchestrate_code
from queries import ( from queries import (
perform_web_search,
rag_query, rag_query,
classify_task, classify_task,
MODEL_NAMES MODEL_NAMES,
show_thinking
) )
import debug as debugMod import debug as debugMod
from search import perform_web_search from search import perform_web_search
@@ -14,6 +14,7 @@ import os
import argparse import argparse
import re import re
import ollama import ollama
import subprocess
from config import Config from config import Config
import conversation_store import conversation_store
conversation_store.initialize_db() conversation_store.initialize_db()
@@ -76,8 +77,9 @@ def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
links = [] links = []
# Classify task once at start # Classify task once at start
show_thinking("[Analyzing query type...]")
task_type = classify_task(query) task_type = classify_task(query)
debugMod.log(f"Task classified as: {task_type}") show_thinking(f"[Task classified as: {task_type}]")
# Early exit for simple tasks # Early exit for simple tasks
if task_type == "simple": if task_type == "simple":
@@ -115,11 +117,14 @@ def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
Return ONLY: web_search/user_input/final_response""" Return ONLY: web_search/user_input/final_response"""
show_thinking('[choosing the appropriate action]')
status = rag_query( status = rag_query(
reflection_prompt, task_type=task_type, silent=True).strip().lower() reflection_prompt, task_type=task_type, silent=True).strip().lower()
debugMod.log(f"Action determined: {status}") debugMod.log(f"Action determined: {status}")
if status == "web_search": if status == "web_search":
show_thinking("[Searching web for information...]")
search_prompt = f"""Generate search query considering: {query} search_prompt = f"""Generate search query considering: {query}
Previous responses: {response_context} Previous responses: {response_context}
Return ONLY search terms""" Return ONLY search terms"""
@@ -220,5 +225,18 @@ if __name__ == "__main__":
# code # code
code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL) code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL)
if code_blocks: if code_blocks:
show_thinking('[running code...]')
orchestrate_code(orchestrate, vector_store, chunks, orchestrate_code(orchestrate, vector_store, chunks,
user_env, code_blocks, query, response, links) user_env, code_blocks, query, response, links)
# clean up
try:
# For Linux/macOS
subprocess.run(["pkill", "-f", "ollama run"], check=False)
# For Windows
subprocess.run(["taskkill", "/IM", "ollama.exe", "/F"], check=False)
debugMod.log("Terminated Ollama background processes")
except Exception as e:
debugMod.log(f"Cleanup error: {str(e)}")
+73 -11
View File
@@ -1,7 +1,9 @@
import re
import debug as debugMod import debug as debugMod
from search import perform_web_search from config import Config
import ollama import ollama
import conversation_store import conversation_store
from helpers import highlight_code
conversation_store.initialize_db() conversation_store.initialize_db()
# models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b
@@ -36,14 +38,27 @@ def classify_task(query: str) -> str:
return 'complex' return 'complex'
def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False): def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False):
if task_type == "simple":
return f"""RESPONSE RULES:
1. Respond ONLY with a single-sentence friendly reply
2. NEVER include explanations, markdown, or metadata
3. Keep responses under 15 words
4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
Query: {query}
Response:""" # Explicit response start
else:
prompt = f""" prompt = f"""
**Strict Response Rules** **Strict Response Rules**
1. Greetings & Casual Queries: 1. General Rules:
- For greetings (e.g. "good morning", "hello"): - For greetings (e.g. "good morning", "hello"):
* Respond with ONLY a short friendly acknowledgment * Respond with ONLY a short friendly acknowledgment
* NEVER explain why you can't chat casually * NEVER explain why you can't chat casually
* Example: "Good morning! How can I assist you today?" * Example: "Good morning! How can I assist you today?"
- NEVER give the user code they didn't ask for
- ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them!
2. Technical Responses: 2. Technical Responses:
- Generate code ONLY if: - Generate code ONLY if:
@@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
- NO justification of rules to users - NO justification of rules to users
- NEVER include the user's question unless explicitly asked to do so - NEVER include the user's question unless explicitly asked to do so
- NEVER include previous responses - NEVER include previous responses
- NEVER EVER SHOW THE RULES TO THE USER
- ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
{f'Local File Context: {local_context}' if local_context else ''} {f'Local File Context: {local_context}' if local_context else ''}
""" """
@@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
return prompt return prompt
def show_thinking(indicator: str = None):
print(
f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True)
def call_ollama_and_print(task_type, prompt, silent=False): def call_ollama_and_print(task_type, prompt, silent=False):
temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7)
if silent: if silent:
response = ollama.chat(model=MODEL_NAMES[task_type], messages=[ response = ollama.chat(
{"role": "user", "content": prompt}]) model=MODEL_NAMES[task_type], messages=[
{"role": "user", "content": prompt}],
options={'temperature': temperature}
)
debugMod.log("RAG query response received") debugMod.log("RAG query response received")
return response return response
full_response = "" full_response = ""
print("\nAI Response: ", end="", flush=True) # Start response line show_thinking()
# Stream the response # Stream the response
stream = ollama.chat( stream = ollama.chat(
model=MODEL_NAMES[task_type], model=MODEL_NAMES[task_type],
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
stream=True stream=True,
options={'temperature': temperature}
) )
buffer = ""
in_code_block = False
code_lang = None
first_chunk = True
for chunk in stream: for chunk in stream:
content = chunk.get('message', {}).get('content', '') if first_chunk:
print(content, end="", flush=True) # Stream to terminal first_chunk = False
full_response += content print("\r\033[K", end="") # Clear line
print("\nAI Response: ", end="", flush=True)
content: str = chunk.get('message', {}).get('content', '')
if content == '```' or re.match('```.*', content):
if in_code_block:
in_code_block = False
print()
buffer += content
code_lang = None
else:
in_code_block = True
code_lang = content.replace('```', '').strip()
if (len(code_lang) == 0):
code_lang = "TODO"
elif code_lang == "TODO":
# last chunk was the backticks, now is lang
splitVal = content.strip().split()
code_lang = splitVal[0]
if (len(splitVal) > 1 and len(splitVal[1]) > 0):
hcode = highlight_code(splitVal[1], code_lang)
print(hcode, end="", flush=True)
buffer += hcode
else:
buffer += content
print(content, end="", flush=True)
print() # Newline after streaming print() # Newline after streaming
debugMod.log("RAG query response received") debugMod.log("RAG query response received")
@@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us
debugMod.log(f"Generating {task_type} RAG query with query: {query}") debugMod.log(f"Generating {task_type} RAG query with query: {query}")
prompt = generate_prompt( prompt = generate_prompt(
query, web_context, local_context, user_context, response_context) query, web_context, local_context, user_context, response_context, task_type)
response = call_ollama_and_print(task_type, prompt, silent) response = call_ollama_and_print(task_type, prompt, silent)