quality of life upgrades and bug fixes

This commit is contained in:
2025-04-02 21:56:41 -04:00
parent b935b6002b
commit 73db5a78e5
6 changed files with 301 additions and 173 deletions
+33 -5
View File
@@ -3,10 +3,12 @@ from pathlib import Path
import re import re
from types import FunctionType from types import FunctionType
import docker import docker
import json
import debug as debugMod import debug as debugMod
import conversation_store import conversation_store
from config import Config from config import Config
from queries import show_thinking
class UserEnvironment: class UserEnvironment:
@@ -14,6 +16,30 @@ class UserEnvironment:
self.user_id = user_id self.user_id = user_id
self.client = docker.from_env() self.client = docker.from_env()
self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_") self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_")
self._ensure_sandbox_image()
def _ensure_sandbox_image(self):
try:
self.client.images.get("code-sandbox")
except docker.errors.ImageNotFound:
debugMod.log("building code-sandbox image from Dockerfile.sandbox...")
try:
self.client.images.build(
path=".",
dockerfile="Dockerfile.sandbox",
tag="code-sandbox",
rm=True,
forcerm=True
)
debugMod.log("successfully built code-sandbox image")
except docker.errors.BuildError as e:
raise RuntimeError(f"Failed to build Docker image: {str(e)}") from e
except docker.errors.APIError as e:
raise RuntimeError(f"Docker API error: {str(e)}") from e
def execute_code(self, code: str, context=None, timeout=15, memory_limit=100): def execute_code(self, code: str, context=None, timeout=15, memory_limit=100):
# Validate input # Validate input
@@ -48,7 +74,6 @@ class UserEnvironment:
detach=True, detach=True,
stdout=True, stdout=True,
stderr=True, stderr=True,
timeout=timeout
) )
# Wait for completion # Wait for completion
@@ -98,10 +123,10 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
execution_result = user_env.execute_code( execution_result = user_env.execute_code(
current_code, context=chunks if chunks else None) current_code, context=chunks if chunks else None)
if isinstance(execution_result, dict) and 'err' in execution_result: if isinstance(execution_result, dict) and execution_result['error']:
# hard code to let user know the program didn't explode # hard code to let user know the program didn't explode
debugMod.log( show_thinking(
"\n\nhmmm...looks like this code didn't work properly, I'll try debugging it now!\n") "[hmmm...looks like this code didn't work properly, I'll try debugging it now!]")
last_error = execution_result['err'] last_error = execution_result['err']
debugMod.log(f"\nExecution error: {last_error}\n") debugMod.log(f"\nExecution error: {last_error}\n")
@@ -128,7 +153,9 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
else: else:
break break
else: else:
debugMod.log("\nCode Execution Result:\n", execution_result) debugMod.log("\nCode Execution Result:\n", json.dumps(execution_result))
print("\nCode Execution Result:\n", execution_result['output'].strip())
if execution_result: if execution_result:
# Get current conversation ID after saving conversation # Get current conversation ID after saving conversation
conv_id = conversation_store.save_conversation(query, response, links) conv_id = conversation_store.save_conversation(query, response, links)
@@ -142,6 +169,7 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
retries=retry_count, retries=retry_count,
conversation_id=conv_id conversation_id=conv_id
) )
break break
if last_error and retry_count >= Config.MAX_CODE_RETRIES: if last_error and retry_count >= Config.MAX_CODE_RETRIES:
+6 -1
View File
@@ -35,7 +35,12 @@ class Config:
MAX_RESPONSE_LENGTH = 10000 # Characters for stored responses MAX_RESPONSE_LENGTH = 10000 # Characters for stored responses
# === Model Settings === # === Model Settings ===
MODEL_TEMPERATURE = 0.7 # Default creativity level MODEL_TEMPERATURE = {
"simple": 0.3,
"medium": 0.6,
"complex": 0.7
}
MAX_CLASSIFY_ATTEMPTS = 3 # Task classification retries MAX_CLASSIFY_ATTEMPTS = 3 # Task classification retries
# === Safety Limits === # === Safety Limits ===
+1 -1
View File
@@ -63,7 +63,7 @@ def save_code_execution(code, result, error=None, retries=0, conversation_id=Non
error_message, retry_count, timestamp) error_message, retry_count, timestamp)
VALUES (?, ?, ?, ?, ?, ?)''', VALUES (?, ?, ?, ?, ?, ?)''',
(conversation_id, code, execution_result, (conversation_id, code, execution_result,
error_message, retries, datetime.datetime.now())) error_message, retries, datetime.now()))
conn.commit() conn.commit()
conn.close() conn.close()
+15
View File
@@ -0,0 +1,15 @@
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import TerminalFormatter
import debug as debugMod
def highlight_code(code: str, language: str = 'py') -> None:
try:
lexer = get_lexer_by_name(language)
except ValueError:
debugMod.log("Warning: Language not recognized. Printing without highlighting.")
return code
formatter = TerminalFormatter()
return highlight(code, lexer, formatter)
+118 -100
View File
@@ -1,9 +1,9 @@
from codeExecution import UserEnvironment, orchestrate_code from codeExecution import UserEnvironment, orchestrate_code
from queries import ( from queries import (
perform_web_search, rag_query,
rag_query,
classify_task, classify_task,
MODEL_NAMES MODEL_NAMES,
show_thinking
) )
import debug as debugMod import debug as debugMod
from search import perform_web_search from search import perform_web_search
@@ -14,6 +14,7 @@ import os
import argparse import argparse
import re import re
import ollama import ollama
import subprocess
from config import Config from config import Config
import conversation_store import conversation_store
conversation_store.initialize_db() conversation_store.initialize_db()
@@ -58,124 +59,128 @@ def create_vector_store(chunks):
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = Chroma.from_texts( vector_store = Chroma.from_texts(
chunks, chunks,
embeddings, embeddings,
persist_directory=Config.chroma_path() persist_directory=Config.chroma_path()
) )
debugMod.log("Vector store created") debugMod.log("Vector store created")
return vector_store return vector_store
def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input): def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
debugMod.log(f"Orchestrating query: {query}") debugMod.log(f"Orchestrating query: {query}")
aggregated_web_context = "" aggregated_web_context = ""
local_context = "" local_context = ""
user_context = "" user_context = ""
response_context = "" response_context = ""
links = [] links = []
# Classify task once at start # Classify task once at start
task_type = classify_task(query) show_thinking("[Analyzing query type...]")
debugMod.log(f"Task classified as: {task_type}") task_type = classify_task(query)
show_thinking(f"[Task classified as: {task_type}]")
# Early exit for simple tasks # Early exit for simple tasks
if task_type == "simple": if task_type == "simple":
debugMod.log("Direct response for simple task") debugMod.log("Direct response for simple task")
return [rag_query(query, task_type=task_type), []] return [rag_query(query, task_type=task_type), []]
# Initialize context for medium/complex tasks # Initialize context for medium/complex tasks
if vector_store: if vector_store:
docs = vector_store.similarity_search(query, k=3) docs = vector_store.similarity_search(query, k=3)
local_context = "\n".join( local_context = "\n".join(
[d.page_content for d in docs]) if docs else "" [d.page_content for d in docs]) if docs else ""
debugMod.log(f"Local context: {local_context}") debugMod.log(f"Local context: {local_context}")
iteration = 0 iteration = 0
status = "continue" status = "continue"
while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final": while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final":
debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---") debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---")
response = "" response = ""
if status == "continue": if status == "continue":
# Include previous responses in reflection # Include previous responses in reflection
reflection_prompt = f"""Determine the next action needed to answer: {query} reflection_prompt = f"""Determine the next action needed to answer: {query}
Available actions: Available actions:
1. web_search - Needs web information 1. web_search - Needs web information
2. user_input - Requires clarification 2. user_input - Requires clarification
3. final_response - Ready to answer 3. final_response - Ready to answer
Context: Context:
- Web: {aggregated_web_context} - Web: {aggregated_web_context}
- Local: {local_context} - Local: {local_context}
- User: {user_context} - User: {user_context}
- Previous Responses: {response_context} - Previous Responses: {response_context}
Return ONLY: web_search/user_input/final_response""" Return ONLY: web_search/user_input/final_response"""
status = rag_query( show_thinking('[choosing the appropriate action]')
reflection_prompt, task_type=task_type, silent=True).strip().lower() status = rag_query(
debugMod.log(f"Action determined: {status}") reflection_prompt, task_type=task_type, silent=True).strip().lower()
debugMod.log(f"Action determined: {status}")
if status == "web_search": if status == "web_search":
search_prompt = f"""Generate search query considering: {query} show_thinking("[Searching web for information...]")
Previous responses: {response_context}
Return ONLY search terms"""
search_terms = rag_query( search_prompt = f"""Generate search query considering: {query}
search_prompt, task_type=task_type, silent=True).strip('"') Previous responses: {response_context}
debugMod.log(f"Searching web for: {search_terms}") Return ONLY search terms"""
web_results, new_links = perform_web_search(search_terms) search_terms = rag_query(
links.extend(new_links) search_prompt, task_type=task_type, silent=True).strip('"')
debugMod.log(f"Searching web for: {search_terms}")
if web_results: web_results, new_links = perform_web_search(search_terms)
aggregated_web_context += f"\nWeb: {web_results}" links.extend(new_links)
debugMod.log(f"Updated web context")
elif status == "user_input": if web_results:
comm_outp("\n[System] Additional info needed:") aggregated_web_context += f"\nWeb: {web_results}"
user_input = comm_inp("Please clarify: ") debugMod.log(f"Updated web context")
user_context += f"\nUser input: {user_input}"
debugMod.log(f"Received user input")
status = "continue"
elif status == "final_response": elif status == "user_input":
break comm_outp("\n[System] Additional info needed:")
user_input = comm_inp("Please clarify: ")
user_context += f"\nUser input: {user_input}"
debugMod.log(f"Received user input")
status = "continue"
else: elif status == "final_response":
debugMod.log(f"Unknown status: {status}") break
status = "final_response"
# Generate and store response else:
if status != "final_response": debugMod.log(f"Unknown status: {status}")
response = rag_query( status = "final_response"
query,
task_type=task_type,
web_context=aggregated_web_context,
local_context=local_context,
user_context=user_context,
response_context=response_context # Pass previous responses
)
response_context += f"\nIteration {iteration} response: {response}"
debugMod.log(f"Iteration {iteration} response stored")
iteration += 1 # Generate and store response
if status != "final_response":
response = rag_query(
query,
task_type=task_type,
web_context=aggregated_web_context,
local_context=local_context,
user_context=user_context,
response_context=response_context # Pass previous responses
)
response_context += f"\nIteration {iteration} response: {response}"
debugMod.log(f"Iteration {iteration} response stored")
# Generate final response with full context iteration += 1
final_response = rag_query(
f"Final answer considering: {query}",
task_type=task_type,
web_context=aggregated_web_context,
local_context=local_context,
user_context=user_context,
response_context=response_context
)
debugMod.log("Orchestration completed") # Generate final response with full context
return [final_response, links] final_response = rag_query(
f"Final answer considering: {query}",
task_type=task_type,
web_context=aggregated_web_context,
local_context=local_context,
user_context=user_context,
response_context=response_context
)
debugMod.log("Orchestration completed")
return [final_response, links]
if __name__ == "__main__": if __name__ == "__main__":
@@ -183,9 +188,9 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--file', type=str, default="", parser.add_argument('--file', type=str, default="",
help='Path to data file for analysis') help='Path to data file for analysis')
parser.add_argument('--cli', type=str, default="false", parser.add_argument('--cli', type=str, default="false",
help="whether to use the CLI for input or run the API") help="whether to use the CLI for input or run the API")
args = parser.parse_args() args = parser.parse_args()
vector_store = None vector_store = None
@@ -220,5 +225,18 @@ if __name__ == "__main__":
# code # code
code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL) code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL)
if code_blocks: if code_blocks:
show_thinking('[running code...]')
orchestrate_code(orchestrate, vector_store, chunks, orchestrate_code(orchestrate, vector_store, chunks,
user_env, code_blocks, query, response, links) user_env, code_blocks, query, response, links)
# clean up
try:
# For Linux/macOS
subprocess.run(["pkill", "-f", "ollama run"], check=False)
# For Windows
subprocess.run(["taskkill", "/IM", "ollama.exe", "/F"], check=False)
debugMod.log("Terminated Ollama background processes")
except Exception as e:
debugMod.log(f"Cleanup error: {str(e)}")
+128 -66
View File
@@ -1,27 +1,29 @@
import re
import debug as debugMod import debug as debugMod
from search import perform_web_search from config import Config
import ollama import ollama
import conversation_store import conversation_store
from helpers import highlight_code
conversation_store.initialize_db() conversation_store.initialize_db()
# models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b
MODEL_NAMES = { MODEL_NAMES = {
"classification": "dolphin3:8b", # Best for structured tasks "classification": "dolphin3:8b", # Best for structured tasks
"simple": "phi3:latest", # phi3:mini "simple": "phi3:latest", # phi3:mini
"medium": "llama3:8b-instruct-q8_0", "medium": "llama3:8b-instruct-q8_0",
"complex": "deepseek-coder:33b-instruct-q4_K_M" "complex": "deepseek-coder:33b-instruct-q4_K_M"
} }
def classify_task(query: str) -> str: def classify_task(query: str) -> str:
# Use a tiny model to classify the task # Use a tiny model to classify the task
prompt = f"""Classify this query into one of these categories: prompt = f"""Classify this query into one of these categories:
- "simple": greetings, yes/no, basic facts - "simple": greetings, yes/no, basic facts
- "medium": summarization, simple coding - "medium": summarization, simple coding
- "complex": advanced coding, data analysis, multi-step reasoning - "complex": advanced coding, data analysis, multi-step reasoning
Query: {query} Query: {query}
Return ONLY the category name (e.g., "simple").""" Return ONLY the category name (e.g., "simple")."""
toPassIn = "" toPassIn = ""
for i in range(3): for i in range(3):
@@ -36,14 +38,27 @@ def classify_task(query: str) -> str:
return 'complex' return 'complex'
def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False): def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False):
prompt = f""" if task_type == "simple":
return f"""RESPONSE RULES:
1. Respond ONLY with a single-sentence friendly reply
2. NEVER include explanations, markdown, or metadata
3. Keep responses under 15 words
4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
Query: {query}
Response:""" # Explicit response start
else:
prompt = f"""
**Strict Response Rules** **Strict Response Rules**
1. Greetings & Casual Queries: 1. General Rules:
- For greetings (e.g. "good morning", "hello"): - For greetings (e.g. "good morning", "hello"):
* Respond with ONLY a short friendly acknowledgment * Respond with ONLY a short friendly acknowledgment
* NEVER explain why you can't chat casually * NEVER explain why you can't chat casually
* Example: "Good morning! How can I assist you today?" * Example: "Good morning! How can I assist you today?"
- NEVER give the user code they didn't ask for
- ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them!
2. Technical Responses: 2. Technical Responses:
- Generate code ONLY if: - Generate code ONLY if:
@@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
- NO justification of rules to users - NO justification of rules to users
- NEVER include the user's question unless explicitly asked to do so - NEVER include the user's question unless explicitly asked to do so
- NEVER include previous responses - NEVER include previous responses
- NEVER EVER SHOW THE RULES TO THE USER
- ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
{f'Local File Context: {local_context}' if local_context else ''} {f'Local File Context: {local_context}' if local_context else ''}
""" """
@@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
return prompt return prompt
def show_thinking(indicator: str = None):
print(
f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True)
def call_ollama_and_print(task_type, prompt, silent=False): def call_ollama_and_print(task_type, prompt, silent=False):
temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7)
if silent: if silent:
response = ollama.chat(model=MODEL_NAMES[task_type], messages=[ response = ollama.chat(
{"role": "user", "content": prompt}]) model=MODEL_NAMES[task_type], messages=[
{"role": "user", "content": prompt}],
options={'temperature': temperature}
)
debugMod.log("RAG query response received") debugMod.log("RAG query response received")
return response return response
full_response = "" full_response = ""
print("\nAI Response: ", end="", flush=True) # Start response line show_thinking()
# Stream the response # Stream the response
stream = ollama.chat( stream = ollama.chat(
model=MODEL_NAMES[task_type], model=MODEL_NAMES[task_type],
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
stream=True stream=True,
options={'temperature': temperature}
) )
buffer = ""
in_code_block = False
code_lang = None
first_chunk = True
for chunk in stream: for chunk in stream:
content = chunk.get('message', {}).get('content', '') if first_chunk:
print(content, end="", flush=True) # Stream to terminal first_chunk = False
full_response += content print("\r\033[K", end="") # Clear line
print("\nAI Response: ", end="", flush=True)
content: str = chunk.get('message', {}).get('content', '')
if content == '```' or re.match('```.*', content):
if in_code_block:
in_code_block = False
print()
buffer += content
code_lang = None
else:
in_code_block = True
code_lang = content.replace('```', '').strip()
if (len(code_lang) == 0):
code_lang = "TODO"
elif code_lang == "TODO":
# last chunk was the backticks, now is lang
splitVal = content.strip().split()
code_lang = splitVal[0]
if (len(splitVal) > 1 and len(splitVal[1]) > 0):
hcode = highlight_code(splitVal[1], code_lang)
print(hcode, end="", flush=True)
buffer += hcode
else:
buffer += content
print(content, end="", flush=True)
print() # Newline after streaming print() # Newline after streaming
debugMod.log("RAG query response received") debugMod.log("RAG query response received")
@@ -111,65 +173,65 @@ def call_ollama_and_print(task_type, prompt, silent=False):
def multi_choice_query(query, options: list[str], task_type: str, web_context="", local_context="", user_context="", silent=False): def multi_choice_query(query, options: list[str], task_type: str, web_context="", local_context="", user_context="", silent=False):
attempts = 0 attempts = 0
max_attempts = 3 max_attempts = 3
inds = list(range(len(options))) inds = list(range(len(options)))
valid_range = f"0-{len(inds) - 1}" valid_range = f"0-{len(inds) - 1}"
last_error = "" last_error = ""
debugMod.log( debugMod.log(
f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}") f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}")
while attempts < max_attempts: while attempts < max_attempts:
prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected. prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected.
Available Options: Available Options:
{"\n".join([f"{i}: {option}" for i, option in enumerate(options)])} {"\n".join([f"{i}: {option}" for i, option in enumerate(options)])}
Question: {query} Question: {query}
Context Sources: Context Sources:
{f'[WEB] {web_context}' if web_context else ''} {f'[WEB] {web_context}' if web_context else ''}
{f'[LOCAL] {local_context}' if local_context else ''} {f'[LOCAL] {local_context}' if local_context else ''}
{f'[USER] {user_context}' if user_context else ''} {f'[USER] {user_context}' if user_context else ''}
{generate_prompt(query, web_context, local_context, user_context, onlyRules=True)} {generate_prompt(query, web_context, local_context, user_context, onlyRules=True)}
- You MUST return a SINGLE INTEGER between {valid_range} - You MUST return a SINGLE INTEGER between {valid_range}
- DO NOT include explanations or punctuation""" - DO NOT include explanations or punctuation"""
if last_error: if last_error:
prompt += f"\n\nPrevious invalid response: {last_error}" prompt += f"\n\nPrevious invalid response: {last_error}"
try: try:
content = call_ollama_and_print(task_type, prompt, silent) content = call_ollama_and_print(task_type, prompt, silent)
debugMod.log(f"Multi-choice response: {content}", wrapped=True) debugMod.log(f"Multi-choice response: {content}", wrapped=True)
# Strict validation # Strict validation
if not content.isdigit(): if not content.isdigit():
raise ValueError(f"Non-numeric response: {content}") raise ValueError(f"Non-numeric response: {content}")
ind = int(content) ind = int(content)
if 0 <= ind < len(options): if 0 <= ind < len(options):
debugMod.log(f"Valid choice selected: {ind} ({options[ind]})") debugMod.log(f"Valid choice selected: {ind} ({options[ind]})")
return options[ind] return options[ind]
raise IndexError(f"Index {ind} out of range {valid_range}") raise IndexError(f"Index {ind} out of range {valid_range}")
except (ValueError, IndexError) as e: except (ValueError, IndexError) as e:
last_error = str(e) last_error = str(e)
debugMod.log(f"Validation failed: {last_error}") debugMod.log(f"Validation failed: {last_error}")
attempts += 1 attempts += 1
continue continue
except Exception as e: except Exception as e:
debugMod.log(f"Unexpected error: {str(e)}") debugMod.log(f"Unexpected error: {str(e)}")
attempts += 1 attempts += 1
continue continue
# Fallback to safest option after all attempts # Fallback to safest option after all attempts
debugMod.log(f"All attempts failed. Defaulting to first option") debugMod.log(f"All attempts failed. Defaulting to first option")
return options[0] return options[0]
def rag_query(query, task_type: str = None, web_context="", local_context="", user_context="", response_context="", silent=False): def rag_query(query, task_type: str = None, web_context="", local_context="", user_context="", response_context="", silent=False):
@@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us
debugMod.log(f"Generating {task_type} RAG query with query: {query}") debugMod.log(f"Generating {task_type} RAG query with query: {query}")
prompt = generate_prompt( prompt = generate_prompt(
query, web_context, local_context, user_context, response_context) query, web_context, local_context, user_context, response_context, task_type)
response = call_ollama_and_print(task_type, prompt, silent) response = call_ollama_and_print(task_type, prompt, silent)