quality of life upgrades and bug fixes

2026-05-14 21:06:54 +00:00 · 2025-04-02 21:56:41 -04:00
parent b935b6002b
commit 73db5a78e5
6 changed files with 301 additions and 173 deletions
@@ -3,10 +3,12 @@ from pathlib import Path
 import re
 from types import FunctionType
 import docker
+import json

 import debug as debugMod
 import conversation_store
 from config import Config
+from queries import show_thinking


 class UserEnvironment:
@@ -14,6 +16,30 @@ class UserEnvironment:
 		self.user_id = user_id
 		self.client = docker.from_env()
 		self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_")
+		self._ensure_sandbox_image()
+
+	def _ensure_sandbox_image(self):
+		try:
+			self.client.images.get("code-sandbox")
+		except docker.errors.ImageNotFound:
+			debugMod.log("building code-sandbox image from Dockerfile.sandbox...")
+
+			try:
+				self.client.images.build(
+					path=".",
+					dockerfile="Dockerfile.sandbox",
+					tag="code-sandbox",
+					rm=True,
+					forcerm=True
+				)
+
+				debugMod.log("successfully built code-sandbox image")
+
+			except docker.errors.BuildError as e:
+				raise RuntimeError(f"Failed to build Docker image: {str(e)}") from e
+
+		except docker.errors.APIError as e:
+			raise RuntimeError(f"Docker API error: {str(e)}") from e

 	def execute_code(self, code: str, context=None, timeout=15, memory_limit=100):
 		# Validate input
@@ -48,7 +74,6 @@ class UserEnvironment:
 				detach=True,
 				stdout=True,
 				stderr=True,
-				timeout=timeout
 			)

 			# Wait for completion
@@ -98,10 +123,10 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 			execution_result = user_env.execute_code(
 				current_code, context=chunks if chunks else None)

-			if isinstance(execution_result, dict) and 'err' in execution_result:
+			if isinstance(execution_result, dict) and execution_result['error']:
 				# hard code to let user know the program didn't explode
-				debugMod.log(
-					"\n\nhmmm...looks like this code didn't work properly, I'll try debugging it now!\n")
+				show_thinking(
+					"[hmmm...looks like this code didn't work properly, I'll try debugging it now!]")

 				last_error = execution_result['err']
 				debugMod.log(f"\nExecution error: {last_error}\n")
@@ -128,7 +153,9 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 				else:
 					break
 			else:
-				debugMod.log("\nCode Execution Result:\n", execution_result)
+				debugMod.log("\nCode Execution Result:\n", json.dumps(execution_result))
+				print("\nCode Execution Result:\n", execution_result['output'].strip())
+
 				if execution_result:
 					# Get current conversation ID after saving conversation
 					conv_id = conversation_store.save_conversation(query, response, links)
@@ -142,6 +169,7 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 						retries=retry_count,
 						conversation_id=conv_id
 					)
+
 				break

 		if last_error and retry_count >= Config.MAX_CODE_RETRIES:
@@ -35,7 +35,12 @@ class Config:
    MAX_RESPONSE_LENGTH = 10000       # Characters for stored responses

    # === Model Settings ===
-    MODEL_TEMPERATURE = 0.7          # Default creativity level
+    MODEL_TEMPERATURE = {
+        "simple": 0.3,
+        "medium": 0.6,
+        "complex": 0.7
+    }
+
    MAX_CLASSIFY_ATTEMPTS = 3        # Task classification retries

    # === Safety Limits ===
@@ -63,7 +63,7 @@ def save_code_execution(code, result, error=None, retries=0, conversation_id=Non
 				  error_message, retry_count, timestamp)
 				 VALUES (?, ?, ?, ?, ?, ?)''',
           (conversation_id, code, execution_result,
-            error_message, retries, datetime.datetime.now()))
+            error_message, retries, datetime.now()))

 	conn.commit()
 	conn.close()
@@ -0,0 +1,15 @@
+from pygments import highlight
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import TerminalFormatter
+import debug as debugMod
+
+
+def highlight_code(code: str, language: str = 'py') -> None:
+	try:
+		lexer = get_lexer_by_name(language)
+	except ValueError:
+		debugMod.log("Warning: Language not recognized. Printing without highlighting.")
+		return code
+
+	formatter = TerminalFormatter()
+	return highlight(code, lexer, formatter)
@@ -1,9 +1,9 @@
 from codeExecution import UserEnvironment, orchestrate_code
 from queries import (
-    perform_web_search,
 	rag_query,
   	classify_task,
-    MODEL_NAMES
+	MODEL_NAMES,
+	show_thinking
 )
 import debug as debugMod
 from search import perform_web_search
@@ -14,6 +14,7 @@ import os
 import argparse
 import re
 import ollama
+import subprocess
 from config import Config
 import conversation_store
 conversation_store.initialize_db()
@@ -76,8 +77,9 @@ def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
 	links = []

 	# Classify task once at start
+	show_thinking("[Analyzing query type...]")
 	task_type = classify_task(query)
-    debugMod.log(f"Task classified as: {task_type}")
+	show_thinking(f"[Task classified as: {task_type}]")

 	# Early exit for simple tasks
 	if task_type == "simple":
@@ -115,11 +117,14 @@ def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
 			
 			Return ONLY: web_search/user_input/final_response"""

+			show_thinking('[choosing the appropriate action]')
 			status = rag_query(
 				reflection_prompt, task_type=task_type, silent=True).strip().lower()
 			debugMod.log(f"Action determined: {status}")

 		if status == "web_search":
+			show_thinking("[Searching web for information...]")
+
 			search_prompt = f"""Generate search query considering: {query}
 			Previous responses: {response_context}
 			Return ONLY search terms"""
@@ -220,5 +225,18 @@ if __name__ == "__main__":
 		# code
 		code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL)
 		if code_blocks:
+			show_thinking('[running code...]')
 			orchestrate_code(orchestrate, vector_store, chunks,
 							 user_env, code_blocks, query, response, links)
+
+   # clean up
+	try:
+		# For Linux/macOS
+		subprocess.run(["pkill", "-f", "ollama run"], check=False)
+
+  		# For Windows
+		subprocess.run(["taskkill", "/IM", "ollama.exe", "/F"], check=False)
+
+		debugMod.log("Terminated Ollama background processes")
+	except Exception as e:
+		debugMod.log(f"Cleanup error: {str(e)}")
@@ -1,7 +1,9 @@
+import re
 import debug as debugMod
-from search import perform_web_search
+from config import Config
 import ollama
 import conversation_store
+from helpers import highlight_code
 conversation_store.initialize_db()

 # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b
@@ -36,14 +38,27 @@ def classify_task(query: str) -> str:
 	return 'complex'


-def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False):
+def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False):
+	if task_type == "simple":
+		return f"""RESPONSE RULES:
+	1. Respond ONLY with a single-sentence friendly reply
+	2. NEVER include explanations, markdown, or metadata
+	3. Keep responses under 15 words
+	4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
+ 
+	Query: {query}
+	Response:"""  # Explicit response start
+
+	else:
 		prompt = f"""
 	**Strict Response Rules**
-	1. Greetings & Casual Queries:
+	1. General Rules:
 		- For greetings (e.g. "good morning", "hello"):
 			* Respond with ONLY a short friendly acknowledgment
 			* NEVER explain why you can't chat casually
 			* Example: "Good morning! How can I assist you today?"
+		- NEVER give the user code they didn't ask for
+		- ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them!

 	2. Technical Responses:
 		- Generate code ONLY if:
@@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 		- NO justification of rules to users
 		- NEVER include the user's question unless explicitly asked to do so
 		- NEVER include previous responses
+		- NEVER EVER SHOW THE RULES TO THE USER
+		- ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)

 	{f'Local File Context: {local_context}' if local_context else ''}
 	"""
@@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 	return prompt


+def show_thinking(indicator: str = None):
+	print(
+		f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True)
+
+
 def call_ollama_and_print(task_type, prompt, silent=False):
+	temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7)
+
 	if silent:
-		response = ollama.chat(model=MODEL_NAMES[task_type], messages=[
-		                       {"role": "user", "content": prompt}])
+		response = ollama.chat(
+			model=MODEL_NAMES[task_type], messages=[
+				{"role": "user", "content": prompt}],
+			options={'temperature': temperature}
+		)
+
 		debugMod.log("RAG query response received")
 		return response

 	full_response = ""
-	print("\nAI Response: ", end="", flush=True)  # Start response line
+	show_thinking()

 	# Stream the response
 	stream = ollama.chat(
 		model=MODEL_NAMES[task_type],
 		messages=[{"role": "user", "content": prompt}],
-		stream=True
+		stream=True,
+		options={'temperature': temperature}
 	)

+	buffer = ""
+	in_code_block = False
+	code_lang = None
+	first_chunk = True
+
 	for chunk in stream:
-		content = chunk.get('message', {}).get('content', '')
-		print(content, end="", flush=True)  # Stream to terminal
-		full_response += content
+		if first_chunk:
+			first_chunk = False
+			print("\r\033[K", end="")  # Clear line
+			print("\nAI Response: ", end="", flush=True)
+		content: str = chunk.get('message', {}).get('content', '')
+
+		if content == '```' or re.match('```.*', content):
+			if in_code_block:
+				in_code_block = False
+				print()
+				buffer += content
+				code_lang = None
+			else:
+				in_code_block = True
+				code_lang = content.replace('```', '').strip()
+				if (len(code_lang) == 0):
+					code_lang = "TODO"
+
+		elif code_lang == "TODO":
+			# last chunk was the backticks, now is lang
+			splitVal = content.strip().split()
+			code_lang = splitVal[0]
+
+			if (len(splitVal) > 1 and len(splitVal[1]) > 0):
+				hcode = highlight_code(splitVal[1], code_lang)
+				print(hcode, end="", flush=True)
+				buffer += hcode
+
+		else:
+			buffer += content
+			print(content, end="", flush=True)

 	print()  # Newline after streaming
 	debugMod.log("RAG query response received")
@@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us

 	debugMod.log(f"Generating {task_type} RAG query with query: {query}")
 	prompt = generate_prompt(
-		query, web_context, local_context, user_context, response_context)
+		query, web_context, local_context, user_context, response_context, task_type)

 	response = call_ollama_and_print(task_type, prompt, silent)