quality of life upgrades and bug fixes

2026-07-13 22:48:07 +00:00 · 2025-04-02 21:56:41 -04:00
parent b935b6002b
commit 73db5a78e5
6 changed files with 301 additions and 173 deletions
@@ -3,10 +3,12 @@ from pathlib import Path
 import re
 from types import FunctionType
 import docker
 import json
 import debug as debugMod
 import conversation_store
 from config import Config
 from queries import show_thinking
 class UserEnvironment:
@@ -14,6 +16,30 @@ class UserEnvironment:
 		self.user_id = user_id
 		self.client = docker.from_env()
 		self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_")
 		self._ensure_sandbox_image()
 	def _ensure_sandbox_image(self):
 		try:
 			self.client.images.get("code-sandbox")
 		except docker.errors.ImageNotFound:
 			debugMod.log("building code-sandbox image from Dockerfile.sandbox...")
 			try:
 				self.client.images.build(
 					path=".",
 					dockerfile="Dockerfile.sandbox",
 					tag="code-sandbox",
 					rm=True,
 					forcerm=True
 				)
 				debugMod.log("successfully built code-sandbox image")
 			except docker.errors.BuildError as e:
 				raise RuntimeError(f"Failed to build Docker image: {str(e)}") from e
 		except docker.errors.APIError as e:
 			raise RuntimeError(f"Docker API error: {str(e)}") from e
 	def execute_code(self, code: str, context=None, timeout=15, memory_limit=100):
 		# Validate input
@@ -48,7 +74,6 @@ class UserEnvironment:
 				detach=True,
 				stdout=True,
 				stderr=True,
 				timeout=timeout
 			)
 			# Wait for completion
@@ -98,10 +123,10 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 			execution_result = user_env.execute_code(
 				current_code, context=chunks if chunks else None)
-			if isinstance(execution_result, dict) and 'err' in execution_result:
+			if isinstance(execution_result, dict) and execution_result['error']:
 				# hard code to let user know the program didn't explode
-				debugMod.log(
+				show_thinking(
-					"\n\nhmmm...looks like this code didn't work properly, I'll try debugging it now!\n")
+					"[hmmm...looks like this code didn't work properly, I'll try debugging it now!]")
 				last_error = execution_result['err']
 				debugMod.log(f"\nExecution error: {last_error}\n")
@@ -128,7 +153,9 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 				else:
 					break
 			else:
-				debugMod.log("\nCode Execution Result:\n", execution_result)
+				debugMod.log("\nCode Execution Result:\n", json.dumps(execution_result))
 				print("\nCode Execution Result:\n", execution_result['output'].strip())
 				if execution_result:
 					# Get current conversation ID after saving conversation
 					conv_id = conversation_store.save_conversation(query, response, links)
@@ -142,6 +169,7 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env:
 						retries=retry_count,
 						conversation_id=conv_id
 					)
 				break
 		if last_error and retry_count >= Config.MAX_CODE_RETRIES:
@@ -35,7 +35,12 @@ class Config:
    MAX_RESPONSE_LENGTH = 10000       # Characters for stored responses
    # === Model Settings ===
-    MODEL_TEMPERATURE = 0.7          # Default creativity level
+    MODEL_TEMPERATURE = {
        "simple": 0.3,
        "medium": 0.6,
        "complex": 0.7
    }
    MAX_CLASSIFY_ATTEMPTS = 3        # Task classification retries
    # === Safety Limits ===
@@ -63,7 +63,7 @@ def save_code_execution(code, result, error=None, retries=0, conversation_id=Non
 				  error_message, retry_count, timestamp)
 				 VALUES (?, ?, ?, ?, ?, ?)''',
           (conversation_id, code, execution_result,
-            error_message, retries, datetime.datetime.now()))
+            error_message, retries, datetime.now()))
 	conn.commit()
 	conn.close()
@@ -0,0 +1,15 @@
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
 from pygments.formatters import TerminalFormatter
 import debug as debugMod
 def highlight_code(code: str, language: str = 'py') -> None:
 	try:
 		lexer = get_lexer_by_name(language)
 	except ValueError:
 		debugMod.log("Warning: Language not recognized. Printing without highlighting.")
 		return code
 	formatter = TerminalFormatter()
 	return highlight(code, lexer, formatter)
@@ -1,9 +1,9 @@
 from codeExecution import UserEnvironment, orchestrate_code
 from queries import (
-    perform_web_search,
+	rag_query,
    rag_query,
   	classify_task,
-    MODEL_NAMES
+	MODEL_NAMES,
 	show_thinking
 )
 import debug as debugMod
 from search import perform_web_search
@@ -14,6 +14,7 @@ import os
 import argparse
 import re
 import ollama
 import subprocess
 from config import Config
 import conversation_store
 conversation_store.initialize_db()
@@ -58,124 +59,128 @@ def create_vector_store(chunks):
 	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 	vector_store = Chroma.from_texts(
-            chunks,
+			chunks,
-            embeddings,
+			embeddings,
-            persist_directory=Config.chroma_path()
+			persist_directory=Config.chroma_path()
-        )
+		)
 	debugMod.log("Vector store created")
 	return vector_store
 def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input):
-    debugMod.log(f"Orchestrating query: {query}")
+	debugMod.log(f"Orchestrating query: {query}")
-    aggregated_web_context = ""
+	aggregated_web_context = ""
-    local_context = ""
+	local_context = ""
-    user_context = ""
+	user_context = ""
-    response_context = ""
+	response_context = ""
-    links = []
+	links = []
-    # Classify task once at start
+	# Classify task once at start
-    task_type = classify_task(query)
+	show_thinking("[Analyzing query type...]")
-    debugMod.log(f"Task classified as: {task_type}")
+	task_type = classify_task(query)
 	show_thinking(f"[Task classified as: {task_type}]")
-    # Early exit for simple tasks
+	# Early exit for simple tasks
-    if task_type == "simple":
+	if task_type == "simple":
-        debugMod.log("Direct response for simple task")
+		debugMod.log("Direct response for simple task")
-        return [rag_query(query, task_type=task_type), []]
+		return [rag_query(query, task_type=task_type), []]
-    # Initialize context for medium/complex tasks
+	# Initialize context for medium/complex tasks
-    if vector_store:
+	if vector_store:
-        docs = vector_store.similarity_search(query, k=3)
+		docs = vector_store.similarity_search(query, k=3)
-        local_context = "\n".join(
+		local_context = "\n".join(
-            [d.page_content for d in docs]) if docs else ""
+			[d.page_content for d in docs]) if docs else ""
-        debugMod.log(f"Local context: {local_context}")
+		debugMod.log(f"Local context: {local_context}")
-    iteration = 0
+	iteration = 0
-    status = "continue"
+	status = "continue"
-    while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final":
+	while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final":
-        debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---")
+		debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---")
-        response = ""
+		response = ""
-        if status == "continue":
+		if status == "continue":
-            # Include previous responses in reflection
+			# Include previous responses in reflection
-            reflection_prompt = f"""Determine the next action needed to answer: {query}
+			reflection_prompt = f"""Determine the next action needed to answer: {query}
-            
+			
-            Available actions:
+			Available actions:
-            1. web_search - Needs web information
+			1. web_search - Needs web information
-            2. user_input - Requires clarification
+			2. user_input - Requires clarification
-            3. final_response - Ready to answer
+			3. final_response - Ready to answer
-            
+			
-            Context:
+			Context:
-            - Web: {aggregated_web_context}
+			- Web: {aggregated_web_context}
-            - Local: {local_context}
+			- Local: {local_context}
-            - User: {user_context}
+			- User: {user_context}
-            - Previous Responses: {response_context}
+			- Previous Responses: {response_context}
-            
+			
-            Return ONLY: web_search/user_input/final_response"""
+			Return ONLY: web_search/user_input/final_response"""
-            status = rag_query(
+			show_thinking('[choosing the appropriate action]')
-            	reflection_prompt, task_type=task_type, silent=True).strip().lower()
+			status = rag_query(
-            debugMod.log(f"Action determined: {status}")
+				reflection_prompt, task_type=task_type, silent=True).strip().lower()
 			debugMod.log(f"Action determined: {status}")
-        if status == "web_search":
+		if status == "web_search":
-            search_prompt = f"""Generate search query considering: {query}
+			show_thinking("[Searching web for information...]")
            Previous responses: {response_context}
            Return ONLY search terms"""
-            search_terms = rag_query(
+			search_prompt = f"""Generate search query considering: {query}
-            	search_prompt, task_type=task_type, silent=True).strip('"')
+			Previous responses: {response_context}
-            debugMod.log(f"Searching web for: {search_terms}")
+			Return ONLY search terms"""
-            web_results, new_links = perform_web_search(search_terms)
+			search_terms = rag_query(
-            links.extend(new_links)
+				search_prompt, task_type=task_type, silent=True).strip('"')
 			debugMod.log(f"Searching web for: {search_terms}")
-            if web_results:
+			web_results, new_links = perform_web_search(search_terms)
-                aggregated_web_context += f"\nWeb: {web_results}"
+			links.extend(new_links)
                debugMod.log(f"Updated web context")
-        elif status == "user_input":
+			if web_results:
-            comm_outp("\n[System] Additional info needed:")
+				aggregated_web_context += f"\nWeb: {web_results}"
-            user_input = comm_inp("Please clarify: ")
+				debugMod.log(f"Updated web context")
            user_context += f"\nUser input: {user_input}"
            debugMod.log(f"Received user input")
            status = "continue"
-        elif status == "final_response":
+		elif status == "user_input":
-            break
+			comm_outp("\n[System] Additional info needed:")
 			user_input = comm_inp("Please clarify: ")
 			user_context += f"\nUser input: {user_input}"
 			debugMod.log(f"Received user input")
 			status = "continue"
-        else:
+		elif status == "final_response":
-            debugMod.log(f"Unknown status: {status}")
+			break
            status = "final_response"
-        # Generate and store response
+		else:
-        if status != "final_response":
+			debugMod.log(f"Unknown status: {status}")
-            response = rag_query(
+			status = "final_response"
                query,
                task_type=task_type,
                web_context=aggregated_web_context,
                local_context=local_context,
                user_context=user_context,
                response_context=response_context  # Pass previous responses
            )
            response_context += f"\nIteration {iteration} response: {response}"
            debugMod.log(f"Iteration {iteration} response stored")
-        iteration += 1
+		# Generate and store response
 		if status != "final_response":
 			response = rag_query(
 				query,
 				task_type=task_type,
 				web_context=aggregated_web_context,
 				local_context=local_context,
 				user_context=user_context,
 				response_context=response_context  # Pass previous responses
 			)
 			response_context += f"\nIteration {iteration} response: {response}"
 			debugMod.log(f"Iteration {iteration} response stored")
-    # Generate final response with full context
+		iteration += 1
    final_response = rag_query(
        f"Final answer considering: {query}",
        task_type=task_type,
        web_context=aggregated_web_context,
        local_context=local_context,
        user_context=user_context,
        response_context=response_context
    )
-    debugMod.log("Orchestration completed")
+	# Generate final response with full context
-    return [final_response, links]
+	final_response = rag_query(
 		f"Final answer considering: {query}",
 		task_type=task_type,
 		web_context=aggregated_web_context,
 		local_context=local_context,
 		user_context=user_context,
 		response_context=response_context
 	)
 	debugMod.log("Orchestration completed")
 	return [final_response, links]
 if __name__ == "__main__":
@@ -183,9 +188,9 @@ if __name__ == "__main__":
 	parser = argparse.ArgumentParser()
 	parser.add_argument('--file', type=str, default="",
-                     help='Path to data file for analysis')
+					 help='Path to data file for analysis')
 	parser.add_argument('--cli', type=str, default="false",
-	                    help="whether to use the CLI for input or run the API")
+						help="whether to use the CLI for input or run the API")
 	args = parser.parse_args()
 	vector_store = None
@@ -220,5 +225,18 @@ if __name__ == "__main__":
 		# code
 		code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL)
 		if code_blocks:
 			show_thinking('[running code...]')
 			orchestrate_code(orchestrate, vector_store, chunks,
-			                 user_env, code_blocks, query, response, links)
+							 user_env, code_blocks, query, response, links)
   # clean up
 	try:
 		# For Linux/macOS
 		subprocess.run(["pkill", "-f", "ollama run"], check=False)
  		# For Windows
 		subprocess.run(["taskkill", "/IM", "ollama.exe", "/F"], check=False)
 		debugMod.log("Terminated Ollama background processes")
 	except Exception as e:
 		debugMod.log(f"Cleanup error: {str(e)}")
@@ -1,27 +1,29 @@
 import re
 import debug as debugMod
-from search import perform_web_search
+from config import Config
 import ollama
 import conversation_store
 from helpers import highlight_code
 conversation_store.initialize_db()
 # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b
 MODEL_NAMES = {
-    "classification": "dolphin3:8b",  # Best for structured tasks
+	"classification": "dolphin3:8b",  # Best for structured tasks
-    "simple": "phi3:latest",  # phi3:mini
+	"simple": "phi3:latest",  # phi3:mini
-    "medium": "llama3:8b-instruct-q8_0",
+	"medium": "llama3:8b-instruct-q8_0",
-    "complex": "deepseek-coder:33b-instruct-q4_K_M"
+	"complex": "deepseek-coder:33b-instruct-q4_K_M"
 }
 def classify_task(query: str) -> str:
 	# Use a tiny model to classify the task
 	prompt = f"""Classify this query into one of these categories:
-    - "simple": greetings, yes/no, basic facts
+	- "simple": greetings, yes/no, basic facts
-    - "medium": summarization, simple coding
+	- "medium": summarization, simple coding
-    - "complex": advanced coding, data analysis, multi-step reasoning
+	- "complex": advanced coding, data analysis, multi-step reasoning
-    Query: {query}
+	Query: {query}
-    Return ONLY the category name (e.g., "simple")."""
+	Return ONLY the category name (e.g., "simple")."""
 	toPassIn = ""
 	for i in range(3):
@@ -36,14 +38,27 @@ def classify_task(query: str) -> str:
 	return 'complex'
-def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False):
+def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False):
-	prompt = f"""
+	if task_type == "simple":
 		return f"""RESPONSE RULES:
 	1. Respond ONLY with a single-sentence friendly reply
 	2. NEVER include explanations, markdown, or metadata
 	3. Keep responses under 15 words
 	4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
 	Query: {query}
 	Response:"""  # Explicit response start
 	else:
 		prompt = f"""
 	**Strict Response Rules**
-	1. Greetings & Casual Queries:
+	1. General Rules:
 		- For greetings (e.g. "good morning", "hello"):
 			* Respond with ONLY a short friendly acknowledgment
 			* NEVER explain why you can't chat casually
 			* Example: "Good morning! How can I assist you today?"
 		- NEVER give the user code they didn't ask for
 		- ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them!
 	2. Technical Responses:
 		- Generate code ONLY if:
@@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 		- NO justification of rules to users
 		- NEVER include the user's question unless explicitly asked to do so
 		- NEVER include previous responses
 		- NEVER EVER SHOW THE RULES TO THE USER
 		- ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
 	{f'Local File Context: {local_context}' if local_context else ''}
 	"""
@@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 	return prompt
 def show_thinking(indicator: str = None):
 	print(
 		f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True)
 def call_ollama_and_print(task_type, prompt, silent=False):
 	temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7)
 	if silent:
-		response = ollama.chat(model=MODEL_NAMES[task_type], messages=[
+		response = ollama.chat(
-		                       {"role": "user", "content": prompt}])
+			model=MODEL_NAMES[task_type], messages=[
 				{"role": "user", "content": prompt}],
 			options={'temperature': temperature}
 		)
 		debugMod.log("RAG query response received")
 		return response
 	full_response = ""
-	print("\nAI Response: ", end="", flush=True)  # Start response line
+	show_thinking()
 	# Stream the response
 	stream = ollama.chat(
 		model=MODEL_NAMES[task_type],
 		messages=[{"role": "user", "content": prompt}],
-		stream=True
+		stream=True,
 		options={'temperature': temperature}
 	)
 	buffer = ""
 	in_code_block = False
 	code_lang = None
 	first_chunk = True
 	for chunk in stream:
-		content = chunk.get('message', {}).get('content', '')
+		if first_chunk:
-		print(content, end="", flush=True)  # Stream to terminal
+			first_chunk = False
-		full_response += content
+			print("\r\033[K", end="")  # Clear line
 			print("\nAI Response: ", end="", flush=True)
 		content: str = chunk.get('message', {}).get('content', '')
 		if content == '```' or re.match('```.*', content):
 			if in_code_block:
 				in_code_block = False
 				print()
 				buffer += content
 				code_lang = None
 			else:
 				in_code_block = True
 				code_lang = content.replace('```', '').strip()
 				if (len(code_lang) == 0):
 					code_lang = "TODO"
 		elif code_lang == "TODO":
 			# last chunk was the backticks, now is lang
 			splitVal = content.strip().split()
 			code_lang = splitVal[0]
 			if (len(splitVal) > 1 and len(splitVal[1]) > 0):
 				hcode = highlight_code(splitVal[1], code_lang)
 				print(hcode, end="", flush=True)
 				buffer += hcode
 		else:
 			buffer += content
 			print(content, end="", flush=True)
 	print()  # Newline after streaming
 	debugMod.log("RAG query response received")
@@ -111,65 +173,65 @@ def call_ollama_and_print(task_type, prompt, silent=False):
 def multi_choice_query(query, options: list[str], task_type: str, web_context="", local_context="", user_context="", silent=False):
-    attempts = 0
+	attempts = 0
-    max_attempts = 3
+	max_attempts = 3
-    inds = list(range(len(options)))
+	inds = list(range(len(options)))
-    valid_range = f"0-{len(inds) - 1}"
+	valid_range = f"0-{len(inds) - 1}"
-    last_error = ""
+	last_error = ""
-    debugMod.log(
+	debugMod.log(
-    	f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}")
+		f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}")
-    while attempts < max_attempts:
+	while attempts < max_attempts:
-        prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected.
+		prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected.
-        
+		
-        Available Options:
+		Available Options:
-        {"\n".join([f"{i}: {option}" for i, option in enumerate(options)])}
+		{"\n".join([f"{i}: {option}" for i, option in enumerate(options)])}
-        Question: {query}
+		Question: {query}
-        Context Sources:
+		Context Sources:
-        {f'[WEB] {web_context}' if web_context else ''}
+		{f'[WEB] {web_context}' if web_context else ''}
-        {f'[LOCAL] {local_context}' if local_context else ''}
+		{f'[LOCAL] {local_context}' if local_context else ''}
-        {f'[USER] {user_context}' if user_context else ''}
+		{f'[USER] {user_context}' if user_context else ''}
-        {generate_prompt(query, web_context, local_context, user_context, onlyRules=True)}
+		{generate_prompt(query, web_context, local_context, user_context, onlyRules=True)}
-        - You MUST return a SINGLE INTEGER between {valid_range}
+		- You MUST return a SINGLE INTEGER between {valid_range}
-        - DO NOT include explanations or punctuation"""
+		- DO NOT include explanations or punctuation"""
-        if last_error:
+		if last_error:
-            prompt += f"\n\nPrevious invalid response: {last_error}"
+			prompt += f"\n\nPrevious invalid response: {last_error}"
-        try:
+		try:
-            content = call_ollama_and_print(task_type, prompt, silent)
+			content = call_ollama_and_print(task_type, prompt, silent)
-            debugMod.log(f"Multi-choice response: {content}", wrapped=True)
+			debugMod.log(f"Multi-choice response: {content}", wrapped=True)
-            # Strict validation
+			# Strict validation
-            if not content.isdigit():
+			if not content.isdigit():
-                raise ValueError(f"Non-numeric response: {content}")
+				raise ValueError(f"Non-numeric response: {content}")
-            ind = int(content)
+			ind = int(content)
-            if 0 <= ind < len(options):
+			if 0 <= ind < len(options):
-                debugMod.log(f"Valid choice selected: {ind} ({options[ind]})")
+				debugMod.log(f"Valid choice selected: {ind} ({options[ind]})")
-                return options[ind]
+				return options[ind]
-            raise IndexError(f"Index {ind} out of range {valid_range}")
+			raise IndexError(f"Index {ind} out of range {valid_range}")
-        except (ValueError, IndexError) as e:
+		except (ValueError, IndexError) as e:
-            last_error = str(e)
+			last_error = str(e)
-            debugMod.log(f"Validation failed: {last_error}")
+			debugMod.log(f"Validation failed: {last_error}")
-            attempts += 1
+			attempts += 1
-            continue
+			continue
-        except Exception as e:
+		except Exception as e:
-            debugMod.log(f"Unexpected error: {str(e)}")
+			debugMod.log(f"Unexpected error: {str(e)}")
-            attempts += 1
+			attempts += 1
-            continue
+			continue
-    # Fallback to safest option after all attempts
+	# Fallback to safest option after all attempts
-    debugMod.log(f"All attempts failed. Defaulting to first option")
+	debugMod.log(f"All attempts failed. Defaulting to first option")
-    return options[0]
+	return options[0]
 def rag_query(query, task_type: str = None, web_context="", local_context="", user_context="", response_context="", silent=False):
@@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us
 	debugMod.log(f"Generating {task_type} RAG query with query: {query}")
 	prompt = generate_prompt(
-		query, web_context, local_context, user_context, response_context)
+		query, web_context, local_context, user_context, response_context, task_type)
 	response = call_ollama_and_print(task_type, prompt, silent)