From 73db5a78e519d31fcc937e2ee690c67325ec2dac Mon Sep 17 00:00:00 2001 From: ION606 Date: Wed, 2 Apr 2025 21:56:41 -0400 Subject: [PATCH] quality of life upgrades and bug fixes --- codeExecution.py | 38 +++++++- config.py | 7 +- conversation_store.py | 2 +- helpers.py | 15 +++ main.py | 218 +++++++++++++++++++++++------------------- queries.py | 194 ++++++++++++++++++++++++------------- 6 files changed, 301 insertions(+), 173 deletions(-) create mode 100644 helpers.py diff --git a/codeExecution.py b/codeExecution.py index d64d65e..8b63eac 100644 --- a/codeExecution.py +++ b/codeExecution.py @@ -3,10 +3,12 @@ from pathlib import Path import re from types import FunctionType import docker +import json import debug as debugMod import conversation_store from config import Config +from queries import show_thinking class UserEnvironment: @@ -14,6 +16,30 @@ class UserEnvironment: self.user_id = user_id self.client = docker.from_env() self.temp_dir = tempfile.TemporaryDirectory(prefix=f"{user_id}_code_") + self._ensure_sandbox_image() + + def _ensure_sandbox_image(self): + try: + self.client.images.get("code-sandbox") + except docker.errors.ImageNotFound: + debugMod.log("building code-sandbox image from Dockerfile.sandbox...") + + try: + self.client.images.build( + path=".", + dockerfile="Dockerfile.sandbox", + tag="code-sandbox", + rm=True, + forcerm=True + ) + + debugMod.log("successfully built code-sandbox image") + + except docker.errors.BuildError as e: + raise RuntimeError(f"Failed to build Docker image: {str(e)}") from e + + except docker.errors.APIError as e: + raise RuntimeError(f"Docker API error: {str(e)}") from e def execute_code(self, code: str, context=None, timeout=15, memory_limit=100): # Validate input @@ -48,7 +74,6 @@ class UserEnvironment: detach=True, stdout=True, stderr=True, - timeout=timeout ) # Wait for completion @@ -98,10 +123,10 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env: execution_result = user_env.execute_code( current_code, context=chunks if chunks else None) - if isinstance(execution_result, dict) and 'err' in execution_result: + if isinstance(execution_result, dict) and execution_result['error']: # hard code to let user know the program didn't explode - debugMod.log( - "\n\nhmmm...looks like this code didn't work properly, I'll try debugging it now!\n") + show_thinking( + "[hmmm...looks like this code didn't work properly, I'll try debugging it now!]") last_error = execution_result['err'] debugMod.log(f"\nExecution error: {last_error}\n") @@ -128,7 +153,9 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env: else: break else: - debugMod.log("\nCode Execution Result:\n", execution_result) + debugMod.log("\nCode Execution Result:\n", json.dumps(execution_result)) + print("\nCode Execution Result:\n", execution_result['output'].strip()) + if execution_result: # Get current conversation ID after saving conversation conv_id = conversation_store.save_conversation(query, response, links) @@ -142,6 +169,7 @@ def orchestrate_code(orchestrate: FunctionType, vector_store, chunks, user_env: retries=retry_count, conversation_id=conv_id ) + break if last_error and retry_count >= Config.MAX_CODE_RETRIES: diff --git a/config.py b/config.py index e986521..5c20f99 100644 --- a/config.py +++ b/config.py @@ -35,7 +35,12 @@ class Config: MAX_RESPONSE_LENGTH = 10000 # Characters for stored responses # === Model Settings === - MODEL_TEMPERATURE = 0.7 # Default creativity level + MODEL_TEMPERATURE = { + "simple": 0.3, + "medium": 0.6, + "complex": 0.7 + } + MAX_CLASSIFY_ATTEMPTS = 3 # Task classification retries # === Safety Limits === diff --git a/conversation_store.py b/conversation_store.py index 0c4655c..ecfde82 100644 --- a/conversation_store.py +++ b/conversation_store.py @@ -63,7 +63,7 @@ def save_code_execution(code, result, error=None, retries=0, conversation_id=Non error_message, retry_count, timestamp) VALUES (?, ?, ?, ?, ?, ?)''', (conversation_id, code, execution_result, - error_message, retries, datetime.datetime.now())) + error_message, retries, datetime.now())) conn.commit() conn.close() diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..cb86c72 --- /dev/null +++ b/helpers.py @@ -0,0 +1,15 @@ +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.formatters import TerminalFormatter +import debug as debugMod + + +def highlight_code(code: str, language: str = 'py') -> None: + try: + lexer = get_lexer_by_name(language) + except ValueError: + debugMod.log("Warning: Language not recognized. Printing without highlighting.") + return code + + formatter = TerminalFormatter() + return highlight(code, lexer, formatter) diff --git a/main.py b/main.py index 2bbc0c1..c6f6509 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,9 @@ from codeExecution import UserEnvironment, orchestrate_code from queries import ( - perform_web_search, - rag_query, + rag_query, classify_task, - MODEL_NAMES + MODEL_NAMES, + show_thinking ) import debug as debugMod from search import perform_web_search @@ -14,6 +14,7 @@ import os import argparse import re import ollama +import subprocess from config import Config import conversation_store conversation_store.initialize_db() @@ -58,124 +59,128 @@ def create_vector_store(chunks): embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vector_store = Chroma.from_texts( - chunks, - embeddings, - persist_directory=Config.chroma_path() - ) + chunks, + embeddings, + persist_directory=Config.chroma_path() + ) debugMod.log("Vector store created") return vector_store def orchestrate(query, vector_store=None, comm_outp=print, comm_inp=input): - debugMod.log(f"Orchestrating query: {query}") - aggregated_web_context = "" - local_context = "" - user_context = "" - response_context = "" - links = [] + debugMod.log(f"Orchestrating query: {query}") + aggregated_web_context = "" + local_context = "" + user_context = "" + response_context = "" + links = [] - # Classify task once at start - task_type = classify_task(query) - debugMod.log(f"Task classified as: {task_type}") + # Classify task once at start + show_thinking("[Analyzing query type...]") + task_type = classify_task(query) + show_thinking(f"[Task classified as: {task_type}]") - # Early exit for simple tasks - if task_type == "simple": - debugMod.log("Direct response for simple task") - return [rag_query(query, task_type=task_type), []] + # Early exit for simple tasks + if task_type == "simple": + debugMod.log("Direct response for simple task") + return [rag_query(query, task_type=task_type), []] - # Initialize context for medium/complex tasks - if vector_store: - docs = vector_store.similarity_search(query, k=3) - local_context = "\n".join( - [d.page_content for d in docs]) if docs else "" - debugMod.log(f"Local context: {local_context}") + # Initialize context for medium/complex tasks + if vector_store: + docs = vector_store.similarity_search(query, k=3) + local_context = "\n".join( + [d.page_content for d in docs]) if docs else "" + debugMod.log(f"Local context: {local_context}") - iteration = 0 - status = "continue" + iteration = 0 + status = "continue" - while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final": - debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---") - response = "" + while iteration < Config.MAX_ORCHESTRATION_ITERATIONS and status != "final": + debugMod.log(f"--- Iteration {iteration} [Status: {status}] ---") + response = "" - if status == "continue": - # Include previous responses in reflection - reflection_prompt = f"""Determine the next action needed to answer: {query} - - Available actions: - 1. web_search - Needs web information - 2. user_input - Requires clarification - 3. final_response - Ready to answer - - Context: - - Web: {aggregated_web_context} - - Local: {local_context} - - User: {user_context} - - Previous Responses: {response_context} - - Return ONLY: web_search/user_input/final_response""" + if status == "continue": + # Include previous responses in reflection + reflection_prompt = f"""Determine the next action needed to answer: {query} + + Available actions: + 1. web_search - Needs web information + 2. user_input - Requires clarification + 3. final_response - Ready to answer + + Context: + - Web: {aggregated_web_context} + - Local: {local_context} + - User: {user_context} + - Previous Responses: {response_context} + + Return ONLY: web_search/user_input/final_response""" - status = rag_query( - reflection_prompt, task_type=task_type, silent=True).strip().lower() - debugMod.log(f"Action determined: {status}") + show_thinking('[choosing the appropriate action]') + status = rag_query( + reflection_prompt, task_type=task_type, silent=True).strip().lower() + debugMod.log(f"Action determined: {status}") - if status == "web_search": - search_prompt = f"""Generate search query considering: {query} - Previous responses: {response_context} - Return ONLY search terms""" + if status == "web_search": + show_thinking("[Searching web for information...]") - search_terms = rag_query( - search_prompt, task_type=task_type, silent=True).strip('"') - debugMod.log(f"Searching web for: {search_terms}") + search_prompt = f"""Generate search query considering: {query} + Previous responses: {response_context} + Return ONLY search terms""" - web_results, new_links = perform_web_search(search_terms) - links.extend(new_links) + search_terms = rag_query( + search_prompt, task_type=task_type, silent=True).strip('"') + debugMod.log(f"Searching web for: {search_terms}") - if web_results: - aggregated_web_context += f"\nWeb: {web_results}" - debugMod.log(f"Updated web context") + web_results, new_links = perform_web_search(search_terms) + links.extend(new_links) - elif status == "user_input": - comm_outp("\n[System] Additional info needed:") - user_input = comm_inp("Please clarify: ") - user_context += f"\nUser input: {user_input}" - debugMod.log(f"Received user input") - status = "continue" + if web_results: + aggregated_web_context += f"\nWeb: {web_results}" + debugMod.log(f"Updated web context") - elif status == "final_response": - break + elif status == "user_input": + comm_outp("\n[System] Additional info needed:") + user_input = comm_inp("Please clarify: ") + user_context += f"\nUser input: {user_input}" + debugMod.log(f"Received user input") + status = "continue" - else: - debugMod.log(f"Unknown status: {status}") - status = "final_response" + elif status == "final_response": + break - # Generate and store response - if status != "final_response": - response = rag_query( - query, - task_type=task_type, - web_context=aggregated_web_context, - local_context=local_context, - user_context=user_context, - response_context=response_context # Pass previous responses - ) - response_context += f"\nIteration {iteration} response: {response}" - debugMod.log(f"Iteration {iteration} response stored") + else: + debugMod.log(f"Unknown status: {status}") + status = "final_response" - iteration += 1 + # Generate and store response + if status != "final_response": + response = rag_query( + query, + task_type=task_type, + web_context=aggregated_web_context, + local_context=local_context, + user_context=user_context, + response_context=response_context # Pass previous responses + ) + response_context += f"\nIteration {iteration} response: {response}" + debugMod.log(f"Iteration {iteration} response stored") - # Generate final response with full context - final_response = rag_query( - f"Final answer considering: {query}", - task_type=task_type, - web_context=aggregated_web_context, - local_context=local_context, - user_context=user_context, - response_context=response_context - ) + iteration += 1 - debugMod.log("Orchestration completed") - return [final_response, links] + # Generate final response with full context + final_response = rag_query( + f"Final answer considering: {query}", + task_type=task_type, + web_context=aggregated_web_context, + local_context=local_context, + user_context=user_context, + response_context=response_context + ) + + debugMod.log("Orchestration completed") + return [final_response, links] if __name__ == "__main__": @@ -183,9 +188,9 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--file', type=str, default="", - help='Path to data file for analysis') + help='Path to data file for analysis') parser.add_argument('--cli', type=str, default="false", - help="whether to use the CLI for input or run the API") + help="whether to use the CLI for input or run the API") args = parser.parse_args() vector_store = None @@ -220,5 +225,18 @@ if __name__ == "__main__": # code code_blocks = re.findall(Config.code_block_regex(), response, re.DOTALL) if code_blocks: + show_thinking('[running code...]') orchestrate_code(orchestrate, vector_store, chunks, - user_env, code_blocks, query, response, links) + user_env, code_blocks, query, response, links) + + # clean up + try: + # For Linux/macOS + subprocess.run(["pkill", "-f", "ollama run"], check=False) + + # For Windows + subprocess.run(["taskkill", "/IM", "ollama.exe", "/F"], check=False) + + debugMod.log("Terminated Ollama background processes") + except Exception as e: + debugMod.log(f"Cleanup error: {str(e)}") diff --git a/queries.py b/queries.py index 52294fd..b79c747 100644 --- a/queries.py +++ b/queries.py @@ -1,27 +1,29 @@ +import re import debug as debugMod -from search import perform_web_search +from config import Config import ollama import conversation_store +from helpers import highlight_code conversation_store.initialize_db() # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b MODEL_NAMES = { - "classification": "dolphin3:8b", # Best for structured tasks - "simple": "phi3:latest", # phi3:mini - "medium": "llama3:8b-instruct-q8_0", - "complex": "deepseek-coder:33b-instruct-q4_K_M" + "classification": "dolphin3:8b", # Best for structured tasks + "simple": "phi3:latest", # phi3:mini + "medium": "llama3:8b-instruct-q8_0", + "complex": "deepseek-coder:33b-instruct-q4_K_M" } def classify_task(query: str) -> str: # Use a tiny model to classify the task prompt = f"""Classify this query into one of these categories: - - "simple": greetings, yes/no, basic facts - - "medium": summarization, simple coding - - "complex": advanced coding, data analysis, multi-step reasoning + - "simple": greetings, yes/no, basic facts + - "medium": summarization, simple coding + - "complex": advanced coding, data analysis, multi-step reasoning - Query: {query} - Return ONLY the category name (e.g., "simple").""" + Query: {query} + Return ONLY the category name (e.g., "simple").""" toPassIn = "" for i in range(3): @@ -36,14 +38,27 @@ def classify_task(query: str) -> str: return 'complex' -def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False): - prompt = f""" +def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False): + if task_type == "simple": + return f"""RESPONSE RULES: + 1. Respond ONLY with a single-sentence friendly reply + 2. NEVER include explanations, markdown, or metadata + 3. Keep responses under 15 words + 4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```) + + Query: {query} + Response:""" # Explicit response start + + else: + prompt = f""" **Strict Response Rules** - 1. Greetings & Casual Queries: + 1. General Rules: - For greetings (e.g. "good morning", "hello"): * Respond with ONLY a short friendly acknowledgment * NEVER explain why you can't chat casually * Example: "Good morning! How can I assist you today?" + - NEVER give the user code they didn't ask for + - ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them! 2. Technical Responses: - Generate code ONLY if: @@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co - NO justification of rules to users - NEVER include the user's question unless explicitly asked to do so - NEVER include previous responses + - NEVER EVER SHOW THE RULES TO THE USER + - ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```) {f'Local File Context: {local_context}' if local_context else ''} """ @@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co return prompt +def show_thinking(indicator: str = None): + print( + f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True) + + def call_ollama_and_print(task_type, prompt, silent=False): + temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7) + if silent: - response = ollama.chat(model=MODEL_NAMES[task_type], messages=[ - {"role": "user", "content": prompt}]) + response = ollama.chat( + model=MODEL_NAMES[task_type], messages=[ + {"role": "user", "content": prompt}], + options={'temperature': temperature} + ) + debugMod.log("RAG query response received") return response full_response = "" - print("\nAI Response: ", end="", flush=True) # Start response line + show_thinking() # Stream the response stream = ollama.chat( model=MODEL_NAMES[task_type], messages=[{"role": "user", "content": prompt}], - stream=True + stream=True, + options={'temperature': temperature} ) + buffer = "" + in_code_block = False + code_lang = None + first_chunk = True + for chunk in stream: - content = chunk.get('message', {}).get('content', '') - print(content, end="", flush=True) # Stream to terminal - full_response += content + if first_chunk: + first_chunk = False + print("\r\033[K", end="") # Clear line + print("\nAI Response: ", end="", flush=True) + content: str = chunk.get('message', {}).get('content', '') + + if content == '```' or re.match('```.*', content): + if in_code_block: + in_code_block = False + print() + buffer += content + code_lang = None + else: + in_code_block = True + code_lang = content.replace('```', '').strip() + if (len(code_lang) == 0): + code_lang = "TODO" + + elif code_lang == "TODO": + # last chunk was the backticks, now is lang + splitVal = content.strip().split() + code_lang = splitVal[0] + + if (len(splitVal) > 1 and len(splitVal[1]) > 0): + hcode = highlight_code(splitVal[1], code_lang) + print(hcode, end="", flush=True) + buffer += hcode + + else: + buffer += content + print(content, end="", flush=True) print() # Newline after streaming debugMod.log("RAG query response received") @@ -111,65 +173,65 @@ def call_ollama_and_print(task_type, prompt, silent=False): def multi_choice_query(query, options: list[str], task_type: str, web_context="", local_context="", user_context="", silent=False): - attempts = 0 - max_attempts = 3 - inds = list(range(len(options))) - valid_range = f"0-{len(inds) - 1}" - last_error = "" + attempts = 0 + max_attempts = 3 + inds = list(range(len(options))) + valid_range = f"0-{len(inds) - 1}" + last_error = "" - debugMod.log( - f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}") + debugMod.log( + f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}") - while attempts < max_attempts: - prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected. - - Available Options: - {"\n".join([f"{i}: {option}" for i, option in enumerate(options)])} + while attempts < max_attempts: + prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected. + + Available Options: + {"\n".join([f"{i}: {option}" for i, option in enumerate(options)])} - Question: {query} + Question: {query} - Context Sources: - {f'[WEB] {web_context}' if web_context else ''} - {f'[LOCAL] {local_context}' if local_context else ''} - {f'[USER] {user_context}' if user_context else ''} + Context Sources: + {f'[WEB] {web_context}' if web_context else ''} + {f'[LOCAL] {local_context}' if local_context else ''} + {f'[USER] {user_context}' if user_context else ''} - {generate_prompt(query, web_context, local_context, user_context, onlyRules=True)} - - You MUST return a SINGLE INTEGER between {valid_range} - - DO NOT include explanations or punctuation""" + {generate_prompt(query, web_context, local_context, user_context, onlyRules=True)} + - You MUST return a SINGLE INTEGER between {valid_range} + - DO NOT include explanations or punctuation""" - if last_error: - prompt += f"\n\nPrevious invalid response: {last_error}" + if last_error: + prompt += f"\n\nPrevious invalid response: {last_error}" - try: - content = call_ollama_and_print(task_type, prompt, silent) - debugMod.log(f"Multi-choice response: {content}", wrapped=True) + try: + content = call_ollama_and_print(task_type, prompt, silent) + debugMod.log(f"Multi-choice response: {content}", wrapped=True) - # Strict validation - if not content.isdigit(): - raise ValueError(f"Non-numeric response: {content}") + # Strict validation + if not content.isdigit(): + raise ValueError(f"Non-numeric response: {content}") - ind = int(content) + ind = int(content) - if 0 <= ind < len(options): - debugMod.log(f"Valid choice selected: {ind} ({options[ind]})") - return options[ind] + if 0 <= ind < len(options): + debugMod.log(f"Valid choice selected: {ind} ({options[ind]})") + return options[ind] - raise IndexError(f"Index {ind} out of range {valid_range}") + raise IndexError(f"Index {ind} out of range {valid_range}") - except (ValueError, IndexError) as e: - last_error = str(e) - debugMod.log(f"Validation failed: {last_error}") - attempts += 1 - continue + except (ValueError, IndexError) as e: + last_error = str(e) + debugMod.log(f"Validation failed: {last_error}") + attempts += 1 + continue - except Exception as e: - debugMod.log(f"Unexpected error: {str(e)}") - attempts += 1 - continue + except Exception as e: + debugMod.log(f"Unexpected error: {str(e)}") + attempts += 1 + continue - # Fallback to safest option after all attempts - debugMod.log(f"All attempts failed. Defaulting to first option") - return options[0] + # Fallback to safest option after all attempts + debugMod.log(f"All attempts failed. Defaulting to first option") + return options[0] def rag_query(query, task_type: str = None, web_context="", local_context="", user_context="", response_context="", silent=False): @@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us debugMod.log(f"Generating {task_type} RAG query with query: {query}") prompt = generate_prompt( - query, web_context, local_context, user_context, response_context) + query, web_context, local_context, user_context, response_context, task_type) response = call_ollama_and_print(task_type, prompt, silent)