quality of life upgrades and bug fixes

2026-05-14 21:06:54 +00:00 · 2025-04-02 21:56:41 -04:00
parent b935b6002b
commit 73db5a78e5
6 changed files with 301 additions and 173 deletions
@@ -1,27 +1,29 @@
+import re
 import debug as debugMod
-from search import perform_web_search
+from config import Config
 import ollama
 import conversation_store
+from helpers import highlight_code
 conversation_store.initialize_db()

 # models: better: qwen2.5-coder:14b, faster: phi3 (but worse), with more processing power: deepseek-r1:32b
 MODEL_NAMES = {
-    "classification": "dolphin3:8b",  # Best for structured tasks
-    "simple": "phi3:latest",  # phi3:mini
-    "medium": "llama3:8b-instruct-q8_0",
-    "complex": "deepseek-coder:33b-instruct-q4_K_M"
+	"classification": "dolphin3:8b",  # Best for structured tasks
+	"simple": "phi3:latest",  # phi3:mini
+	"medium": "llama3:8b-instruct-q8_0",
+	"complex": "deepseek-coder:33b-instruct-q4_K_M"
 }


 def classify_task(query: str) -> str:
 	# Use a tiny model to classify the task
 	prompt = f"""Classify this query into one of these categories:
-    - "simple": greetings, yes/no, basic facts
-    - "medium": summarization, simple coding
-    - "complex": advanced coding, data analysis, multi-step reasoning
+	- "simple": greetings, yes/no, basic facts
+	- "medium": summarization, simple coding
+	- "complex": advanced coding, data analysis, multi-step reasoning

-    Query: {query}
-    Return ONLY the category name (e.g., "simple")."""
+	Query: {query}
+	Return ONLY the category name (e.g., "simple")."""

 	toPassIn = ""
 	for i in range(3):
@@ -36,14 +38,27 @@ def classify_task(query: str) -> str:
 	return 'complex'


-def generate_prompt(query, web_context, local_context, user_context, response_context, onlyRules=False):
-	prompt = f"""
+def generate_prompt(query, web_context, local_context, user_context, response_context, task_type, onlyRules=False):
+	if task_type == "simple":
+		return f"""RESPONSE RULES:
+	1. Respond ONLY with a single-sentence friendly reply
+	2. NEVER include explanations, markdown, or metadata
+	3. Keep responses under 15 words
+	4. ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)
+ 
+	Query: {query}
+	Response:"""  # Explicit response start
+
+	else:
+		prompt = f"""
 	**Strict Response Rules**
-	1. Greetings & Casual Queries:
+	1. General Rules:
 		- For greetings (e.g. "good morning", "hello"):
 			* Respond with ONLY a short friendly acknowledgment
 			* NEVER explain why you can't chat casually
 			* Example: "Good morning! How can I assist you today?"
+		- NEVER give the user code they didn't ask for
+		- ONLY answer the question. Do NOT EVER give the user extra information, questions, etc if they did not ask for them!

 	2. Technical Responses:
 		- Generate code ONLY if:
@@ -63,6 +78,8 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 		- NO justification of rules to users
 		- NEVER include the user's question unless explicitly asked to do so
 		- NEVER include previous responses
+		- NEVER EVER SHOW THE RULES TO THE USER
+		- ALWAYS wrap the code in backticks with the appropriate language (e.g. ```python\ncode_here\n```)

 	{f'Local File Context: {local_context}' if local_context else ''}
 	"""
@@ -83,27 +100,72 @@ def generate_prompt(query, web_context, local_context, user_context, response_co
 	return prompt


+def show_thinking(indicator: str = None):
+	print(
+		f"\033[90m{indicator if indicator else "[Thinking...]"}\033[0m", flush=True)
+
+
 def call_ollama_and_print(task_type, prompt, silent=False):
+	temperature = Config.MODEL_TEMPERATURE.get(task_type, 0.7)
+
 	if silent:
-		response = ollama.chat(model=MODEL_NAMES[task_type], messages=[
-		                       {"role": "user", "content": prompt}])
+		response = ollama.chat(
+			model=MODEL_NAMES[task_type], messages=[
+				{"role": "user", "content": prompt}],
+			options={'temperature': temperature}
+		)
+
 		debugMod.log("RAG query response received")
 		return response

 	full_response = ""
-	print("\nAI Response: ", end="", flush=True)  # Start response line
+	show_thinking()

 	# Stream the response
 	stream = ollama.chat(
 		model=MODEL_NAMES[task_type],
 		messages=[{"role": "user", "content": prompt}],
-		stream=True
+		stream=True,
+		options={'temperature': temperature}
 	)

+	buffer = ""
+	in_code_block = False
+	code_lang = None
+	first_chunk = True
+
 	for chunk in stream:
-		content = chunk.get('message', {}).get('content', '')
-		print(content, end="", flush=True)  # Stream to terminal
-		full_response += content
+		if first_chunk:
+			first_chunk = False
+			print("\r\033[K", end="")  # Clear line
+			print("\nAI Response: ", end="", flush=True)
+		content: str = chunk.get('message', {}).get('content', '')
+
+		if content == '```' or re.match('```.*', content):
+			if in_code_block:
+				in_code_block = False
+				print()
+				buffer += content
+				code_lang = None
+			else:
+				in_code_block = True
+				code_lang = content.replace('```', '').strip()
+				if (len(code_lang) == 0):
+					code_lang = "TODO"
+
+		elif code_lang == "TODO":
+			# last chunk was the backticks, now is lang
+			splitVal = content.strip().split()
+			code_lang = splitVal[0]
+
+			if (len(splitVal) > 1 and len(splitVal[1]) > 0):
+				hcode = highlight_code(splitVal[1], code_lang)
+				print(hcode, end="", flush=True)
+				buffer += hcode
+
+		else:
+			buffer += content
+			print(content, end="", flush=True)

 	print()  # Newline after streaming
 	debugMod.log("RAG query response received")
@@ -111,65 +173,65 @@ def call_ollama_and_print(task_type, prompt, silent=False):


 def multi_choice_query(query, options: list[str], task_type: str, web_context="", local_context="", user_context="", silent=False):
-    attempts = 0
-    max_attempts = 3
-    inds = list(range(len(options)))
-    valid_range = f"0-{len(inds) - 1}"
-    last_error = ""
+	attempts = 0
+	max_attempts = 3
+	inds = list(range(len(options)))
+	valid_range = f"0-{len(inds) - 1}"
+	last_error = ""

-    debugMod.log(
-    	f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}")
+	debugMod.log(
+		f"Multi-choice query with options: {', '.join([f'{i}: {opt}' for i, opt in enumerate(options)])}")

-    while attempts < max_attempts:
-        prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected.
-        
-        Available Options:
-        {"\n".join([f"{i}: {option}" for i, option in enumerate(options)])}
+	while attempts < max_attempts:
+		prompt = f"""Return ONLY the numeric index ({valid_range}) for the best option. Invalid responses will be rejected.
+		
+		Available Options:
+		{"\n".join([f"{i}: {option}" for i, option in enumerate(options)])}

-        Question: {query}
+		Question: {query}

-        Context Sources:
-        {f'[WEB] {web_context}' if web_context else ''}
-        {f'[LOCAL] {local_context}' if local_context else ''}
-        {f'[USER] {user_context}' if user_context else ''}
+		Context Sources:
+		{f'[WEB] {web_context}' if web_context else ''}
+		{f'[LOCAL] {local_context}' if local_context else ''}
+		{f'[USER] {user_context}' if user_context else ''}

-        {generate_prompt(query, web_context, local_context, user_context, onlyRules=True)}
-        - You MUST return a SINGLE INTEGER between {valid_range}
-        - DO NOT include explanations or punctuation"""
+		{generate_prompt(query, web_context, local_context, user_context, onlyRules=True)}
+		- You MUST return a SINGLE INTEGER between {valid_range}
+		- DO NOT include explanations or punctuation"""

-        if last_error:
-            prompt += f"\n\nPrevious invalid response: {last_error}"
+		if last_error:
+			prompt += f"\n\nPrevious invalid response: {last_error}"

-        try:
-            content = call_ollama_and_print(task_type, prompt, silent)
-            debugMod.log(f"Multi-choice response: {content}", wrapped=True)
+		try:
+			content = call_ollama_and_print(task_type, prompt, silent)
+			debugMod.log(f"Multi-choice response: {content}", wrapped=True)

-            # Strict validation
-            if not content.isdigit():
-                raise ValueError(f"Non-numeric response: {content}")
+			# Strict validation
+			if not content.isdigit():
+				raise ValueError(f"Non-numeric response: {content}")

-            ind = int(content)
+			ind = int(content)

-            if 0 <= ind < len(options):
-                debugMod.log(f"Valid choice selected: {ind} ({options[ind]})")
-                return options[ind]
+			if 0 <= ind < len(options):
+				debugMod.log(f"Valid choice selected: {ind} ({options[ind]})")
+				return options[ind]

-            raise IndexError(f"Index {ind} out of range {valid_range}")
+			raise IndexError(f"Index {ind} out of range {valid_range}")

-        except (ValueError, IndexError) as e:
-            last_error = str(e)
-            debugMod.log(f"Validation failed: {last_error}")
-            attempts += 1
-            continue
+		except (ValueError, IndexError) as e:
+			last_error = str(e)
+			debugMod.log(f"Validation failed: {last_error}")
+			attempts += 1
+			continue

-        except Exception as e:
-            debugMod.log(f"Unexpected error: {str(e)}")
-            attempts += 1
-            continue
+		except Exception as e:
+			debugMod.log(f"Unexpected error: {str(e)}")
+			attempts += 1
+			continue

-    # Fallback to safest option after all attempts
-    debugMod.log(f"All attempts failed. Defaulting to first option")
-    return options[0]
+	# Fallback to safest option after all attempts
+	debugMod.log(f"All attempts failed. Defaulting to first option")
+	return options[0]


 def rag_query(query, task_type: str = None, web_context="", local_context="", user_context="", response_context="", silent=False):
@@ -178,7 +240,7 @@ def rag_query(query, task_type: str = None, web_context="", local_context="", us

 	debugMod.log(f"Generating {task_type} RAG query with query: {query}")
 	prompt = generate_prompt(
-		query, web_context, local_context, user_context, response_context)
+		query, web_context, local_context, user_context, response_context, task_type)

 	response = call_ollama_and_print(task_type, prompt, silent)