diff --git a/app/config.py b/app/config.py
index be5efea..b46612b 100644
--- a/app/config.py
+++ b/app/config.py
@@ -24,7 +24,7 @@ class Settings(BaseSettings):
     # RAG settings
     chunk_size: int = 500  # tokens per chunk
     chunk_overlap: int = 50  # token overlap between chunks
-    retrieval_top_k: int = 3  # number of chunks to retrieve
+    retrieval_top_k: int = 2  # number of chunks to retrieve
 
     class Config:
         env_file = ".env"
diff --git a/app/llm.py b/app/llm.py
index 6c67d81..62842e8 100644
--- a/app/llm.py
+++ b/app/llm.py
@@ -24,9 +24,9 @@ async def stream_ollama(messages: list[dict], system: str = "") -> AsyncGenerato
         "stream": True,
         "keep_alive": "24h",
         "options": {
-            "num_ctx": 1024,
-            "num_predict": 256,
-            "num_thread": 16,
+            "num_ctx": 512,
+            "num_predict": 128,
+            "num_thread": 12,
             "temperature": 0.7,
         },
     }
diff --git a/app/rag.py b/app/rag.py
index 50d712d..df7651f 100644
--- a/app/rag.py
+++ b/app/rag.py
@@ -65,19 +65,9 @@ def build_context_prompt(chunks: list[dict]) -> str:
         return "\n[No relevant documents found in the database.]\n"
 
     context_parts = []
-    for i, chunk in enumerate(chunks, 1):
-        source_label = chunk["source_type"].title()
-        metadata = chunk["metadata"]
-
-        header = f"--- Source {i} ({source_label})"
-        if "title" in metadata:
-            header += f" | {metadata['title']}"
-        if "substance" in metadata:
-            header += f" | Substance: {metadata['substance']}"
-        header += " ---"
-
-        content = chunk["content"][:300]
-        context_parts.append(f"{header}\n{content}")
+    for chunk in chunks:
+        content = chunk["content"][:150]
+        context_parts.append(content)
 
     return "\n\n".join(context_parts)
 
@@ -92,12 +82,12 @@ async def chat_stream(
 
     # Build the context-augmented system prompt
     context_text = build_context_prompt(chunks)
-    full_system = f"{SYSTEM_PROMPT}\n\nContext:\n{context_text}"
+    full_system = f"{SYSTEM_PROMPT}\n{context_text}"
 
-    # Build message history (keep minimal for speed)
+    # Keep only last exchange for speed
     messages = []
     if conversation_history:
-        messages = conversation_history[-4:]
+        messages = conversation_history[-2:]
 
     messages.append({"role": "user", "content": user_message})