diff --git a/app/config.py b/app/config.py index be5efea..b46612b 100644 --- a/app/config.py +++ b/app/config.py @@ -24,7 +24,7 @@ class Settings(BaseSettings): # RAG settings chunk_size: int = 500 # tokens per chunk chunk_overlap: int = 50 # token overlap between chunks - retrieval_top_k: int = 3 # number of chunks to retrieve + retrieval_top_k: int = 2 # number of chunks to retrieve class Config: env_file = ".env" diff --git a/app/llm.py b/app/llm.py index 6c67d81..62842e8 100644 --- a/app/llm.py +++ b/app/llm.py @@ -24,9 +24,9 @@ async def stream_ollama(messages: list[dict], system: str = "") -> AsyncGenerato "stream": True, "keep_alive": "24h", "options": { - "num_ctx": 1024, - "num_predict": 256, - "num_thread": 16, + "num_ctx": 512, + "num_predict": 128, + "num_thread": 12, "temperature": 0.7, }, } diff --git a/app/rag.py b/app/rag.py index 50d712d..df7651f 100644 --- a/app/rag.py +++ b/app/rag.py @@ -65,19 +65,9 @@ def build_context_prompt(chunks: list[dict]) -> str: return "\n[No relevant documents found in the database.]\n" context_parts = [] - for i, chunk in enumerate(chunks, 1): - source_label = chunk["source_type"].title() - metadata = chunk["metadata"] - - header = f"--- Source {i} ({source_label})" - if "title" in metadata: - header += f" | {metadata['title']}" - if "substance" in metadata: - header += f" | Substance: {metadata['substance']}" - header += " ---" - - content = chunk["content"][:300] - context_parts.append(f"{header}\n{content}") + for chunk in chunks: + content = chunk["content"][:150] + context_parts.append(content) return "\n\n".join(context_parts) @@ -92,12 +82,12 @@ async def chat_stream( # Build the context-augmented system prompt context_text = build_context_prompt(chunks) - full_system = f"{SYSTEM_PROMPT}\n\nContext:\n{context_text}" + full_system = f"{SYSTEM_PROMPT}\n{context_text}" - # Build message history (keep minimal for speed) + # Keep only last exchange for speed messages = [] if conversation_history: - messages = conversation_history[-4:] + messages = conversation_history[-2:] messages.append({"role": "user", "content": user_message})