Kaynağa Gözat

added rag sys

galo 3 ay önce
ebeveyn
işleme
f61cedf0ca
6 değiştirilmiş dosya ile 102 ekleme ve 26 silme
  1. 6 8
      Dockerfile
  2. 6 2
      README.md
  3. 22 7
      app/api/chat.py
  4. 1 1
      app/main.py
  5. 63 7
      app/services/qa.py
  6. 4 1
      requirements.txt

+ 6 - 8
Dockerfile

@@ -1,24 +1,22 @@
-# Use official Python slim image
+# usando python slim para menor instalaçao
 FROM python:3.10-slim
 
-# Set working directory
 WORKDIR /app
 
-# Install system dependencies for Ollama
+# instalando depedencias do ollama
 RUN apt-get update && apt-get install -y curl libgomp1 && rm -rf /var/lib/apt/lists/*
 
-# Install Ollama
+# instalando ollama
 RUN curl -fsSL https://ollama.com/install.sh | sh
 
-# Copy requirements and install Python dependencies
+# instalando depedencias do python
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy the application code
 COPY . .
 
-# Expose port 8000 for FastAPI
+# expondo portas
 EXPOSE 8000
 
-# Command to start Ollama server, pull the model if needed, and run FastAPI
+# comandso para inicializar o servidor e fazer download da llm
 CMD ["sh", "-c", "ollama serve & (ollama list | grep -q tinyllama || ollama pull tinyllama) && uvicorn app.main:app --host 0.0.0.0 --port 8000 & wait"]

+ 6 - 2
README.md

@@ -2,11 +2,11 @@
 
 ultiliza Ollama e Mistral, requer recursos para a LLM local
 
-## Prerequisites
+## Prequisitos
 - Git
 - Docker
 
-## Setup and Installation
+## Setup e Installation
 
 1. 
 ```
@@ -23,4 +23,8 @@ docker build -t chat-api . && docker run -p 8000:8000 --env-file .env chat-api
 4. 
 ```
 curl -X POST http://localhost:8000/chat/?session_id=test_session -H "Content-Type: application/json" -d "{\"message\": \"Qual a capital da França?\"}"
+
+ou
+
+curl -X POST http://localhost:8000/ask/?session_id=test_session&question="Qual a capital da França?" -F "file=@/path/to/your/file.txt"
 ```

+ 22 - 7
app/api/chat.py

@@ -1,5 +1,5 @@
-from fastapi import APIRouter, HTTPException
-from app.services.qa import get_answer, chat_history
+from fastapi import APIRouter, HTTPException, UploadFile, File, Query
+from app.services.qa import get_answer, ask_rag
 from app.schemas.chat import ChatRequest, ChatResponse
 import time
 
@@ -16,10 +16,25 @@ async def chat(request: ChatRequest, session_id: str = "default_session"):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
 
[email protected]("/ask/")
+async def ask(session_id: str = Query("default_session"), question: str = Query(...), file: UploadFile = File(None)):
+    start_time = time.time()
+    
+    try:
+        file_content = None
+        file_name = None
+        if file:
+            file_content = await file.read()
+            file_name = file.filename
+            if not file_name.endswith(('.txt', '.md')):
+                raise HTTPException(status_code=400, detail="Only .txt or .md files are supported")
+        
+        result = ask_rag(session_id, question, file_content, file_name)
+        latency_ms = int((time.time() - start_time) * 1000)
+        return {"answer": result["answer"], "sources": result["sources"], "latency_ms": latency_ms}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
+
 @router.get("/health")
 async def health():
-    return {"status": "healthy"}
-
[email protected]("/sessions")
-async def list_sessions():
-    return {"sessions": {sid: len(history) for sid, history in chat_history.items()}}
+    return {"status": "healthy"}

+ 1 - 1
app/main.py

@@ -3,7 +3,7 @@ from app.api.chat import router as chat_router
 
 app = FastAPI()
 
-# Include the chat router
+# incluindo rotas
 app.include_router(chat_router)
 
 if __name__ == "__main__":

+ 63 - 7
app/services/qa.py

@@ -1,22 +1,47 @@
 from langchain_community.llms import Ollama
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import CharacterTextSplitter
 import os
 from dotenv import load_dotenv
+from tempfile import NamedTemporaryFile
 
 # Load environment variables
 load_dotenv()
 
-# Global dictionary to store chat history (in-memory for simplicity)
+# Global dictionary for chat history
 chat_history = {}
 
-# Initialize Ollama LLM with tinyllama
-llm = Ollama(
-    model="tinyllama",
-    temperature=0.7
-)
+# Initialize Ollama LLM and Embeddings
+llm = Ollama(model="tinyllama", temperature=0.7)
+embeddings = OllamaEmbeddings(model="tinyllama")
 
-# Define a prompt template that includes history
+# Initialize global Chroma vector store (in-memory)
+vector_store = Chroma.from_texts([""], embeddings)  # Initialize empty store
+
+# Function to index uploaded file
+def index_file(file_content: bytes, file_name: str):
+    with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file:
+        temp_file.write(file_content)
+        temp_file_path = temp_file.name
+
+    loader = TextLoader(temp_file_path)
+    documents = loader.load()
+
+    # Split documents into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    chunks = text_splitter.split_documents(documents)
+
+    # Add to vector store
+    vector_store.add_documents(chunks)
+
+    # Clean up temp file
+    os.unlink(temp_file_path)
+
+# Define prompt templates
 def get_prompt_with_history(session_id):
     history = chat_history.get(session_id, [])
     history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
@@ -25,6 +50,15 @@ def get_prompt_with_history(session_id):
         template=f"Previous conversation:\n{history_text}\n\nResponda à seguinte pergunta: {{question}}"
     )
 
+def get_prompt_with_history_and_docs(session_id, docs):
+    history = chat_history.get(session_id, [])
+    history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
+    docs_text = "\n".join([f"Source: {doc.page_content}" for doc in docs]) if docs else "No relevant documents found."
+    return PromptTemplate(
+        input_variables=["question"],
+        template=f"Previous conversation:\n{history_text}\n\nRelevant documents:\n{docs_text}\n\nResponda à seguinte pergunta usando as fontes relevantes e citando trechos como fontes: {{question}}"
+    )
+
 def get_answer(session_id: str, question: str) -> str:
     # Get or initialize chat history for this session
     if session_id not in chat_history:
@@ -43,6 +77,28 @@ def get_answer(session_id: str, question: str) -> str:
     
     return response
 
+# RAG function for /ask endpoint
+def ask_rag(session_id: str, question: str, file_content: bytes = None, file_name: str = None) -> dict:
+    if file_content and file_name:
+        index_file(file_content, file_name)
+    
+    if session_id not in chat_history:
+        chat_history[session_id] = []
+    
+    docs = vector_store.similarity_search(question, k=3)
+    
+    prompt = get_prompt_with_history_and_docs(session_id, docs)
+    chain = LLMChain(llm=llm, prompt=prompt)
+    
+    response = chain.run(question=question)
+    response = response[:100] if len(response) > 100 else response
+    
+    chat_history[session_id].append({"question": question, "answer": response})
+    
+    sources = [doc.page_content for doc in docs]
+    
+    return {"answer": response, "sources": sources}
+
 if __name__ == "__main__":
     session_id = "test_session"
     print(get_answer(session_id, "Qual a capital da França?"))

+ 4 - 1
requirements.txt

@@ -3,4 +3,7 @@ uvicorn==0.30.6
 langchain==0.2.16
 langchain_community==0.2.16
 pydantic==2.9.2
-python-dotenv==1.0.1
+python-dotenv==1.0.1
+langchain-text-splitters==0.2.4
+chromadb==0.5.5
+python-multipart==0.0.9