3 ay önce · f61cedf0ca
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,24 +1,22 @@
 
				-# Use official Python slim image
			
 
				+# usando python slim para menor instalaçao
			
 
				 FROM python:3.10-slim
			
 
				 
			
 
				-# Set working directory
			
 
				 WORKDIR /app
			
 
				 
			
 
				-# Install system dependencies for Ollama
			
 
				+# instalando depedencias do ollama
			
 
				 RUN apt-get update && apt-get install -y curl libgomp1 && rm -rf /var/lib/apt/lists/*
			
 
				 
			
 
				-# Install Ollama
			
 
				+# instalando ollama
			
 
				 RUN curl -fsSL https://ollama.com/install.sh | sh
			
 
				 
			
 
				-# Copy requirements and install Python dependencies
			
 
				+# instalando depedencias do python
			
 
				 COPY requirements.txt .
			
 
				 RUN pip install --no-cache-dir -r requirements.txt
			
 
				 
			
 
				-# Copy the application code
			
 
				 COPY . .
			
 
				 
			
 
				-# Expose port 8000 for FastAPI
			
 
				+# expondo portas
			
 
				 EXPOSE 8000
			
 
				 
			
 
				-# Command to start Ollama server, pull the model if needed, and run FastAPI
			
 
				+# comandso para inicializar o servidor e fazer download da llm
			
 
				 CMD ["sh", "-c", "ollama serve & (ollama list | grep -q tinyllama || ollama pull tinyllama) && uvicorn app.main:app --host 0.0.0.0 --port 8000 & wait"]
			
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
 
				 
			
 
				 ultiliza Ollama e Mistral, requer recursos para a LLM local
			
 
				 
			
 
				-## Prerequisites
			
 
				+## Prequisitos
			
 
				 - Git
			
 
				 - Docker
			
 
				 
			
 
				-## Setup and Installation
			
 
				+## Setup e Installation
			
 
				 
			
 
				 1. 
			
 
				 ```
			
@@ -23,4 +23,8 @@ docker build -t chat-api . && docker run -p 8000:8000 --env-file .env chat-api
 
				 4. 
			
 
				 ```
			
 
				 curl -X POST http://localhost:8000/chat/?session_id=test_session -H "Content-Type: application/json" -d "{\"message\": \"Qual a capital da França?\"}"
			
 
				+
			
 
				+ou
			
 
				+
			
 
				+curl -X POST http://localhost:8000/ask/?session_id=test_session&question="Qual a capital da França?" -F "file=@/path/to/your/file.txt"
			
 
				 ```
			
--- a/app/api/chat.py
+++ b/app/api/chat.py
@@ -1,5 +1,5 @@
 
				-from fastapi import APIRouter, HTTPException
			
 
				-from app.services.qa import get_answer, chat_history
			
 
				+from fastapi import APIRouter, HTTPException, UploadFile, File, Query
			
 
				+from app.services.qa import get_answer, ask_rag
			
 
				 from app.schemas.chat import ChatRequest, ChatResponse
			
 
				 import time
			
 
				 
			
@@ -16,10 +16,25 @@ async def chat(request: ChatRequest, session_id: str = "default_session"):
 
				     except Exception as e:
			
 
				         raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
			
 
				 
			
 
				[email protected]("/ask/")
			
 
				+async def ask(session_id: str = Query("default_session"), question: str = Query(...), file: UploadFile = File(None)):
			
 
				+    start_time = time.time()
			
 
				+    
			
 
				+    try:
			
 
				+        file_content = None
			
 
				+        file_name = None
			
 
				+        if file:
			
 
				+            file_content = await file.read()
			
 
				+            file_name = file.filename
			
 
				+            if not file_name.endswith(('.txt', '.md')):
			
 
				+                raise HTTPException(status_code=400, detail="Only .txt or .md files are supported")
			
 
				+        
			
 
				+        result = ask_rag(session_id, question, file_content, file_name)
			
 
				+        latency_ms = int((time.time() - start_time) * 1000)
			
 
				+        return {"answer": result["answer"], "sources": result["sources"], "latency_ms": latency_ms}
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
			
 
				+
			
 
				 @router.get("/health")
			
 
				 async def health():
			
 
				-    return {"status": "healthy"}
			
 
				-
			
 
				[email protected]("/sessions")
			
 
				-async def list_sessions():
			
 
				-    return {"sessions": {sid: len(history) for sid, history in chat_history.items()}}
			
 
				+    return {"status": "healthy"}
			
--- a/app/main.py
+++ b/app/main.py
@@ -3,7 +3,7 @@ from app.api.chat import router as chat_router
 
				 
			
 
				 app = FastAPI()
			
 
				 
			
 
				-# Include the chat router
			
 
				+# incluindo rotas
			
 
				 app.include_router(chat_router)
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/app/services/qa.py
+++ b/app/services/qa.py
@@ -1,22 +1,47 @@
 
				 from langchain_community.llms import Ollama
			
 
				 from langchain.chains import LLMChain
			
 
				 from langchain.prompts import PromptTemplate
			
 
				+from langchain_community.vectorstores import Chroma
			
 
				+from langchain_community.embeddings import OllamaEmbeddings
			
 
				+from langchain_community.document_loaders import TextLoader
			
 
				+from langchain_text_splitters import CharacterTextSplitter
			
 
				 import os
			
 
				 from dotenv import load_dotenv
			
 
				+from tempfile import NamedTemporaryFile
			
 
				 
			
 
				 # Load environment variables
			
 
				 load_dotenv()
			
 
				 
			
 
				-# Global dictionary to store chat history (in-memory for simplicity)
			
 
				+# Global dictionary for chat history
			
 
				 chat_history = {}
			
 
				 
			
 
				-# Initialize Ollama LLM with tinyllama
			
 
				-llm = Ollama(
			
 
				-    model="tinyllama",
			
 
				-    temperature=0.7
			
 
				-)
			
 
				+# Initialize Ollama LLM and Embeddings
			
 
				+llm = Ollama(model="tinyllama", temperature=0.7)
			
 
				+embeddings = OllamaEmbeddings(model="tinyllama")
			
 
				 
			
 
				-# Define a prompt template that includes history
			
 
				+# Initialize global Chroma vector store (in-memory)
			
 
				+vector_store = Chroma.from_texts([""], embeddings)  # Initialize empty store
			
 
				+
			
 
				+# Function to index uploaded file
			
 
				+def index_file(file_content: bytes, file_name: str):
			
 
				+    with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file:
			
 
				+        temp_file.write(file_content)
			
 
				+        temp_file_path = temp_file.name
			
 
				+
			
 
				+    loader = TextLoader(temp_file_path)
			
 
				+    documents = loader.load()
			
 
				+
			
 
				+    # Split documents into chunks
			
 
				+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
			
 
				+    chunks = text_splitter.split_documents(documents)
			
 
				+
			
 
				+    # Add to vector store
			
 
				+    vector_store.add_documents(chunks)
			
 
				+
			
 
				+    # Clean up temp file
			
 
				+    os.unlink(temp_file_path)
			
 
				+
			
 
				+# Define prompt templates
			
 
				 def get_prompt_with_history(session_id):
			
 
				     history = chat_history.get(session_id, [])
			
 
				     history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
			
@@ -25,6 +50,15 @@ def get_prompt_with_history(session_id):
 
				         template=f"Previous conversation:\n{history_text}\n\nResponda à seguinte pergunta: {{question}}"
			
 
				     )
			
 
				 
			
 
				+def get_prompt_with_history_and_docs(session_id, docs):
			
 
				+    history = chat_history.get(session_id, [])
			
 
				+    history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
			
 
				+    docs_text = "\n".join([f"Source: {doc.page_content}" for doc in docs]) if docs else "No relevant documents found."
			
 
				+    return PromptTemplate(
			
 
				+        input_variables=["question"],
			
 
				+        template=f"Previous conversation:\n{history_text}\n\nRelevant documents:\n{docs_text}\n\nResponda à seguinte pergunta usando as fontes relevantes e citando trechos como fontes: {{question}}"
			
 
				+    )
			
 
				+
			
 
				 def get_answer(session_id: str, question: str) -> str:
			
 
				     # Get or initialize chat history for this session
			
 
				     if session_id not in chat_history:
			
@@ -43,6 +77,28 @@ def get_answer(session_id: str, question: str) -> str:
 
				     
			
 
				     return response
			
 
				 
			
 
				+# RAG function for /ask endpoint
			
 
				+def ask_rag(session_id: str, question: str, file_content: bytes = None, file_name: str = None) -> dict:
			
 
				+    if file_content and file_name:
			
 
				+        index_file(file_content, file_name)
			
 
				+    
			
 
				+    if session_id not in chat_history:
			
 
				+        chat_history[session_id] = []
			
 
				+    
			
 
				+    docs = vector_store.similarity_search(question, k=3)
			
 
				+    
			
 
				+    prompt = get_prompt_with_history_and_docs(session_id, docs)
			
 
				+    chain = LLMChain(llm=llm, prompt=prompt)
			
 
				+    
			
 
				+    response = chain.run(question=question)
			
 
				+    response = response[:100] if len(response) > 100 else response
			
 
				+    
			
 
				+    chat_history[session_id].append({"question": question, "answer": response})
			
 
				+    
			
 
				+    sources = [doc.page_content for doc in docs]
			
 
				+    
			
 
				+    return {"answer": response, "sources": sources}
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     session_id = "test_session"
			
 
				     print(get_answer(session_id, "Qual a capital da França?"))
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,7 @@ uvicorn==0.30.6
 
				 langchain==0.2.16
			
 
				 langchain_community==0.2.16
			
 
				 pydantic==2.9.2
			
 
				-python-dotenv==1.0.1
			
 
				+python-dotenv==1.0.1
			
 
				+langchain-text-splitters==0.2.4
			
 
				+chromadb==0.5.5
			
 
				+python-multipart==0.0.9