3 сар өмнө · f61cedf0ca
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,24 +1,22 @@
 
															-# Use official Python slim image
														
 
															+# usando python slim para menor instalaçao
														
 
															 FROM python:3.10-slim
														
 
															-# Set working directory
														
 
															 WORKDIR /app
														
 
															-# Install system dependencies for Ollama
														
 
															+# instalando depedencias do ollama
														
 
															 RUN apt-get update && apt-get install -y curl libgomp1 && rm -rf /var/lib/apt/lists/*
														
 
															-# Install Ollama
														
 
															+# instalando ollama
														
 
															 RUN curl -fsSL https://ollama.com/install.sh | sh
														
 
															-# Copy requirements and install Python dependencies
														
 
															+# instalando depedencias do python
														
 
															 COPY requirements.txt .
														
 
															 RUN pip install --no-cache-dir -r requirements.txt
														
 
															-# Copy the application code
														
 
															 COPY . .
														
 
															-# Expose port 8000 for FastAPI
														
 
															+# expondo portas
														
 
															 EXPOSE 8000
														
 
															-# Command to start Ollama server, pull the model if needed, and run FastAPI
														
 
															+# comandso para inicializar o servidor e fazer download da llm
														
 
															 CMD ["sh", "-c", "ollama serve & (ollama list | grep -q tinyllama || ollama pull tinyllama) && uvicorn app.main:app --host 0.0.0.0 --port 8000 & wait"]
														
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
 
															 ultiliza Ollama e Mistral, requer recursos para a LLM local
														
 
															-## Prerequisites
														
 
															+## Prequisitos
														
 
															 - Git
														
 
															 - Docker
														
 
															-## Setup and Installation
														
 
															+## Setup e Installation
														
 
															 1. 
														
 
															 ```
														
@@ -23,4 +23,8 @@ docker build -t chat-api . && docker run -p 8000:8000 --env-file .env chat-api
 
															 4. 
														
 
															 ```
														
 
															 curl -X POST http://localhost:8000/chat/?session_id=test_session -H "Content-Type: application/json" -d "{\"message\": \"Qual a capital da França?\"}"
														
 
															+
														
 
															+ou
														
 
															+
														
 
															+curl -X POST http://localhost:8000/ask/?session_id=test_session&question="Qual a capital da França?" -F "file=@/path/to/your/file.txt"
														
 
															 ```
														
--- a/app/api/chat.py
+++ b/app/api/chat.py
@@ -1,5 +1,5 @@
 
															-from fastapi import APIRouter, HTTPException
														
 
															-from app.services.qa import get_answer, chat_history
														
 
															+from fastapi import APIRouter, HTTPException, UploadFile, File, Query
														
 
															+from app.services.qa import get_answer, ask_rag
														
 
															 from app.schemas.chat import ChatRequest, ChatResponse
														
 
															 import time
														
@@ -16,10 +16,25 @@ async def chat(request: ChatRequest, session_id: str = "default_session"):
 
															     except Exception as e:
														
 
															         raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
														
 
															[email protected]("/ask/")
														
 
															+async def ask(session_id: str = Query("default_session"), question: str = Query(...), file: UploadFile = File(None)):
														
 
															+    start_time = time.time()
														
 
															+    
														
 
															+    try:
														
 
															+        file_content = None
														
 
															+        file_name = None
														
 
															+        if file:
														
 
															+            file_content = await file.read()
														
 
															+            file_name = file.filename
														
 
															+            if not file_name.endswith(('.txt', '.md')):
														
 
															+                raise HTTPException(status_code=400, detail="Only .txt or .md files are supported")
														
 
															+        
														
 
															+        result = ask_rag(session_id, question, file_content, file_name)
														
 
															+        latency_ms = int((time.time() - start_time) * 1000)
														
 
															+        return {"answer": result["answer"], "sources": result["sources"], "latency_ms": latency_ms}
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=f"Ollama call failed with error: {str(e)}")
														
 
															+
														
 
															 @router.get("/health")
														
 
															 async def health():
														
 
															-    return {"status": "healthy"}
														
 
															-
														
 
															[email protected]("/sessions")
														
 
															-async def list_sessions():
														
 
															-    return {"sessions": {sid: len(history) for sid, history in chat_history.items()}}
														
 
															+    return {"status": "healthy"}
														
--- a/app/main.py
+++ b/app/main.py
@@ -3,7 +3,7 @@ from app.api.chat import router as chat_router
 
															 app = FastAPI()
														
 
															-# Include the chat router
														
 
															+# incluindo rotas
														
 
															 app.include_router(chat_router)
														
 
															 if __name__ == "__main__":
														
--- a/app/services/qa.py
+++ b/app/services/qa.py
@@ -1,22 +1,47 @@
 
															 from langchain_community.llms import Ollama
														
 
															 from langchain.chains import LLMChain
														
 
															 from langchain.prompts import PromptTemplate
														
 
															+from langchain_community.vectorstores import Chroma
														
 
															+from langchain_community.embeddings import OllamaEmbeddings
														
 
															+from langchain_community.document_loaders import TextLoader
														
 
															+from langchain_text_splitters import CharacterTextSplitter
														
 
															 import os
														
 
															 from dotenv import load_dotenv
														
 
															+from tempfile import NamedTemporaryFile
														
 
															 # Load environment variables
														
 
															 load_dotenv()
														
 
															-# Global dictionary to store chat history (in-memory for simplicity)
														
 
															+# Global dictionary for chat history
														
 
															 chat_history = {}
														
 
															-# Initialize Ollama LLM with tinyllama
														
 
															-llm = Ollama(
														
 
															-    model="tinyllama",
														
 
															-    temperature=0.7
														
 
															-)
														
 
															+# Initialize Ollama LLM and Embeddings
														
 
															+llm = Ollama(model="tinyllama", temperature=0.7)
														
 
															+embeddings = OllamaEmbeddings(model="tinyllama")
														
 
															-# Define a prompt template that includes history
														
 
															+# Initialize global Chroma vector store (in-memory)
														
 
															+vector_store = Chroma.from_texts([""], embeddings)  # Initialize empty store
														
 
															+
														
 
															+# Function to index uploaded file
														
 
															+def index_file(file_content: bytes, file_name: str):
														
 
															+    with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file:
														
 
															+        temp_file.write(file_content)
														
 
															+        temp_file_path = temp_file.name
														
 
															+
														
 
															+    loader = TextLoader(temp_file_path)
														
 
															+    documents = loader.load()
														
 
															+
														
 
															+    # Split documents into chunks
														
 
															+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
														
 
															+    chunks = text_splitter.split_documents(documents)
														
 
															+
														
 
															+    # Add to vector store
														
 
															+    vector_store.add_documents(chunks)
														
 
															+
														
 
															+    # Clean up temp file
														
 
															+    os.unlink(temp_file_path)
														
 
															+
														
 
															+# Define prompt templates
														
 
															 def get_prompt_with_history(session_id):
														
 
															     history = chat_history.get(session_id, [])
														
 
															     history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
														
@@ -25,6 +50,15 @@ def get_prompt_with_history(session_id):
 
															         template=f"Previous conversation:\n{history_text}\n\nResponda à seguinte pergunta: {{question}}"
														
 
															     )
														
 
															+def get_prompt_with_history_and_docs(session_id, docs):
														
 
															+    history = chat_history.get(session_id, [])
														
 
															+    history_text = "\n".join([f"User: {msg['question']}\nAI: {msg['answer']}" for msg in history]) if history else "No previous conversation."
														
 
															+    docs_text = "\n".join([f"Source: {doc.page_content}" for doc in docs]) if docs else "No relevant documents found."
														
 
															+    return PromptTemplate(
														
 
															+        input_variables=["question"],
														
 
															+        template=f"Previous conversation:\n{history_text}\n\nRelevant documents:\n{docs_text}\n\nResponda à seguinte pergunta usando as fontes relevantes e citando trechos como fontes: {{question}}"
														
 
															+    )
														
 
															+
														
 
															 def get_answer(session_id: str, question: str) -> str:
														
 
															     # Get or initialize chat history for this session
														
 
															     if session_id not in chat_history:
														
@@ -43,6 +77,28 @@ def get_answer(session_id: str, question: str) -> str:
 
															     return response
														
 
															+# RAG function for /ask endpoint
														
 
															+def ask_rag(session_id: str, question: str, file_content: bytes = None, file_name: str = None) -> dict:
														
 
															+    if file_content and file_name:
														
 
															+        index_file(file_content, file_name)
														
 
															+    
														
 
															+    if session_id not in chat_history:
														
 
															+        chat_history[session_id] = []
														
 
															+    
														
 
															+    docs = vector_store.similarity_search(question, k=3)
														
 
															+    
														
 
															+    prompt = get_prompt_with_history_and_docs(session_id, docs)
														
 
															+    chain = LLMChain(llm=llm, prompt=prompt)
														
 
															+    
														
 
															+    response = chain.run(question=question)
														
 
															+    response = response[:100] if len(response) > 100 else response
														
 
															+    
														
 
															+    chat_history[session_id].append({"question": question, "answer": response})
														
 
															+    
														
 
															+    sources = [doc.page_content for doc in docs]
														
 
															+    
														
 
															+    return {"answer": response, "sources": sources}
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															     session_id = "test_session"
														
 
															     print(get_answer(session_id, "Qual a capital da França?"))
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,7 @@ uvicorn==0.30.6
 
															 langchain==0.2.16
														
 
															 langchain_community==0.2.16
														
 
															 pydantic==2.9.2
														
 
															-python-dotenv==1.0.1
														
 
															+python-dotenv==1.0.1
														
 
															+langchain-text-splitters==0.2.4
														
 
															+chromadb==0.5.5
														
 
															+python-multipart==0.0.9