| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- from langchain_community.llms import Ollama
- from langchain.chains import LLMChain
- from langchain.prompts import PromptTemplate
- from langchain_community.vectorstores import Chroma
- from langchain_community.embeddings import OllamaEmbeddings
- from langchain_community.document_loaders import TextLoader
- from langchain_text_splitters import CharacterTextSplitter
- from langchain.memory import ConversationBufferWindowMemory
- import os
- from dotenv import load_dotenv
- from tempfile import NamedTemporaryFile
- # Load environment variables
- load_dotenv()
- # Initialize Ollama LLM and Embeddings
- llm = Ollama(model="tinyllama", temperature=0.7)
- embeddings = OllamaEmbeddings(model="tinyllama")
- # Initialize global Chroma vector store (in-memory)
- vector_store = Chroma.from_texts([""], embeddings) # Initialize empty store
- # Function to index uploaded file
- def index_file(file_content: bytes, file_name: str):
- with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file:
- temp_file.write(file_content)
- temp_file_path = temp_file.name
- loader = TextLoader(temp_file_path)
- documents = loader.load()
- # Split documents into chunks
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
- chunks = text_splitter.split_documents(documents)
- # Add to vector store
- vector_store.add_documents(chunks)
- # Clean up temp file
- os.unlink(temp_file_path)
- # Define prompt templates
- def get_prompt_with_history(memory):
- return PromptTemplate(
- input_variables=["history", "question"],
- template=f"Previous conversation:\n{{history}}\n\nResponda à seguinte pergunta: {{question}}"
- )
- def get_prompt_with_history_and_docs(memory, docs):
- docs_text = "\n".join([f"Source: {doc.page_content}" for doc in docs]) if docs else "No relevant documents found."
- return PromptTemplate(
- input_variables=["history", "question"],
- template=f"Previous conversation:\n{{history}}\n\nRelevant documents:\n{docs_text}\n\nResponda à seguinte pergunta usando as fontes relevantes e citando trechos como fontes: {{question}}"
- )
- def get_answer(session_id: str, question: str) -> str:
- # Get or initialize memory for this session
- memory = ConversationBufferWindowMemory(memory_key="history", input_key="question", k=3, session_id=session_id)
-
- # Create chain with dynamic prompt including history
- prompt = get_prompt_with_history(memory)
- chain = LLMChain(llm=llm, prompt=prompt, memory=memory)
-
- # Get response
- response = chain.run(question=question)
- response = response[:100] if len(response) > 100 else response # Truncate if needed
-
- return response
- # RAG function for /ask endpoint
- def ask_rag(session_id: str, question: str, file_content: bytes = None, file_name: str = None) -> dict:
- # Get or initialize memory for this session
- memory = ConversationBufferWindowMemory(memory_key="history", input_key="question", k=3, session_id=session_id)
-
- if file_content and file_name:
- index_file(file_content, file_name)
-
- # Retrieve relevant documents
- docs = vector_store.similarity_search(question, k=3)
-
- # Create chain with dynamic prompt including history and docs
- prompt = get_prompt_with_history_and_docs(memory, docs)
- chain = LLMChain(llm=llm, prompt=prompt, memory=memory)
-
- # Get response
- response = chain.run(question=question)
- response = response[:100] if len(response) > 100 else response
-
- # Prepare sources
- sources = [doc.page_content for doc in docs]
-
- return {"answer": response, "sources": sources}
- if __name__ == "__main__":
- session_id = "test_session"
- print(get_answer(session_id, "Qual a capital da França?"))
- print(get_answer(session_id, "E a capital da Espanha?"))
|