- import os
- os.environ['OPENAI_API_KEY']='sk-xxxxx'
- # Load docs
- from langchain.document_loaders import WebBaseLoader
- loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
- data = loader.load()
-
- # Split
- from langchain.text_splitter import RecursiveCharacterTextSplitter
- text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
- all_splits = text_splitter.split_documents(data)
-
- # Store splits
- from langchain.embeddings import OpenAIEmbeddings
- from langchain.vectorstores import Chroma
- if not os.path.exists('VectorStore'):
- vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(),
- persist_directory="VectorStore")
- vectorstore.persist()
- else:
- vectorstore = Chroma(persist_directory='VectorStore', embedding_function=OpenAIEmbeddings())
-
- # RAG prompt
- from langchain import hub
- prompt = hub.pull("rlm/rag-prompt")
-
- # LLM
- from langchain.chains import RetrievalQA
- from langchain.chat_models import ChatOpenAI
- llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
-
- # RetrievalQA
- qa_chain = RetrievalQA.from_chain_type(
- llm,
- retriever=vectorstore.as_retriever(),
- chain_type_kwargs={"prompt": prompt}
- )
- question = "What are the approaches to Task Decomposition?"
- result = qa_chain({"query": question})
- print(result["result"])
最终是使用如下prompt让大模型去做回答:
question:用户的问题
context: 基于语义匹配的最相似段落作为上下文
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"