The code below takes a local PDF file, uses GPT’s embeddings and stores it in the Pinecone through Langchain. But every time I run the code I rewrite the embeddings in Pinecone, how can I just ask the question alone instead?
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
loader = PyPDFLoader("/Users/Max/Downloads/The-Chronicles-of-Xeriden.pdf")
data = loader.load()
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-XX’X)
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', ‘XXX’)
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', ‘XXX’)
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
index_name = "index13"
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)
query = "What is Omnis?"
docs = docsearch.similarity_search(query)
print(docs[0].page_content[:450])
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")
query = "What is Omnis?"
docs = docsearch.similarity_search(query)
print (chain.run(input_documents=docs, question=query))
chain.run(input_documents=docs, question=query)