Main Error:
[PineconeError: PineconeClient: Error calling upsert: PineconeError: Vector dimension 0 does not match the dimension of the index 1536]
GITHUB CODE:
mayooear/gpt4-pdf-chatbot-langchain: GPT4 & LangChain Chatbot for large PDF docs (github.com)
PINECONE INDEX:
Cloud GCP
Region Las Vegas (us-west4)
Environment us-west4-gcp-free
Dimensions 1536
Metric cosine
My ingest-data.ts
Code:
import { RecursiveCharacterTextSplitter } from ‘langchain/text_splitter’;
import { OpenAIEmbeddings } from ‘langchain/embeddings/openai’;
import { PineconeStore } from ‘langchain/vectorstores/pinecone’;
import { pinecone } from ‘@/utils/pinecone-client’;
import { PDFLoader } from ‘langchain/document_loaders/fs/pdf’;
import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from ‘@/config/pinecone’;
import { DirectoryLoader } from ‘langchain/document_loaders/fs/directory’;/* Name of directory to retrieve your files from
Make sure to add your PDF files inside the ‘docs’ folder
*/
const filePath = ‘docs’;export const run = async () => {
try {
/*load raw docs from the all files in the directory */
const directoryLoader = new DirectoryLoader(filePath, {
‘.pdf’: (path) => new PDFLoader(path),
});// const loader = new PDFLoader(filePath); const rawDocs = await directoryLoader.load(); /* Split text into chunks */ const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200, }); const docs = await textSplitter.splitDocuments(rawDocs); console.log('split docs', docs); console.log('creating vector store...'); /*create and store the embeddings in the vectorStore*/ const embeddings = new OpenAIEmbeddings(); const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name //embed the PDF documents await PineconeStore.fromDocuments(docs, embeddings, { pineconeIndex: index, namespace: PINECONE_NAME_SPACE, textKey: 'text', });
} catch (error) {
console.log(‘error’, error);
throw new Error(‘Failed to ingest your data’);
}
};(async () => {
await run();
console.log(‘ingestion complete’);
})();