Here is the code that is performing the embedding and the upserting:
const updatePineconeVectorStore = async (client, indexName, docs, embeddings) => {
const index = client.Index(indexName);
const splitter = new RecursiveCharacterTextSplitter();
const queue = [];
try {
const splitDocs = await splitter.splitDocuments(docs);
// Loop over splitDocs and enqueue each splitDoc into a queue
for (let splitDoc of splitDocs) {
queue.push(splitDoc);
}
const batchSize = 6;
while (queue.length > 0) {
//Extract a batch of document from the queue
const batch = queue.slice(0, batchSize);
//Construct vector and perform upsert operations for the current batch
const vectors = await Promise.all(batch.map(async (splitDoc, idx) => {
const splitDocEmbeddings = await embeddings.embedDocuments(
splitDoc.pageContent.replace(/\n/g, " ").split(",")
);
return {
id: `${splitDoc.metadata.source}_${idx}`,
values: splitDocEmbeddings,
metadata: {
...splitDoc.metadata,
},
};
}));
await index.upsert(vectors);
console.log(`Processed batch of ${batch.length} documents. Remaining queue size: ${queue.length}`);
}
} catch (err) {
console.error("Error occurred during document processing:", err.message);
}
};
Here is how one of the documents objects in the vectors
array looks like: