I want to upsert vectors in pinecone with prefix id for my documents in haystack. How can I achieve that. I just need this to delete my documents from pinecone with meta filtering.but, for serverles index isn’t possible. WHY???
Anyway,
Here is my current pipeline
"
from haystack import Document
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.converters import TikaDocumentConverter
from haystack.components.preprocessors import DocumentCleaner,DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.retrievers.pinecone import PineconeEmbeddingRetriever
from haystack_integrations.document_stores.pinecone import PineconeDocumentStore
from haystack.components.builders import DynamicChatPromptBuilder
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiChatGenerator
from haystack.components.joiners import DocumentJoiner
from dotenv import load_dotenv
from haystack.document_stores.in_memory import InMemoryDocumentStore
from pathlib import Path
from haystack.document_stores.types import DuplicatePolicy
load_dotenv()
text_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-mpnet-base-v2")
text_embedder.warm_up()
document_joiner = DocumentJoiner()
# Make sure you have the PINECONE_API_KEY environment variable set
document_store = PineconeDocumentStore(
index="ulf",
namespace="ulf_user_1",
dimension=768,
metric="cosine",
spec={"serverless": {"region": "us-east-1", "cloud": "aws"}})
# document_store = InMemoryDocumentStore()
retriever = PineconeEmbeddingRetriever(document_store=document_store)
document_cleaner = DocumentCleaner()
converter = TikaDocumentConverter()
sources = ['data/Banking Laws In India.pdf']
pipeline = Pipeline()
pipeline.add_component("converter", TikaDocumentConverter())
pipeline.add_component("cleaner", DocumentCleaner())
pipeline.add_component("splitter", DocumentSplitter(split_by="passage", split_length=1,split_overlap= 0))
pipeline.add_component("text_embedder", text_embedder)
pipeline.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP))
# pipeline.add_component("document_joiner",document_joiner)
# pipeline.add_component("retriever", retriever)
# pipeline.add_component("prompt_builder", prompt_builder)
# pipeline.add_component("generator", llm)
pipeline.connect("converter", "cleaner")
pipeline.connect("cleaner", "splitter")
pipeline.connect("splitter", "text_embedder")
pipeline.connect("text_embedder",'writer')
result = pipeline.run({"converter": {"sources": sources}}, include_outputs_from={"splitter"})
print("result", result)"