Hi, i followed the chat with your own data, langchain, openai, pinecone and it worked fine, till i started to use the serverless Pinecone, i did everything, debgugging, hardcoded upsert ( that workerd) but can’t get it to work with the embedding of OpenAi and also Cohere gives the same error.
This is my code: Is there some one who can pint me to the wright direction?
decrypted_api_key = get_decoded_pinecone_key_for_current_company()
if not decrypted_api_key:
flash(‘Geen Pinecone API Key gevonden.’, ‘error’)
return redirect(url_for(‘dataopslag2.upload_pdf’))
# Initialiseer Pinecone
pc = PineconeClient(api_key=decrypted_api_key)
safe_user_id = re.sub(r"[^a-z0-9-]", "", str(user_id).lower())
index_name = f"chat-data-{safe_user_id}"
existing_indexes = pc.list_indexes().get('indexes', [])
existing_index_names = [index['name'] for index in existing_indexes]
if index_name not in existing_index_names:
flash(f'Index {index_name} EMBEDbestaat niet.', 'error')
return redirect(url_for('dataopslag2.dataopslag2_start'))
index = pc.Index(index_name)
test_vector = [0.01 * i for i in range(1536)] # Een eenvoudige lineaire reeks als testvector
test_metadata = {"description": "Test2 Vector"}
aangepastenamespace = "fgbdfgb"
# Probeer de vector te upserten in de Pinecone index
try:
index.upsert(
vectors=[{"id": "test_vector", "values": test_vector, "metadata": test_metadata}],
namespace=aangepastenamespace
)
print("Test vector type:", type(test_vector), "Length:", len(test_vector))
print("Eerste 5 waarden van test_vector:", test_vector[:5])
print("Upsert van testvector geslaagd.")
except Exception as e:
print(f"Fout bij upsert van testvector: {e}")
return redirect(url_for('dataopslag2.dataopslag2_start')) # Of een andere foutafhandelingsroute
# Laad de chunks van het document
chunks_file_path = session.get('chunks_file_path')
document_prefix = session.get('document_prefix')
if not chunks_file_path or not os.path.exists(chunks_file_path):
flash('Geen document chunks gevonden.', 'error')
return redirect(url_for('***********'))
Laad chunks van het document
with open(chunks_file_path, 'rb') as file:
chunks = pickle.load(file)
# Laad OpenAI API Key en initialiseer embedding_instance
openai_api_key = get_decoded_openai_k*********()
embedding_instance = OpenAIEmbeddings(openai_api_key=openai_api_key )
namespace = session.get('namespace_name', 'default_namespace')
namespace = sanitize_namespace_name(namespace)
# Probeer de embeddings te upserten
try:
for i, chunk in enumerate(chunks):
chunk_id = f"{document_prefix}chunk{i}"
chunk_vector = embedding_instance.embed_documents([chunk])[0]
# Controleer de vector op speciale waarden
if not validate_vector(chunk_vector):
print(f"Chunk {chunk_id} bevat NaN of oneindige waarden")
print("Chunk vector type:", type(chunk_vector), "Length:", len(chunk_vector))
print("Eerste 5 waarden van chunk_vector:", chunk_vector[:5])
if isinstance(chunk_vector, list) and len(chunk_vector) == 1536:
try:
index.upsert(ids=[chunk_id], vectors=[chunk_vector], namespace=namespace)
print(f"Upsert van {chunk_id} geslaagd.")
except Exception as e:
print(f"Fout bij upsert van {chunk_id}: {e}")
else:
print(f"Chunk {chunk_id} heeft een incorrecte dimensie of structuur: {len(chunk_vector)}")
# Je kunt hier kiezen om een redirect uit te voeren of door te gaan naar de volgende chunk
except Exception as e:
flash(f'Fout bij het opslaan van documenten in Pinecone: {str(e)}', 'error')
return redirect(url_for('*******'))