Openai embeddings and upsert with chunks Serverless

info7 · January 20, 2024, 1:35pm

Hi, i followed the chat with your own data, langchain, openai, pinecone and it worked fine, till i started to use the serverless Pinecone, i did everything, debgugging, hardcoded upsert ( that workerd) but can’t get it to work with the embedding of OpenAi and also Cohere gives the same error.

This is my code: Is there some one who can pint me to the wright direction?
decrypted_api_key = get_decoded_pinecone_key_for_current_company()
if not decrypted_api_key:
flash(‘Geen Pinecone API Key gevonden.’, ‘error’)
return redirect(url_for(‘dataopslag2.upload_pdf’))

# Initialiseer Pinecone
pc = PineconeClient(api_key=decrypted_api_key)

safe_user_id = re.sub(r"[^a-z0-9-]", "", str(user_id).lower())
index_name = f"chat-data-{safe_user_id}"

existing_indexes = pc.list_indexes().get('indexes', [])
existing_index_names = [index['name'] for index in existing_indexes]

if index_name not in existing_index_names:
    flash(f'Index {index_name} EMBEDbestaat niet.', 'error')
    return redirect(url_for('dataopslag2.dataopslag2_start'))

index = pc.Index(index_name)
test_vector = [0.01 * i for i in range(1536)]  # Een eenvoudige lineaire reeks als testvector
test_metadata = {"description": "Test2 Vector"}
aangepastenamespace = "fgbdfgb"

# Probeer de vector te upserten in de Pinecone index
try:
    index.upsert(
        vectors=[{"id": "test_vector", "values": test_vector, "metadata": test_metadata}],
        namespace=aangepastenamespace
    )
    print("Test vector type:", type(test_vector), "Length:", len(test_vector))
    print("Eerste 5 waarden van test_vector:", test_vector[:5])
    print("Upsert van testvector geslaagd.")
except Exception as e:
    print(f"Fout bij upsert van testvector: {e}")


    return redirect(url_for('dataopslag2.dataopslag2_start'))  # Of een andere foutafhandelingsroute

# Laad de chunks van het document
chunks_file_path = session.get('chunks_file_path')
document_prefix = session.get('document_prefix')
if not chunks_file_path or not os.path.exists(chunks_file_path):
    flash('Geen document chunks gevonden.', 'error')
    return redirect(url_for('***********'))

Laad chunks van het document

with open(chunks_file_path, 'rb') as file:
    chunks = pickle.load(file)

# Laad OpenAI API Key en initialiseer embedding_instance
openai_api_key = get_decoded_openai_k*********()
embedding_instance = OpenAIEmbeddings(openai_api_key=openai_api_key )

namespace = session.get('namespace_name', 'default_namespace')
namespace = sanitize_namespace_name(namespace)

# Probeer de embeddings te upserten
try:
    for i, chunk in enumerate(chunks):
        chunk_id = f"{document_prefix}chunk{i}"
        chunk_vector = embedding_instance.embed_documents([chunk])[0]

        # Controleer de vector op speciale waarden
        if not validate_vector(chunk_vector):
            print(f"Chunk {chunk_id} bevat NaN of oneindige waarden")


        print("Chunk vector type:", type(chunk_vector), "Length:", len(chunk_vector))
        print("Eerste 5 waarden van chunk_vector:", chunk_vector[:5])

        if isinstance(chunk_vector, list) and len(chunk_vector) == 1536:
            try:
                index.upsert(ids=[chunk_id], vectors=[chunk_vector], namespace=namespace)
                print(f"Upsert van {chunk_id} geslaagd.")
            except Exception as e:
                print(f"Fout bij upsert van {chunk_id}: {e}")
        else:
            print(f"Chunk {chunk_id} heeft een incorrecte dimensie of structuur: {len(chunk_vector)}")
        # Je kunt hier kiezen om een redirect uit te voeren of door te gaan naar de volgende chunk

except Exception as e:
    flash(f'Fout bij het opslaan van documenten in Pinecone: {str(e)}', 'error')
    return redirect(url_for('*******'))

info7 · January 22, 2024, 9:06am

The error id: Invalid vector value passed: cannot interpret type <class ‘list’>

system · February 5, 2024, 9:07am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.