Invalid vector value passed: cannot interpret type <class 'list'>

arman.hn007 · July 6, 2023, 6:02am

from langchain.llms import OpenAI
from sentence_transformers import SentenceTransformer
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
import os
import glob
import pdfplumber
import pinecone
import numpy as np

load_dotenv()

# Initialize the language model
llm = OpenAI(temperature=0)
model = SentenceTransformer('all-MiniLM-L6-v2')
# Initialize embeddings model
embeddings_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

# Initialize Pinecone
pinecone.init(      
	api_key=os.getenv("PINECONE_API_KEY"),      
	environment='my_environment**`strong text`**'      
)      
index = pinecone.Index('my_index')

# Directory to retrieve your files from
directory_path = 'docs'

# Get list of PDF files in the directory
pdf_files = glob.glob(f"{directory_path}/*.pdf")

def split_into_chunks(text, chunk_size=1000, overlap_size=200):
    """
    Splits the text into overlapping chunks of a given size.
    """
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap_size
    return chunks

for pdf_file in pdf_files:
    # Load the PDF
    with pdfplumber.open(pdf_file) as pdf:
        raw_text = '\n'.join(page.extract_text() for page in pdf.pages)

    # Split the text into chunks
    chunks = split_into_chunks(raw_text)

    # Create embeddings for each chunk and store them in Pinecone
    for i, chunk in enumerate(chunks):
        embedding = model.encode([chunk])[0]
        id = f"{os.path.basename(pdf_file)}_chunk_{i}"
        index.upsert(ids=[id], vectors=[embedding.tolist()])  # Convert the numpy array to a list

print("PDFs processed and stored in Pinecone.")

I am a complete newbie, trying to train a chatbot using Pinecone, to be honest I do not know what I am doing, I am taking help of chatgpt4, but I am at a loop at the moment.
This is the error i am getting " raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}“)
ValueError: Invalid vector value passed: cannot interpret type <class ‘list’>”

LarryStewart2022 · July 6, 2023, 4:50pm

index.upsert(ids=[id], vectors=[embedding])

arman.hn007 · July 6, 2023, 5:03pm

Thank You for your feedback