I’m trying to upsert 100 vectors at a time with 1536 dimensions. However, if I check the total number of vectors after the job is finished, it always says 100 vectors in total even though it just upserted a lot more vectors than that.
I basically use the following code to do this.
df = pd.read_csv("embeddings.csv")
def chunker(seq, size):
'Yields a series of slices of the original iterable, up to the limit of what size is.'
for pos in range(0, len(seq), size):
yield seq.iloc[pos:pos + size]
def convert_data(chunk):
data = []
for key, value in enumerate(chunk.to_dict('records')):
data.append((str(key["id"]), ast.literal_eval(value["embedding"])))
return data
if 'bible' not in pinecone.list_indexes():
pinecone.create_index('key', 1536)
index = pinecone.Index('key')
for chunk in chunker(df,100):
index.upsert(vectors=convert_data(chunk))