Hey @Cory_Pinecone
I am getting a new error now - don’t know if I made it better or worse 
ApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({‘content-length’: ‘34482’, ‘content-type’: ‘text/plain’, ‘date’: ‘Sat, 24 Dec 2022 16:45:12 GMT’, ‘server’: ‘envoy’, ‘connection’: ‘close’})
HTTP response body: vectors[0].values: invalid value "[0.03192412108182907, 0.003208060981705785, 0.0036474389489740133, -0.04201175644993782, -0.030696269124746323, -0.021559614688158035, -0.013855453580617905, 0.02006693370640278, -0.02341342903673649, -0.022594861686229706, 0.03437982127070427, 0.010045504197478294, -0.016937118023633957, -0.019007612019777298, -0.008185671642422676, 0.02751830220222473, 0.05181048810482025, -0.022739315405488014, 0.009136654436588287, 0.0078004635870456696, 0.003451825585216284, -0.010502939112484455, 0.001489671878516674, -0.0048211198300123215, -0.01325356587767601, 0.005883451551198959, 0.012447035871446133, -0.04003756493330002, 0.009624183177947998, -0.020741047337651253, -0.00345784449018538, -0.0005966211319901049, -0.011809035204350948, -0.0037136466708034277, -0.01110482681542635, 0.003948383033275604, -0.020512331277132034, -0.012338696047663689, 0.0028243577107787132, -0.0053236959502100945, 0.016828779131174088, 0.0010826453799381852, -0.0008960601990111172, -0.0012301078531891108, -0.012398885563015938, 0.02334120310842991, -0.004559298977255821, -0.009834843687713146, -0.03743740916252136, 0.010924260132014751, 0.016985269263386726, 0.030070306733250618, -0.01552870124578476, 0.006668915040791035, -0.02317267470061779, -0.03895416855812073, -0.01875481940805912, 0.004941497463732958, -0.011700695380568504, 0.00015696100308559835, -0.011291411705315113, 0.005952668841928244, -0.005985772702842951, 0.0033585329074412584, -0…
from pinecone.core.client.model.vector import Vector
import pinecone
import pandas as pd
import ast
import sys
def chunker(seq, size):
'Yields a series of slices of the original iterable, up to the limit of what size is.'
for pos in range(0, len(seq), size):
yield seq.iloc[pos:pos + size]
def convert_data(chunk):
'Converts a pandas dataframe to be a simple list of tuples, formatted how the `upsert()` method in the Pinecone Python client expects.'
data = []
for i in chunk.to_dict('records'):
#if 'metadata' in i:
#data.append((str(i['id']),i['embedding'],i['metadata']))
#else:
if len(str(i['embedding'])) < 100:
print("########### wrong embedding format found ######")
continue
embeddings_str = i['embedding']
embeddings_str = embeddings_str.replace("\n", " ")
data.append((str(i['id']),embeddings_str))
df_debug_convert = pd.DataFrame(data)
df_debug_convert.to_csv("/content/drive/MyDrive/semantic_search_mat/debug_convert.csv")
print(data)
return data
pinecone.init(
api_key="",
environment="us-west1-gcp"
)
# check if 'bgb' index already exists (only create index if not)
if 'bgb' not in pinecone.list_indexes():
pinecone.create_index('bgb', 2048)
# connect to index
index = pinecone.Index(index_name="bgb")
df = pd.read_csv("/content/drive/MyDrive/semantic_search_mat/bgb_csv_with_embeddings.csv")
for chunk in chunker(df,1):
#print(chunk)
df_debug_chunks = pd.DataFrame(chunk)
df_debug_chunks.to_csv("/content/drive/MyDrive/semantic_search_mat/debug_chunks.csv")
index.upsert(vectors=convert_data(chunk))