def get_ids_from_query(index, input_vector, namespace):
print(“searching pinecone…”)
results = index.query(
vector=input_vector, top_k=10000, include_values=False, namespace=namespace
)
# print(“after searching”)
# print(results)
ids = set()
for result in results.matches:
ids.add(result.id)
return ids
def get_all_ids_from_namespace(index, num_dimensions, namespace):
stats = index.describe_index_stats()
if namespace not in stats.namespaces:
print(f"Namespace {namespace} not found in index.“)
return None
num_vectors = stats.namespaces[namespace].vector_count
all_ids = set()
while len(all_ids) < num_vectors:
print(“Length of ids list is shorter than the number of total vectors…”)
# input_vector = np.random.rand(num_dimensions).tolist()
input_vector = [0.5] * num_dimensions
print(“creating random vector…”)
ids = get_ids_from_query(index, input_vector, namespace)
print(f"getting ids from a vector query of {namespace}…”)
all_ids.update(ids)
print(“updating ids set…”)
print(f"Collected {len(all_ids)} ids out of {num_vectors}.")
return all_ids
namespaces = [“namespace-1”, “”]
for namespace in namespaces:
all_ids = get_all_ids_from_namespace(
pinecone_index, num_dimensions=1536, namespace=namespace
)
# Update metadata for each vector
for vector_id in all_ids:
print(vector_id)
pinecone_index.update(
id=vector_id, namespace=namespace, set_metadata={“primary”: False}
)
there currently isn’t an “update_all” API, but maybe that wouldn’t be such a bad api to have! Submit a feature request and the Pinecone team will check it out
There are workarounds like the one posted, but having a straight API would be way better imo.