Hi all,
Need some help. thanks in advance.
I am getting the ApiValueError with the line
index.upsert(vectors=list(zip(ids, embeddings, batch)))
Each of ids, embeddings, batch is of type list; hence this is not really the same problem at given at Unable to prepare type ndarray for serialization
The error goes away if i comment the above line as follows:
for i in range(0, len(items), batch_size):
batch = items[i : min(len(items), i + batch_size)]
ids = [b["id"] for b in batch]
embeddings = [b["embeddings"] for b in batch]
batch = [{"title": b["title"], "url": b["url"], "text": b["text"]} for b in batch]
#index.upsert(vectors=list(zip(ids, embeddings, batch)))
The complete error is
ApiValueError Traceback (most recent call last)
Cell In[26], line 13
10 embeddings = [b[“embeddings”] for b in batch]
11 batch = [{“title”: b[“title”], “url”: b[“url”], “text”: b[“text”]} for b in batch]
—> 13 index.upsert(vectors=list(zip(ids, embeddings, batch)))
File /usr/local/lib/python3.9/site-packages/pinecone/core/utils/error_handling.py:17, in validate_and_convert_errors..inner_func(*args, **kwargs)
15 Config.validate() # raises exceptions in case of invalid config
16 try:
—> 17 return func(*args, **kwargs)
18 except MaxRetryError as e:
19 if isinstance(e.reason, ProtocolError):
File /usr/local/lib/python3.9/site-packages/pinecone/index.py:150, in Index.upsert(self, vectors, namespace, batch_size, show_progress, **kwargs)
145 raise ValueError(‘async_req is not supported when batch_size is provided.’
146 'To upsert in parallel, please follow: ’
147 ‘Insert data’)
149 if batch_size is None:
→ 150 return self._upsert_batch(vectors, namespace, _check_type, **kwargs)
152 if not isinstance(batch_size, int) or batch_size <= 0:
153 raise ValueError(‘batch_size must be a positive integer’)
File /usr/local/lib/python3.9/site-packages/pinecone/index.py:237, in Index._upsert_batch(self, vectors, namespace, _check_type, **kwargs)
234 return _dict_to_vector(item)
235 raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
→ 237 return self._vector_api.upsert(
238 UpsertRequest(
239 vectors=list(map(_vector_transform, vectors)),
240 **args_dict,
241 _check_type=_check_type,
242 **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}
243 ),
244 **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}
245 )
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:776, in Endpoint.call(self, *args, **kwargs)
765 def call(self, *args, **kwargs):
766 “”" This method is invoked when endpoints are called
767 Example:
768
(…)
774
775 “”"
→ 776 return self.callable(self, *args, **kwargs)
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api/vector_operations_api.py:956, in VectorOperationsApi.init..__upsert(self, upsert_request, **kwargs)
953 kwargs[‘_host_index’] = kwargs.get(‘_host_index’)
954 kwargs[‘upsert_request’] =
955 upsert_request
→ 956 return self.call_with_http_info(**kwargs)
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:838, in Endpoint.call_with_http_info(self, **kwargs)
834 header_list = self.api_client.select_header_content_type(
835 content_type_headers_list)
836 params[‘header’][‘Content-Type’] = header_list
→ 838 return self.api_client.call_api(
839 self.settings[‘endpoint_path’], self.settings[‘http_method’],
840 params[‘path’],
841 params[‘query’],
842 params[‘header’],
843 body=params[‘body’],
844 post_params=params[‘form’],
845 files=params[‘file’],
846 response_type=self.settings[‘response_type’],
847 auth_settings=self.settings[‘auth’],
848 async_req=kwargs[‘async_req’],
849 _check_type=kwargs[‘_check_return_type’],
850 _return_http_data_only=kwargs[‘_return_http_data_only’],
851 _preload_content=kwargs[‘_preload_content’],
852 _request_timeout=kwargs[‘_request_timeout’],
853 _host=_host,
854 collection_formats=params[‘collection_format’])
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:413, in ApiClient.call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
359 “”“Makes the HTTP request (synchronous) and returns deserialized data.
360
361 To make an async_req request, set the async_req parameter.
(…)
410 then the method will return the response directly.
411 “””
412 if not async_req:
→ 413 return self.__call_api(resource_path, method,
414 path_params, query_params, header_params,
415 body, post_params, files,
416 response_type, auth_settings,
417 _return_http_data_only, collection_formats,
418 _preload_content, _request_timeout, _host,
419 _check_type)
421 return self.pool.apply_async(self.__call_api, (resource_path,
422 method, path_params,
423 query_params,
(…)
431 _request_timeout,
432 _host, _check_type))
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:185, in ApiClient.__call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
183 # body
184 if body:
→ 185 body = self.sanitize_for_serialization(body)
187 # auth setting
188 self.update_params_for_auth(header_params, query_params,
189 auth_settings, resource_path, method, body)
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:277, in ApiClient.sanitize_for_serialization(cls, obj)
264 “”“Prepares data for transmission before it is sent with the rest client
265 If obj is None, return None.
266 If obj is str, int, long, float, bool, return directly.
(…)
274 :return: The serialized form of data.
275 “””
276 if isinstance(obj, (ModelNormal, ModelComposed)):
→ 277 return {
278 key: cls.sanitize_for_serialization(val) for key, val in model_to_dict(obj, serialize=True).items()
279 }
280 elif isinstance(obj, io.IOBase):
281 return cls.get_file_data_and_close_file(obj)
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:278, in (.0)
264 “”“Prepares data for transmission before it is sent with the rest client
265 If obj is None, return None.
266 If obj is str, int, long, float, bool, return directly.
(…)
274 :return: The serialized form of data.
275 “””
276 if isinstance(obj, (ModelNormal, ModelComposed)):
277 return {
→ 278 key: cls.sanitize_for_serialization(val) for key, val in model_to_dict(obj, serialize=True).items()
279 }
280 elif isinstance(obj, io.IOBase):
281 return cls.get_file_data_and_close_file(obj)
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:289, in ApiClient.sanitize_for_serialization(cls, obj)
287 return cls.sanitize_for_serialization(obj.value)
288 elif isinstance(obj, (list, tuple)):
→ 289 return [cls.sanitize_for_serialization(item) for item in obj]
290 if isinstance(obj, dict):
291 return {key: cls.sanitize_for_serialization(val) for key, val in obj.items()}
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:289, in (.0)
287 return cls.sanitize_for_serialization(obj.value)
288 elif isinstance(obj, (list, tuple)):
→ 289 return [cls.sanitize_for_serialization(item) for item in obj]
290 if isinstance(obj, dict):
291 return {key: cls.sanitize_for_serialization(val) for key, val in obj.items()}
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:291, in ApiClient.sanitize_for_serialization(cls, obj)
289 return [cls.sanitize_for_serialization(item) for item in obj]
290 if isinstance(obj, dict):
→ 291 return {key: cls.sanitize_for_serialization(val) for key, val in obj.items()}
292 raise ApiValueError(‘Unable to prepare type {} for serialization’.format(obj.class.name))
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:291, in (.0)
289 return [cls.sanitize_for_serialization(item) for item in obj]
290 if isinstance(obj, dict):
→ 291 return {key: cls.sanitize_for_serialization(val) for key, val in obj.items()}
292 raise ApiValueError(‘Unable to prepare type {} for serialization’.format(obj.class.name))
File /usr/local/lib/python3.9/site-packages/pinecone/core/client/api_client.py:292, in ApiClient.sanitize_for_serialization(cls, obj)
290 if isinstance(obj, dict):
291 return {key: cls.sanitize_for_serialization(val) for key, val in obj.items()}
→ 292 raise ApiValueError(‘Unable to prepare type {} for serialization’.format(obj.class.name))
ApiValueError: Unable to prepare type ndarray for serialization