Creation of Index error

For the last 2 hours I am trying to create an index using the following code:

# Check if the knowledge base index exists
try:
    # Attempt to retrieve information about the index
    pinecone.describe_index("demand-writer")
except pinecone.ApiException as e:
    # If the index does not exist, create it
    if "Index not found" in str(e):
        pinecone.create_index(
            "demand-writer",
            dimension=len(embeddings[0]),  # Assuming all embeddings have the same dimension
            metric='cosine',
        )

However, it seems to be throwing an error.

This is another variation of the code also generating the same error:

# Check if the knowledge base index exists and create an index if it does not exist
if "demand-writer" not in pinecone.list_indexes():
    pinecone.create_index(
        "demand-writer",
        dimension=1536,
        metric='cosine',
    )

Now I have used this code last week and there were no errors. Could someone provide some guidance

Can you share more of your code and any other info you have on the error message? For example, can you show how you’re initializing the pinecone client, and can you see what header/value it doesn’t like?

@silas

This is the code:

!pip install -qU \
    openai==0.27.7 \
    --upgrade pinecone-client \
    pinecone-datasets=='0.5.0rc11' \
    pandas  # Make sure to install the pandas library

import pinecone
import openai
import pandas as pd
from google.colab import drive
drive.mount('/content/gdrive')

# Initialize OpenAI
openai.api_type = "azure"
openai.api_base = "OpenAI_Base"
openai.api_version = "Version"
openai.api_key = "API_KEY_OPENAI"

# Initialize Pinecone client
api_key_pinecone = "API_KEY_Pinecone"
env_pinecone = "Environment"
pinecone_index_name = 'pinecode-knowledge-index'

# Sample text data
texts = [
    "Pursuant to the legal representation of Ms. Samantha Turner, ...",
    "As you know our office represents the interests of Mr. John Doe ...",
    "We act on behalf of Mr. John Doe regarding a recent incident ...",
    "Representing Mr. John Doe in connection with a personal injury ...",
    "In light of a medical malpractice incident involving Mr. John Doe ...",
    "Our representation of Mr. John Doe pertains to a product liability ...",
    "Our firm is representing Mr. John Doe, who was involved in a recent ..."
]

# Assign unique text_ids to each text
text_ids = ["text_id_1", "text_id_2", "text_id_3", "text_id_4", "text_id_5", "text_id_6", "text_id_7"]

# Function to create metadata based on text_ids
def create_metadata(text_ids):
    return [{"text_id": text_id} for text_id in text_ids]

# Generate embeddings using OpenAI's text-embedding-ada-002
embeddings_response = openai.Embedding.create(
    input=texts,
    engine="DemandWriter-Embedding",
    encoding_format="float"
)

# Extract embeddings from the response
embeddings = [result['embedding'] for result in embeddings_response['data']]

# Create metadata based on text_ids
metadata = create_metadata(text_ids)

# Convert embeddings to DataFrame with text_ids and metadata
data = {'text_id': text_ids, 'embedding': embeddings, 'metadata': metadata}
df = pd.DataFrame(data)

# Specify the filename for the DataFrame
df_filename = "embeddings_dataframe.csv"

path = "/content/gdrive/MyDrive/Colab Notebooks/data/{df_filename}"

# Save DataFrame to a CSV file
df_save_path = f"path"  # Specify the desired path
df.to_csv(df_save_path, index=False)

# Check if the knowledge base index exists and create an index if it does not exist
if pinecone_index_name not in pinecone.list_indexes():
     pinecone.create_index(
         pinecone_index_name ,
         dimension=1536,
         metric='cosine',
     )

# Connect to the knowledge base index
index_knowledge_base = pinecone.Index(index_name=pinecone_index_name)

# Upsert data from DataFrame to Pinecone
index_knowledge_base.upsert(ids=df['text_id'].tolist(), vectors=df['embedding'].tolist(), metadata=df['metadata'].tolist())

print("Embeddings uploaded successfully to Pinecone.")

as for the error:

TypeError                                 Traceback (most recent call last)
<ipython-input-23-e164dcfcdcfa> in <cell line: 2>()
      2 try:
      3     # Attempt to retrieve information about the index
----> 4     pinecone.describe_index("demand-writer")
      5 except pinecone.ApiException as e:
      6     # If the index does not exist, create it

18 frames
/usr/local/lib/python3.10/dist-packages/pinecone/manage.py in describe_index(name)
    194     """
    195     api_instance = _get_api_instance()
--> 196     response = api_instance.describe_index(name)
    197     db = response['database']
    198     ready = response['status']['ready']

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in __call__(self, *args, **kwargs)
    774 
    775         """
--> 776         return self.callable(self, *args, **kwargs)
    777 
    778     def call_with_http_info(self, **kwargs):

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api/index_operations_api.py in __describe_index(self, index_name, **kwargs)
    885             kwargs['index_name'] = \
    886                 index_name
--> 887             return self.call_with_http_info(**kwargs)
    888 
    889         self.describe_index = _Endpoint(

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in call_with_http_info(self, **kwargs)
    836             params['header']['Content-Type'] = header_list
    837 
--> 838         return self.api_client.call_api(
    839             self.settings['endpoint_path'], self.settings['http_method'],
    840             params['path'],

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
    411         """
    412         if not async_req:
--> 413             return self.__call_api(resource_path, method,
    414                                    path_params, query_params, header_params,
    415                                    body, post_params, files,

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in __call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
    198         try:
    199             # perform request and return response
--> 200             response_data = self.request(
    201                 method, url, query_params=query_params, headers=header_params,
    202                 post_params=post_params, body=body,

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in request(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)
    437         """Makes the HTTP request using RESTClient."""
    438         if method == "GET":
--> 439             return self.rest_client.GET(url,
    440                                         query_params=query_params,
    441                                         _preload_content=_preload_content,

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py in GET(self, url, headers, query_params, _preload_content, _request_timeout)
    234     def GET(self, url, headers=None, query_params=None, _preload_content=True,
    235             _request_timeout=None):
--> 236         return self.request("GET", url,
    237                             headers=headers,
    238                             _preload_content=_preload_content,

/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py in request(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)
    200             # For `GET`, `HEAD`
    201             else:
--> 202                 r = self.pool_manager.request(method, url,
    203                                               fields=query_params,
    204                                               preload_content=_preload_content,

/usr/local/lib/python3.10/dist-packages/urllib3/request.py in request(self, method, url, fields, headers, **urlopen_kw)
     75 
     76         if method in self._encode_url_methods:
---> 77             return self.request_encode_url(
     78                 method, url, fields=fields, headers=headers, **urlopen_kw
     79             )

/usr/local/lib/python3.10/dist-packages/urllib3/request.py in request_encode_url(self, method, url, fields, headers, **urlopen_kw)
     97             url += "?" + urlencode(fields)
     98 
---> 99         return self.urlopen(method, url, **extra_kw)
    100 
    101     def request_encode_body(

/usr/local/lib/python3.10/dist-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, **kw)
    374             response = conn.urlopen(method, url, **kw)
    375         else:
--> 376             response = conn.urlopen(method, u.request_uri, **kw)
    377 
    378         redirect_location = redirect and response.get_redirect_location()

/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    713 
    714             # Make the request on the httplib connection object.
--> 715             httplib_response = self._make_request(
    716                 conn,
    717                 method,

/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    414                 conn.request_chunked(method, url, **httplib_request_kw)
    415             else:
--> 416                 conn.request(method, url, **httplib_request_kw)
    417 
    418         # We are swallowing BrokenPipeError (errno.EPIPE) since the server is

/usr/local/lib/python3.10/dist-packages/urllib3/connection.py in request(self, method, url, body, headers)
    242         if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
    243             headers["User-Agent"] = _get_default_user_agent()
--> 244         super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    245 
    246     def request_chunked(self, method, url, body=None, headers=None):

/usr/lib/python3.10/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1281                 encode_chunked=False):
   1282         """Send a complete request to the server."""
-> 1283         self._send_request(method, url, body, headers, encode_chunked)
   1284 
   1285     def _send_request(self, method, url, body, headers, encode_chunked):

/usr/lib/python3.10/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1322 
   1323         for hdr, value in headers.items():
-> 1324             self.putheader(hdr, value)
   1325         if isinstance(body, str):
   1326             # RFC 2616 Section 3.7.1 says that text default has a

/usr/local/lib/python3.10/dist-packages/urllib3/connection.py in putheader(self, header, *values)
    222         """ """
    223         if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
--> 224             _HTTPConnection.putheader(self, header, *values)
    225         elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
    226             raise ValueError(

/usr/lib/python3.10/http/client.py in putheader(self, header, *values)
   1258                 values[i] = str(one_value).encode('ascii')
   1259 
-> 1260             if _is_illegal_header_value(values[i]):
   1261                 raise ValueError('Invalid header value %r' % (values[i],))
   1262 

TypeError: expected string or bytes-like object

That error doesn’t match the code you posted as it’s erroring on describe_index but I don’t see describe_index anywhere in the code. (I know you said earlier you get the same error for a different call, but would still be helpful if they match)

I think it would be helpful to see what header it seems to be complaining about. Can you widen your except clause to catch everything, and then log what you’re getting? Or you could debug and put a breakpoint on this line of code:

   1261                 raise ValueError('Invalid header value %r' % (values[i],))
1 Like

@silas

ERROR:root:Exception occurred while describing index.
Traceback (most recent call last):
  File "<ipython-input-34-a2efc43afaa3>", line 9, in <cell line: 7>
    pinecone.describe_index("demand-writer")
  File "/usr/local/lib/python3.10/dist-packages/pinecone/manage.py", line 196, in describe_index
    response = api_instance.describe_index(name)
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py", line 776, in __call__
    return self.callable(self, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api/index_operations_api.py", line 887, in __describe_index
    return self.call_with_http_info(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py", line 838, in call_with_http_info
    return self.api_client.call_api(
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py", line 413, in call_api
    return self.__call_api(resource_path, method,
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py", line 200, in __call_api
    response_data = self.request(
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py", line 439, in request
    return self.rest_client.GET(url,
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py", line 236, in GET
    return self.request("GET", url,
  File "/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py", line 202, in request
    r = self.pool_manager.request(method, url,
  File "/usr/local/lib/python3.10/dist-packages/urllib3/request.py", line 77, in request
    return self.request_encode_url(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/request.py", line 99, in request_encode_url
    return self.urlopen(method, url, **extra_kw)
  File "/usr/local/lib/python3.10/dist-packages/urllib3/poolmanager.py", line 376, in urlopen
    response = conn.urlopen(method, u.request_uri, **kw)
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 715, in urlopen
    httplib_response = self._make_request(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 416, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connection.py", line 244, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
  File "/usr/lib/python3.10/http/client.py", line 1283, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/usr/lib/python3.10/http/client.py", line 1324, in _send_request
    self.putheader(hdr, value)
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connection.py", line 224, in putheader
    _HTTPConnection.putheader(self, header, *values)
  File "/usr/lib/python3.10/http/client.py", line 1260, in putheader
    if _is_illegal_header_value(values[i]):
TypeError: expected string or bytes-like object

This topic was automatically closed 24 hours after the last reply. New replies are no longer allowed.