@silas
This is the code:
!pip install -qU \
openai==0.27.7 \
--upgrade pinecone-client \
pinecone-datasets=='0.5.0rc11' \
pandas # Make sure to install the pandas library
import pinecone
import openai
import pandas as pd
from google.colab import drive
drive.mount('/content/gdrive')
# Initialize OpenAI
openai.api_type = "azure"
openai.api_base = "OpenAI_Base"
openai.api_version = "Version"
openai.api_key = "API_KEY_OPENAI"
# Initialize Pinecone client
api_key_pinecone = "API_KEY_Pinecone"
env_pinecone = "Environment"
pinecone_index_name = 'pinecode-knowledge-index'
# Sample text data
texts = [
"Pursuant to the legal representation of Ms. Samantha Turner, ...",
"As you know our office represents the interests of Mr. John Doe ...",
"We act on behalf of Mr. John Doe regarding a recent incident ...",
"Representing Mr. John Doe in connection with a personal injury ...",
"In light of a medical malpractice incident involving Mr. John Doe ...",
"Our representation of Mr. John Doe pertains to a product liability ...",
"Our firm is representing Mr. John Doe, who was involved in a recent ..."
]
# Assign unique text_ids to each text
text_ids = ["text_id_1", "text_id_2", "text_id_3", "text_id_4", "text_id_5", "text_id_6", "text_id_7"]
# Function to create metadata based on text_ids
def create_metadata(text_ids):
return [{"text_id": text_id} for text_id in text_ids]
# Generate embeddings using OpenAI's text-embedding-ada-002
embeddings_response = openai.Embedding.create(
input=texts,
engine="DemandWriter-Embedding",
encoding_format="float"
)
# Extract embeddings from the response
embeddings = [result['embedding'] for result in embeddings_response['data']]
# Create metadata based on text_ids
metadata = create_metadata(text_ids)
# Convert embeddings to DataFrame with text_ids and metadata
data = {'text_id': text_ids, 'embedding': embeddings, 'metadata': metadata}
df = pd.DataFrame(data)
# Specify the filename for the DataFrame
df_filename = "embeddings_dataframe.csv"
path = "/content/gdrive/MyDrive/Colab Notebooks/data/{df_filename}"
# Save DataFrame to a CSV file
df_save_path = f"path" # Specify the desired path
df.to_csv(df_save_path, index=False)
# Check if the knowledge base index exists and create an index if it does not exist
if pinecone_index_name not in pinecone.list_indexes():
pinecone.create_index(
pinecone_index_name ,
dimension=1536,
metric='cosine',
)
# Connect to the knowledge base index
index_knowledge_base = pinecone.Index(index_name=pinecone_index_name)
# Upsert data from DataFrame to Pinecone
index_knowledge_base.upsert(ids=df['text_id'].tolist(), vectors=df['embedding'].tolist(), metadata=df['metadata'].tolist())
print("Embeddings uploaded successfully to Pinecone.")
as for the error:
TypeError Traceback (most recent call last)
<ipython-input-23-e164dcfcdcfa> in <cell line: 2>()
2 try:
3 # Attempt to retrieve information about the index
----> 4 pinecone.describe_index("demand-writer")
5 except pinecone.ApiException as e:
6 # If the index does not exist, create it
18 frames
/usr/local/lib/python3.10/dist-packages/pinecone/manage.py in describe_index(name)
194 """
195 api_instance = _get_api_instance()
--> 196 response = api_instance.describe_index(name)
197 db = response['database']
198 ready = response['status']['ready']
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in __call__(self, *args, **kwargs)
774
775 """
--> 776 return self.callable(self, *args, **kwargs)
777
778 def call_with_http_info(self, **kwargs):
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api/index_operations_api.py in __describe_index(self, index_name, **kwargs)
885 kwargs['index_name'] = \
886 index_name
--> 887 return self.call_with_http_info(**kwargs)
888
889 self.describe_index = _Endpoint(
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in call_with_http_info(self, **kwargs)
836 params['header']['Content-Type'] = header_list
837
--> 838 return self.api_client.call_api(
839 self.settings['endpoint_path'], self.settings['http_method'],
840 params['path'],
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
411 """
412 if not async_req:
--> 413 return self.__call_api(resource_path, method,
414 path_params, query_params, header_params,
415 body, post_params, files,
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in __call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
198 try:
199 # perform request and return response
--> 200 response_data = self.request(
201 method, url, query_params=query_params, headers=header_params,
202 post_params=post_params, body=body,
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py in request(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)
437 """Makes the HTTP request using RESTClient."""
438 if method == "GET":
--> 439 return self.rest_client.GET(url,
440 query_params=query_params,
441 _preload_content=_preload_content,
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py in GET(self, url, headers, query_params, _preload_content, _request_timeout)
234 def GET(self, url, headers=None, query_params=None, _preload_content=True,
235 _request_timeout=None):
--> 236 return self.request("GET", url,
237 headers=headers,
238 _preload_content=_preload_content,
/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py in request(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)
200 # For `GET`, `HEAD`
201 else:
--> 202 r = self.pool_manager.request(method, url,
203 fields=query_params,
204 preload_content=_preload_content,
/usr/local/lib/python3.10/dist-packages/urllib3/request.py in request(self, method, url, fields, headers, **urlopen_kw)
75
76 if method in self._encode_url_methods:
---> 77 return self.request_encode_url(
78 method, url, fields=fields, headers=headers, **urlopen_kw
79 )
/usr/local/lib/python3.10/dist-packages/urllib3/request.py in request_encode_url(self, method, url, fields, headers, **urlopen_kw)
97 url += "?" + urlencode(fields)
98
---> 99 return self.urlopen(method, url, **extra_kw)
100
101 def request_encode_body(
/usr/local/lib/python3.10/dist-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, **kw)
374 response = conn.urlopen(method, url, **kw)
375 else:
--> 376 response = conn.urlopen(method, u.request_uri, **kw)
377
378 redirect_location = redirect and response.get_redirect_location()
/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
713
714 # Make the request on the httplib connection object.
--> 715 httplib_response = self._make_request(
716 conn,
717 method,
/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
414 conn.request_chunked(method, url, **httplib_request_kw)
415 else:
--> 416 conn.request(method, url, **httplib_request_kw)
417
418 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
/usr/local/lib/python3.10/dist-packages/urllib3/connection.py in request(self, method, url, body, headers)
242 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
243 headers["User-Agent"] = _get_default_user_agent()
--> 244 super(HTTPConnection, self).request(method, url, body=body, headers=headers)
245
246 def request_chunked(self, method, url, body=None, headers=None):
/usr/lib/python3.10/http/client.py in request(self, method, url, body, headers, encode_chunked)
1281 encode_chunked=False):
1282 """Send a complete request to the server."""
-> 1283 self._send_request(method, url, body, headers, encode_chunked)
1284
1285 def _send_request(self, method, url, body, headers, encode_chunked):
/usr/lib/python3.10/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1322
1323 for hdr, value in headers.items():
-> 1324 self.putheader(hdr, value)
1325 if isinstance(body, str):
1326 # RFC 2616 Section 3.7.1 says that text default has a
/usr/local/lib/python3.10/dist-packages/urllib3/connection.py in putheader(self, header, *values)
222 """ """
223 if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
--> 224 _HTTPConnection.putheader(self, header, *values)
225 elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
226 raise ValueError(
/usr/lib/python3.10/http/client.py in putheader(self, header, *values)
1258 values[i] = str(one_value).encode('ascii')
1259
-> 1260 if _is_illegal_header_value(values[i]):
1261 raise ValueError('Invalid header value %r' % (values[i],))
1262
TypeError: expected string or bytes-like object