Pinecone API error: (400) Reason: Bad Request HTTP response headers

Hi, I am getting an error when using Pinecone and Streamlit. The error message is as follows: Pinecone API error: (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({‘Date’: ‘Wed, 12 Jun 2024 22:03:59 GMT’, ‘Content-Type’: ‘text/plain’, ‘Content-Length’: ‘90’, ‘Connection’: ‘keep-alive’, ‘server’: ‘envoy’}) HTTP response body: queries[364]: invalid value 0.0084299538284540176 for type type.googleapis.com/QueryVector. Can you help me, please?

Hi @btejasri1996, and welcome to the Pinecone community forums!

Thank you for your question.

Debugging what might be going wrong without seeing your code is difficult.

Could you please paste all your relevant code, being very careful to NOT paste any secrets such as your Pinecone API key?

We’ll be able to provide better suggestions once we can see how you’re interacting with the Pinecone SDK.

Best,
Zack

Hi Zack,

Thank you so much for your response.

Here is my Code:

import os
import re
import time
import hashlib
import openai
import pinecone
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import streamlit as st

Initialize OpenAI API key

openai.api_key = os.getenv(‘OPENAI_API_KEY’, ‘xxxxxxx’)
model_name = “text-embedding-ada-002”

Initialize Pinecone

use_serverless = True
from pinecone import Pinecone, ServerlessSpec, PodSpec
api_key = os.environ.get(‘PINECONE_API_KEY’) or ‘xxxxxxxxx’ # Add your actual API key
pc = Pinecone(api_key=api_key)

if use_serverless:
spec = ServerlessSpec(cloud=‘aws’, region=‘us-west-2’)
else:
spec = PodSpec(environment=environment)

Define the index name

index_name = “resp”

Check if index already exists

if index_name not in pc.list_indexes().names():
# Create index if it does not exist
pc.create_index(
index_name,
dimension=1536, # dimensionality of text-embedding-ada-002
metric=‘cosine’,
spec=spec
)
# Wait for index to be initialized
time.sleep(1)

Connect to index

index = pc.Index(index_name)

Preprocess text function

def preprocess_text(text):
return re.sub(r’\s+', ’ ', text)

Process a PDF file to extract and split text

def process_pdf(file_path):
loader = PyPDFLoader(file_path)
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(data)
texts = [str(doc) for doc in documents]
return texts

Create embeddings function

def create_embeddings(texts):
embeddings_list =
for text in texts:
res = openai.Embedding.create(input=[text], engine=model_name)
embeddings_list.append(res[‘data’][0][‘embedding’])
return embeddings_list

Upsert embeddings to Pinecone function

def upsert_embeddings_to_pinecone(index, embeddings, texts, batch_size=100):
ids = [hashlib.md5(text.encode()).hexdigest() for text in texts]
for i in range(0, len(embeddings), batch_size):
batch_ids = ids[i:i + batch_size]
batch_embeddings = embeddings[i:i + batch_size]
index.upsert(vectors=[(id, embedding) for id, embedding in zip(batch_ids, batch_embeddings)])
time.sleep(1)

Retrieve relevant documents from Pinecone

def retrieve_documents(query, index, top_k=5):
query_embedding = openai.Embedding.create(input=[query], engine=model_name)[‘data’][0][‘embedding’]
if isinstance(query_embedding, list) and all(isinstance(i, float) for i in query_embedding):
results = index.query(queries=[query_embedding], top_k=top_k)
return [match[‘metadata’][‘text’] for match in results[‘matches’]]
else:
raise ValueError(“Invalid embedding format.”)

Process the PDF and create embeddings

Upsert embeddings to Pinecone function with error handling

def upsert_embeddings_to_pinecone(file_names):
for file_name in file_names:
try:
file_path = f"/path/to/your/pdf/{file_name}" # Replace with your actual file path
texts = process_pdf(file_path)
embeddings = create_embeddings(texts)
ids = [hashlib.md5(text.encode()).hexdigest() for text in texts]
index.upsert(vectors=[(id, embedding) for id, embedding in zip(ids, embeddings)])
st.success(f"Successfully upserted embeddings for {file_name}“)
except openai.OpenAIError as e:
st.error(f"OpenAI error occurred while processing {‘Respiratory Health’}: {str(e)}”)
except pinecone.PineconeApiException as e:
if ‘Missing low surrogate’ in str(e):
st.warning(f’Skipping file {“Respiratory Health”} due to encoding issue’)
else:
st.error(f"Pinecone API error occurred while processing {‘Respiratory Health’}: {str(e)}“)
except Exception as e:
st.error(f"An unexpected error occurred while processing {‘Respiratory Health’}: {str(e)}”)
raise e

Streamlit Interface

st.title(‘Ask a question’)

if ‘messages’ not in st.session_state:
st.session_state.messages =

for message in st.session_state.messages:
st.chat_message(message[‘role’]).markdown(message[‘content’])

prompt = st.chat_input(‘Pass your prompt here’)

if prompt:
st.chat_message(‘user’).markdown(prompt)
st.session_state.messages.append({‘role’: ‘user’, ‘content’: prompt})

try:
    # Retrieve relevant documents from Pinecone
    retrieved_docs = retrieve_documents(prompt, index)
    
    # Combine the retrieved documents into a single context
    context = "\n".join(retrieved_docs)
    
    # Generate response with context
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
            {"role": "system", "content": "Context: " + context}
        ]
    ).choices[0].message['content'].strip()
    
    st.chat_message('assistant').markdown(response)
    st.session_state.messages.append({'role': 'assistant', 'content': response})
except ValueError as e:
    st.error(f"Error: {str(e)}")
except pinecone.PineconeApiException as e:
    st.error(f"Pinecone API error: {str(e)}")

Thank you.

Regards,
Tejasri