I can't find the information in the knowledge base

Hi, I’m developing a RAG system with Python, Django, and Vapi for a voice client, but when I ask a question, it tells me it can’t find any results. My scores range from 0.2687 to 1.0004. An important point is that the transcription I’m creating sometimes isn’t very good. I’m sending you the code for my search function to see if you can help me.

Thank you

def search_context(query, top_k=5):
    import unicodedata
    import re

    def limpiar_texto_ligero(texto: str) -> str:
        # Solo para extraer keywords/fallback; NO para el embedding principal
        t = texto.lower()
        t = re.sub(r'\s+', ' ', t).strip()
        return t

    try:
        # 1) Embedir la consulta ORIGINAL (mejor señal semántica)
        print(f"[DEBUG search_context] Query original (sin normalizar): '{query}'")
        emb = openai_client.embeddings.create(
            input=[query],  # <- sin normalizar
            model="text-embedding-3-small"
        ).data[0].embedding

        resultados_finales = []
        vistos = set()

        # Búsqueda A: consulta original
        print(f"[DEBUG search_context] Búsqueda A: consulta original")
        rA = index.query(vector=emb, top_k=top_k*2, include_metadata=True, include_values=False)
        for m in (rA.matches or []):
            if m.id not in vistos:
                resultados_finales.append(m); vistos.add(m.id)
        print(f"[DEBUG search_context] Búsqueda A encontró: {len(rA.matches or [])}")

        # Búsqueda B: palabras clave (limpieza ligera)
        if len(resultados_finales) < 2:
            print(f"[DEBUG search_context] Búsqueda B: palabras clave")
            qlite = limpiar_texto_ligero(query)
            keywords = [w for w in qlite.split() if len(w) > 3][:3]
            if keywords:
                emb_kw = openai_client.embeddings.create(
                    input=[' '.join(keywords)],
                    model="text-embedding-3-small"
                ).data[0].embedding
                rB = index.query(vector=emb_kw, top_k=top_k*2, include_metadata=True, include_values=False)
                for m in (rB.matches or []):
                    if m.id not in vistos:
                        resultados_finales.append(m); vistos.add(m.id)
                print(f"[DEBUG search_context] Búsqueda B agregó: {len(rB.matches or [])}")

        # Búsqueda C: respaldo genérico
        if len(resultados_finales) < 1:
            print(f"[DEBUG search_context] Búsqueda C: términos genéricos")
            genericos = "plan beneficio cobertura deducible renovacion seguro"
            emb_gen = openai_client.embeddings.create(
                input=[genericos],
                model="text-embedding-3-small"
            ).data[0].embedding
            rC = index.query(vector=emb_gen, top_k=top_k, include_metadata=True, include_values=False)
            for m in (rC.matches or []):
                if m.id not in vistos:
                    resultados_finales.append(m); vistos.add(m.id)
            print(f"[DEBUG search_context] Búsqueda C agregó: {len(rC.matches or [])}")

        print(f"[DEBUG search_context] Total de matches únicos (sin ordenar): {len(resultados_finales)}")

        if not resultados_finales:
            print(f"[DEBUG search_context] No se encontraron matches en ninguna búsqueda")
            return []

        # 🔑 Ordenar por score DESC antes de recortar a top_k
        resultados_finales.sort(key=lambda m: getattr(m, "score", 0.0), reverse=True)
        top = resultados_finales[:max(top_k, 5)]  # ser un poco más generosos

        # Construir lista final
        resultados_ordenados = []
        for m in top:
            if m.metadata:
                text = (m.metadata.get('text')
                        or m.metadata.get('texto')
                        or m.metadata.get('content'))
                if text and len(text.strip()) > 10:
                    resultados_ordenados.append({
                        "texto": text,
                        "score": float(getattr(m, "score", 0.0)),
                        "source": m.metadata.get("source", "")
                    })

        print("[DEBUG search_context] Top matches (score, preview):")
        for r in resultados_ordenados:
            preview = (r.get("texto", "")[:120]).replace("\n", " ").replace("\r", " ")
            print(f"  - {r.get('score', 0):.3f} :: {preview}")

        return resultados_ordenados

    except Exception as e:
        print(f"[ERROR search_context] Error en búsqueda: {str(e)}")
        import traceback; traceback.print_exc()
        return []


def chatgpt_with_context(user_query, system_prompt=None):
    resultados = search_context(user_query)
    if not es_respuesta_relevante(resultados, umbral_score=0.2):
        print("[WARNING] Contexto vacío o irrelevante devuelto por Pinecone. Verifique la indexación y la consulta.")
        return ("Te ayudo con gusto. Para darte el dato exacto según tu plan, "
                "necesito un momento para revisar tu póliza. ¿Prefieres confirmación por SMS "
                "o seguimos en la línea?")

    # Construir contexto solo con los textos relevantes
    contexto = "\n---\n".join([r['texto'] for r in resultados if r.get('score', 0) >= 0.25])
    prompt = f"Contexto relevante:\n{contexto}\n\nPregunta del usuario: {user_query}\nRespuesta:"
    if system_prompt is None:
        system_prompt = ("Eres una asistente telefónica de seguros. Responde SOLO con lo que esté en CONTEXTO. "
                         "Si falta un dato, pide lo mínimo. Sé breve, cálida y profesional.")
    try:
        completion = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "system", "content": system_prompt},
                      {"role": "user", "content": prompt}]
        )
        response = completion.choices[0].message.content
        print(f"[DEBUG chatgpt_with_context] Respuesta generada: {response[:100]}...")
        return f"{response}\n\n¿Necesita algo más?"
    except Exception as e:
        print(f"[ERROR chatgpt_with_context] Error al generar respuesta con OpenAI: {str(e)}")
        import traceback
        traceback.print_exc()
        return "Lo siento, hubo un problema técnico al procesar su solicitud. ¿Necesita algo más?"

hi @ai.agent

TLDR: your code shows several issues that might cause the “no results” problem. Your scores (0.2687 to 1.0004) indicate the search is working, but your filtering logic might be too restrictive.
—–
That said, here are some ideas to help you drill down:

1. Restrictive Relevance Threshold
Your es_respuesta_relevante() function with umbral_score=0.2 might be filtering out valid results. Scores of 0.2687 aren’t necessarily “irrelevant” - they could contain useful information, especially with poor transcription quality.

2. Multiple Embedding Calls
You’re making 3 separate embedding calls, which is inefficient and costly. Consider consolidating your search strategy.

3. Score Filtering Too High
Your final filtering at score >= 0.25 might eliminate results that could be useful for voice applications where transcription quality varies.

:backhand_index_pointing_right: Some quick fixes you can try:

  1. Lower your score thresholds from 0.25 to 0.15 or even 0.1
  2. Increase top_k in your initial query to get more candidates
  3. Remove the es_respuesta_relevante() check temporarily to see if you get results
  4. Add more debug output to see what scores you’re actually getting

Your approach of handling poor transcription with fallback searches is good, but the thresholds might be too restrictive for voice applications where perfect transcription isn’t always possible.

I’d recommend that you increase search relevance. For better search results, consider implementing reranking [doc] . Reranking is used as part of a two-stage vector retrieval process to improve the quality of results . You first query an index for a given number of relevant results, and then you send the query and results to a reranking model .

  • Optimize Your Filtering Strategy
def search_context(query, top_k=5):
    try:
        # Single embedding call for the original query
        emb = openai_client.embeddings.create(
            input=[query],
            model="text-embedding-3-small"
        ).data[0].embedding

        # Increase top_k to get more candidates
        results = index.query(
            vector=emb, 
            top_k=top_k*3,  # Get more candidates
            include_metadata=True, 
            include_values=False
        )
        
        # Lower threshold for voice applications
        filtered_results = []
        for match in results.matches:
            if match.score >= 0.15:  # Lower threshold
                if match.metadata:
                    text = (match.metadata.get('text') or 
                           match.metadata.get('texto') or 
                           match.metadata.get('content'))
                    if text and len(text.strip()) > 10:
                        filtered_results.append({
                            "texto": text,
                            "score": float(match.score),
                            "source": match.metadata.get("source", "")
                        })
        
        return filtered_results[:top_k]
        
    except Exception as e:
        print(f"Error in search: {str(e)}")
        return []
  • Consider Hybrid Search

For better handling of transcription errors, you might benefit from hybrid search . Semantic search and lexical search are powerful information retrieval techniques, but each has notable limitations . Hybrid search combines both dense and sparse indexes, combines the results from both, and uses reranking models to assign a unified relevance score .

  • Metadata Filtering

If your documents have categories or types, use metadata filtering . Every record in an index can include metadata key-value pairs to store related information . When you search the index, you can include a metadata filter to limit the search to records matching a filter expression .