Update Gel AI API in Python

Open anbuzin opened this issue 8 months ago • 1 comments

This is the current situation with gel.ai in Python.

db = gel.create_async_client()

# WORKS: configuration (crashes if you insert it twice though)
config_query = f"""
    configure current branch
    insert ext::ai::OpenAIProviderConfig {{
        secret := '{api_key}',
    }}
"""
await db.query(config_query)

# WORKS: vector search
vector_search_query = ai.search(default.Chunk, std.str("dummy query"))
vector_search = await db.query(vector_search_query)

# DOESN'T WORK: ORM provider config
config = ai.OpenAIProviderConfig(
    name="openai",
    display_name="OpenAI", 
    api_url="https://api.openai.com/v1",
    api_style=ai.ProviderAPIStyle.OpenAI,
    secret=api_key,
)
await db.save(config)

# DOESN'T WORK: anything related to RAG
rag = await gel.ai.create_async_rag_client(db, model="gpt-4o-mini")
chunk_rag = rag.with_context(query="Chunk")
result = await chunk_rag.query_rag("dummy rag query")

# throws 400 bad request and 500 internal server error
# no error messages provided, it's plain response.raise_for_status()

Proposed API update to the RAG.

import gel
from models import default
from models.ext import ai

# 1. make RAG a function
result = await ai.generate(
    client=client,
    prompt="dummy prompt",
    context_query=ai.search(default.Chunk, "search string"),
    model="anthropic:claude-4-haiku",
)

# 2. take any query for search
result = await ai.generate(
    client=client,
    prompt ="Who are our top customers?",
    context_query=default.Customer.select("*").order_by(revenue="desc").limit(10),
    model="openai:gpt-4o-mini"
)

result = await ai.generate(
    client=client,
    system_prompt="You are a customer service agent",
    prompt ="Recent customer complaints?",
    context_query=default.SupportTicket.filter(
        lambda t: std.fts.search(t.description, "complaint issue problem")
    ).order_by(created_at="desc"),
    model="openai:gpt-4o-mini"
)

# 3. add configation sugar
ai.configure(
    client=client,
    default_model="openai:gpt-4o-mini",
    providers={
        "openai": {"api_key": os.getenv("OPENAI_API_KEY")},
        "anthropic": {"api_key": os.getenv("ANTHROPIC_API_KEY")}
    }
)

# 4. implement fallbacks (or at least elaborate errors so users can catch them)
ai.configure(
    client=client,
    model_fallback=[
        "openai:gpt-4o-mini",
        "anthropic:claude-3-sonnet",
        "local:llama-7b"
    ],
    fallback_on=["rate_limit", "model_unavailable", "context_too_large"]
)

# 5. if you have to use the RAG with the same query a bunch of times for some reason
import functools

generate = functools.partial(
    ai.generate,
    client=client,
    system_prompt=system_prompt,
    model="openai:gpt-4o-mini",
    context_query=context_query,
)

# extra stuff:
# 1. context postprocessing (reranking, filtering etc)
# 2. hybrid search

Jun 19 '25 05:06 anbuzin

That is all assuming we need the RAG API in the first place. Honestly, I can think of maybe one use case:

from pydantic_ai import Agent, RunContext

agent = Agent("openai:gpt-4o")

@agent.tool
async def extract_medical_findings(
    ctx: RunContext[Context], 
    patient_id: int,
    query: str
) -> str:
    """Extract symptoms and diagnoses from patient medical records"""
    
    findings = await ai.generate(
        client=ctx.deps,
        prompt=f"Extract symptoms and diagnoses: {query}",
        context_query=default.MedicalRecord.filter(
            lambda r: r.patient_id == patient_id
        ),
        model="specialized:medical-llm",
        system_prompt="Extract only verified symptoms and diagnoses from clinical notes..."
    )
    
    return findings


result = await agent.run(
    "What symptoms has patient 123 reported for chest pain?",
    deps=db_client
)

Jun 19 '25 06:06 anbuzin