Skip to content

Custom LLM Provider

Integrate any LLM through the CustomLLMClient interface — internal corporate models, locally-hosted models, or custom API endpoints.

Interface

from eval_lib import CustomLLMClient
from abc import ABC

class CustomLLMClient(ABC):
    async def chat_complete(
        self,
        messages: list[dict[str, str]],
        temperature: float = 0.0
    ) -> tuple[str, float | None]:
        """
        Send a chat completion request.

        Args:
            messages: List of {"role": "...", "content": "..."} dicts
            temperature: Sampling temperature

        Returns:
            Tuple of (response_text, cost_in_usd or None)
        """
        ...

    async def get_embeddings(
        self,
        texts: list[str],
        model: str = ""
    ) -> tuple[list[list[float]], float | None]:
        """
        Optional: Generate embeddings for texts.

        Args:
            texts: List of texts to embed
            model: Model name (ignored for custom implementations)

        Returns:
            Tuple of (list_of_embedding_vectors, cost_in_usd or None)
        """
        raise NotImplementedError

    def get_model_name(self) -> str:
        """Return model name for logging and reporting."""
        ...

Implementation Example

Basic HTTP Client

import aiohttp
from eval_lib import CustomLLMClient

class MyCorpLLM(CustomLLMClient):
    def __init__(self, api_url: str, api_key: str):
        self.api_url = api_url
        self.api_key = api_key

    async def chat_complete(self, messages, temperature=0.0):
        async with aiohttp.ClientSession() as session:
            async with session.post(
                f"{self.api_url}/chat",
                json={"messages": messages, "temperature": temperature},
                headers={"Authorization": f"Bearer {self.api_key}"}
            ) as resp:
                data = await resp.json()
                return data["text"], None  # No cost tracking

    def get_model_name(self):
        return "corp-llm-v2"

With Embeddings

class MyLLMWithEmbeddings(CustomLLMClient):
    async def chat_complete(self, messages, temperature=0.0):
        # Your chat implementation
        return response_text, cost_usd

    async def get_embeddings(self, texts, model=""):
        # Your embedding implementation
        return embeddings_list, cost_usd

    def get_model_name(self):
        return "my-custom-model"

Wrapping an Existing SDK

from eval_lib import CustomLLMClient

class TogetherAIClient(CustomLLMClient):
    def __init__(self, api_key: str, model: str = "meta-llama/Meta-Llama-3-70B"):
        import together
        self.client = together.Together(api_key=api_key)
        self.model = model

    async def chat_complete(self, messages, temperature=0.0):
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=temperature,
        )
        text = response.choices[0].message.content
        cost = None  # Calculate from response.usage if needed
        return text, cost

    def get_model_name(self):
        return self.model

Usage with Metrics

Pass your custom client instance directly as the model parameter:

from eval_lib import AnswerRelevancyMetric, FaithfulnessMetric, evaluate
import asyncio

my_llm = MyCorpLLM(api_url="https://llm.internal.corp", api_key="...")

metrics = [
    AnswerRelevancyMetric(model=my_llm, threshold=0.7),
    FaithfulnessMetric(model=my_llm, threshold=0.7),
]

results = asyncio.run(evaluate(test_cases, metrics))

Notes

  • chat_complete is required; get_embeddings is optional
  • Return None for cost if your provider doesn't track costs
  • The messages format follows the OpenAI chat format: [{"role": "user", "content": "..."}]
  • All standard metrics work with custom providers