Custom LLM Provider¶
Integrate any LLM through the CustomLLMClient interface — internal corporate models, locally-hosted models, or custom API endpoints.
Interface¶
from eval_lib import CustomLLMClient
from abc import ABC
class CustomLLMClient(ABC):
async def chat_complete(
self,
messages: list[dict[str, str]],
temperature: float = 0.0
) -> tuple[str, float | None]:
"""
Send a chat completion request.
Args:
messages: List of {"role": "...", "content": "..."} dicts
temperature: Sampling temperature
Returns:
Tuple of (response_text, cost_in_usd or None)
"""
...
async def get_embeddings(
self,
texts: list[str],
model: str = ""
) -> tuple[list[list[float]], float | None]:
"""
Optional: Generate embeddings for texts.
Args:
texts: List of texts to embed
model: Model name (ignored for custom implementations)
Returns:
Tuple of (list_of_embedding_vectors, cost_in_usd or None)
"""
raise NotImplementedError
def get_model_name(self) -> str:
"""Return model name for logging and reporting."""
...
Implementation Example¶
Basic HTTP Client¶
import aiohttp
from eval_lib import CustomLLMClient
class MyCorpLLM(CustomLLMClient):
def __init__(self, api_url: str, api_key: str):
self.api_url = api_url
self.api_key = api_key
async def chat_complete(self, messages, temperature=0.0):
async with aiohttp.ClientSession() as session:
async with session.post(
f"{self.api_url}/chat",
json={"messages": messages, "temperature": temperature},
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
data = await resp.json()
return data["text"], None # No cost tracking
def get_model_name(self):
return "corp-llm-v2"
With Embeddings¶
class MyLLMWithEmbeddings(CustomLLMClient):
async def chat_complete(self, messages, temperature=0.0):
# Your chat implementation
return response_text, cost_usd
async def get_embeddings(self, texts, model=""):
# Your embedding implementation
return embeddings_list, cost_usd
def get_model_name(self):
return "my-custom-model"
Wrapping an Existing SDK¶
from eval_lib import CustomLLMClient
class TogetherAIClient(CustomLLMClient):
def __init__(self, api_key: str, model: str = "meta-llama/Meta-Llama-3-70B"):
import together
self.client = together.Together(api_key=api_key)
self.model = model
async def chat_complete(self, messages, temperature=0.0):
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=temperature,
)
text = response.choices[0].message.content
cost = None # Calculate from response.usage if needed
return text, cost
def get_model_name(self):
return self.model
Usage with Metrics¶
Pass your custom client instance directly as the model parameter:
from eval_lib import AnswerRelevancyMetric, FaithfulnessMetric, evaluate
import asyncio
my_llm = MyCorpLLM(api_url="https://llm.internal.corp", api_key="...")
metrics = [
AnswerRelevancyMetric(model=my_llm, threshold=0.7),
FaithfulnessMetric(model=my_llm, threshold=0.7),
]
results = asyncio.run(evaluate(test_cases, metrics))
Notes¶
chat_completeis required;get_embeddingsis optional- Return
Nonefor cost if your provider doesn't track costs - The
messagesformat follows the OpenAI chat format:[{"role": "user", "content": "..."}] - All standard metrics work with custom providers