Skip to content

Embedders

Embedder

Bases: Protocol

Dense embedding interface. Implementations must be deterministic.

embed() is for documents/passages (indexing time). embed_query() is for query strings (search time) — some models apply a different instruction prefix for queries vs passages, which improves retrieval recall on asymmetric tasks.

dimension property

dimension: int

Output embedding dimension.

embed

embed(texts: list[str]) -> list[list[float]]

Return one embedding per text, in the same order.

Source code in src/verifiable_rag/embedders/__init__.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Return one embedding per text, in the same order."""
    ...

embed_query

embed_query(query: str) -> list[float]

Embed a single query string (may use a different prompt prefix).

Source code in src/verifiable_rag/embedders/__init__.py
def embed_query(self, query: str) -> list[float]:
    """Embed a single query string (may use a different prompt prefix)."""
    ...

SentenceTransformerEmbedder

SentenceTransformerEmbedder

SentenceTransformerEmbedder(model_name: str = 'BAAI/bge-small-en-v1.5', query_instruction: str | None = None, normalize: bool = True, batch_size: int = 64, device: str | None = None)

Dense embedder backed by sentence-transformers.

Parameters

model_name: HuggingFace model ID. Defaults to BGE-small-en-v1.5 (384-dim, fast). query_instruction: Prefix prepended to query strings before encoding. None (default) uses the built-in table for known BGE models; set to "" to disable for any other model. normalize: L2-normalise output vectors. Keep True for cosine similarity (which is how LanceDB and most ANN indexes are configured). batch_size: Number of texts per encode call. Tune down on low-VRAM GPUs. device: "cpu", "cuda", "mps", or None for auto-detect.

Source code in src/verifiable_rag/embedders/sentence_transformer.py
def __init__(
    self,
    model_name: str = "BAAI/bge-small-en-v1.5",
    query_instruction: str | None = None,
    normalize: bool = True,
    batch_size: int = 64,
    device: str | None = None,
) -> None:
    self._model_name = model_name
    self._query_instruction: str = (
        query_instruction
        if query_instruction is not None
        else _QUERY_INSTRUCTIONS.get(model_name, "")
    )
    self._normalize = normalize
    self._batch_size = batch_size
    self._device = device
    self._model: Any = None  # sentence_transformers.SentenceTransformer — lazy

embed

embed(texts: list[str]) -> list[list[float]]

Embed a batch of document/passage strings.

Source code in src/verifiable_rag/embedders/sentence_transformer.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of document/passage strings."""
    if not texts:
        return []
    return self._encode(texts)

embed_query

embed_query(query: str) -> list[float]

Embed a single query string, prepending the instruction prefix.

Source code in src/verifiable_rag/embedders/sentence_transformer.py
def embed_query(self, query: str) -> list[float]:
    """Embed a single query string, prepending the instruction prefix."""
    prefixed = self._query_instruction + query if self._query_instruction else query
    return self._encode([prefixed])[0]

CohereEmbedder

CohereEmbedder

CohereEmbedder(model: str = 'embed-english-v3.0', api_key: str | None = None, batch_size: int = 96)

Production-grade embedder backed by Cohere's API.

Parameters

model: Cohere embed model ID. Defaults to "embed-english-v3.0". api_key: Cohere API key. Falls back to COHERE_API_KEY env var. batch_size: Max texts per API call. Capped at 96 (Cohere's limit).

Source code in src/verifiable_rag/embedders/cohere_embedder.py
def __init__(
    self,
    model: str = "embed-english-v3.0",
    api_key: str | None = None,
    batch_size: int = 96,
) -> None:
    if model not in _COHERE_DIMENSIONS:
        raise ValueError(
            f"Unknown Cohere embed model {model!r}. "
            f"Known: {sorted(_COHERE_DIMENSIONS)}"
        )
    if batch_size < 1 or batch_size > _COHERE_BATCH_LIMIT:
        raise ValueError(
            f"batch_size must be 1-{_COHERE_BATCH_LIMIT}, got {batch_size}"
        )
    self._model = model
    self._api_key = api_key or os.environ.get("COHERE_API_KEY")
    self._batch_size = batch_size
    self._client: Any = None

VoyageEmbedder

VoyageEmbedder

VoyageEmbedder(model: str = 'voyage-3', api_key: str | None = None, batch_size: int = 128)

Production-grade embedder backed by Voyage AI's API.

Parameters

model: Voyage model ID. Defaults to "voyage-3" (balanced quality/cost). api_key: Voyage API key. Falls back to the VOYAGE_API_KEY env var. batch_size: Max texts per API call. Capped at 128 (Voyage's limit).

Source code in src/verifiable_rag/embedders/voyage.py
def __init__(
    self,
    model: str = "voyage-3",
    api_key: str | None = None,
    batch_size: int = 128,
) -> None:
    if model not in _VOYAGE_DIMENSIONS:
        raise ValueError(
            f"Unknown Voyage model {model!r}. "
            f"Known models: {sorted(_VOYAGE_DIMENSIONS)}"
        )
    if batch_size < 1 or batch_size > _VOYAGE_BATCH_LIMIT:
        raise ValueError(
            f"batch_size must be 1–{_VOYAGE_BATCH_LIMIT}, got {batch_size}"
        )
    self._model = model
    self._api_key = api_key or os.environ.get("VOYAGE_API_KEY")
    self._batch_size = batch_size
    self._client: Any = None  # voyageai.Client — lazy

embed

embed(texts: list[str]) -> list[list[float]]

Embed a batch of document/passage strings.

Source code in src/verifiable_rag/embedders/voyage.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of document/passage strings."""
    if not texts:
        return []
    return self._call(texts, input_type="document")

embed_query

embed_query(query: str) -> list[float]

Embed a single query string (uses Voyage's 'query' input_type).

Source code in src/verifiable_rag/embedders/voyage.py
def embed_query(self, query: str) -> list[float]:
    """Embed a single query string (uses Voyage's 'query' input_type)."""
    return self._call([query], input_type="query")[0]