Skip to content

Generators

PromptedCitedGenerator

PromptedCitedGenerator

PromptedCitedGenerator(model: str = 'claude-haiku-4-5-20251001', temperature: float = 0.0, max_tokens: int = 1024, system_prompt: str | None = None, api_base: str | None = None, num_retries: int = 3)

Baseline cited generator using prompt instructions + LiteLLM.

Parameters

model: LiteLLM model identifier, e.g. "claude-haiku-4-5-20251001" or "openai/gpt-4o-mini". See LiteLLM docs for the full list. temperature: Sampling temperature. Default 0 for deterministic citations. max_tokens: Hard cap on generated tokens. Defaults to 1024. system_prompt: Override the default system prompt. api_base: Override LiteLLM's default API base (e.g. for local models).

Source code in src/verifiable_rag/generators/prompted.py
def __init__(
    self,
    model: str = "claude-haiku-4-5-20251001",
    temperature: float = 0.0,
    max_tokens: int = 1024,
    system_prompt: str | None = None,
    api_base: str | None = None,
    num_retries: int = 3,
) -> None:
    if not (0.0 <= temperature <= 2.0):
        raise ValueError(f"temperature must be in [0, 2], got {temperature}")
    if max_tokens < 1:
        raise ValueError(f"max_tokens must be positive, got {max_tokens}")
    if num_retries < 0:
        raise ValueError(f"num_retries must be >= 0, got {num_retries}")

    self._model = model
    self._temperature = temperature
    self._max_tokens = max_tokens
    self._system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT
    self._api_base = api_base
    self._num_retries = num_retries

generate

generate(query: str, chunks: list[RetrievedChunk], documents: dict[str, Document]) -> list[CitedSentence]

Call the LLM and parse its cited output.

Source code in src/verifiable_rag/generators/prompted.py
def generate(
    self,
    query: str,
    chunks: list[RetrievedChunk],
    documents: dict[str, Document],
) -> list[CitedSentence]:
    """Call the LLM and parse its cited output."""
    if not chunks:
        return []

    source_lines, valid_ids = self._format_sources(chunks, documents)
    if not source_lines:
        # No retrievable sentence text — generator cannot ground anything.
        return []

    user_prompt = _USER_PROMPT_TEMPLATE.format(
        query=query.strip(),
        sources="\n".join(source_lines),
    )

    raw = self._call_llm(user_prompt)
    if self._looks_like_refusal(raw):
        return []

    return self._parse_output(raw, valid_ids)

ConstrainedCitedGenerator

ConstrainedCitedGenerator

ConstrainedCitedGenerator(model: str = 'anthropic/claude-haiku-4-5', temperature: float = 0.0, max_tokens: int = 2048, system_prompt: str | None = None, api_base: str | None = None, num_retries: int = 3, max_citations_per_sentence: int = _DEFAULT_MAX_CITATIONS)

Constrained-decoding cited generator using LiteLLM structured outputs.

Parameters

model: LiteLLM model identifier. Must support response_format json_schema (Anthropic Sonnet/Haiku, OpenAI GPT-4o family, etc.). temperature: Sampling temperature. Default 0 for deterministic citations. max_tokens: Hard cap on generated tokens. Defaults to 2048 (structured JSON is more verbose than free prose). system_prompt: Override the default system prompt. api_base: Override LiteLLM's default API base (e.g. for local models). num_retries: LiteLLM-level retry count for transient errors. max_citations_per_sentence: Schema-enforced cap on the citations array per output sentence. Default 3.

Source code in src/verifiable_rag/generators/constrained.py
def __init__(
    self,
    model: str = "anthropic/claude-haiku-4-5",
    temperature: float = 0.0,
    max_tokens: int = 2048,
    system_prompt: str | None = None,
    api_base: str | None = None,
    num_retries: int = 3,
    max_citations_per_sentence: int = _DEFAULT_MAX_CITATIONS,
) -> None:
    if not (0.0 <= temperature <= 2.0):
        raise ValueError(f"temperature must be in [0, 2], got {temperature}")
    if max_tokens < 1:
        raise ValueError(f"max_tokens must be positive, got {max_tokens}")
    if num_retries < 0:
        raise ValueError(f"num_retries must be >= 0, got {num_retries}")
    if max_citations_per_sentence < 1:
        raise ValueError(
            f"max_citations_per_sentence must be >= 1, got {max_citations_per_sentence}"
        )

    self._model = model
    self._temperature = temperature
    self._max_tokens = max_tokens
    self._system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT
    self._api_base = api_base
    self._num_retries = num_retries
    self._max_citations = max_citations_per_sentence

SAFECitedGenerator

SAFECitedGenerator

SAFECitedGenerator(model: str = 'anthropic/claude-haiku-4-5', temperature: float = 0.0, max_tokens: int = 4096, system_prompt: str | None = None, api_base: str | None = None, num_retries: int = 3, max_citations_per_claim: int = _DEFAULT_MAX_CITATIONS_PER_CLAIM, max_claims_per_sentence: int = _DEFAULT_MAX_CLAIMS_PER_SENTENCE)

SAFE-style atomic-claim cited generator using LiteLLM structured outputs.

Functionally equivalent to :class:ConstrainedCitedGenerator with one architectural addition: each sentence is decomposed into atomic claims, and citations attach to atomic claims rather than whole sentences. This enables fine-grained verification on benchmarks with atomic-claim-level gold (RAGTruth, FaithBench).

For benchmarks with sentence-level gold (LitQA2, ALCE), the parser flattens atomic_claims back into our existing CitedSentence shape so existing eval metrics work unchanged.

Parameters

model: LiteLLM model identifier. Must support response_format json_schema (Anthropic Sonnet/Haiku, OpenAI GPT-4o family, etc.). temperature: Sampling temperature. Default 0 for deterministic citations. max_tokens: Hard cap on generated tokens. Defaults to 4096 — atomic decomposition is more verbose than flat constrained output. system_prompt: Override the default system prompt. api_base: Override LiteLLM's default API base (e.g. for local models). num_retries: LiteLLM-level retry count for transient errors. max_citations_per_claim: Schema-enforced cap on citations per atomic claim. Default 3. max_claims_per_sentence: Schema-enforced cap on atomic claims per output sentence. Default 5.

Source code in src/verifiable_rag/generators/safe.py
def __init__(
    self,
    model: str = "anthropic/claude-haiku-4-5",
    temperature: float = 0.0,
    max_tokens: int = 4096,
    system_prompt: str | None = None,
    api_base: str | None = None,
    num_retries: int = 3,
    max_citations_per_claim: int = _DEFAULT_MAX_CITATIONS_PER_CLAIM,
    max_claims_per_sentence: int = _DEFAULT_MAX_CLAIMS_PER_SENTENCE,
) -> None:
    if not (0.0 <= temperature <= 2.0):
        raise ValueError(f"temperature must be in [0, 2], got {temperature}")
    if max_tokens < 1:
        raise ValueError(f"max_tokens must be positive, got {max_tokens}")
    if num_retries < 0:
        raise ValueError(f"num_retries must be >= 0, got {num_retries}")
    if max_citations_per_claim < 1:
        raise ValueError(
            f"max_citations_per_claim must be >= 1, got {max_citations_per_claim}"
        )
    if max_claims_per_sentence < 1:
        raise ValueError(
            f"max_claims_per_sentence must be >= 1, got {max_claims_per_sentence}"
        )

    self._model = model
    self._temperature = temperature
    self._max_tokens = max_tokens
    self._system_prompt = system_prompt or _DEFAULT_SYSTEM_PROMPT
    self._api_base = api_base
    self._num_retries = num_retries
    self._max_cites = max_citations_per_claim
    self._max_claims = max_claims_per_sentence