Source code for agent_inspect.metrics.scorer.llm_based_metric

from abc import abstractmethod

from typing import Any, Dict, Optional

from agent_inspect.metrics.scorer.metric import Metric
from agent_inspect.clients.llm_client import LLMClient
from agent_inspect.models.metrics.agent_trace import AgentDialogueTrace
from agent_inspect.models.metrics.agent_data_sample import EvaluationSample



[docs]
class LLMBasedMetric(Metric):
    """
    This is a base abstract class that should be extended for actual implementations.

    :param llm_client: the client which allows connection to the LLM-as-a-judge model for evaluation.
    :param config: configuration for metric initialization. Default to ``None``.
    """

    def __init__(self, llm_client: LLMClient, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
        self.llm_client = llm_client


[docs]
    @abstractmethod
    def evaluate(
            self,
            agent_trace: AgentDialogueTrace,
            evaluation_data_sample: EvaluationSample,
    ):
        """
        This is an abstract method and should be implemented in a concrete class.

        :param agent_trace: a :obj:`~agent_inspect.models.agent_trace.AgentDialogueTrace` object constructed with the agent trajectory information for a given data sample.
        :param evaluation_data_sample: a :obj:`~agent_inspect.models.agent_data_sample.EvaluationSample` object representing a data sample in the evaluation data set.
        :return: a :obj:`~agent_inspect.models.metric_score.NumericalScore` object or a :obj:`~typing.List` [:obj:`~agent_inspect.models.metric_score.NumericalScore`] object.
        """
        ...


    @staticmethod
    def get_turn_groupings_from_traces(agent_trace, turns_to_run):
        turns_groupings = []
        for i in range(turns_to_run):
            turns_groupings.append(agent_trace.turns[:i + 1])
        return turns_groupings