Source code for agent_inspect.metrics.scorer.llm_based_metric

from abc import abstractmethod

from typing import Any, Dict, Optional

from agent_inspect.metrics.scorer.metric import Metric
from agent_inspect.clients.llm_client import LLMClient
from agent_inspect.models.metrics.agent_trace import AgentDialogueTrace
from agent_inspect.models.metrics.agent_data_sample import EvaluationSample


[docs] class LLMBasedMetric(Metric): """ This is a base abstract class that should be extended for actual implementations. :param llm_client: the client which allows connection to the LLM-as-a-judge model for evaluation. :param config: configuration for metric initialization. Default to ``None``. """ def __init__(self, llm_client: LLMClient, config: Optional[Dict[str, Any]] = None): super().__init__(config) self.llm_client = llm_client
[docs] @abstractmethod def evaluate( self, agent_trace: AgentDialogueTrace, evaluation_data_sample: EvaluationSample, ): """ This is an abstract method and should be implemented in a concrete class. :param agent_trace: a :obj:`~agent_inspect.models.agent_trace.AgentDialogueTrace` object constructed with the agent trajectory information for a given data sample. :param evaluation_data_sample: a :obj:`~agent_inspect.models.agent_data_sample.EvaluationSample` object representing a data sample in the evaluation data set. :return: a :obj:`~agent_inspect.models.metric_score.NumericalScore` object or a :obj:`~typing.List` [:obj:`~agent_inspect.models.metric_score.NumericalScore`] object. """ ...
@staticmethod def get_turn_groupings_from_traces(agent_trace, turns_to_run): turns_groupings = [] for i in range(turns_to_run): turns_groupings.append(agent_trace.turns[:i + 1]) return turns_groupings