diff --git a/athina/runner/run.py b/athina/runner/run.py index e170b0d..26da111 100644 --- a/athina/runner/run.py +++ b/athina/runner/run.py @@ -1,4 +1,4 @@ -from typing import List, TypedDict, Optional +from typing import List, TypedDict, Optional, Union from athina.datasets.dataset import Dataset from athina.helpers.athina_logging_helper import AthinaLoggingHelper from athina.evals.llm.llm_evaluator import LlmEvaluator @@ -190,16 +190,18 @@ def run_suite( max_parallel_evals: int = 5, dataset_id: Optional[str] = None, number_of_rows: Optional[int] = None, - ) -> List[LlmBatchEvalResult]: + return_format: str = "dataframe", + ) -> Union[List[LlmBatchEvalResult], pd.DataFrame]: """ Run a suite of LLM evaluations against a dataset. Args: evals: A list of LlmEvaluator objects. data: A list of data points. + return_format: The format of the returned object. Can be "dataframe" or "list". Returns: - A list of LlmBatchEvalResult objects. + A list of LlmBatchEvalResult objects or a Pandas DataFrame. """ eval_suite_name = "llm_eval_suite" + "_" + ",".join(eval.name for eval in evals) AthinaApiService.log_usage(eval_name=eval_suite_name, run_type="suite") @@ -231,4 +233,9 @@ def run_suite( if dataset: print(f"You can view your dataset at: {Dataset.dataset_link(dataset_id)}") - return EvalRunner.to_df(batch_results) + if return_format == "dataframe": + return EvalRunner.to_df(batch_results) + elif return_format == "list": + return batch_results + else: + raise ValueError("Invalid return_format") diff --git a/pyproject.toml b/pyproject.toml index 4f4a04b..f45a30f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "athina" -version = "1.6.32" +version = "1.6.33" description = "Python SDK to configure and run evaluations for your LLM-based application" authors = ["Shiv Sakhuja ", "Akshat Gupta ", "Vivek Aditya ", "Akhil Bisht "] readme = "README.md"