[docs]defevaluate_generation(metric_inputs:List[MetricInput],metrics:Union[List[str],List[Dict]]):defdecorator_evaluate_generation(func:Callable):@functools.wraps(func)defwrapper(*args,**kwargs)->pd.DataFrame:generation_result=func(*args,**kwargs)iftype(generation_result)istuple:assert(type(generation_result[0])islistandtype(generation_result[0][0])isstr),"Input func must return string list as generated answer at the first return value."generated_str=generation_result[0]eliftype(generation_result)islist:assert(type(generation_result[0])isstr),"Input func must return string list as generated answer at the first return value."generated_str=generation_resultelse:raiseValueError("Input func must return string list as generated answer at the first return value.")formetric_input,generated_textinzip(metric_inputs,generated_str):metric_input.generated_texts=generated_textmetric_scores={}metric_names,metric_params=cast_metrics(metrics)formetric_name,metric_paraminzip(metric_names,metric_params):ifmetric_namenotinGENERATION_METRIC_FUNC_DICT:warnings.warn(f"metric {metric_name} is not in supported metrics: {GENERATION_METRIC_FUNC_DICT.keys()}"f"{metric_name} will be ignored.")else:metric_scores[metric_name]=GENERATION_METRIC_FUNC_DICT[metric_name](metric_inputs=metric_inputs,**metric_param,)metric_result_df=pd.DataFrame(metric_scores)execution_result_df=pd.DataFrame({"generated_texts":generated_str})iftype(generation_result)istuple:execution_result_df["generated_tokens"]=generation_result[1]execution_result_df["generated_log_probs"]=generation_result[2]result_df=pd.concat([execution_result_df,metric_result_df],axis=1)returnresult_dfreturnwrapperreturndecorator_evaluate_generation