Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for autorag.evaluation.metric.retrieval_contents
"""
This file contains the retrieval contents metric,
which means calculate the metric based on the contents of the retrieved items.
"""
import itertools
from collections import Counter
import numpy as np
from autorag.evaluation.metric.util import autorag_metric
from autorag.schema.metricinput import MetricInput
from autorag.utils.util import normalize_string
[docs]
def single_token_f1 ( ground_truth : str , prediction : str ):
prediction_tokens = normalize_string ( prediction ) . split ()
ground_truth_tokens = normalize_string ( ground_truth ) . split ()
common = Counter ( prediction_tokens ) & Counter ( ground_truth_tokens )
num_same = sum ( common . values ())
if num_same == 0 :
return 0 , 0 , 0
precision = 1.0 * num_same / len ( prediction_tokens )
recall = 1.0 * num_same / len ( ground_truth_tokens )
f1 = ( 2 * precision * recall ) / ( precision + recall )
return precision , recall , f1
[docs]
@autorag_metric ( fields_to_check = [ "retrieved_contents" , "retrieval_gt_contents" ])
def retrieval_token_f1 ( metric_input : MetricInput ):
pred = metric_input . retrieved_contents
gt = itertools . chain . from_iterable ( metric_input . retrieval_gt_contents )
calculated_results = list (
map ( lambda x : single_token_f1 ( x [ 1 ], x [ 0 ]), list ( itertools . product ( pred , gt )))
)
_ , _ , result = zip ( * calculated_results )
result_np = np . array ( list ( result )) . reshape ( len ( pred ), - 1 )
return result_np . max ( axis = 1 ) . mean ()
[docs]
@autorag_metric ( fields_to_check = [ "retrieved_contents" , "retrieval_gt_contents" ])
def retrieval_token_precision ( metric_input : MetricInput ):
pred = metric_input . retrieved_contents
gt = itertools . chain . from_iterable ( metric_input . retrieval_gt_contents )
calculated_results = list (
map ( lambda x : single_token_f1 ( x [ 1 ], x [ 0 ]), list ( itertools . product ( pred , gt )))
)
result , _ , _ = zip ( * calculated_results )
result_np = np . array ( list ( result )) . reshape ( len ( pred ), - 1 )
return result_np . max ( axis = 1 ) . mean ()
[docs]
@autorag_metric ( fields_to_check = [ "retrieved_contents" , "retrieval_gt_contents" ])
def retrieval_token_recall ( metric_input : MetricInput ):
pred = metric_input . retrieved_contents
gt = itertools . chain . from_iterable ( metric_input . retrieval_gt_contents )
calculated_results = list (
map ( lambda x : single_token_f1 ( x [ 1 ], x [ 0 ]), list ( itertools . product ( pred , gt )))
)
_ , result , _ = zip ( * calculated_results )
result_np = np . array ( list ( result )) . reshape ( len ( pred ), - 1 )
return result_np . max ( axis = 1 ) . mean ()