Source code for autorag.nodes.passagereranker.time_reranker

from datetime import datetime
from typing import List, Tuple

from autorag.nodes.passagereranker.base import passage_reranker_node



[docs]
@passage_reranker_node
def time_reranker(
	contents_list: List[List[str]],
	scores_list: List[List[float]],
	ids_list: List[List[str]],
	top_k: int,
	time_list: List[List[datetime]],
) -> Tuple[List[List[str]], List[List[str]], List[List[float]]]:
	"""
	Rerank the passages based on merely the datetime of the passage.
	It uses 'last_modified_datetime' key in the corpus metadata,
	so the metadata should be in the format of {'last_modified_datetime': datetime.datetime} at the corpus data file.

	:param contents_list: The list of lists of contents
	:param scores_list: The list of lists of scores from the initial ranking
	:param ids_list: The list of lists of ids
	:param top_k: The number of passages to be retrieved after reranking
	:param time_list: The metadata list of lists of datetime.datetime
	    It automatically extracts the 'last_modified_datetime' key from the metadata in the corpus data.
	:return: The reranked contents, ids, and scores
	"""

	def sort_row(contents, scores, ids, time, top_k):
		combined = list(zip(contents, scores, ids, time))
		combined.sort(key=lambda x: x[3], reverse=True)
		sorted_contents, sorted_scores, sorted_ids, _ = zip(*combined)
		return (
			list(sorted_contents)[:top_k],
			list(sorted_scores)[:top_k],
			list(sorted_ids)[:top_k],
		)

	reranked_contents, reranked_scores, reranked_ids = zip(
		*map(
			sort_row,
			contents_list,
			scores_list,
			ids_list,
			time_list,
			[top_k] * len(contents_list),
		)
	)

	return list(reranked_contents), list(reranked_ids), list(reranked_scores)