[docs]@result_to_dataframe(["retrieved_contents","retrieved_ids","retrieve_scores"])defpure(self,previous_result:pd.DataFrame,*args,**kwargs):result_dfs:List[pd.DataFrame]=list(map(lambdax,y:x.pure(**y,previous_result=previous_result,),self.target_modules,self.target_module_params,))ids=tuple(map(lambdadf:df["retrieved_ids"].apply(list).tolist(),result_dfs))scores=tuple(map(lambdadf:df["retrieve_scores"].apply(list).tolist(),result_dfs,))_pure_params=pop_params(self._pure,kwargs)if"ids"in_pure_paramsor"scores"in_pure_params:raiseValueError("With specifying ids or scores, you must use HybridRRF.run_evaluator instead.")ids,scores=self._pure(ids=ids,scores=scores,**_pure_params)contents=fetch_contents(self.corpus_df,ids)returncontents,ids,scores
[docs]defcast_queries(queries:Union[str,List[str]])->List[str]:ifisinstance(queries,str):return[queries]elifisinstance(queries,List):returnquerieselse:raiseValueError(f"queries must be str or list, but got {type(queries)}")
[docs]defevenly_distribute_passages(ids:List[List[str]],scores:List[List[float]],top_k:int)->Tuple[List[str],List[float]]:assertlen(ids)==len(scores),"ids and scores must have same length."query_cnt=len(ids)avg_len=top_k//query_cntremainder=top_k%query_cntnew_ids=[]new_scores=[]foriinrange(query_cnt):ifi<remainder:new_ids.extend(ids[i][:avg_len+1])new_scores.extend(scores[i][:avg_len+1])else:new_ids.extend(ids[i][:avg_len])new_scores.extend(scores[i][:avg_len])returnnew_ids,new_scores