Source code for autorag.nodes.promptmaker.window_replacement

import logging
import os
from typing import List, Dict

import pandas as pd

from autorag.nodes.promptmaker.base import BasePromptMaker
from autorag.utils import result_to_dataframe, fetch_contents

logger = logging.getLogger("AutoRAG")


[docs] class WindowReplacement(BasePromptMaker): def __init__(self, project_dir: str, *args, **kwargs): super().__init__(project_dir, *args, **kwargs) # load corpus data_dir = os.path.join(project_dir, "data") self.corpus_data = pd.read_parquet( os.path.join(data_dir, "corpus.parquet"), engine="pyarrow" )
[docs] @result_to_dataframe(["prompts"]) def pure(self, previous_result: pd.DataFrame, *args, **kwargs): query, retrieved_contents, prompt = self.cast_to_run( previous_result, *args, **kwargs ) retrieved_ids = previous_result["retrieved_ids"].tolist() # get metadata from corpus retrieved_metadata = fetch_contents( self.corpus_data, retrieved_ids, column_name="metadata" ) return self._pure(prompt, query, retrieved_contents, retrieved_metadata)
def _pure( self, prompt: str, queries: List[str], retrieved_contents: List[List[str]], retrieved_metadata: List[List[Dict]], ) -> List[str]: """ Replace retrieved_contents with a window to create a Prompt (only available for corpus chunked with Sentence window method) You must type a prompt or prompt list at a config YAML file like this: .. Code:: yaml nodes: - node_type: prompt_maker modules: - module_type: window_replacement prompt: [Answer this question: {query} \n\n {retrieved_contents}, Read the passages carefully and answer this question: {query} \n\n Passages: {retrieved_contents}] :param prompt: A prompt string. :param queries: List of query strings. :param retrieved_contents: List of retrieved contents. :param retrieved_metadata: List of retrieved metadata. :return: Prompts that are made by window_replacement. """ def window_replacement_row( _prompt: str, _query: str, _retrieved_contents, _retrieved_metadata: List[Dict], ) -> str: window_list = [] for content, metadata in zip(_retrieved_contents, _retrieved_metadata): if "window" in metadata: window_list.append(metadata["window"]) else: window_list.append(content) logger.info( "Only available for corpus chunked with Sentence window method." "window_replacement will not proceed." ) contents_str = "\n\n".join(window_list) return _prompt.format(query=_query, retrieved_contents=contents_str) return list( map( lambda x: window_replacement_row(prompt, x[0], x[1], x[2]), zip(queries, retrieved_contents, retrieved_metadata), ) )