autorag.vectordb package

Submodules

autorag.vectordb.base module

class autorag.vectordb.base.BaseVectorStore(embedding_model: str, similarity_metric: str = 'cosine', embedding_batch: int = 100)[source]

Bases: object

abstract async add(ids: List[str], texts: List[str])[source]
abstract async delete(ids: List[str])[source]
abstract async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

abstract async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

abstract async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]
support_similarity_metrics = ['l2', 'ip', 'cosine']
truncated_inputs(inputs: List[str]) List[str][source]

autorag.vectordb.chroma module

class autorag.vectordb.chroma.Chroma(embedding_model: str, collection_name: str, embedding_batch: int = 100, client_type: str = 'persistent', similarity_metric: str = 'cosine', path: str | None = None, host: str = 'localhost', port: int = 8000, ssl: bool = False, headers: Dict[str, str] | None = None, api_key: str | None = None, tenant: str = 'default_tenant', database: str = 'default_database')[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]

autorag.vectordb.couchbase module

class autorag.vectordb.couchbase.Couchbase(embedding_model: str, bucket_name: str, scope_name: str, collection_name: str, index_name: str, embedding_batch: int = 100, connection_string: str = '', username: str = '', password: str = '', ingest_batch: int = 100, text_key: str | None = 'text', embedding_key: str | None = 'embedding', scoped_index: bool = True)[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]

autorag.vectordb.milvus module

class autorag.vectordb.milvus.Milvus(embedding_model: str, collection_name: str, embedding_batch: int = 100, similarity_metric: str = 'cosine', uri: str = 'http://localhost:19530', db_name: str = '', token: str = '', user: str = '', password: str = '', timeout: float | None = None)[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
delete_collection()[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]

autorag.vectordb.pinecone module

class autorag.vectordb.pinecone.Pinecone(embedding_model: str, index_name: str, embedding_batch: int = 100, dimension: int = 1536, similarity_metric: str = 'cosine', cloud: str | None = 'aws', region: str | None = 'us-east-1', api_key: str | None = None, deletion_protection: str | None = 'disabled', namespace: str | None = 'default', ingest_batch: int = 200)[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
delete_index()[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]

autorag.vectordb.qdrant module

class autorag.vectordb.qdrant.Qdrant(embedding_model: str, collection_name: str, embedding_batch: int = 100, similarity_metric: str = 'cosine', client_type: str = 'docker', url: str = 'http://localhost:6333', host: str = '', api_key: str = '', dimension: int = 1536, ingest_batch: int = 64, parallel: int = 1, max_retries: int = 3)[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
delete_collection()[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]

autorag.vectordb.weaviate module

class autorag.vectordb.weaviate.Weaviate(embedding_model: str, collection_name: str, embedding_batch: int = 100, similarity_metric: str = 'cosine', client_type: str = 'docker', host: str = 'localhost', port: int = 8080, grpc_port: int = 50051, url: str | None = None, api_key: str | None = None, text_key: str = 'content')[source]

Bases: BaseVectorStore

async add(ids: List[str], texts: List[str])[source]
async delete(ids: List[str])[source]
delete_collection()[source]
async fetch(ids: List[str]) List[List[float]][source]

Fetch the embeddings of the ids.

async is_exist(ids: List[str]) List[bool][source]

Check if the ids exist in the Vector DB.

async query(queries: List[str], top_k: int, **kwargs) Tuple[List[List[str]], List[List[float]]][source]
autorag.vectordb.weaviate.distance_to_score(distance: float, similarity_metric) float[source]

Module contents

autorag.vectordb.get_support_vectordb(vectordb_name: str)[source]
autorag.vectordb.load_all_vectordb_from_yaml(yaml_path: str, project_dir: str) List[BaseVectorStore][source]
autorag.vectordb.load_vectordb(vectordb_name: str, **kwargs)[source]
autorag.vectordb.load_vectordb_from_yaml(yaml_path: str, vectordb_name: str, project_dir: str)[source]