Class Introduction

Function

This class is used to adapt to various chains of text-to-text, image-to-image, and text-to-image of RAG SDK. It also provides the capability of accessing mxRAGCache. When the cache is not hit, model inference is performed, and the result is updated to the cache.

Prototype

from mx_rag.cache import CacheChainChat

CacheChainChat(cache,chain,convert_data_to_cache,convert_data_to_user)

Parameters

Parameter

Data Type

Required/Optional

Description

cache

MxRAGCache

Required

RAG SDK cache.

chain

Chain

Required

RAG SDK chain, which is used to access foundation models.

convert_data_to_cache

Callable[[Any], Dict]

Optional

Conversion callback function to convert user data into the character string format.

By default, no conversion is performed.

convert_data_to_user

Callable[[Dict], Any]

Optional

This callback function is used together with convert_data_to_cache. When a user question is hit, the format stored in the cache is converted to the user format.

By default, no conversion is performed.

Example

import time
from paddle.base import libpaddle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from mx_rag.chain import SingleText2TextChain
from mx_rag.document.loader import DocxLoader
from mx_rag.embedding.local import TextEmbedding
from mx_rag.knowledge import KnowledgeDB
from mx_rag.knowledge.knowledge import KnowledgeStore
from mx_rag.llm import Text2TextLLM
from mx_rag.storage.document_store import SQLiteDocstore
from mx_rag.knowledge.handler import upload_files
from mx_rag.document import LoaderMng
from mx_rag.storage.vectorstore import MindFAISS
from mx_rag.utils import ClientParam
from mx_rag.cache import CacheChainChat, MxRAGCache, SimilarityCacheConfig

# Vector dimension
dim = 1024
# NPU ID
dev = 0

similarity_config = SimilarityCacheConfig(
    vector_config={
        "vector_type": "npu_faiss_db",
        "x_dim": dim,
        "devs": [dev],

    },
    cache_config="sqlite",
    emb_config={
        "embedding_type": "local_text_embedding",
        "x_dim": dim,
        "model_path": "/path to emb", # Embedding model path
        "dev_id": dev
    },
    similarity_config={
        "similarity_type": "local_reranker",
        "model_path": "/path to reranker", # Reranker path
        "dev_id": dev
    },

    retrieval_top_k=1,
    cache_size=1000,
    clean_size=20,
    similarity_threshold=0.86,
    data_save_folder="/save path",  # Flushing path
    disable_report=True
)
similarity_cache = MxRAGCache("similarity_cache", similarity_config)

# Initialize the cache.
cache = MxRAGCache("similarity_cache", similarity_config)
# Step1 Register document handling tools before building a knowledge base offline.
loader_mng = LoaderMng()
# Load the document loader provided by RAG SDK or LangChain.
loader_mng.register_loader(DocxLoader, [".docx"])
# Load the document splitter provided by LangChain.
loader_mng.register_splitter(RecursiveCharacterTextSplitter, [".xlsx", ".docx", ".pdf"],
                             {"chunk_size": 200, "chunk_overlap": 50, "keep_separator": False})

emb = TextEmbedding(model_path="/path to emb", dev_id=dev)

# Initialize the relational database for document chunks.
chunk_store = SQLiteDocstore(db_path="./sql.db")
# Initialize the relational database for knowledge management.
knowledge_store = KnowledgeStore(db_path="./sql.db")
# Initialize vector retrieval.

vector_store = MindFAISS(x_dim=dim,
                         devs=[dev],
                         load_local_index="./faiss.index"
                         )

# Add a knowledge base and its administrator.
knowledge_store.add_knowledge(knowledge_name="test", user_id='Default', role='admin')
# Initialize knowledge base management.
knowledge_db = KnowledgeDB(knowledge_store=knowledge_store,
                           chunk_store=chunk_store,
                           vector_store=vector_store,
                           knowledge_name="test",
                           user_id='Default',
                           white_paths=["/home"])
# Build an offline knowledge base and upload the domain-specific knowledge file test.docx.
upload_files(knowledge_db, ["/path to files"],
             loader_mng=loader_mng,
             embed_func=emb.embed_documents,
             force=True)
# Step 2 Answer questions online and initialize the retriever.
retriever = vector_store.as_retriever(document_store=chunk_store,
                                      embed_func=emb.embed_documents, k=3, score_threshold=0.3)
# Configure the reranker.

# Configure the text to generate a model chain. Change the IP address and port number based on the actual situation.
llm = Text2TextLLM(base_url="https://<ip>:<port>",
                   model_name="Llama3-8B-Chinese-Chat",
                   client_param=ClientParam(ca_file="/path/to/ca.crt"))
text2text_chain = SingleText2TextChain(llm=llm, retriever=retriever)
cache_chain = CacheChainChat(chain=text2text_chain, cache=cache)
start_time = time.time()
res = cache_chain.query("Describe the requirements of the composition test of the 2024 National College Entrance Examination.")
end_time = time.time()
print(f"no cache query time cost:{(end_time - start_time) * 1000}ms")
print(f"no cache answer {res}")
start_time = time.time()
res = cache_chain.query("What are the requirements of the composition test of the 2024 National College Entrance Examination?",)
end_time = time.time()
print(f"cache query time cost:{(end_time - start_time) * 1000}ms")
print(f"cache answer {res}")