query

Function

Implements RAG SDK dialogues.

Prototype

def query(text, llm_config, *args, **kwargs) 

Parameters

Parameter

Data Type

Required/Optional

Description

text

String

Required

Original question. The value range is (0, 1000 × 1000].

llm_config

LLMParameterConfig

Optional

Parameters for calling an LLM. Change the default value of temperature to 0.5 and that of top_p to 0.95. For details about other parameters, see LLMParameterConfig.

args

List

Optional

Inherits the signature of its parent class method, which is not used here.

kwargs

Dict

Optional

Inherits the signature of its parent class method, which is not used here.

Return Value

Data Type

Description

Union[Dict, Iterator[Dict]]

Result returned by an LLM. The dictionary content is as follows:

  • With knowledge source: {"query": query, "result": data, "source_documents": [{'metadata': xxx, 'page_content': xxx}]}
  • Without knowledge source: {"query": query, "result": data}

Example

  • Basic dialogue
from paddle.base import libpaddle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from mx_rag.chain import SingleText2TextChain
from mx_rag.document import LoaderMng
from mx_rag.document.loader import DocxLoader, PdfLoader, PowerPointLoader
from mx_rag.embedding.local import TextEmbedding
from mx_rag.embedding.service import TEIEmbedding
from mx_rag.knowledge import KnowledgeDB, KnowledgeStore
from mx_rag.knowledge.handler import upload_files
from mx_rag.knowledge.knowledge import KnowledgeStore
from mx_rag.llm import Text2TextLLM, Img2TextLLM, LLMParameterConfig
from mx_rag.retrievers import Retriever
from mx_rag.storage.document_store import SQLiteDocstore
from mx_rag.storage.vectorstore import MindFAISS
from mx_rag.utils import ClientParam
from mx_rag.llm.llm_parameter import LLMParameterConfig

loader_mng = LoaderMng()
# Load the document loader provided by mxrag or LangChain.
loader_mng.register_loader(loader_class=PdfLoader, file_types=[".pdf"])
loader_mng.register_loader(loader_class=DocxLoader, file_types=[".docx"])
loader_mng.register_loader(loader_class=PowerPointLoader, file_types=[".pptx"])
# Load the document splitter provided by LangChain.
loader_mng.register_splitter(splitter_class=RecursiveCharacterTextSplitter,
                             file_types=[".pdf", ".docx", ".txt", ".md", ".xlsx", ".pptx"],
                             splitter_params={"chunk_size": 750,
                                              "chunk_overlap": 150,
                                              "keep_separator": False })

dev = 0
# Load the embedding model.
emb = TextEmbedding("/path/to/acge_text_embedding/", dev_id=dev)
# Initialize the vector database.
vector_store = MindFAISS(x_dim=1024,  devs=[dev],
                                 load_local_index="/path/to/faiss.index",
                                 auto_save=True)
# Initialize the relational database for document chunks.
chunk_store = SQLiteDocstore(db_path="/path/to/sql.db")
# Initialize the relational database for knowledge management.
knowledge_store = KnowledgeStore(db_path="/path/to/sql.db")
# Add a knowledge base.
knowledge_store.add_knowledge("test", "Default", "admin")
# Initialize knowledge base management.
knowledge_db = KnowledgeDB(knowledge_store=knowledge_store,
                           chunk_store=chunk_store,
                           vector_store=vector_store,
                           knowledge_name="test",
                           white_paths=["/path/"],
                           user_id="Default"
                           )
# Upload a document to the knowledge base.
upload_files(knowledge_db, ["/path/to/file1", "/path/to/file2"], loader_mng, emb.embed_documents, True)
client_param = ClientParam(ca_file="/path/to/ca.crt")
llm = Text2TextLLM(model_name="Meta-Llama-3-8B-Instruct", 
                   base_url="https://x.x.x.x:port/v1/chat/completions", 
                   client_param=client_param)
r = Retriever(vector_store=vector_store, document_store=chunk_store, embed_func=emb.embed_documents, k=1, score_threshold=0.6)
rag = SingleText2TextChain(retriever=r, llm=llm)
response = rag.query("What modules does the mxVision architecture contain?", LLMParameterConfig(max_tokens = 1024, temperature = 1.0, top_p = 0.1))
print(response)
  • Text-image dialogue
from paddle.base import libpaddle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from mx_rag.chain import SingleText2TextChain
from mx_rag.document import LoaderMng
from mx_rag.document.loader import DocxLoader, PdfLoader, PowerPointLoader
from mx_rag.embedding.local import TextEmbedding
from mx_rag.embedding.service import TEIEmbedding
from mx_rag.knowledge import KnowledgeDB, KnowledgeStore
from mx_rag.knowledge.handler import upload_files
from mx_rag.knowledge.knowledge import KnowledgeStore
from mx_rag.llm import Text2TextLLM, Img2TextLLM, LLMParameterConfig
from mx_rag.retrievers import Retriever
from mx_rag.storage.document_store import SQLiteDocstore
from mx_rag.storage.vectorstore import MindFAISS
from mx_rag.utils import ClientParam
from mx_rag.llm.llm_parameter import LLMParameterConfig
from typing import List
from langchain_core.documents import Document

# Load the VLM used to parse images in documents.
vlm = Img2TextLLM(base_url="https://x.x.x.x:port/openai/v1/chat/completions",
                   model_name="Qwen2.5-VL-7B-Instruct",
                   llm_config=LLMParameterConfig(max_tokens=512),
                   client_param=ClientParam(ca_file="/path/to/ca.crt")
                   )
loader_mng = LoaderMng()
# Load the document loader provided by mxrag or LangChain.
loader_mng.register_loader(loader_class=PdfLoader, file_types=[".pdf"], loader_params={"vlm": vlm})
loader_mng.register_loader(loader_class=DocxLoader, file_types=[".docx"], loader_params={"vlm": vlm})
loader_mng.register_loader(loader_class=PowerPointLoader, file_types=[".pptx"], loader_params={"vlm": vlm})
# Load the document splitter provided by LangChain.
loader_mng.register_splitter(splitter_class=RecursiveCharacterTextSplitter,
                             file_types=[".pdf", ".docx", ".txt", ".md", ".xlsx", ".pptx"],
                             splitter_params={"chunk_size": 750,
                                              "chunk_overlap": 150,
                                              "keep_separator": False })

dev = 0
# Load the embedding model.
emb = TextEmbedding("/path/to/acge_text_embedding/", dev_id=dev)
client_param = ClientParam(ca_file="/path/to/ca.crt")
# Initialize the vector database.
vector_store = MindFAISS(x_dim=1024,  devs=[dev],
                                 load_local_index="/path/to/faiss.index",
                                 auto_save=True)
# Initialize the relational database for document chunks.
chunk_store = SQLiteDocstore(db_path="/path/to/sql.db")
# Initialize the relational database for knowledge management.
knowledge_store = KnowledgeStore(db_path="/path/to/sql.db")
# Add a knowledge base.
knowledge_store.add_knowledge("test", "Default", "admin")
# Initialize knowledge base management.
knowledge_db = KnowledgeDB(knowledge_store=knowledge_store,
                           chunk_store=chunk_store,
                           vector_store=vector_store,
                           knowledge_name="test",
                           white_paths=["/path/"],
                           user_id="Default"
                           )
# Upload a document to the knowledge base.
upload_files(knowledge_db, ["/path/to/file1", "/path/to/file2"], loader_mng, emb.embed_documents, True)
# Define the callback function to integrate the question, retrieved document, and image description to generate user content in model dialogues.
def user_content_builder(query: str, docs: List[Document], *args, **kwargs):
       """
       Parameter description:
       query : string, original question. For example, "Summarize the key points based on the following materials."
       docs : List[Document], list of document objects returned by the retriever.
              Each document  usually contains page_content (document content) and metadata (such as the source, title, and score).
       Return:
       str: complete prompt after concatenation, which is used as the model input.
       """
    text_docs = [doc for doc in docs if doc.metadata.get("type", "") == "text"]
    img_docs = [doc for doc in docs if doc.metadata.get("type", "") == "image"]
    user_message = []
    if len(text_docs) > 0:
        # 2. Add text quotes
        user_message.append(f"Text Quotes are:")
        for i, doc in enumerate(text_docs):
            user_message.append(f"\n[{i + 1}] {doc.page_content}")
    if len(img_docs) > 0:
        # 3. Add image quotes vlm-text or ocr-text
        user_message.append("\nImage Quotes are:")
        for i, doc in enumerate(img_docs):
            user_message.append(f"\nimage{i + 1} is described as: {doc.page_content}")
    user_message.append("\n\n")
    # 4. add user question
    user_message.append(f"The user question is: {query}")
    return ''.join(user_message)

# System prompt
TEXT_INFER_PROMPT = '''
You are a helpful question-answering assistant. Your task is to generate an interleaved text and image response based on provided questions and quotes. Here's how to refine your process:

1. **Evidence Selection**:
   - From both text and image quotes, pinpoint those really relevant for answering the question. Focus on significance and direct relevance.
   - Each image quote is the description of the image.

2. **Answer Construction**:
   - Use Markdown to embed text and images in your response, avoid using obvious headings or divisions; ensure the response flows naturally and cohesively.
   - Conclude with a direct and concise answer to the question in a simple and clear sentence.

3. **Quote Citation**:
- Cite images using the format `![{conclusion}](image index)`; for the first image, use `![{conclusion}](image1)`;The {conclusion} should be a concise one-sentence summary of the image's content.
   - Ensure the cite of the image must strict follow `![{conclusion}](image index)`, do not simply stating "See image1", "image1 shows" ,"[image1]" or "image1".
   - Each image or text can only be quoted once.

- Do not cite irrelevant quotes.
- Compose a detailed and articulate interleaved answer to the question.
- Ensure that your answer is logical, informative, and directly ties back to the evidence provided by the quotes.
- If Quote contain text and image, answer must contain both text and image response.
- If Quote only contain text, answer must contain text response, do not contain image.
- Answer in chinese.
'''

client_param = ClientParam(ca_file="/path/to/ca.crt")
# LLM for dialogues
llm = Text2TextLLM(model_name="Meta-Llama-3-8B-Instruct", 
                   base_url="https://x.x.x.x:port/v1/chat/completions", 
                   client_param=client_param)
sys_messages=[{"role": "system", "content": TEXT_INFER_PROMPT}]
r = Retriever(vector_store=vector_store, document_store=chunk_store, embed_func=emb.embed_documents, k=1, score_threshold=0.6)
rag = SingleText2TextChain(retriever=r, llm=llm, sys_messages=sys_messages, user_content_builder=user_content_builder)
response = rag.query("What modules does the mxVision architecture contain?", LLMParameterConfig(max_tokens = 1024, temperature = 1.0, top_p = 0.1))
# source_documents in the answer may contain images. You can obtain the Base64 code of images from the metadata dictionary.
print(response)