query

Function

Implements RAG SDK dialogues.

Prototype

def query(text: str, llm_config, *args, **kwargs)

Parameters

Parameter

Data Type

Required/Optional

Description

text

String

Required

Original question. The value cannot be empty, and the maximum question length is 1000 × 1000.

llm_config

LLMParameterConfig

Optional

Parameters for calling an LLM. Change the default value of temperature to 0.5 and that of top_p to 0.95. For details about other parameters, see LLMParameterConfig.

args

List

Optional

Inherits the signature of its parent class method, which is not used here.

kwargs

Dict

Optional

Inherits the signature of its parent class method, which is not used here.

Return Value

Data Type

Description

Union[Dict, Iterator[Dict]]

Returns a dictionary or iterator. If stream is set to True, an iterator is returned. Otherwise, a dictionary is returned. The dictionary content is as follows:

  • Source with knowledge: {"prompt": prompt, "result": data, "source_documents": [{'metadata': xxx, 'page_content': xxx}]}
  • Sources without knowledge: {"prompt": prompt, "result": data}

Example

from mx_rag.chain import ParallelText2TextChain
from mx_rag.llm import Text2TextLLM
from mx_rag.embedding.local import TextEmbedding
from mx_rag.storage.vectorstore import MindFAISS
from mx_rag.storage.document_store import SQLiteDocstore
from mx_rag.retrievers import Retriever
from mx_rag.utils import ClientParam
dev = 0
emb = TextEmbedding("/path/to/acge_text_embedding/", dev_id=dev)
client_param = ClientParam(ca_file="/path/to/ca.crt")
llm = Text2TextLLM(model_name="Meta-Llama-3-8B-Instruct",
                   base_url="https://x.x.x.x:port/v1/chat/completions",
                   client_param=client_param)
vector_store = MindFAISS(x_dim=1024,  devs=[dev],
                                 load_local_index="/path/to/faiss.index",
                                 auto_save=True)
chunk_store = SQLiteDocstore(db_path="/path/to/sql.db")
retriever = Retriever(vector_store=vector_store, document_store=chunk_store, embed_func=emb.embed_documents, k=1, score_threshold=0.6)
parallel_chain = ParallelText2TextChain(llm=llm, retriever=retriever)
answer = parallel_chain.query(text="123456")
print(answer)