delete_files
功能描述
将知识库中指定的文档删除,如果知识库中不存在该文档则会跳过,文档列表为空会报错,仅知识库管理员有权限操作。
函数原型
from mx_rag.knowledge import delete_files def delete_files(knowledge, doc_names)
参数说明
参数名 |
数据类型 |
可选/必选 |
说明 |
---|---|---|---|
knowledge |
KnowledgeDB |
必选 |
知识库对象,数据类型参见KnowledgeDB类。 |
doc_names |
List[str] |
必选 |
文档名列表,列表长度不能超过knowledge.max_file_count。 |
调用示例
from mx_rag.embedding.local import TextEmbedding from mx_rag.knowledge import KnowledgeStore, KnowledgeDB, upload_files, delete_files, FilesLoadInfo from mx_rag.document import LoaderMng from mx_rag.storage.document_store import SQLiteDocstore from mx_rag.storage.vectorstore import MindFAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from mx_rag.knowledge import upload_dir from mx_rag.document.loader import DocxLoader, PdfLoader, ExcelLoader loader_mng = LoaderMng() loader_mng.register_loader(DocxLoader, [".docx"]) loader_mng.register_loader(PdfLoader, [".pdf"]) loader_mng.register_loader(ExcelLoader, [".xlsx"]) # loader_mng.register_loader(ImageLoader, [".png"]) loader_mng.register_splitter(RecursiveCharacterTextSplitter, [".docx", ".pdf", ".xlsx"]) # 设置向量检索使用的NPU卡 dev = 0 # 加载embedding模型 emb = TextEmbedding("/path/to/model", dev_id=dev) # 初始化向量数据库 vector_store = MindFAISS(x_dim=1024, devs=[dev], load_local_index="/path/to/index", auto_save=True) # 初始化文档chunk 关系数据库 chunk_store = SQLiteDocstore(db_path="./sql.db") # 初始化知识管理关系数据库 knowledge_store = KnowledgeStore(db_path="./sql.db") #添加知识库及管理员 knowledge_store.add_knowledge(knowledge_name="test", user_id='Default', role='admin') # 初始化知识管理 knowledge_db = KnowledgeDB(knowledge_store=knowledge_store, chunk_store=chunk_store, vector_store=vector_store, knowledge_name="test", user_id='Default', white_paths=["/home/"]) # 上传领域知识文档 # 调用upload_files upload_files(knowledge=knowledge_db, files=["/path/data/test.docx"], loader_mng=loader_mng, embed_func=emb.embed_documents, force=True) # 上传领域知识文档目录 # 调用upload_dir params = FilesLoadInfo(knowledge=knowledge_db, dir_path="/path/data/files", loader_mng=loader_mng, embed_func=emb.embed_documents, force=True, load_image=False) upload_dir(params=params) # 调用delete_files delete_files(knowledge_db, ["test.docx"])
父主题: 文档管理