昇腾社区首页
中文
注册

delete_files

功能描述

将知识库中指定的文档删除,如果知识库中不存在该文档则会跳过,文档列表为空会报错,仅知识库管理员有权限操作。

函数原型

from mx_rag.knowledge import delete_files
def delete_files(knowledge, doc_names)

参数说明

参数名

数据类型

可选/必选

说明

knowledge

KnowledgeDB

必选

知识库对象,数据类型参见KnowledgeDB类

doc_names

List[str]

必选

文档名列表,列表长度不能超过knowledge.max_file_count。

调用示例

from mx_rag.embedding.local import TextEmbedding
from mx_rag.knowledge import KnowledgeStore, KnowledgeDB, upload_files, delete_files, FilesLoadInfo
from mx_rag.document import LoaderMng
from mx_rag.storage.document_store import SQLiteDocstore
from mx_rag.storage.vectorstore import MindFAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from mx_rag.knowledge import upload_dir
from mx_rag.document.loader import DocxLoader, PdfLoader, ExcelLoader

loader_mng = LoaderMng()
loader_mng.register_loader(DocxLoader, [".docx"])
loader_mng.register_loader(PdfLoader, [".pdf"])
loader_mng.register_loader(ExcelLoader, [".xlsx"])
# loader_mng.register_loader(ImageLoader, [".png"])
loader_mng.register_splitter(RecursiveCharacterTextSplitter,
                             [".docx", ".pdf", ".xlsx"])
# 设置向量检索使用的NPU卡
dev = 0
# 加载embedding模型
emb = TextEmbedding("/path/to/model", dev_id=dev)
# 初始化向量数据库
vector_store = MindFAISS(x_dim=1024, devs=[dev], 
                         load_local_index="/path/to/index", auto_save=True)
# 初始化文档chunk 关系数据库
chunk_store = SQLiteDocstore(db_path="./sql.db")
# 初始化知识管理关系数据库
knowledge_store = KnowledgeStore(db_path="./sql.db")
#添加知识库及管理员
knowledge_store.add_knowledge(knowledge_name="test", user_id='Default', role='admin')
# 初始化知识管理
knowledge_db = KnowledgeDB(knowledge_store=knowledge_store, chunk_store=chunk_store, vector_store=vector_store,
                           knowledge_name="test", user_id='Default', white_paths=["/home/"])
# 上传领域知识文档
# 调用upload_files
upload_files(knowledge=knowledge_db, files=["/path/data/test.docx"], loader_mng=loader_mng,
             embed_func=emb.embed_documents, force=True)
# 上传领域知识文档目录
# 调用upload_dir
params = FilesLoadInfo(knowledge=knowledge_db, dir_path="/path/data/files", loader_mng=loader_mng,
                       embed_func=emb.embed_documents, force=True, load_image=False)
upload_dir(params=params)
# 调用delete_files
delete_files(knowledge_db, ["test.docx"])