Model Evaluation
The InformationRetrievalEvaluator method of Sentence Transformers is used to evaluate an embedding model based on the synthesized evaluation dataset. After the evaluation is successful, the following metrics are returned:
{'cosine_accuracy@1', 'cosine_accuracy@3', 'cosine_accuracy@5', 'cosine_accuracy@10', 'cosine_precision@1', 'cosine_precision@3', 'cosine_precision@5', 'cosine_precision@10', 'cosine_recall@1', 'cosine_recall@3', 'cosine_recall@5', 'cosine_recall@10', 'cosine_ndcg@10', 'cosine_mrr@10', 'cosine_map@100', 'dot_accuracy@1', 'dot_accuracy@3', 'dot_accuracy@5', 'dot_accuracy@10', 'dot_precision@1', 'dot_precision@3', 'dot_precision@5', 'dot_precision@10', 'dot_recall@1', 'dot_recall@3', 'dot_recall@5', 'dot_recall@10', 'dot_ndcg@10', 'dot_mrr@10', 'dot_map@100'}
Example
import torch
import torch_npu
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from datasets import load_dataset
torch.npu.set_device(torch.device("npu:0"))
model = SentenceTransformer("model_path", device="npu" if torch.npu.is_available() else "cpu")
eval_data = load_dataset("json", data_files="evaluate_data.jsonl", split="train")
eval_data = eval_data.add_column("id", range(len(eval_data)))
corpus = dict(
zip(eval_data["id"], eval_data["corpus"])
)
queries = dict(
zip(eval_data["id"], eval_data["query"])
)
relevant_docs = {}
for q_id in queries:
relevant_docs[q_id] = [q_id]
evaluator = InformationRetrievalEvaluator(queries=queries, corpus=corpus, relevant_docs=relevant_docs, name="model_name")
result = evaluator(model)
print(result)
Parent topic: Model Evaluation and Fine-Tuning