| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- import os
- import json
- import pandas as pd
- from datasets import Dataset
- from ragas import evaluate
- from ragas.metrics import (
- faithfulness,
- answer_correctness,
- answer_relevancy,
- context_precision,
- context_recall,
- NoiseSensitivity
- )
- from langchain_openai import ChatOpenAI, OpenAIEmbeddings
- VLLM_LLM_BASE = "http://xia0miduo.gicp.net:8102/v1"
- VLLM_LLM_KEY = "vllm-dummy-key"
- VLLM_EMBEDDING_BASE = "http://10.168.100.17:8787/v1/"
- vllm_generator = ChatOpenAI(
- model="Qwen3-Coder-30B-loft",
- base_url=VLLM_LLM_BASE,
- api_key=VLLM_LLM_KEY,
- temperature=0,
- max_tokens=512,
- )
- vllm_embeddings = OpenAIEmbeddings(
- model="",
- base_url=VLLM_EMBEDDING_BASE,
- api_key=VLLM_LLM_KEY,
- )
- # --- 2. 加载和准备数据集 ---
- def load_data_from_json(json_path):
- """
- 从 JSON 文件加载数据并转换为 datasets.Dataset 格式。
- 假设的 JSON 结构:
- [
- {
- "question": "...",
- "answer": "...",
- "contexts": ["...", "..."],
- "ground_truth": "..."
- },
- ...
- ]
- """
- try:
- with open(json_path, 'r', encoding='utf-8') as f:
- data_list = json.load(f)
-
- df = pd.DataFrame(data_list)
- required_cols = {'question', 'answer', 'contexts'}
- if not required_cols.issubset(df.columns):
- raise ValueError(f"JSON 文件必须包含 'question', 'answer', 和 'contexts' 键")
- if 'ground_truth' not in df.columns:
- print("警告: 未找到 'ground_truth' 列。'context_recall' 指标将无法计算。")
-
- dataset = Dataset.from_pandas(df)
- return dataset
- except Exception as e:
- print(f"加载数据时出错: {e}")
- return None
- # --- 3. 定义指标 ---
- metrics_to_run = [
- faithfulness,
- answer_correctness,
- answer_relevancy,
- context_precision,
- context_recall,
- NoiseSensitivity(llm=vllm_generator),
- ]
- # --- 4. 执行评估 ---
- def run_evaluation(dataset_path):
- print(f"正在从 {dataset_path} 加载数据...")
- dataset = load_data_from_json(dataset_path)
- if dataset is None:
- print("数据加载失败,退出评估。")
- return
- print(f"成功加载 {len(dataset)} 条评估数据。")
- final_metrics = list(metrics_to_run)
- if 'ground_truth' not in dataset.column_names:
- print("由于缺少 'ground_truth',将跳过 'context_recall' 指标。")
- final_metrics.remove(context_recall)
- if not final_metrics:
- print("没有可执行的指标。退出。")
- return
- print("开始 RAGAS 评估... (这可能需要一些时间)")
-
- # 核心:调用 evaluate
- result = evaluate(
- dataset=dataset,
- metrics=final_metrics,
- llm=vllm_generator,
- embeddings=vllm_embeddings
- )
- print("评估完成!")
- result_df = result.to_pandas()
- print(json.dumps(result_df.to_dict(orient="records"), indent=4, ensure_ascii=False))
- # --- 主程序入口 ---
- if __name__ == "__main__":
-
- input_json_file = "dataset.json"
-
- if not os.path.exists(input_json_file):
- print(f"错误: 找不到 {input_json_file}")
- print("请创建一个示例 JSON 文件,结构如下:")
- print("""
- [
- {
- "question": "什么是 RAGAS?",
- "answer": "RAGAS 是一个评估 RAG 管道的框架。",
- "contexts": ["RAGAS (Retrieval-Augmented Generation Assessment) 是一个用于评估 RAG 管道的框架。"],
- "ground_truth": "RAGAS 是一个专为评估 RAG 管道设计的框架,它关注检索和生成的质量。"
- }
- ]
- """)
- else:
- run_evaluation(input_json_file)
|