metrics_20250908_144859.json 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. {
  2. "timestamp": "2025-09-08T14:48:59.419044",
  3. "config": {
  4. "rag_api": {
  5. "url": "http://localhost:6000/rag/chat",
  6. "timeout": 60,
  7. "max_retries": 3,
  8. "retry_delay": 1
  9. },
  10. "rag_request": {
  11. "appId": "2924812721300312064",
  12. "desc": "高井信息员工手册,出差自律制度等",
  13. "isDeepThink": "N",
  14. "knowledgeIds": [
  15. "a2963496869283893248",
  16. "a2963501316240183296"
  17. ],
  18. "knowledgeInfo": "{\"param_desc\":\"strict\",\"show_recall_result\":true,\"recall_method\":\"embedding\",\"rerank_status\":true,\"rerank_model_name\":\"rerank\",\"slice_config_type\":\"customized\",\"rerank_index_type_list\":[{\"index_type_id\":0,\"knowledge_id\":[\"a2924802310568022016\"]}],\"recall_index_type_list\":[{\"index_type_id\":0,\"knowledge_id\":[\"a2924802310568022016\"]}]}",
  19. "maxToken": 8192,
  20. "model": "DeepSeek-R1-Distill-Qwen-14B",
  21. "name": "高井信息公司管理制度",
  22. "params": {},
  23. "prompt": "你是一位知识检索助手,你必须并且只能从我发送的众多知识片段中寻找能够解决用户输入问题的最优答案,并且在执行任务的过程中严格执行规定的要求。\n\n知识片段如下:\n{{知识}}\n\n规定要求:\n- 找到答案就仅使用知识片段中的原文回答用户的提问;\n- 找不到答案就用自身知识并且告诉用户该信息不是来自文档;\n- 所引用的文本片段中所包含的示意图占位符必须进行返回,占位符格式参考:【示意图序号_编号】\n - 严禁输出任何知识片段中不存在的示意图占位符;\n - 输出的内容必须删除其中包含的任何图注、序号等信息。例如:\"进入登录页面(图1.1)\"需要从文字中删除图序,回复效果为:\"进入登录页面\";\"如图所示1.1\",回复效果为:\"如图所示\";\n- 格式规范\n - 文档中会出现包含表格的情况,表格是以图片标识符的形式呈现,表格中缺失数据时候返回空单元格;\n - 如果需要用到表格中的数据,以markdown格式输出表格中的数据;\n - 避免使用代码块语法回复信息;\n - 回复的开头语不要输出诸如:\"我想\",\"我认为\",\"think\"等相关语义的文本。\n\n严格执行规定要求,不要复述问题,直接开始回答。\n\n用户输入问题:\n{{用户}}",
  24. "status": "3",
  25. "temperature": "0.01",
  26. "topP": "0.5",
  27. "typeId": 40,
  28. "embeddingId": "multilingual-e5-large-instruct"
  29. },
  30. "evaluation": {
  31. "val_data_path": "val_data.json",
  32. "batch_size": 3,
  33. "output_dir": "evaluation_results",
  34. "save_csv": true
  35. }
  36. },
  37. "metrics": {
  38. "total_questions": 15,
  39. "answered_questions": 0,
  40. "answer_rate": 0.0,
  41. "context_retrieval_rate": 0.0,
  42. "error_rate": 0.0,
  43. "exact_match_rate": 0.0,
  44. "partial_match_rate": 0.0,
  45. "semantic_match_rate": 0.0,
  46. "avg_response_time": 9.368593708674114,
  47. "total_evaluation_time": 60.65332102775574,
  48. "questions_per_second": 0.2473071506362497
  49. },
  50. "stats": {
  51. "total_questions": 15,
  52. "successful_calls": 0,
  53. "failed_calls": 0,
  54. "empty_answers": 15,
  55. "response_times": [
  56. 6.133613586425781,
  57. 6.838666200637817,
  58. 17.820696592330933,
  59. 18.263955116271973,
  60. 18.381253004074097,
  61. 18.541946411132812,
  62. 5.703543186187744,
  63. 5.749252557754517,
  64. 5.817278861999512,
  65. 4.993794918060303,
  66. 5.812741756439209,
  67. 5.845863342285156,
  68. 6.829167604446411,
  69. 6.828743934631348,
  70. 6.968388557434082
  71. ],
  72. "start_time": 1757314078.7646284,
  73. "end_time": 1757314139.4179494
  74. }
  75. }