rag_eval.html 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. <!DOCTYPE html>
  2. <html lang="zh-CN">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6. <title>RAG 评估系统</title>
  7. <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
  8. <style>
  9. * {
  10. margin: 0;
  11. padding: 0;
  12. box-sizing: border-box;
  13. }
  14. body {
  15. font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
  16. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  17. min-height: 100vh;
  18. padding: 20px;
  19. }
  20. .container {
  21. max-width: 1200px;
  22. margin: 0 auto;
  23. }
  24. .header {
  25. text-align: center;
  26. color: white;
  27. margin-bottom: 30px;
  28. }
  29. .header h1 {
  30. font-size: 2.5rem;
  31. font-weight: 600;
  32. margin-bottom: 8px;
  33. text-shadow: 0 2px 4px rgba(0,0,0,0.1);
  34. }
  35. .header p {
  36. font-size: 1.1rem;
  37. opacity: 0.9;
  38. }
  39. .card {
  40. background: white;
  41. border-radius: 16px;
  42. box-shadow: 0 10px 40px rgba(0,0,0,0.15);
  43. padding: 30px;
  44. margin-bottom: 24px;
  45. }
  46. .card-title {
  47. font-size: 1.25rem;
  48. font-weight: 600;
  49. color: #1a1a2e;
  50. margin-bottom: 20px;
  51. padding-bottom: 12px;
  52. border-bottom: 2px solid #f0f0f5;
  53. }
  54. .form-grid {
  55. display: grid;
  56. grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
  57. gap: 20px;
  58. }
  59. .form-group {
  60. display: flex;
  61. flex-direction: column;
  62. }
  63. .form-group.full-width {
  64. grid-column: 1 / -1;
  65. }
  66. .form-group label {
  67. font-size: 0.875rem;
  68. font-weight: 500;
  69. color: #4a4a6a;
  70. margin-bottom: 6px;
  71. }
  72. .form-group label .required {
  73. color: #e74c3c;
  74. margin-left: 2px;
  75. }
  76. .form-group input,
  77. .form-group select {
  78. padding: 12px 16px;
  79. border: 1.5px solid #e0e0e8;
  80. border-radius: 10px;
  81. font-size: 0.95rem;
  82. transition: all 0.2s ease;
  83. background: #fafafa;
  84. }
  85. .form-group input:focus,
  86. .form-group select:focus {
  87. outline: none;
  88. border-color: #667eea;
  89. background: white;
  90. box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.15);
  91. }
  92. .form-group input::placeholder {
  93. color: #9999aa;
  94. }
  95. .btn {
  96. padding: 14px 32px;
  97. border: none;
  98. border-radius: 10px;
  99. font-size: 1rem;
  100. font-weight: 600;
  101. cursor: pointer;
  102. transition: all 0.2s ease;
  103. display: inline-flex;
  104. align-items: center;
  105. justify-content: center;
  106. gap: 8px;
  107. }
  108. .btn-primary {
  109. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  110. color: white;
  111. }
  112. .btn-primary:hover {
  113. transform: translateY(-2px);
  114. box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4);
  115. }
  116. .btn-primary:disabled {
  117. opacity: 0.6;
  118. cursor: not-allowed;
  119. transform: none;
  120. }
  121. .btn-secondary {
  122. background: #f0f0f5;
  123. color: #4a4a6a;
  124. }
  125. .btn-secondary:hover {
  126. background: #e5e5ed;
  127. }
  128. .button-group {
  129. display: flex;
  130. gap: 12px;
  131. margin-top: 20px;
  132. }
  133. .results-section {
  134. display: none;
  135. }
  136. .results-section.show {
  137. display: block;
  138. }
  139. .metrics-grid {
  140. display: grid;
  141. grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
  142. gap: 16px;
  143. margin-bottom: 30px;
  144. }
  145. .metric-card {
  146. background: linear-gradient(135deg, #f8f9ff 0%, #f0f4ff 100%);
  147. border-radius: 12px;
  148. padding: 20px;
  149. text-align: center;
  150. border: 1px solid #e8ecf8;
  151. }
  152. .metric-card .value {
  153. font-size: 2rem;
  154. font-weight: 700;
  155. color: #667eea;
  156. margin-bottom: 4px;
  157. }
  158. .metric-card .label {
  159. font-size: 0.85rem;
  160. color: #6a6a8a;
  161. text-transform: capitalize;
  162. }
  163. .chart-container {
  164. position: relative;
  165. height: 350px;
  166. margin-bottom: 20px;
  167. }
  168. .loading-overlay {
  169. display: none;
  170. position: fixed;
  171. top: 0;
  172. left: 0;
  173. right: 0;
  174. bottom: 0;
  175. background: rgba(0,0,0,0.5);
  176. z-index: 1000;
  177. justify-content: center;
  178. align-items: center;
  179. }
  180. .loading-overlay.show {
  181. display: flex;
  182. }
  183. .loading-box {
  184. background: white;
  185. border-radius: 16px;
  186. padding: 40px 60px;
  187. text-align: center;
  188. box-shadow: 0 20px 60px rgba(0,0,0,0.3);
  189. }
  190. .spinner {
  191. width: 50px;
  192. height: 50px;
  193. border: 4px solid #f0f0f5;
  194. border-top-color: #667eea;
  195. border-radius: 50%;
  196. animation: spin 1s linear infinite;
  197. margin: 0 auto 20px;
  198. }
  199. @keyframes spin {
  200. to { transform: rotate(360deg); }
  201. }
  202. .loading-text {
  203. font-size: 1.1rem;
  204. color: #4a4a6a;
  205. }
  206. .info-table {
  207. width: 100%;
  208. border-collapse: collapse;
  209. margin-top: 20px;
  210. }
  211. .info-table th,
  212. .info-table td {
  213. padding: 12px 16px;
  214. text-align: left;
  215. border-bottom: 1px solid #f0f0f5;
  216. }
  217. .info-table th {
  218. background: #f8f9fc;
  219. font-weight: 600;
  220. color: #4a4a6a;
  221. font-size: 0.85rem;
  222. }
  223. .info-table td {
  224. color: #2a2a4a;
  225. font-size: 0.9rem;
  226. }
  227. .info-table tr:hover {
  228. background: #fafbfd;
  229. }
  230. .badge {
  231. display: inline-block;
  232. padding: 4px 10px;
  233. border-radius: 20px;
  234. font-size: 0.75rem;
  235. font-weight: 500;
  236. }
  237. .badge-success {
  238. background: #d4edda;
  239. color: #155724;
  240. }
  241. .badge-info {
  242. background: #e7f1ff;
  243. color: #0056b3;
  244. }
  245. .error-message {
  246. background: #fff5f5;
  247. border: 1px solid #fed7d7;
  248. border-radius: 10px;
  249. padding: 16px 20px;
  250. color: #c53030;
  251. margin-top: 20px;
  252. display: none;
  253. }
  254. .error-message.show {
  255. display: block;
  256. }
  257. .charts-grid {
  258. display: grid;
  259. grid-template-columns: 1fr 1fr;
  260. gap: 24px;
  261. }
  262. @media (max-width: 768px) {
  263. .charts-grid {
  264. grid-template-columns: 1fr;
  265. }
  266. .header h1 {
  267. font-size: 1.8rem;
  268. }
  269. }
  270. </style>
  271. </head>
  272. <body>
  273. <div class="container">
  274. <div class="header">
  275. <h1>RAG 评估系统</h1>
  276. <p>基于 RAGAS 框架的 RAG 系统质量评估工具</p>
  277. </div>
  278. <div class="card">
  279. <div class="card-title">评估配置</div>
  280. <div class="form-grid">
  281. <div class="form-group full-width">
  282. <label>文件URL <span class="required">*</span></label>
  283. <input type="text" id="fileUrl" placeholder="请输入JSON文件的网络地址 (包含question和ground_truth)">
  284. </div>
  285. <div class="form-group full-width">
  286. <label>知识库ID <span class="required">*</span></label>
  287. <input type="text" id="knowledgeIds" placeholder="多个ID用英文逗号分隔,如: id1, id2, id3">
  288. </div>
  289. <div class="form-group">
  290. <label>嵌入模型</label>
  291. <select id="embeddingId">
  292. <option value="e5">E5 (默认)</option>
  293. <option value="multilingual-e5-large-instruct">Multilingual E5 Large</option>
  294. <option value="bge">BGE</option>
  295. </select>
  296. </div>
  297. <div class="form-group">
  298. <label>LLM 模型</label>
  299. <select id="model">
  300. <option value="Qwen3-Coder-30B-loft">Qwen3-Coder-30B-loft (默认)</option>
  301. <option value="deepseek-chat">DeepSeek Chat</option>
  302. </select>
  303. </div>
  304. <div class="form-group">
  305. <label>Temperature</label>
  306. <input type="number" id="temperature" value="0.6" min="0" max="2" step="0.1">
  307. </div>
  308. <div class="form-group">
  309. <label>Top P</label>
  310. <input type="number" id="topP" value="0.7" min="0" max="1" step="0.1">
  311. </div>
  312. <div class="form-group">
  313. <label>Max Tokens</label>
  314. <input type="number" id="maxTokens" value="4096" min="256" max="32768" step="256">
  315. </div>
  316. <div class="form-group">
  317. <label>检索切片数量</label>
  318. <input type="number" id="sliceCount" value="5" min="1" max="20">
  319. </div>
  320. </div>
  321. <div class="button-group">
  322. <button class="btn btn-primary" id="submitBtn" onclick="runEvaluation()">
  323. <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  324. <polygon points="5 3 19 12 5 21 5 3"></polygon>
  325. </svg>
  326. 开始评估
  327. </button>
  328. <button class="btn btn-secondary" onclick="resetForm()">
  329. <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  330. <path d="M3 12a9 9 0 1 0 9-9 9.75 9.75 0 0 0-6.74 2.74L3 8"></path>
  331. <path d="M3 3v5h5"></path>
  332. </svg>
  333. 重置
  334. </button>
  335. </div>
  336. <div class="error-message" id="errorMessage"></div>
  337. </div>
  338. <div class="results-section" id="resultsSection">
  339. <div class="card">
  340. <div class="card-title">评估结果概览</div>
  341. <div class="metrics-grid" id="metricsGrid"></div>
  342. <div class="charts-grid">
  343. <div>
  344. <h4 style="margin-bottom: 16px; color: #4a4a6a;">指标雷达图</h4>
  345. <div class="chart-container">
  346. <canvas id="radarChart"></canvas>
  347. </div>
  348. </div>
  349. <div>
  350. <h4 style="margin-bottom: 16px; color: #4a4a6a;">指标柱状图</h4>
  351. <div class="chart-container">
  352. <canvas id="barChart"></canvas>
  353. </div>
  354. </div>
  355. </div>
  356. </div>
  357. <div class="card">
  358. <div class="card-title">评估信息</div>
  359. <table class="info-table">
  360. <tbody id="infoTable"></tbody>
  361. </table>
  362. </div>
  363. </div>
  364. </div>
  365. <div class="loading-overlay" id="loadingOverlay">
  366. <div class="loading-box">
  367. <div class="spinner"></div>
  368. <div class="loading-text">正在评估中,请稍候...</div>
  369. <div style="font-size: 0.85rem; color: #999; margin-top: 8px;">评估可能需要几分钟时间</div>
  370. </div>
  371. </div>
  372. <script>
  373. let radarChart = null;
  374. let barChart = null;
  375. function showError(message) {
  376. const el = document.getElementById('errorMessage');
  377. el.textContent = message;
  378. el.classList.add('show');
  379. }
  380. function hideError() {
  381. document.getElementById('errorMessage').classList.remove('show');
  382. }
  383. function showLoading(show) {
  384. const overlay = document.getElementById('loadingOverlay');
  385. const btn = document.getElementById('submitBtn');
  386. if (show) {
  387. overlay.classList.add('show');
  388. btn.disabled = true;
  389. } else {
  390. overlay.classList.remove('show');
  391. btn.disabled = false;
  392. }
  393. }
  394. function resetForm() {
  395. document.getElementById('fileUrl').value = '';
  396. document.getElementById('knowledgeIds').value = '';
  397. document.getElementById('embeddingId').value = 'e5';
  398. document.getElementById('model').value = 'Qwen3-Coder-30B-loft';
  399. document.getElementById('temperature').value = '0.6';
  400. document.getElementById('topP').value = '0.7';
  401. document.getElementById('maxTokens').value = '4096';
  402. document.getElementById('sliceCount').value = '5';
  403. document.getElementById('resultsSection').classList.remove('show');
  404. hideError();
  405. }
  406. async function runEvaluation() {
  407. hideError();
  408. const fileUrl = document.getElementById('fileUrl').value.trim();
  409. const knowledgeIdsStr = document.getElementById('knowledgeIds').value.trim();
  410. if (!fileUrl) {
  411. showError('请输入文件URL');
  412. return;
  413. }
  414. if (!knowledgeIdsStr) {
  415. showError('请输入知识库ID');
  416. return;
  417. }
  418. const knowledgeIds = knowledgeIdsStr.split(',').map(s => s.trim()).filter(s => s);
  419. const payload = {
  420. file_url: fileUrl,
  421. knowledge_ids: knowledgeIds,
  422. embedding_id: document.getElementById('embeddingId').value,
  423. model: document.getElementById('model').value,
  424. temperature: parseFloat(document.getElementById('temperature').value),
  425. top_p: parseFloat(document.getElementById('topP').value),
  426. max_tokens: parseInt(document.getElementById('maxTokens').value),
  427. slice_count: parseInt(document.getElementById('sliceCount').value)
  428. };
  429. showLoading(true);
  430. try {
  431. const response = await fetch('/rag/evaluate', {
  432. method: 'POST',
  433. headers: { 'Content-Type': 'application/json' },
  434. body: JSON.stringify(payload)
  435. });
  436. const result = await response.json();
  437. if (result.code !== 200) {
  438. showError(result.message || '评估失败');
  439. return;
  440. }
  441. renderResults(result.data);
  442. document.getElementById('resultsSection').classList.add('show');
  443. } catch (err) {
  444. showError('请求失败: ' + err.message);
  445. } finally {
  446. showLoading(false);
  447. }
  448. }
  449. function renderResults(data) {
  450. const metrics = data.metrics || {};
  451. const knowledgeIds = data.knowledge_ids || [];
  452. // Render metric cards
  453. const grid = document.getElementById('metricsGrid');
  454. grid.innerHTML = '';
  455. const metricLabels = {
  456. 'faithfulness': '忠实度',
  457. 'answer_correctness': '答案正确性',
  458. 'answer_relevancy': '答案相关性',
  459. 'context_precision': '上下文精确度',
  460. 'context_recall': '上下文召回率',
  461. 'noise_sensitivity': '噪声敏感度',
  462. 'noise_sensitivity(mode=relevant)': '噪声敏感度',
  463. 'noise_sensitivity(mode=irrelevant)': '噪声敏感度(无关)'
  464. };
  465. for (const [key, value] of Object.entries(metrics)) {
  466. const card = document.createElement('div');
  467. card.className = 'metric-card';
  468. card.innerHTML = `
  469. <div class="value">${value !== null ? (value * 100).toFixed(1) + '%' : 'N/A'}</div>
  470. <div class="label">${metricLabels[key] || key}</div>
  471. `;
  472. grid.appendChild(card);
  473. }
  474. // Render charts
  475. renderCharts(metrics, metricLabels);
  476. // Render info table
  477. const infoTable = document.getElementById('infoTable');
  478. infoTable.innerHTML = `
  479. <tr>
  480. <th style="width: 180px;">知识库ID</th>
  481. <td>${knowledgeIds.map(id => `<span class="badge badge-info">${id}</span>`).join(' ')}</td>
  482. </tr>
  483. <tr>
  484. <th>评估数据量</th>
  485. <td>${data.count || 0} 条</td>
  486. </tr>
  487. <tr>
  488. <th>评估指标数</th>
  489. <td>${data.metric_names ? data.metric_names.length : 0} 个</td>
  490. </tr>
  491. <tr>
  492. <th>评估状态</th>
  493. <td><span class="badge badge-success">完成</span></td>
  494. </tr>
  495. `;
  496. }
  497. function renderCharts(metrics, metricLabels) {
  498. const labels = [];
  499. const values = [];
  500. const colors = [
  501. 'rgba(102, 126, 234, 0.8)',
  502. 'rgba(118, 75, 162, 0.8)',
  503. 'rgba(52, 152, 219, 0.8)',
  504. 'rgba(46, 204, 113, 0.8)',
  505. 'rgba(241, 196, 15, 0.8)',
  506. 'rgba(231, 76, 60, 0.8)',
  507. 'rgba(155, 89, 182, 0.8)',
  508. 'rgba(26, 188, 156, 0.8)'
  509. ];
  510. for (const [key, value] of Object.entries(metrics)) {
  511. labels.push(metricLabels[key] || key);
  512. values.push(value !== null ? value : 0);
  513. }
  514. // Destroy existing charts
  515. if (radarChart) radarChart.destroy();
  516. if (barChart) barChart.destroy();
  517. // Radar chart
  518. const radarCtx = document.getElementById('radarChart').getContext('2d');
  519. radarChart = new Chart(radarCtx, {
  520. type: 'radar',
  521. data: {
  522. labels: labels,
  523. datasets: [{
  524. label: '评估得分',
  525. data: values,
  526. backgroundColor: 'rgba(102, 126, 234, 0.2)',
  527. borderColor: 'rgba(102, 126, 234, 1)',
  528. borderWidth: 2,
  529. pointBackgroundColor: 'rgba(102, 126, 234, 1)',
  530. pointRadius: 4
  531. }]
  532. },
  533. options: {
  534. responsive: true,
  535. maintainAspectRatio: false,
  536. scales: {
  537. r: {
  538. beginAtZero: true,
  539. max: 1,
  540. ticks: {
  541. stepSize: 0.2,
  542. callback: v => (v * 100) + '%'
  543. }
  544. }
  545. },
  546. plugins: {
  547. legend: { display: false }
  548. }
  549. }
  550. });
  551. // Bar chart
  552. const barCtx = document.getElementById('barChart').getContext('2d');
  553. barChart = new Chart(barCtx, {
  554. type: 'bar',
  555. data: {
  556. labels: labels,
  557. datasets: [{
  558. label: '评估得分',
  559. data: values,
  560. backgroundColor: colors.slice(0, values.length),
  561. borderRadius: 6
  562. }]
  563. },
  564. options: {
  565. responsive: true,
  566. maintainAspectRatio: false,
  567. scales: {
  568. y: {
  569. beginAtZero: true,
  570. max: 1,
  571. ticks: {
  572. callback: v => (v * 100) + '%'
  573. }
  574. }
  575. },
  576. plugins: {
  577. legend: { display: false }
  578. }
  579. }
  580. });
  581. }
  582. </script>
  583. </body>
  584. </html>