File size: 4,511 Bytes
9fce90e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | """
生成示例数据脚本
用于测试审核系统
"""
import os
import json
from pathlib import Path
def create_sample_dataset():
"""创建示例数据集"""
base_path = Path("./dataset")
# 示例数据配置
sources = ["Apache_Echarts", "Plotly", "ChartJS"]
chart_types = {
"Apache_Echarts": ["bar", "line", "pie"],
"Plotly": ["scatter", "bar", "heatmap"],
"ChartJS": ["line", "doughnut", "radar"]
}
models = ["gpt-4", "claude-3", "gemini-pro"]
for source in sources:
for chart_type in chart_types[source]:
# 创建目录
web_dir = base_path / "web" / source / chart_type
label_dir = base_path / "label" / source / chart_type
web_dir.mkdir(parents=True, exist_ok=True)
label_dir.mkdir(parents=True, exist_ok=True)
for model in models:
qa_dir = base_path / "question_answer" / source / chart_type / model
qa_dir.mkdir(parents=True, exist_ok=True)
# 为每个图表类型创建示例图表
for i in range(1, 4):
chart_id = f"chart_{str(i).zfill(4)}_{chart_type}"
# 创建 HTML 文件
html_content = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>{chart_id}</title>
<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
<style>
body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }}
#chart {{ width: 100%; height: 400px; }}
.title {{ text-align: center; color: #333; margin-bottom: 20px; }}
</style>
</head>
<body>
<h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2>
<div id="chart"></div>
<script>
var chart = echarts.init(document.getElementById('chart'));
var option = {{
title: {{ text: 'Sample {chart_type.capitalize()} Chart' }},
tooltip: {{}},
xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }},
yAxis: {{}},
series: [{{
type: '{chart_type}',
data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100]
}}]
}};
chart.setOption(option);
</script>
</body>
</html>"""
with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f:
f.write(html_content)
# 创建标签文件
label_data = {
"Number": str(i).zfill(4),
"Type": chart_type,
"Source": source,
"Weblink": f"https://example.com/{source}/{chart_type}/{i}",
"Topic": f"Sample {chart_type} chart #{i}",
"Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.",
"Other": ""
}
with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f:
json.dump(label_data, f, ensure_ascii=False, indent=2)
# 为每个模型创建 QA 文件
for j, model in enumerate(models):
qa_dir = base_path / "question_answer" / source / chart_type / model
for q in range(1, 3):
qa_data = {
"id": f"{chart_id}_q{q}",
"chart": chart_id,
"question": f"在图表 {chart_id} 中,第 {q} 个数据点的值是多少?",
"answer": f"约为 {int(50 + q * 10 + j * 5)}"
}
with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f:
json.dump(qa_data, f, ensure_ascii=False, indent=2)
print("✅ 示例数据集创建完成!")
print(f"📁 数据集位置: {base_path.absolute()}")
# 打印统计
total_charts = sum(len(chart_types[s]) * 3 for s in sources)
total_qa = total_charts * len(models) * 2
print(f"📊 共创建 {total_charts} 个图表")
print(f"❓ 共创建 {total_qa} 个问答对")
if __name__ == "__main__":
create_sample_dataset()
|