File size: 4,511 Bytes
9fce90e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""
生成示例数据脚本
用于测试审核系统
"""
import os
import json
from pathlib import Path

def create_sample_dataset():
    """创建示例数据集"""
    
    base_path = Path("./dataset")
    
    # 示例数据配置
    sources = ["Apache_Echarts", "Plotly", "ChartJS"]
    chart_types = {
        "Apache_Echarts": ["bar", "line", "pie"],
        "Plotly": ["scatter", "bar", "heatmap"],
        "ChartJS": ["line", "doughnut", "radar"]
    }
    models = ["gpt-4", "claude-3", "gemini-pro"]
    
    for source in sources:
        for chart_type in chart_types[source]:
            # 创建目录
            web_dir = base_path / "web" / source / chart_type
            label_dir = base_path / "label" / source / chart_type
            web_dir.mkdir(parents=True, exist_ok=True)
            label_dir.mkdir(parents=True, exist_ok=True)
            
            for model in models:
                qa_dir = base_path / "question_answer" / source / chart_type / model
                qa_dir.mkdir(parents=True, exist_ok=True)
            
            # 为每个图表类型创建示例图表
            for i in range(1, 4):
                chart_id = f"chart_{str(i).zfill(4)}_{chart_type}"
                
                # 创建 HTML 文件
                html_content = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{chart_id}</title>
    <script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
    <style>
        body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }}
        #chart {{ width: 100%; height: 400px; }}
        .title {{ text-align: center; color: #333; margin-bottom: 20px; }}
    </style>
</head>
<body>
    <h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2>
    <div id="chart"></div>
    <script>
        var chart = echarts.init(document.getElementById('chart'));
        var option = {{
            title: {{ text: 'Sample {chart_type.capitalize()} Chart' }},
            tooltip: {{}},
            xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }},
            yAxis: {{}},
            series: [{{
                type: '{chart_type}',
                data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100]
            }}]
        }};
        chart.setOption(option);
    </script>
</body>
</html>"""
                
                with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f:
                    f.write(html_content)
                
                # 创建标签文件
                label_data = {
                    "Number": str(i).zfill(4),
                    "Type": chart_type,
                    "Source": source,
                    "Weblink": f"https://example.com/{source}/{chart_type}/{i}",
                    "Topic": f"Sample {chart_type} chart #{i}",
                    "Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.",
                    "Other": ""
                }
                
                with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f:
                    json.dump(label_data, f, ensure_ascii=False, indent=2)
                
                # 为每个模型创建 QA 文件
                for j, model in enumerate(models):
                    qa_dir = base_path / "question_answer" / source / chart_type / model
                    
                    for q in range(1, 3):
                        qa_data = {
                            "id": f"{chart_id}_q{q}",
                            "chart": chart_id,
                            "question": f"在图表 {chart_id} 中,第 {q} 个数据点的值是多少?",
                            "answer": f"约为 {int(50 + q * 10 + j * 5)}"
                        }
                        
                        with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f:
                            json.dump(qa_data, f, ensure_ascii=False, indent=2)
    
    print("✅ 示例数据集创建完成!")
    print(f"📁 数据集位置: {base_path.absolute()}")
    
    # 打印统计
    total_charts = sum(len(chart_types[s]) * 3 for s in sources)
    total_qa = total_charts * len(models) * 2
    print(f"📊 共创建 {total_charts} 个图表")
    print(f"❓ 共创建 {total_qa} 个问答对")


if __name__ == "__main__":
    create_sample_dataset()