| | """ |
| | 生成示例数据脚本 |
| | 用于测试审核系统 |
| | """ |
| | import os |
| | import json |
| | from pathlib import Path |
| |
|
| | def create_sample_dataset(): |
| | """创建示例数据集""" |
| | |
| | base_path = Path("./dataset") |
| | |
| | |
| | sources = ["Apache_Echarts", "Plotly", "ChartJS"] |
| | chart_types = { |
| | "Apache_Echarts": ["bar", "line", "pie"], |
| | "Plotly": ["scatter", "bar", "heatmap"], |
| | "ChartJS": ["line", "doughnut", "radar"] |
| | } |
| | models = ["gpt-4", "claude-3", "gemini-pro"] |
| | |
| | for source in sources: |
| | for chart_type in chart_types[source]: |
| | |
| | web_dir = base_path / "web" / source / chart_type |
| | label_dir = base_path / "label" / source / chart_type |
| | web_dir.mkdir(parents=True, exist_ok=True) |
| | label_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | for model in models: |
| | qa_dir = base_path / "question_answer" / source / chart_type / model |
| | qa_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | |
| | for i in range(1, 4): |
| | chart_id = f"chart_{str(i).zfill(4)}_{chart_type}" |
| | |
| | |
| | html_content = f"""<!DOCTYPE html> |
| | <html> |
| | <head> |
| | <meta charset="UTF-8"> |
| | <title>{chart_id}</title> |
| | <script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script> |
| | <style> |
| | body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }} |
| | #chart {{ width: 100%; height: 400px; }} |
| | .title {{ text-align: center; color: #333; margin-bottom: 20px; }} |
| | </style> |
| | </head> |
| | <body> |
| | <h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2> |
| | <div id="chart"></div> |
| | <script> |
| | var chart = echarts.init(document.getElementById('chart')); |
| | var option = {{ |
| | title: {{ text: 'Sample {chart_type.capitalize()} Chart' }}, |
| | tooltip: {{}}, |
| | xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }}, |
| | yAxis: {{}}, |
| | series: [{{ |
| | type: '{chart_type}', |
| | data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100] |
| | }}] |
| | }}; |
| | chart.setOption(option); |
| | </script> |
| | </body> |
| | </html>""" |
| | |
| | with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f: |
| | f.write(html_content) |
| | |
| | |
| | label_data = { |
| | "Number": str(i).zfill(4), |
| | "Type": chart_type, |
| | "Source": source, |
| | "Weblink": f"https://example.com/{source}/{chart_type}/{i}", |
| | "Topic": f"Sample {chart_type} chart #{i}", |
| | "Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.", |
| | "Other": "" |
| | } |
| | |
| | with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f: |
| | json.dump(label_data, f, ensure_ascii=False, indent=2) |
| | |
| | |
| | for j, model in enumerate(models): |
| | qa_dir = base_path / "question_answer" / source / chart_type / model |
| | |
| | for q in range(1, 3): |
| | qa_data = { |
| | "id": f"{chart_id}_q{q}", |
| | "chart": chart_id, |
| | "question": f"在图表 {chart_id} 中,第 {q} 个数据点的值是多少?", |
| | "answer": f"约为 {int(50 + q * 10 + j * 5)}" |
| | } |
| | |
| | with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f: |
| | json.dump(qa_data, f, ensure_ascii=False, indent=2) |
| | |
| | print("✅ 示例数据集创建完成!") |
| | print(f"📁 数据集位置: {base_path.absolute()}") |
| | |
| | |
| | total_charts = sum(len(chart_types[s]) * 3 for s in sources) |
| | total_qa = total_charts * len(models) * 2 |
| | print(f"📊 共创建 {total_charts} 个图表") |
| | print(f"❓ 共创建 {total_qa} 个问答对") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | create_sample_dataset() |
| |
|