Spaces:

Pekku
/

Chart

Sleeping

File size: 4,511 Bytes

9fce90e

"""
生成示例数据脚本
用于测试审核系统
"""
import os
import json
from pathlib import Path

def create_sample_dataset():
    """创建示例数据集"""
    
    base_path = Path("./dataset")
    
    # 示例数据配置
    sources = ["Apache_Echarts", "Plotly", "ChartJS"]
    chart_types = {
        "Apache_Echarts": ["bar", "line", "pie"],
        "Plotly": ["scatter", "bar", "heatmap"],
        "ChartJS": ["line", "doughnut", "radar"]
    }
    models = ["gpt-4", "claude-3", "gemini-pro"]
    
    for source in sources:
        for chart_type in chart_types[source]:
            # 创建目录
            web_dir = base_path / "web" / source / chart_type
            label_dir = base_path / "label" / source / chart_type
            web_dir.mkdir(parents=True, exist_ok=True)
            label_dir.mkdir(parents=True, exist_ok=True)
            
            for model in models:
                qa_dir = base_path / "question_answer" / source / chart_type / model
                qa_dir.mkdir(parents=True, exist_ok=True)
            
            # 为每个图表类型创建示例图表
            for i in range(1, 4):
                chart_id = f"chart_{str(i).zfill(4)}_{chart_type}"
                
                # 创建 HTML 文件
                html_content = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{chart_id}</title>
    <script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
    <style>
        body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }}
        #chart {{ width: 100%; height: 400px; }}
        .title {{ text-align: center; color: #333; margin-bottom: 20px; }}
    </style>
</head>
<body>
    <h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2>
    <div id="chart"></div>
    <script>
        var chart = echarts.init(document.getElementById('chart'));
        var option = {{
            title: {{ text: 'Sample {chart_type.capitalize()} Chart' }},
            tooltip: {{}},
            xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }},
            yAxis: {{}},
            series: [{{
                type: '{chart_type}',
                data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100]
            }}]
        }};
        chart.setOption(option);
    </script>
</body>
</html>"""
                
                with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f:
                    f.write(html_content)
                
                # 创建标签文件
                label_data = {
                    "Number": str(i).zfill(4),
                    "Type": chart_type,
                    "Source": source,
                    "Weblink": f"https://example.com/{source}/{chart_type}/{i}",
                    "Topic": f"Sample {chart_type} chart #{i}",
                    "Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.",
                    "Other": ""
                }
                
                with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f:
                    json.dump(label_data, f, ensure_ascii=False, indent=2)
                
                # 为每个模型创建 QA 文件
                for j, model in enumerate(models):
                    qa_dir = base_path / "question_answer" / source / chart_type / model
                    
                    for q in range(1, 3):
                        qa_data = {
                            "id": f"{chart_id}_q{q}",
                            "chart": chart_id,
                            "question": f"在图表 {chart_id} 中，第 {q} 个数据点的值是多少？",
                            "answer": f"约为 {int(50 + q * 10 + j * 5)}"
                        }
                        
                        with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f:
                            json.dump(qa_data, f, ensure_ascii=False, indent=2)
    
    print("✅ 示例数据集创建完成！")
    print(f"📁 数据集位置: {base_path.absolute()}")
    
    # 打印统计
    total_charts = sum(len(chart_types[s]) * 3 for s in sources)
    total_qa = total_charts * len(models) * 2
    print(f"📊 共创建 {total_charts} 个图表")
    print(f"❓ 共创建 {total_qa} 个问答对")


if __name__ == "__main__":
    create_sample_dataset()