import gradio as gr
from datetime import datetime
from groq import Groq
import json
import os
import traceback
import re

# --- 1. API KEY ---
api_key_coder = os.environ.get('fristapi')
if not api_key_coder:
    raise ValueError("Groq API key not found. Set fristapi environment variable.")

# --- 2. LLM CLIENT ---
class GroqLLM:
    def __init__(self, api_key, model="llama-3.3-70b-versatile", temperature=0.7):
        self.client = Groq(api_key=api_key)
        self.model = model
        self.temperature = temperature
    
    def invoke(self, prompt):
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=self.temperature,
                max_tokens=4000
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"LLM Error: {str(e)}"

llm = GroqLLM(api_key=api_key_coder)

class SyntheticDataGenerator:
    """Generates synthetic test data in multiple formats"""

    def __init__(self, llm):
        self.llm = llm

    def generate_data(self, schema_description, data_type="json", language="Python", record_count=5):
        """
        Generate synthetic test data based on schema description
        """
        import json

        # Ensure schema_description is safely embedded
        if isinstance(schema_description, dict):
            schema_description_str = json.dumps(schema_description, indent=2)
        else:
            schema_description_str = schema_description

        # Map data types to formats
        format_map = {
            "json": "JSON (array of objects)",
            "csv": "CSV (comma-separated values with headers)",
            "sql": "SQL INSERT statements",
            "xml": "XML document",
            "python_dict": "Python list of dictionaries",
            "yaml": "YAML format",
            "excel_style": "Table format (markdown)"
        }

        output_format = format_map.get(data_type, "JSON")

        prompt = f"""
You are a TEST DATA ENGINEER specializing in synthetic data generation.
Generate realistic, diverse, and comprehensive test data.
=====================
CONFIGURATION
=====================
Data Schema: {schema_description_str}
Output Format: {output_format}
Number of Records: {record_count}
Programming Context: {language}
=====================
REQUIREMENTS
=====================
1. Generate {record_count} unique, realistic records
2. Include edge cases and boundary values
3. Ensure data variety (different types, values)
4. Make data look production-like
5. Include at least one record with null/empty values (if applicable)
6. Include at least one record with extreme values
=====================
OUTPUT STRUCTURE
=====================
Generate the response with:
## 📊 Data Generation Summary
- **Schema**: [brief description]
- **Format**: {output_format}
- **Records**: {record_count}
- **Language Context**: {language}
## 📝 Generated Test Data
```{data_type if data_type != 'excel_style' else 'markdown'}
[generated data here]
🔍 Data Quality Notes
[Special cases included]
[Edge values]
[Validation notes]

💡 Usage Example
[How to use this data in {language} tests]
"""
        # Invoke LLM
        return self.llm.invoke(prompt)

# ============================================
# Synthetic Data Generator: Bulk Function
# ============================================

def generate_bulk_data(self, schema_description, formats=None, record_counts=None):
    """Generate data in multiple formats at once"""
    # Default formats
    if formats is None:
        formats = ["json", "csv", "sql"]
    # Default record counts
    if record_counts is None:
        record_counts = [3, 5, 10]

    results = {}
    for format_type in formats:
        # If only one record count is given, use it for all formats
        count = record_counts[0] if len(record_counts) == 1 else record_counts[formats.index(format_type)]
        # Generate data using the main generator
        results[format_type] = self.generate_data(
            schema_description=schema_description,
            data_type=format_type,
            record_count=count
        )

    return results


# Initialize the synthetic data generator
synthetic_generator = SyntheticDataGenerator(llm)


# ============================================
# Template Functions
# ============================================

def load_schema_template(template_name):
    """Return the JSON schema for a given template name"""
    templates = {
        "custom": '''{
  "user_id": "uuid",
  "full_name": "name",
  "email": "email",
  "age": {"type": "int", "min": 18, "max": 99},
  "is_active": "boolean",
  "country": "country"
}''',
        "user_profile": '''{
  "user_id": "integer",
  "username": "string (3-20 chars)",
  "email": "email format",
  "age": "integer (18-80)",
  "country": "ISO country code",
  "is_premium": "boolean",
  "registration_date": "date (last 2 years)",
  "last_login": "datetime"
}''',
        "ecommerce_order": '''{
  "order_id": "string (format: ORD-XXXX)",
  "customer_name": "string",
  "email": "email",
  "items": [
    {
      "product_id": "string",
      "quantity": "integer (1-10)",
      "price": "decimal (10.99-999.99)"
    }
  ],
  "total_amount": "decimal",
  "status": "enum(pending,shipped,delivered,cancelled)",
  "order_date": "datetime"
}''',
        "api_request_log": '''{
  "request_id": "uuid",
  "endpoint": "string",
  "method": "enum(GET,POST,PUT,DELETE)",
  "status_code": "integer (200,400,401,404,500)",
  "response_time_ms": "integer (50-5000)",
  "timestamp": "datetime",
  "user_agent": "string",
  "ip_address": "ipv4"
}''',
        "contact_info": '''{
  "contact_id": "integer",
  "first_name": "string",
  "last_name": "string",
  "email": "email",
  "phone": "phone number",
  "address": {
    "street": "string",
    "city": "string",
    "state": "string",
    "zip_code": "string",
    "country": "string"
  },
  "company": "string",
  "notes": "string (optional)"
}''',
        "financial_transaction": '''{
  "transaction_id": "uuid",
  "account_number": "string (format: ****1234)",
  "transaction_type": "enum(deposit,withdrawal,transfer,payment)",
  "amount": "decimal (0.01-10000.00)",
  "currency": "enum(USD,EUR,GBP,JPY)",
  "timestamp": "datetime",
  "status": "enum(pending,completed,failed,reversed)",
  "description": "string"
}''',
        "healthcare_patient": '''{
  "patient_id": "string (format: PAT-XXXX)",
  "first_name": "string",
  "last_name": "string",
  "date_of_birth": "date (1950-2020)",
  "gender": "enum(M,F,Other)",
  "blood_type": "enum(A+,A-,B+,B-,AB+,AB-,O+,O-)",
  "allergies": "array of strings",
  "medications": "array of strings",
  "last_visit": "datetime",
  "insurance_provider": "string"
}'''
    }
    # Return the requested template, default to 'custom'
    return templates.get(template_name, templates["custom"])

def get_output_format_description(output_type):
    """
    Return a human-readable description for a given output format type
    """
    descriptions = {
        "json": "📄 JSON format - Array of objects, ideal for API testing and data interchange",
        "csv": "📊 CSV format - Comma-separated values, perfect for data import and spreadsheet analysis",
        "sql": "🗄️ SQL format - INSERT statements with CREATE TABLE, ready for database seeding",
        "xml": "📋 XML format - Structured document format for enterprise systems",
        "python_dict": "🐍 Python format - List of dictionaries, ready to use in Python tests",
        "yaml": "⚙️ YAML format - Human-readable configuration format",
        "excel_style": "📑 Table format - Markdown table, easy to read and document"
    }
    # Return description if found, otherwise default message
    return descriptions.get(output_type, "📄 Standard format for test data")
# ============================================
# Gradio UI Functions
# ============================================

def generate_synthetic_data(language, schema_json, record_count, data_format, template_name):
    """Main generation function for Gradio"""
    try:
        # Validate schema
        if not schema_json or not schema_json.strip():
            return (
                "### ⚠️ No Data Generated\n\nPlease provide a schema definition.",
                "# No data generated\nPlease provide a schema definition.",
                "### 📊 Generation Statistics\nNo data generated yet",
                schema_json
            )

        # Parse record count
        try:
            count = int(record_count)
            if count < 1:
                count = 1
            elif count > 100:
                count = 100
        except:
            count = 5

        # Generate synthetic data
        result = synthetic_generator.generate_data(
            schema_description=schema_json,
            data_type=data_format,
            language=language,
            record_count=count
        )

        # Extract data from result
        pattern = rf'{data_format}\n(.*?)\n'
        if data_format == 'excel_style':
            pattern = r'markdown\n(.*?)\n'

        data_match = re.search(pattern, result, re.DOTALL)

        if data_match:
            generated_data = data_match.group(1)

            # Generate usage example based on format
            usage_examples = {
                "json": f'''# Generated test data for {language}
Created with Synthetic Data Generator
import json

test_data = {generated_data[:500] + '...' if len(generated_data) > 500 else generated_data}

for record in test_data:
    # Your test logic here
    print(f"Processing: {{record}}")

# Example validation
assert 'user_id' in record
assert 'email' in record
''',
                "python_dict": f'''# Generated test data for {language}
Test data as Python dictionary/list
test_data = {generated_data[:500] + '...' if len(generated_data) > 500 else generated_data}

for record in test_data:
    print(f"Testing with: {{record.get('name', 'N/A')}}")
''',
                "csv": f'''# Generated test data for {language}
import csv
from io import StringIO

csv_data = """{generated_data[:500]}"""

reader = csv.DictReader(StringIO(csv_data))
for row in reader:
    print(f"Processing row: {{row}}")
''',
                "sql": f'''# Generated test data for {language}
import sqlite3

sql_statements = """{generated_data[:500]}"""

conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
for statement in sql_statements.split(';'):
    if statement.strip():
        cursor.execute(statement)
conn.commit()
print("Database seeded successfully!")
''',
                "xml": f'''# Generated test data for {language}
import xml.etree.ElementTree as ET

xml_data = """{generated_data[:500]}"""
root = ET.fromstring(xml_data)
for record in root:
    print(f"Record: {{record.tag}}")
''',
                "yaml": f'''# Generated test data for {language}
import yaml

yaml_data = """{generated_data[:500]}"""
test_data = yaml.safe_load(yaml_data)
for record in test_data:
    print(f"Testing with: {{record}}")
'''
            }

            code_template = usage_examples.get(
                data_format,
                f'''# Generated test data for {language}
Generated Data:
{generated_data[:500]}
Use this data in your tests as needed'''
            )

            # Generation statistics
            stats = f"""### 📊 Generation Statistics
Metric	Value
Records Generated	{count}
Language	{language}
Format	{data_format.upper()}
Template	{template_name}
Status	✅ Success
Generated	{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Data Quality Notes:
✅ Realistic test data created
✅ Includes edge cases and variations
✅ Ready for immediate use in tests
✅ Follows schema specifications
Special Cases Included:
Boundary values
Edge cases
Null/empty values (where applicable)
Extreme values
"""

            return (
                generated_data,
                code_template,
                stats,
                schema_json
            )
        else:
            # No data found in expected format, return raw result
            return (
                result[:2000] + "\n\n... (truncated)",
                f"# Generated Output\n\n{result}",
                "### ⚠️ Generation completed but format may need review\n\nThe data may not be in the expected format. Please check the output above.",
                schema_json
            )

    except Exception as e:
        error_msg = f"❌ Error: {str(e)}\n\n{traceback.format_exc()}\n"
        return (
            error_msg,
            f"# Error generating data\n\n{str(e)}",
            f"### ❌ Generation Failed\n\nError: {str(e)}",
            schema_json
        )


def preview_schema(schema_json):
    """Preview and validate schema"""
    try:
        if not schema_json or not schema_json.strip():
            return "⚠️ No schema provided"

        # Try to parse as JSON for validation
        try:
            parsed = json.loads(schema_json)
            return f"""### ✅ Valid JSON Schema
Structure Preview:

{json.dumps(parsed, indent=2, ensure_ascii=False)[:500]}
Fields Detected: {len(parsed) if isinstance(parsed, dict) else 'Multiple records'}

This schema will generate realistic test data based on the field definitions.
"""
        except json.JSONDecodeError:
            return f"""### 📝 Schema Description (Text Format)
{schema_json[:300]}
Note: The schema is in text format. The AI will interpret and generate appropriate test data.
"""

    except Exception as e:
        return f"❌ Error previewing schema: {str(e)}"
# ============================================
# Gradio UI (Standalone with Home Tab)
# ============================================

with gr.Blocks(title="Synthetic Test Data Generator") as demo:
    gr.Markdown("# 🎲 Synthetic Test Data Generator")
    gr.Markdown("### Create Realistic Test Data for Your Applications with AI")
    
    # ============================================
    # Tab 0: Home
    # ============================================
    with gr.Tab("🏠 Home"):
        gr.Markdown("""
🎯 Welcome to Synthetic Test Data Generator
... (المحتوى)
""")

        # Stats and Use Cases
        with gr.Row():
            with gr.Column():
                gr.Markdown("### 📊 Quick Stats")
                gr.Markdown("""
Formats Supported: 7
Max Records: 100 per generation
Templates: 7 pre-built
Languages: 4
""")
            with gr.Column():
                gr.Markdown("### 🎯 Use Cases")
                gr.Markdown("""
API Testing
Database Seeding
Load Testing
UI Testing
Integration Tests
Demo Data Creation
""")

        # Quick Action Button
        gr.Markdown("---")
        gr.Markdown("### 🔧 Quick Action")
        quick_generate_btn = gr.Button("🚀 Go to Generator Tab", variant="primary")

        quick_generate_btn.click(
            fn=None,
            inputs=[],
            outputs=[],
            js='() => { document.querySelector(\'[data-testid="tab-Generator"]\').click(); }'
        )
    
    # ============================================
    # Tab 1: Generator (Main) - يجب أن يكون داخل كتلة demo
    # ============================================
    with gr.Tab("Generator") as generator_tab:
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### ⚙️ Configuration")

                # Language Selection
                data_language = gr.Dropdown(
                    choices=["python", "javascript", "java", "csharp"],
                    value="python",
                    label="🎯 Target Language"
                )

                # Format Selection
                data_format = gr.Dropdown(
                    choices=["json", "csv", "sql", "xml", "python_dict", "yaml", "excel_style"],
                    value="json",
                    label="📄 Output Format"
                )

                # Format description
                format_desc = gr.Markdown("Format Info: " + get_output_format_description("json"))

                # Record count
                data_count = gr.Slider(
                    minimum=1,
                    maximum=100,
                    value=10,
                    step=1,
                    label="📊 Number of Records"
                )

                gr.Markdown("### 📝 Schema Definition")

                # Template selection
                data_template = gr.Dropdown(
                    choices=[
                        "custom", "user_profile", "ecommerce_order", "api_request_log",
                        "contact_info", "financial_transaction", "healthcare_patient"
                    ],
                    value="custom",
                    label="📁 Schema Template"
                )

                # Schema input
                schema_input = gr.Textbox(
                    value='''{
  "user_id": "uuid",
  "full_name": "name",
  "email": "email",
  "age": {"type": "int", "min": 18, "max": 99},
  "is_active": "boolean",
  "country": "country"
}''',
                    lines=15,
                    label="Schema Definition"
                )

                # Schema preview
                preview_btn = gr.Button("🔍 Preview Schema", size="sm")
                schema_preview = gr.Markdown("")

                # Actions
                generate_btn = gr.Button("✨ Generate Test Data", variant="primary", size="lg")

            with gr.Column(scale=1):
                gr.Markdown("### 📊 Generation Statistics")
                data_stats = gr.Markdown("No data generated yet")

                gr.Markdown("### 📝 Generated Data")
                data_output = gr.Code(label="Test Data", language="json", lines=15)

                gr.Markdown("### 💻 Ready-to-use Code Template")
                code_output = gr.Code(label="Code Template", language="python", lines=12)

        # تعريف الدوال المساعدة
        def update_code_language(format_type):
            lang_map = {
                "json": "json",
                "csv": "csv",
                "sql": "sql",
                "xml": "xml",
                "python_dict": "python",
                "yaml": "yaml",
                "excel_style": "markdown"
            }
            return gr.update(language=lang_map.get(format_type, "text"))

        # Event Handlers
        data_template.change(
            fn=load_schema_template,
            inputs=[data_template],
            outputs=[schema_input]
        )
        
        preview_btn.click(
            fn=preview_schema,
            inputs=[schema_input],
            outputs=[schema_preview]
        )
        
        generate_btn.click(
            fn=generate_synthetic_data,
            inputs=[data_language, schema_input, data_count, data_format, data_template],
            outputs=[data_output, code_output, data_stats, schema_input]
        )
        
        data_format.change(
            fn=get_output_format_description,
            inputs=[data_format],
            outputs=[format_desc]
        )
        
        data_format.change(
            fn=update_code_language,
            inputs=[data_format],
            outputs=[data_output]
        )
    
    # ============================================
    # Tab 2: Templates Library
    # ============================================
    with gr.Tab("📚 Templates Library"):
        gr.Markdown("# 📚 Schema Templates Library")
        gr.Markdown("Browse and learn from these example schemas")

        template_selector = gr.Dropdown(
            choices=[
                "user_profile", "ecommerce_order", "api_request_log",
                "contact_info", "financial_transaction", "healthcare_patient"
            ],
            value="user_profile",
            label="Select Template to View"
        )

        template_display = gr.Code(label="Template Schema", language="json", lines=20)
        template_description = gr.Markdown("")

        def show_template_details(template_name):
            templates_info = {
                "user_profile": {
                    "desc": "User Profile Schema\n\nPerfect for testing user management systems, authentication, and profile features.\n\nUse Cases:\n- User registration tests\n- Profile management\n- User search/filtering\n- Data export features",
                    "schema": load_schema_template(template_name)
                },
                "ecommerce_order": {
                    "desc": "E-commerce Order Schema\n\nIdeal for testing shopping carts, checkout flows, and order management systems.\n\nUse Cases:\n- Order creation tests\n- Cart calculations\n- Inventory management\n- Order history features",
                    "schema": load_schema_template(template_name)
                },
                "api_request_log": {
                    "desc": "API Request Log Schema\n\nGreat for testing logging systems, analytics, and monitoring tools.\n\nUse Cases:\n- API monitoring tests\n- Log analysis\n- Performance metrics\n- Error tracking",
                    "schema": load_schema_template(template_name)
                },
                "contact_info": {
                    "desc": "Contact Information Schema\n\nUseful for CRM systems, address books, and contact management features.\n\nUse Cases:\n- Contact import/export\n- Address validation\n- Phone number formatting\n- Data enrichment tests",
                    "schema": load_schema_template(template_name)
                },
                "financial_transaction": {
                    "desc": "Financial Transaction Schema\n\nEssential for banking, payment systems, and financial applications.\n\nUse Cases:\n- Payment processing tests\n- Transaction history\n- Fraud detection\n- Statement generation",
                    "schema": load_schema_template(template_name)
                },
                "healthcare_patient": {
                    "desc": "Healthcare Patient Schema\n\nDesigned for healthcare systems, patient management, and medical records.\n\nUse Cases:\n- Patient registration\n- Medical records\n- Appointment scheduling\n- Insurance verification",
                    "schema": load_schema_template(template_name)
                }
            }

            info = templates_info.get(template_name, templates_info["user_profile"])
            return info["schema"], info["desc"]

        template_selector.change(
            fn=show_template_details,
            inputs=[template_selector],
            outputs=[template_display, template_description]
        )

        # Load initial template
        initial_schema, initial_desc = show_template_details("user_profile")
        template_display.value = initial_schema
        template_description.value = initial_desc

        gr.Markdown("---")
        gr.Markdown("### 💡 Tips for Creating Custom Schemas")
        gr.Markdown("""
- Use descriptive field names - The AI understands context (e.g., 'email' vs 'e')  
- Specify data types - Include hints like 'string', 'integer', 'decimal', 'date'  
- Add constraints - Use ranges like 'age: 18-80' or patterns like 'email format'  
- Include enums - For fields with limited options (e.g., 'status: pending,active,closed')  
- Structure nested data - Use JSON objects or arrays for complex relationships
""")
    
    # ============================================
    # Tab 3: About & Help
    # ============================================
    with gr.Tab("ℹ️ About & Help"):
        gr.Markdown("""
ℹ️ **About Synthetic Test Data Generator**

**What is this tool?**  
This tool uses AI (Groq's LLM) to generate realistic, production-like test data based on your schema definitions. It helps QA engineers and developers create comprehensive test datasets quickly and efficiently.

... (باقي المحتوى)
""")

# ============================================
# Launch Application
# ============================================
if __name__ == "__main__":
    print("=" * 60)
    print("🎲 Synthetic Test Data Generator")
    print("=" * 60)
    print("✅ All components loaded successfully!")
    print("=" * 60)
    print("🚀 Launching application...")
    print("=" * 60)
    
    demo.launch(
        share=True,
        debug=False,
        server_name="0.0.0.0",
        server_port=7860,
        theme=gr.themes.Soft()
    )