Spaces:
Sleeping
Sleeping
| """ | |
| Data export utilities for the AI Trading Experiment. | |
| Provides functions to export experiment data for statistical analysis. | |
| """ | |
| import os | |
| import sqlite3 | |
| from datetime import datetime | |
| from typing import Dict, List, Any, Optional | |
| import pandas as pd | |
| DATABASE_PATH = "db/experiment.db" | |
| EXPORT_DIR = "exports" | |
| def ensure_export_dir(): | |
| """Ensure the export directory exists.""" | |
| os.makedirs(EXPORT_DIR, exist_ok=True) | |
| def get_connection(): | |
| """Get a database connection.""" | |
| return sqlite3.connect(DATABASE_PATH) | |
| def export_sessions_csv() -> str: | |
| """Export all sessions to CSV.""" | |
| ensure_export_dir() | |
| conn = get_connection() | |
| df = pd.read_sql_query("SELECT * FROM sessions ORDER BY session_start", conn) | |
| conn.close() | |
| filename = f"{EXPORT_DIR}/sessions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| df.to_csv(filename, index=False) | |
| print(f"Exported {len(df)} sessions to {filename}") | |
| return filename | |
| def export_decisions_csv() -> str: | |
| """Export all decisions to CSV.""" | |
| ensure_export_dir() | |
| conn = get_connection() | |
| df = pd.read_sql_query("SELECT * FROM decisions ORDER BY timestamp", conn) | |
| conn.close() | |
| filename = f"{EXPORT_DIR}/decisions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| df.to_csv(filename, index=False) | |
| print(f"Exported {len(df)} decisions to {filename}") | |
| return filename | |
| def export_interactions_csv() -> str: | |
| """Export all chat interactions to CSV.""" | |
| ensure_export_dir() | |
| conn = get_connection() | |
| df = pd.read_sql_query("SELECT * FROM chat_interactions ORDER BY timestamp", conn) | |
| conn.close() | |
| filename = f"{EXPORT_DIR}/interactions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| df.to_csv(filename, index=False) | |
| print(f"Exported {len(df)} interactions to {filename}") | |
| return filename | |
| def export_trust_metrics_csv() -> str: | |
| """Export trust metrics to CSV.""" | |
| ensure_export_dir() | |
| conn = get_connection() | |
| df = pd.read_sql_query("SELECT * FROM trust_metrics ORDER BY timestamp", conn) | |
| conn.close() | |
| filename = f"{EXPORT_DIR}/trust_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| df.to_csv(filename, index=False) | |
| print(f"Exported {len(df)} trust metrics to {filename}") | |
| return filename | |
| def export_all() -> Dict[str, str]: | |
| """Export all data tables to CSV files.""" | |
| return { | |
| "sessions": export_sessions_csv(), | |
| "decisions": export_decisions_csv(), | |
| "interactions": export_interactions_csv(), | |
| "trust_metrics": export_trust_metrics_csv() | |
| } | |
| def generate_summary_report() -> pd.DataFrame: | |
| """Generate a summary report aggregated by participant.""" | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| s.participant_id, | |
| s.condition_name, | |
| s.completed, | |
| s.initial_portfolio, | |
| s.current_portfolio as final_portfolio, | |
| (s.current_portfolio - s.initial_portfolio) as total_return, | |
| ((s.current_portfolio - s.initial_portfolio) / s.initial_portfolio * 100) as return_pct, | |
| s.scenarios_completed, | |
| s.ai_advice_followed, | |
| s.ai_advice_total, | |
| CASE WHEN s.ai_advice_total > 0 | |
| THEN (s.ai_advice_followed * 1.0 / s.ai_advice_total * 100) | |
| ELSE 0 END as ai_follow_rate, | |
| s.total_chat_queries, | |
| s.proactive_advice_accepted, | |
| s.proactive_advice_dismissed, | |
| CASE WHEN (s.proactive_advice_accepted + s.proactive_advice_dismissed) > 0 | |
| THEN (s.proactive_advice_accepted * 1.0 / (s.proactive_advice_accepted + s.proactive_advice_dismissed) * 100) | |
| ELSE 0 END as proactive_engage_rate | |
| FROM sessions s | |
| ORDER BY s.session_start | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df | |
| def generate_condition_comparison() -> pd.DataFrame: | |
| """Generate comparison statistics across experimental conditions.""" | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| s.condition_name, | |
| COUNT(*) as n_participants, | |
| SUM(CASE WHEN s.completed = 1 THEN 1 ELSE 0 END) as n_completed, | |
| AVG((s.current_portfolio - s.initial_portfolio) / s.initial_portfolio * 100) as avg_return_pct, | |
| AVG(CASE WHEN s.ai_advice_total > 0 | |
| THEN (s.ai_advice_followed * 1.0 / s.ai_advice_total * 100) | |
| ELSE 0 END) as avg_ai_follow_rate, | |
| AVG(s.total_chat_queries) as avg_chat_queries, | |
| AVG(CASE WHEN (s.proactive_advice_accepted + s.proactive_advice_dismissed) > 0 | |
| THEN (s.proactive_advice_accepted * 1.0 / (s.proactive_advice_accepted + s.proactive_advice_dismissed) * 100) | |
| ELSE 0 END) as avg_proactive_engage_rate | |
| FROM sessions s | |
| WHERE s.completed = 1 | |
| GROUP BY s.condition_name | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df | |
| def generate_ai_accuracy_analysis() -> pd.DataFrame: | |
| """Analyze how participants respond to correct vs incorrect AI advice.""" | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| d.ai_was_correct, | |
| COUNT(*) as n_decisions, | |
| SUM(d.followed_ai) as n_followed, | |
| (SUM(d.followed_ai) * 1.0 / COUNT(*) * 100) as follow_rate, | |
| AVG(d.decision_confidence) as avg_confidence, | |
| AVG(d.time_to_decision_ms) as avg_decision_time_ms | |
| FROM decisions d | |
| GROUP BY d.ai_was_correct | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| df['ai_was_correct'] = df['ai_was_correct'].map({0: 'Incorrect', 1: 'Correct'}) | |
| return df | |
| def generate_trust_evolution() -> pd.DataFrame: | |
| """Analyze how trust evolves over the course of the experiment.""" | |
| conn = get_connection() | |
| # Get decision sequence and follow rate | |
| query = """ | |
| SELECT | |
| d.participant_id, | |
| d.scenario_id, | |
| ROW_NUMBER() OVER (PARTITION BY d.participant_id ORDER BY d.timestamp) as decision_number, | |
| d.followed_ai, | |
| d.ai_was_correct, | |
| d.decision_confidence, | |
| d.outcome_percentage | |
| FROM decisions d | |
| ORDER BY d.participant_id, d.timestamp | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df | |
| def generate_chat_usage_analysis() -> pd.DataFrame: | |
| """Analyze chat usage patterns.""" | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| ci.participant_id, | |
| ci.scenario_id, | |
| ci.interaction_type, | |
| COUNT(*) as n_interactions, | |
| AVG(ci.response_time_ms) as avg_response_time_ms, | |
| SUM(CASE WHEN ci.user_engaged = 1 THEN 1 ELSE 0 END) as n_engaged, | |
| SUM(CASE WHEN ci.dismissed = 1 THEN 1 ELSE 0 END) as n_dismissed | |
| FROM chat_interactions ci | |
| GROUP BY ci.participant_id, ci.scenario_id, ci.interaction_type | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df | |
| def print_quick_stats(): | |
| """Print quick statistics to console.""" | |
| conn = get_connection() | |
| # Session stats | |
| sessions = pd.read_sql_query("SELECT * FROM sessions", conn) | |
| decisions = pd.read_sql_query("SELECT * FROM decisions", conn) | |
| interactions = pd.read_sql_query("SELECT * FROM chat_interactions", conn) | |
| conn.close() | |
| print("\n" + "="*60) | |
| print("EXPERIMENT STATISTICS") | |
| print("="*60) | |
| print(f"\n📊 SESSIONS") | |
| print(f" Total sessions: {len(sessions)}") | |
| print(f" Completed sessions: {sessions['completed'].sum()}") | |
| print(f" Completion rate: {sessions['completed'].mean()*100:.1f}%") | |
| if len(sessions) > 0: | |
| avg_portfolio = sessions['current_portfolio'].mean() | |
| avg_return = ((sessions['current_portfolio'] - sessions['initial_portfolio']) / sessions['initial_portfolio']).mean() * 100 | |
| print(f" Average final portfolio: {avg_portfolio:,.2f}") | |
| print(f" Average return: {avg_return:.1f}%") | |
| print(f"\n🤖 AI INTERACTIONS") | |
| print(f" Total decisions: {len(decisions)}") | |
| if len(decisions) > 0: | |
| follow_rate = decisions['followed_ai'].mean() * 100 | |
| avg_confidence = decisions['decision_confidence'].mean() | |
| avg_time = decisions['time_to_decision_ms'].mean() / 1000 | |
| print(f" AI follow rate: {follow_rate:.1f}%") | |
| print(f" Average confidence: {avg_confidence:.1f}") | |
| print(f" Average decision time: {avg_time:.1f}s") | |
| # Follow rate by AI accuracy | |
| correct_ai = decisions[decisions['ai_was_correct'] == 1] | |
| incorrect_ai = decisions[decisions['ai_was_correct'] == 0] | |
| if len(correct_ai) > 0: | |
| print(f" Follow rate when AI correct: {correct_ai['followed_ai'].mean()*100:.1f}%") | |
| if len(incorrect_ai) > 0: | |
| print(f" Follow rate when AI incorrect: {incorrect_ai['followed_ai'].mean()*100:.1f}%") | |
| print(f"\n💬 CHAT USAGE") | |
| print(f" Total interactions: {len(interactions)}") | |
| if len(interactions) > 0: | |
| reactive = interactions[interactions['interaction_type'] == 'reactive_query'] | |
| proactive = interactions[interactions['interaction_type'] == 'proactive'] | |
| print(f" Reactive queries: {len(reactive)}") | |
| print(f" Proactive messages: {len(proactive)}") | |
| print("\n" + "="*60) | |
| def export_full_report() -> str: | |
| """Generate a comprehensive analysis report.""" | |
| ensure_export_dir() | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| filename = f"{EXPORT_DIR}/full_report_{timestamp}.xlsx" | |
| with pd.ExcelWriter(filename, engine='openpyxl') as writer: | |
| # Summary | |
| summary = generate_summary_report() | |
| summary.to_excel(writer, sheet_name='Participant Summary', index=False) | |
| # Condition comparison | |
| conditions = generate_condition_comparison() | |
| conditions.to_excel(writer, sheet_name='Condition Comparison', index=False) | |
| # AI accuracy analysis | |
| accuracy = generate_ai_accuracy_analysis() | |
| accuracy.to_excel(writer, sheet_name='AI Accuracy Analysis', index=False) | |
| # Trust evolution | |
| trust = generate_trust_evolution() | |
| trust.to_excel(writer, sheet_name='Trust Evolution', index=False) | |
| # Chat usage | |
| chat = generate_chat_usage_analysis() | |
| chat.to_excel(writer, sheet_name='Chat Usage', index=False) | |
| # Raw data | |
| conn = get_connection() | |
| sessions = pd.read_sql_query("SELECT * FROM sessions", conn) | |
| sessions.to_excel(writer, sheet_name='Raw Sessions', index=False) | |
| decisions = pd.read_sql_query("SELECT * FROM decisions", conn) | |
| decisions.to_excel(writer, sheet_name='Raw Decisions', index=False) | |
| interactions = pd.read_sql_query("SELECT * FROM chat_interactions", conn) | |
| interactions.to_excel(writer, sheet_name='Raw Interactions', index=False) | |
| trust_metrics = pd.read_sql_query("SELECT * FROM trust_metrics", conn) | |
| trust_metrics.to_excel(writer, sheet_name='Raw Trust Metrics', index=False) | |
| conn.close() | |
| print(f"Full report exported to {filename}") | |
| return filename | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) > 1: | |
| command = sys.argv[1] | |
| if command == "stats": | |
| print_quick_stats() | |
| elif command == "export": | |
| export_all() | |
| elif command == "report": | |
| export_full_report() | |
| else: | |
| print(f"Unknown command: {command}") | |
| print("Usage: python export_data.py [stats|export|report]") | |
| else: | |
| print("AI Trading Experiment - Data Export Utility") | |
| print("=" * 50) | |
| print("\nCommands:") | |
| print(" python export_data.py stats - Show quick statistics") | |
| print(" python export_data.py export - Export all data to CSV") | |
| print(" python export_data.py report - Generate full Excel report") | |
| print("\nRunning quick stats by default...\n") | |
| print_quick_stats() | |