"""
AIFinder Configuration
Easy configuration for providers and datasets.
"""

import os

# --- Paths ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, "models")

# ============================================================================
# EASY PROVIDER CONFIGURATION
# Add new providers here! Each entry: (huggingface_dataset, provider_name, model_name, kwargs)
# ============================================================================
PROVIDER_DATASETS = [
    # Anthropic
    ("TeichAI/claude-4.5-opus-high-reasoning-250x", "Anthropic", "Claude 4.5 Opus", {}),
    (
        "TeichAI/claude-sonnet-4.5-high-reasoning-250x",
        "Anthropic",
        "Claude Sonnet 4.5",
        {},
    ),
    (
        "Roman1111111/claude-opus-4.6-10000x",
        "Anthropic",
        "Claude Opus 4.6",
        {"max_samples": 1500},
    ),
    # OpenAI
    ("TeichAI/gpt-5.2-high-reasoning-250x", "OpenAI", "GPT-5.2", {}),
    ("TeichAI/gpt-5.1-high-reasoning-1000x", "OpenAI", "GPT-5.1", {}),
    ("TeichAI/gpt-5.1-codex-max-1000x", "OpenAI", "GPT-5.1 Codex Max", {}),
    ("TeichAI/gpt-5-codex-250x", "OpenAI", "GPT-5 Codex", {}),
    ("TeichAI/gpt-5-codex-1000x", "OpenAI", "GPT-5 Codex", {}),
    # Google
    ("TeichAI/gemini-3-pro-preview-high-reasoning-1000x", "Google", "Gemini 3 Pro", {}),
    ("TeichAI/gemini-3-pro-preview-high-reasoning-250x", "Google", "Gemini 3 Pro", {}),
    (
        "TeichAI/gemini-2.5-flash-11000x",
        "Google",
        "Gemini 2.5 Flash",
        {"max_samples": 1500},
    ),
    ("TeichAI/Gemini-3-Flash-Preview-VIBE", "Google", "Gemini 3 Flash", {}),
    ("TeichAI/gemini-3-flash-preview-1000x", "Google", "Gemini 3 Flash", {}),
    ("TeichAI/gemini-3-flash-preview-complex-1000x", "Google", "Gemini 3 Flash", {}),
    # xAI
    ("TeichAI/brainstorm-v3.1-grok-4-fast-200x", "xAI", "Grok 4 Fast", {}),
    (
        "TeichAI/sherlock-thinking-alpha-11000x",
        "xAI",
        "Grok 4.1 Fast",
        {"max_samples": 1500},
    ),
    ("TeichAI/sherlock-dash-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
    ("TeichAI/sherlock-think-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
    ("TeichAI/grok-code-fast-1-1000x", "xAI", "Grok Code Fast 1", {}),
    # MoonshotAI
    ("TeichAI/kimi-k2-thinking-250x", "MoonshotAI", "Kimi K2", {}),
    ("TeichAI/kimi-k2-thinking-1000x", "MoonshotAI", "Kimi K2", {}),
    # Mistral
    ("TeichAI/mistral-small-creative-500x", "Mistral", "Mistral Small", {}),
    # MiniMax
    ("TeichAI/MiniMax-M2.1-Code-SFT", "MiniMax", "MiniMax M2.1", {"max_samples": 1500}),
    ("TeichAI/convo-v1", "MiniMax", "MiniMax M2.1", {}),
    # StepFun
    (
        "TeichAI/Step-3.5-Flash-2600x",
        "StepFun",
        "Step 3.5 Flash",
        {"max_samples": 1500},
    ),
    # Zhipu
    ("TeichAI/Pony-Alpha-15k", "Zhipu", "GLM-5", {"max_samples": 1500}),
    # DeepSeek
    ("TeichAI/deepseek-v3.2-speciale-1000x", "DeepSeek", "DeepSeek V3.2 Speciale", {}),
    (
        "TeichAI/deepseek-v3.2-speciale-openr1-math-3k",
        "DeepSeek",
        "DeepSeek V3.2 Speciale",
        {"max_samples": 1500},
    ),
    # DeepSeek (a-m-team) - different format
    (
        "a-m-team/AM-DeepSeek-R1-Distilled-1.4M",
        "DeepSeek",
        "DeepSeek R1",
        {"name": "am_0.9M", "max_samples": 1000},
    ),
]

# Auto-generate DATASET_REGISTRY and PROVIDERS from PROVIDER_DATASETS
DEEPSEEK_AM_DATASETS = [
    (ds_id, prov, model, kwargs)
    for ds_id, prov, model, kwargs in PROVIDER_DATASETS
    if "a-m-team" in ds_id
]
DATASET_REGISTRY = [
    (ds_id, prov, model, kwargs)
    for ds_id, prov, model, kwargs in PROVIDER_DATASETS
    if "a-m-team" not in ds_id
]
PROVIDERS = sorted(set(prov for _, prov, _, _ in PROVIDER_DATASETS))

# ============================================================================
# FEATURE PARAMETERS
# ============================================================================
TFIDF_WORD_PARAMS = {
    "analyzer": "word",
    "ngram_range": (1, 2),
    "max_features": 20,
    "sublinear_tf": True,
    "min_df": 3,
    "max_df": 0.7,
}

TFIDF_CHAR_PARAMS = {
    "analyzer": "char_wb",
    "ngram_range": (2, 4),
    "max_features": 20,
    "sublinear_tf": True,
    "min_df": 3,
    "max_df": 0.7,
    "smooth_idf": True,
}

# ============================================================================
# TRAINING PARAMETERS
# ============================================================================
MAX_SAMPLES_PER_PROVIDER = 1000
TEST_SIZE = 0.15
VAL_SIZE = 0.10
RANDOM_STATE = 42

# Neural Network (unused currently, but kept for reference)
HIDDEN_DIM = 256
EMBED_DIM = 128
DROPOUT = 0.7
BATCH_SIZE = 128
EPOCHS = 80
EARLY_STOP_PATIENCE = 25
LEARNING_RATE = 3e-5
WEIGHT_DECAY = 8e-2
LABEL_SMOOTHING = 0.3