| """Testing tab UI components.""" |
|
|
| import os |
| import sys |
|
|
| import gradio as gr |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| import json |
|
|
| from utils.constants import MODELS, TEST_EXAMPLES |
| from utils.model_interface import extract_model_id, get_model_info |
| from utils.helpers import check_token_availability |
|
|
|
|
| def parse_json_response(response: str) -> dict: |
| """Parse JSON response, handling code blocks.""" |
| response = response.strip() |
| try: |
| if "```json" in response: |
| response = response.split("```json")[1].split("```")[0] |
| elif "```" in response: |
| response = response.split("```")[1].split("```")[0] |
| return json.loads(response) |
| except json.JSONDecodeError: |
| return {"label": -1, "categories": []} |
|
|
|
|
| def format_model_info(model_choice, reasoning_effort) -> str: |
| """Format model information markdown.""" |
| if not model_choice: |
| return "*Select a model in Configuration tab*" |
| |
| model_id = extract_model_id(model_choice) |
| if not model_id: |
| return "*Select a model in Configuration tab*" |
| |
| model_info = get_model_info(model_id) |
| |
| if not model_info: |
| return f"*Model: {model_id}*" |
| |
| model_name = model_info.get("name", model_id) |
| is_thinking = model_info.get("is_thinking", False) |
| supports_reasoning_level = model_info.get("supports_reasoning_level", False) |
| |
| |
| reasoning_effort_val = reasoning_effort if reasoning_effort else "Low" |
| |
| info_lines = [ |
| f"**Model:** {model_name}", |
| f"- **Thinking Model:** {'Yes' if is_thinking else 'No'}", |
| f"- **Supports Reasoning Level:** {'Yes' if supports_reasoning_level else 'No'}", |
| ] |
| |
| if supports_reasoning_level: |
| info_lines.append(f"- **Reasoning Effort:** {reasoning_effort_val}") |
| |
| return "\n".join(info_lines) |
|
|
| def format_reasoning_info(model_choice, reasoning_text) -> tuple[str, bool]: |
| """Format reasoning info markdown and visibility.""" |
| if not model_choice: |
| return "", False |
| |
| model_id = extract_model_id(model_choice) |
| model_info = get_model_info(model_id) |
| |
| if not model_info: |
| return "", False |
| |
| is_thinking = model_info.get("is_thinking", False) |
| |
| |
| if not is_thinking: |
| return "*This model does not provide reasoning traces.*", True |
| |
| |
| if not reasoning_text or not reasoning_text.strip(): |
| return "", False |
| |
| return "", False |
|
|
|
|
| def format_save_mode_help(has_personal: bool, has_org: bool) -> str: |
| """ |
| Format help text explaining save mode options. |
| |
| Args: |
| has_personal: Whether personal token is available |
| has_org: Whether org token is available |
| |
| Returns: |
| Help text string |
| """ |
| lines = [] |
| |
| if not has_personal and not has_org: |
| lines.append("*⚠️ No tokens available. Please log in or set tokens to save results.*") |
| else: |
| if has_org: |
| lines.append("*✅ ROOST Dataset: Available (org token set)*") |
| else: |
| lines.append("*❌ ROOST Dataset: Requires org token (HACKATHON_INFERENCE_TOKEN)*") |
| |
| if has_personal: |
| lines.append("*✅ Private Dataset: Available (personal token set)*") |
| else: |
| lines.append("*❌ Private Dataset: Requires personal token (OAuth login or .env)*") |
| |
| return "\n".join(lines) |
|
|
|
|
| def format_test_result(result: dict) -> tuple[str, dict, str, str, str]: |
| """ |
| Format test result for display. |
| |
| Returns: |
| Tuple of (label_text, parsed_json, categories_text, reasoning_text, raw_response) |
| """ |
| raw_content = result.get("content", "") |
| parsed = parse_json_response(raw_content) |
| label = parsed.get("label", -1) |
| categories = parsed.get("categories", []) |
|
|
| label_text = ( |
| "## ❌ Policy Violation Detected" if label == 1 |
| else "## ✅ No Policy Violation" if label == 0 |
| else "## ⚠️ Unable to determine label" |
| ) |
|
|
| if categories and len(categories) > 0: |
| cat_text = "### Categories:\n\n" |
| for cat in categories: |
| category_name = cat.get('category', 'Unknown') |
| reasoning_text = cat.get('reasoning', 'No reasoning provided') |
| policy_source = cat.get('policy_source', '') |
| |
| cat_text += f"- **Category:** {category_name}\n" |
| cat_text += f" - **Explanation:** {reasoning_text}\n" |
| if policy_source: |
| cat_text += f" - **Policy Source:** {policy_source}\n" |
| cat_text += "\n\n" |
| else: |
| cat_text = "*No categories found in response*\n\n" |
| cat_text += "This output expects a valid JSON response, as specified for example in the default prompt.\n\n" |
| cat_text += "The raw response can be seen in the Model Response section below." |
|
|
| reasoning = result.get("reasoning", "") |
| |
| |
| raw_response_text = f"```\n{raw_content}\n```" |
| |
| return label_text, parsed, cat_text, reasoning or "", raw_response_text |
|
|
|
|
| def build_testing_tab() -> dict: |
| """Build the testing tab UI and set up simple handlers.""" |
| with gr.Tab("🧪 Testing"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### Input") |
| with gr.Group(): |
| test_input = gr.Textbox(label="Test Content", placeholder="Enter content to test...", lines=5) |
| example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()), value=None) |
| load_example_btn = gr.Button("Load Example", variant="secondary") |
| run_test_btn = gr.Button("Run Test", variant="primary") |
| save_mode = gr.Radio( |
| label="Save to Dataset", |
| choices=["Don't Save", "Save to ROOST Dataset", "Save to Private Dataset"], |
| value="Don't Save" |
| ) |
| |
| has_personal, has_org = check_token_availability(None) |
| save_mode_help = gr.Markdown( |
| value=format_save_mode_help(has_personal, has_org), |
| visible=True |
| ) |
| |
| initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})" |
| initial_info_lines = [ |
| f"**Model:** {MODELS[0]['name']}", |
| f"- **Thinking Model:** {'Yes' if MODELS[0]['is_thinking'] else 'No'}", |
| f"- **Supports Reasoning Level:** {'Yes' if MODELS[0]['supports_reasoning_level'] else 'No'}", |
| ] |
| if MODELS[0]['supports_reasoning_level']: |
| initial_info_lines.append("- **Reasoning Effort:** Low") |
| model_info_display = gr.Markdown(value="\n".join(initial_info_lines)) |
|
|
| with gr.Column(scale=2): |
| gr.Markdown("### Results") |
| label_display = gr.Markdown(value="*Run a test to see results*") |
| with gr.Accordion("Categories & Reasoning", open=True): |
| categories_display = gr.Markdown(value="*No categories yet*") |
| with gr.Accordion("Model Response", open=False): |
| model_response_display = gr.Markdown(value="*No response yet*") |
| with gr.Accordion("Reasoning Trace", open=False): |
| reasoning_info = gr.Markdown(value="", visible=False) |
| reasoning_display = gr.Code(label="", language=None, value="", visible=False) |
|
|
| |
| load_example_btn.click( |
| lambda name: TEST_EXAMPLES.get(name, ""), |
| inputs=example_dropdown, |
| outputs=test_input, |
| ) |
|
|
| return { |
| "test_input": test_input, |
| "example_dropdown": example_dropdown, |
| "load_example_btn": load_example_btn, |
| "run_test_btn": run_test_btn, |
| "save_mode": save_mode, |
| "save_mode_help": save_mode_help, |
| "model_info_display": model_info_display, |
| "label_display": label_display, |
| "categories_display": categories_display, |
| "model_response_display": model_response_display, |
| "reasoning_info": reasoning_info, |
| "reasoning_display": reasoning_display, |
| } |
|
|