| | """ |
| | Question generation utilities for MCQ and open-text formats. |
| | """ |
| |
|
| | import random |
| | from typing import Dict, List, Optional, Tuple |
| |
|
| | from .logger import setup_logger |
| |
|
| | logger = setup_logger(__name__) |
| |
|
| |
|
| | class QuestionGenerator: |
| | """Generates questions in MCQ and open-text formats.""" |
| | |
| | def __init__( |
| | self, |
| | num_options: int = 4, |
| | option_labels: Optional[List[str]] = None, |
| | distractor_strategy: str = "balanced" |
| | ): |
| | """ |
| | Initialize question generator. |
| | |
| | Args: |
| | num_options: Number of MCQ options |
| | option_labels: Labels for options (e.g., ['A', 'B', 'C', 'D']) |
| | distractor_strategy: Strategy for generating distractor options |
| | - "present_only": only use sounds present in audio |
| | - "mixed": mix of present and absent sounds |
| | - "balanced": balanced distribution |
| | """ |
| | self.num_options = num_options |
| | self.option_labels = option_labels or ["A", "B", "C", "D"] |
| | self.distractor_strategy = distractor_strategy |
| | |
| | if len(self.option_labels) != num_options: |
| | raise ValueError(f"Number of option labels must match num_options ({num_options})") |
| | |
| | def generate_count_mcq( |
| | self, |
| | question_template: str, |
| | correct_count: int, |
| | all_categories: List[str] |
| | ) -> Dict: |
| | """ |
| | Generate an MCQ for counting task. |
| | |
| | Args: |
| | question_template: Question text template |
| | correct_count: Correct number of unique sounds |
| | all_categories: List of all available categories |
| | |
| | Returns: |
| | Dictionary with question, options, and correct answer |
| | """ |
| | |
| | options = self._generate_count_options(correct_count) |
| | |
| | |
| | random.shuffle(options) |
| | |
| | |
| | correct_label = self.option_labels[options.index(correct_count)] |
| | |
| | |
| | option_map = {label: value for label, value in zip(self.option_labels, options)} |
| | |
| | return { |
| | "question": question_template, |
| | "options": option_map, |
| | "correct_answer": correct_label, |
| | "correct_value": correct_count |
| | } |
| | |
| | def generate_count_open_text( |
| | self, |
| | question_template: str, |
| | correct_count: int |
| | ) -> Dict: |
| | """ |
| | Generate an open-text question for counting task. |
| | |
| | Args: |
| | question_template: Question text template |
| | correct_count: Correct number of unique sounds |
| | |
| | Returns: |
| | Dictionary with question and correct answer |
| | """ |
| | return { |
| | "question": question_template, |
| | "correct_answer": str(correct_count) |
| | } |
| | |
| | def generate_category_mcq( |
| | self, |
| | question_template: str, |
| | correct_category: str, |
| | present_categories: List[str], |
| | all_categories: List[str] |
| | ) -> Dict: |
| | """ |
| | Generate an MCQ where answer is a sound category. |
| | |
| | Args: |
| | question_template: Question text template |
| | correct_category: Correct category |
| | present_categories: Categories present in the audio |
| | all_categories: All available categories |
| | |
| | Returns: |
| | Dictionary with question, options, and correct answer |
| | """ |
| | |
| | distractors = self._generate_category_distractors( |
| | correct_category, |
| | present_categories, |
| | all_categories, |
| | self.num_options - 1 |
| | ) |
| | |
| | |
| | options = [correct_category] + distractors |
| | random.shuffle(options) |
| | |
| | |
| | correct_label = self.option_labels[options.index(correct_category)] |
| | |
| | |
| | option_map = {label: value for label, value in zip(self.option_labels, options)} |
| | |
| | return { |
| | "question": question_template, |
| | "options": option_map, |
| | "correct_answer": correct_label, |
| | "correct_value": correct_category |
| | } |
| | |
| | def generate_category_open_text( |
| | self, |
| | question_template: str, |
| | correct_category: str |
| | ) -> Dict: |
| | """ |
| | Generate an open-text question where answer is a sound category. |
| | |
| | Args: |
| | question_template: Question text template |
| | correct_category: Correct category |
| | |
| | Returns: |
| | Dictionary with question and correct answer |
| | """ |
| | return { |
| | "question": question_template, |
| | "correct_answer": correct_category |
| | } |
| | |
| | def generate_sequence_open_text( |
| | self, |
| | question_template: str, |
| | sequence: List[str] |
| | ) -> Dict: |
| | """ |
| | Generate an open-text question for sequence/ordering. |
| | |
| | Args: |
| | question_template: Question text template |
| | sequence: List of categories in order |
| | |
| | Returns: |
| | Dictionary with question and correct answer |
| | """ |
| | return { |
| | "question": question_template, |
| | "correct_answer": ", ".join(sequence) |
| | } |
| | |
| | def _generate_count_options(self, correct_count: int) -> List[int]: |
| | """ |
| | Generate count options including the correct count. |
| | |
| | Args: |
| | correct_count: Correct count value |
| | |
| | Returns: |
| | List of count options |
| | """ |
| | options = [correct_count] |
| | |
| | |
| | possible_values = list(range(1, max(correct_count + 3, 12))) |
| | possible_values = [v for v in possible_values if v != correct_count] |
| | |
| | distractors = random.sample(possible_values, min(self.num_options - 1, len(possible_values))) |
| | options.extend(distractors) |
| | |
| | return options[:self.num_options] |
| | |
| | def _generate_category_distractors( |
| | self, |
| | correct_category: str, |
| | present_categories: List[str], |
| | all_categories: List[str], |
| | num_distractors: int |
| | ) -> List[str]: |
| | """ |
| | Generate distractor categories based on strategy. |
| | |
| | Args: |
| | correct_category: Correct category |
| | present_categories: Categories present in audio |
| | all_categories: All available categories |
| | num_distractors: Number of distractors to generate |
| | |
| | Returns: |
| | List of distractor categories |
| | """ |
| | present_non_answer = [c for c in present_categories if c != correct_category] |
| | absent_categories = [c for c in all_categories if c not in present_categories] |
| | |
| | distractors = [] |
| | |
| | if self.distractor_strategy == "present_only": |
| | |
| | if len(present_non_answer) >= num_distractors: |
| | distractors = random.sample(present_non_answer, num_distractors) |
| | else: |
| | distractors = present_non_answer.copy() |
| | |
| | remaining = num_distractors - len(distractors) |
| | distractors.extend(random.sample(absent_categories, min(remaining, len(absent_categories)))) |
| | |
| | elif self.distractor_strategy == "mixed": |
| | |
| | num_present = random.randint(0, min(len(present_non_answer), num_distractors)) |
| | num_absent = num_distractors - num_present |
| | |
| | if num_present > 0: |
| | distractors.extend(random.sample(present_non_answer, min(num_present, len(present_non_answer)))) |
| | if num_absent > 0: |
| | distractors.extend(random.sample(absent_categories, min(num_absent, len(absent_categories)))) |
| | |
| | else: |
| | |
| | num_present_distractor = random.choice([0, 1, 2]) |
| | num_present_distractor = min(num_present_distractor, len(present_non_answer), num_distractors) |
| | num_absent_distractor = num_distractors - num_present_distractor |
| | |
| | if num_present_distractor > 0: |
| | distractors.extend(random.sample(present_non_answer, num_present_distractor)) |
| | if num_absent_distractor > 0: |
| | distractors.extend(random.sample(absent_categories, min(num_absent_distractor, len(absent_categories)))) |
| | |
| | |
| | while len(distractors) < num_distractors: |
| | remaining_options = [c for c in all_categories if c not in distractors and c != correct_category] |
| | if not remaining_options: |
| | break |
| | distractors.append(random.choice(remaining_options)) |
| | |
| | return distractors[:num_distractors] |
| |
|