Update amazonmix.py
Browse files- amazonmix.py +16 -13
amazonmix.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
import re
|
| 2 |
from typing import Dict, Tuple, Optional
|
| 3 |
|
| 4 |
-
def extract_solution(processed_str
|
| 5 |
# Split response to isolate assistant output
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
# Extract final answer using XML-style tags
|
| 15 |
answer_pattern = r'<answer>(.*?)</answer>'
|
| 16 |
-
matches = list(re.finditer(answer_pattern, processed_str
|
| 17 |
|
| 18 |
if not matches:
|
| 19 |
print("[Error] No valid answer tags found")
|
|
@@ -42,6 +42,7 @@ def validate_response_structure(processed_str: str) -> bool:
|
|
| 42 |
|
| 43 |
# Check required tags
|
| 44 |
tags = {
|
|
|
|
| 45 |
'think_end': ('</think>', 1),
|
| 46 |
'answer_start': ('<answer>', 1),
|
| 47 |
'answer_end': ('</answer>', 1)
|
|
@@ -59,13 +60,13 @@ def validate_response_structure(processed_str: str) -> bool:
|
|
| 59 |
validation_passed = False
|
| 60 |
|
| 61 |
# Verify tag order
|
| 62 |
-
if (positions['
|
| 63 |
-
positions['think_end'] > positions['
|
| 64 |
positions['answer_start'] > positions['answer_end']):
|
| 65 |
print(" [Error] Incorrect tag order: Expected <think>...</think><answer>...</answer>")
|
| 66 |
validation_passed = False
|
| 67 |
else:
|
| 68 |
-
print("
|
| 69 |
|
| 70 |
return validation_passed
|
| 71 |
|
|
@@ -76,6 +77,8 @@ def parse_ground_truth_text_format(ground_truth):
|
|
| 76 |
format1_match = re.search(r"([a-zA-Z]):", ground_truth)
|
| 77 |
if format1_match:
|
| 78 |
return format1_match.group(1).strip()
|
|
|
|
|
|
|
| 79 |
else:
|
| 80 |
return ground_truth
|
| 81 |
|
|
@@ -98,7 +101,7 @@ def compute_score(solution_str: str, ground_truth: str, method='strict', format_
|
|
| 98 |
ground_truth = parse_ground_truth_text_format(ground_truth)
|
| 99 |
|
| 100 |
# Extract model answer
|
| 101 |
-
answer_text, processed_str=
|
| 102 |
print(f"\n[Model Response]\n{processed_str}")
|
| 103 |
print(f"\n[Processed Model Response]\n{answer_text}")
|
| 104 |
|
|
|
|
| 1 |
import re
|
| 2 |
from typing import Dict, Tuple, Optional
|
| 3 |
|
| 4 |
+
def extract_solution(processed_str):
|
| 5 |
# Split response to isolate assistant output
|
| 6 |
+
if "Assistant:" in solution_str:
|
| 7 |
+
processed_str = solution_str.split("Assistant:", 1)[1]
|
| 8 |
+
elif "<|im_start|>assistant" in solution_str:
|
| 9 |
+
processed_str = solution_str.split("<|im_start|>assistant", 1)[1]
|
| 10 |
+
else:
|
| 11 |
+
print("[Error] Failed to locate model response header")
|
| 12 |
+
return None, solution_str
|
| 13 |
|
| 14 |
# Extract final answer using XML-style tags
|
| 15 |
answer_pattern = r'<answer>(.*?)</answer>'
|
| 16 |
+
matches = list(re.finditer(answer_pattern, processed_str, re.DOTALL))
|
| 17 |
|
| 18 |
if not matches:
|
| 19 |
print("[Error] No valid answer tags found")
|
|
|
|
| 42 |
|
| 43 |
# Check required tags
|
| 44 |
tags = {
|
| 45 |
+
'think_start': ('<think>', 1),
|
| 46 |
'think_end': ('</think>', 1),
|
| 47 |
'answer_start': ('<answer>', 1),
|
| 48 |
'answer_end': ('</answer>', 1)
|
|
|
|
| 60 |
validation_passed = False
|
| 61 |
|
| 62 |
# Verify tag order
|
| 63 |
+
if (positions['think_start'] > positions['think_end'] or
|
| 64 |
+
positions['think_end'] > positions['answer_start'] or
|
| 65 |
positions['answer_start'] > positions['answer_end']):
|
| 66 |
print(" [Error] Incorrect tag order: Expected <think>...</think><answer>...</answer>")
|
| 67 |
validation_passed = False
|
| 68 |
else:
|
| 69 |
+
print(" Tag sequence validation passed")
|
| 70 |
|
| 71 |
return validation_passed
|
| 72 |
|
|
|
|
| 77 |
format1_match = re.search(r"([a-zA-Z]):", ground_truth)
|
| 78 |
if format1_match:
|
| 79 |
return format1_match.group(1).strip()
|
| 80 |
+
else:
|
| 81 |
+
return ground_truth
|
| 82 |
else:
|
| 83 |
return ground_truth
|
| 84 |
|
|
|
|
| 101 |
ground_truth = parse_ground_truth_text_format(ground_truth)
|
| 102 |
|
| 103 |
# Extract model answer
|
| 104 |
+
answer_text, processed_str=extract_solution(processed_str=solution_str)
|
| 105 |
print(f"\n[Model Response]\n{processed_str}")
|
| 106 |
print(f"\n[Processed Model Response]\n{answer_text}")
|
| 107 |
|