| | |
| | |
| |
|
| | from collections import defaultdict |
| |
|
| | from factool.code.helper.io_utils import Tools |
| |
|
| | STOP_TOKEN = ['\nclass', '\ndef', '\n#', '\nif', '\nprint'] |
| |
|
| | class PostProcessor: |
| | @staticmethod |
| | def map_task_id_for_solution(predict_path, source_path): |
| | database = dict() |
| | raw_problems = Tools.load_tasks(source_path) |
| | for task_id in raw_problems.keys(): |
| | database[raw_problems[task_id]['prompt']] = raw_problems[task_id] |
| |
|
| | result = [] |
| | predictions = Tools.load_jsonl(predict_path) |
| | |
| | for pre in predictions: |
| | task = database[pre['prompt']] |
| | |
| | for sample in pre['samples']: |
| | processed_code = PostProcessor.solution_extract(sample) |
| | result.append({ |
| | 'task_id': task['task_id'], |
| | 'prompt': pre['prompt'], |
| | 'test': task['test'], |
| | 'entry_point': task['entry_point'], |
| | 'completion': processed_code |
| | }) |
| | return result, len(raw_problems) |
| |
|
| | @staticmethod |
| | def solution_extract(content): |
| | for identifier in STOP_TOKEN: |
| | if identifier in content: |
| | content = content.split(identifier)[0] |
| | return content |