Spaces:
Running
Running
| from server.environment import PreferenceLabEnvironment | |
| from models import PairwiseAction, LikertAction, ConsistencyAction | |
| env = PreferenceLabEnvironment() | |
| # Task 1 – Pairwise | |
| obs = env.reset(seed=42, task_type='pairwise') | |
| print('TASK 1 prompt:', obs.prompt[:60]) | |
| obs = env.step(PairwiseAction(choice='A')) | |
| print('Reward:', obs.reward, '| Done:', obs.done) | |
| # Task 2 – Likert | |
| obs = env.reset(seed=42, task_type='likert') | |
| print('\nTASK 2 response:', obs.response[:60]) | |
| obs = env.step(LikertAction(helpfulness=5, honesty=5, harmlessness=5, instruction_following=5)) | |
| print('Reward:', obs.reward, '| Done:', obs.done) | |
| # Task 3 – Consistency | |
| obs = env.reset(seed=42, task_type='consistency') | |
| print('\nTASK 3 prompt:', obs.prompt[:60]) | |
| obs = env.step(ConsistencyAction(ranking=['C', 'A', 'B', 'D'])) | |
| print('Reward:', obs.reward, '| Done:', obs.done) | |
| # State (now a property, not a method call) | |
| state = env.state | |
| print('\nState:', state.model_dump()) |