from server.environment import PreferenceLabEnvironment from models import PairwiseAction, LikertAction, ConsistencyAction env = PreferenceLabEnvironment() # Task 1 – Pairwise obs = env.reset(seed=42, task_type='pairwise') print('TASK 1 prompt:', obs.prompt[:60]) obs = env.step(PairwiseAction(choice='A')) print('Reward:', obs.reward, '| Done:', obs.done) # Task 2 – Likert obs = env.reset(seed=42, task_type='likert') print('\nTASK 2 response:', obs.response[:60]) obs = env.step(LikertAction(helpfulness=5, honesty=5, harmlessness=5, instruction_following=5)) print('Reward:', obs.reward, '| Done:', obs.done) # Task 3 – Consistency obs = env.reset(seed=42, task_type='consistency') print('\nTASK 3 prompt:', obs.prompt[:60]) obs = env.step(ConsistencyAction(ranking=['C', 'A', 'B', 'D'])) print('Reward:', obs.reward, '| Done:', obs.done) # State (now a property, not a method call) state = env.state print('\nState:', state.model_dump())