Priyansh Saxena commited on
Commit
be50021
·
1 Parent(s): 1435892

fix: roundoff issue

Browse files

Signed-off-by: Priyansh Saxena <priyena.programming@gmail.com>

Files changed (2) hide show
  1. inference.py +3 -2
  2. src/pytorch_debug_env/reward.py +1 -1
inference.py CHANGED
@@ -16,6 +16,7 @@ MAX_STEPS = int(os.environ.get("MAX_STEPS", "5"))
16
  SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
  SEED = os.environ.get("SEED")
 
19
 
20
 
21
  def _parse_seed(value: str | None) -> int | None:
@@ -114,7 +115,7 @@ async def _run_task(task: str, client: OpenAI) -> None:
114
  try:
115
  action_text = get_model_message(client, observation, history)
116
  except Exception as exc:
117
- reward = 0.0
118
  done = True
119
  error = f"model_error: {exc}"
120
  rewards.append(reward)
@@ -136,7 +137,7 @@ async def _run_task(task: str, client: OpenAI) -> None:
136
  error = result.get("error")
137
  observation = result.get("observation", observation)
138
  except Exception as exc:
139
- reward = 0.0
140
  done = True
141
  error = f"step_error: {exc}"
142
 
 
16
  SUCCESS_SCORE_THRESHOLD = float(os.environ.get("SUCCESS_SCORE_THRESHOLD", "0.7"))
17
  MAX_TOTAL_REWARD = float(os.environ.get("MAX_TOTAL_REWARD", "1.0"))
18
  SEED = os.environ.get("SEED")
19
+ MIN_LOG_REWARD = 0.01
20
 
21
 
22
  def _parse_seed(value: str | None) -> int | None:
 
115
  try:
116
  action_text = get_model_message(client, observation, history)
117
  except Exception as exc:
118
+ reward = MIN_LOG_REWARD
119
  done = True
120
  error = f"model_error: {exc}"
121
  rewards.append(reward)
 
137
  error = result.get("error")
138
  observation = result.get("observation", observation)
139
  except Exception as exc:
140
+ reward = MIN_LOG_REWARD
141
  done = True
142
  error = f"step_error: {exc}"
143
 
src/pytorch_debug_env/reward.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
 
4
  from .bug_library import BUG_CATEGORIES
5
 
6
- EPSILON = 1e-3
7
 
8
 
9
  def clamp_score(value: float) -> float:
 
3
 
4
  from .bug_library import BUG_CATEGORIES
5
 
6
+ EPSILON = 1e-2
7
 
8
 
9
  def clamp_score(value: float) -> float: