| from dotenv import load_dotenv |
|
|
| from evoagentx.agents.agent_manager import AgentManager |
| from evoagentx.benchmark import MATH |
| from evoagentx.core.callbacks import suppress_logger_info |
| from evoagentx.core.logging import logger |
| from evoagentx.evaluators import Evaluator |
| from evoagentx.models import OpenAILLM, OpenAILLMConfig |
| from evoagentx.optimizers import TextGradOptimizer |
| from evoagentx.prompts import StringTemplate |
| from evoagentx.workflow import SequentialWorkFlowGraph |
|
|
| load_dotenv() |
|
|
| class MathSplits(MATH): |
|
|
| def _load_data(self): |
| |
| super()._load_data() |
| |
| import numpy as np |
| np.random.seed(42) |
| permutation = np.random.permutation(len(self._test_data)) |
| full_test_data = self._test_data |
| |
| self._train_data = [full_test_data[idx] for idx in permutation[:10]] |
| self._dev_data = [full_test_data[idx] for idx in permutation[10:50]] |
| self._test_data = [full_test_data[idx] for idx in permutation[50:150]] |
|
|
|
|
| def collate_func(example: dict) -> dict: |
| return {"problem": example["problem"]} |
|
|
|
|
| math_graph_data = { |
| "goal": r"Answer the math question. The answer should be in box format, e.g., \boxed{123}", |
| "tasks": [ |
| { |
| "name": "answer_generate", |
| "description": "Answer generation for Math.", |
| "inputs": [ |
| {"name": "problem", "type": "str", "required": True, "description": "The problem to solve."} |
| ], |
| "outputs": [ |
| {"name": "answer", "type": "str", "required": True, "description": "The generated answer."} |
| ], |
| "prompt_template": StringTemplate(instruction="Answer the math question. The answer should be in box format, e.g., \\boxed{{123}}\n"), |
| "parse_mode": "str" |
| } |
| ] |
| } |
|
|
|
|
| def main(): |
|
|
| executor_config = OpenAILLMConfig(model="gpt-4o-mini") |
| executor_llm = OpenAILLM(config=executor_config) |
|
|
| optimizer_config = OpenAILLMConfig(model="gpt-4o") |
| optimizer_llm = OpenAILLM(config=optimizer_config) |
|
|
| benchmark = MathSplits() |
| workflow_graph = SequentialWorkFlowGraph.from_dict(math_graph_data) |
| agent_manager = AgentManager() |
| agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config) |
|
|
| evaluator = Evaluator( |
| llm=executor_llm, |
| agent_manager=agent_manager, |
| collate_func=collate_func, |
| num_workers=20, |
| verbose=True |
| ) |
|
|
| textgrad_optimizer = TextGradOptimizer( |
| graph=workflow_graph, |
| optimize_mode="all", |
| executor_llm=executor_llm, |
| optimizer_llm=optimizer_llm, |
| batch_size=3, |
| max_steps=20, |
| evaluator=evaluator, |
| eval_every_n_steps=1, |
| eval_rounds=1, |
| save_interval=None, |
| save_path="./", |
| rollback=True, |
| constraints=[] |
| ) |
|
|
| logger.info("Evaluating workflow on test set...") |
| with suppress_logger_info(): |
| results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| logger.info(f"Evaluation metrics (before optimization): {results}") |
|
|
| logger.info("Optimizing workflow...") |
| textgrad_optimizer.optimize(benchmark, seed=8) |
| textgrad_optimizer.restore_best_graph() |
|
|
| logger.info("Evaluating workflow on test set...") |
| with suppress_logger_info(): |
| results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test") |
| logger.info(f"Evaluation metrics (after optimization): {results}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|