| | """ |
| | Multi-Agent EvoPrompt Workflow Example |
| | |
| | This script demonstrates multi-prompt evolution using ensemble voting strategies. |
| | It optimizes multiple prompts simultaneously to improve task performance through |
| | collaborative evolutionary optimization. |
| | """ |
| |
|
| | import asyncio |
| | import os |
| | import re |
| | from collections import Counter |
| |
|
| | from dotenv import load_dotenv |
| | from evoagentx.core.logging import logger |
| |
|
| | from evoagentx.optimizers.evoprompt_optimizer import DEOptimizer, GAOptimizer |
| | from evoagentx.benchmark.bigbenchhard import BIGBenchHard |
| | from evoagentx.models import OpenAILLM, OpenAILLMConfig |
| | from evoagentx.optimizers.engine.registry import ParamRegistry |
| |
|
| |
|
| | class SarcasmClassifierProgram: |
| | """ |
| | Multi-prompt ensemble classifier using majority voting strategy. |
| | |
| | This program employs three independent prompt "voters" that can evolve |
| | independently to achieve better collective performance through diversity. |
| | """ |
| | |
| | def __init__(self, model: OpenAILLM): |
| | """ |
| | Initialize the multi-prompt ensemble classifier. |
| | |
| | Args: |
| | model: The language model to use for inference |
| | """ |
| | self.model = model |
| | |
| | |
| | self.prompt_direct = "As a straightforward responder, follow the task instruction exactly and provide the final answer." |
| | self.prompt_expert = "As an expert assistant, interpret the task instruction carefully and provide the final answer." |
| | self.prompt_cot = "As a thoughtful assistant, think step-by-step, then follow the task instruction and provide the final answer." |
| | self.task_instruction = "Respond with your final answer wrapped like this: FINAL_ANSWER(ANSWER)" |
| |
|
| | def __call__(self, input: str) -> tuple[str, dict]: |
| | """ |
| | Execute ensemble prediction using majority voting. |
| | |
| | Args: |
| | input: The input text to process |
| | |
| | Returns: |
| | Tuple of (final_answer, metadata) |
| | """ |
| | answers = [] |
| | prompts = [self.prompt_direct, self.prompt_expert, self.prompt_cot] |
| | pattern = r"the answer is\s*(.*)" |
| |
|
| | |
| | for prompt in prompts: |
| | full_prompt = f"{prompt}\n\n{self.task_instruction}\n\nText:\n{input}" |
| | response = self.model.generate(prompt=full_prompt) |
| | prediction = response.content.strip() |
| | |
| | |
| | match = re.search(pattern, prediction, re.IGNORECASE) |
| | if match: |
| | answers.append(match.group(1)) |
| |
|
| | |
| | if not answers: |
| | return "N/A", {"votes": []} |
| |
|
| | |
| | vote_counts = Counter(answers) |
| | most_common_answer = vote_counts.most_common(1)[0][0] |
| | |
| | return most_common_answer, {"votes": answers} |
| |
|
| | def save(self, path: str): |
| | """Save program state (placeholder for future implementation).""" |
| | pass |
| |
|
| | def load(self, path: str): |
| | """Load program state (placeholder for future implementation).""" |
| | pass |
| |
|
| | async def main(): |
| | """Main execution function for multi-agent EvoPrompt optimization.""" |
| | |
| | |
| | load_dotenv() |
| | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
| | if not OPENAI_API_KEY: |
| | raise ValueError("OPENAI_API_KEY not found in environment variables.") |
| |
|
| | |
| | POPULATION_SIZE = 4 |
| | ITERATIONS = 10 |
| | CONCURRENCY_LIMIT = 100 |
| | COMBINATION_SAMPLE_SIZE = 3 |
| | DEV_SAMPLE_NUM = 15 |
| |
|
| | |
| | evo_llm_config = OpenAILLMConfig( |
| | model="gpt-4.1-nano", |
| | openai_key=OPENAI_API_KEY, |
| | stream=False, |
| | top_p=0.95, |
| | temperature=0.5 |
| | ) |
| |
|
| | |
| | eval_llm_config = OpenAILLMConfig( |
| | model="gpt-4.1-nano", |
| | openai_key=OPENAI_API_KEY, |
| | stream=False, |
| | temperature=0 |
| | ) |
| | llm = OpenAILLM(config=eval_llm_config) |
| |
|
| | |
| | tasks = [ |
| | "snarks", |
| | "sports_understanding", |
| | "logical_deduction_three_objects", |
| | "dyck_languages", |
| | "multistep_arithmetic_two", |
| | ] |
| | |
| | |
| | for task_name in tasks: |
| | logger.info(f"=== Task: {task_name} ===") |
| | |
| | |
| | benchmark = BIGBenchHard(task_name, dev_sample_num=DEV_SAMPLE_NUM, seed=10) |
| | program = SarcasmClassifierProgram(model=llm) |
| | |
| | |
| | registry = ParamRegistry() |
| | registry.track(program, "prompt_direct", name="direct_prompt_node") |
| | registry.track(program, "prompt_expert", name="expert_prompt_node") |
| | registry.track(program, "prompt_cot", name="cot_prompt_node") |
| |
|
| | |
| | optimizer_DE = DEOptimizer( |
| | registry=registry, |
| | program=program, |
| | population_size=POPULATION_SIZE, |
| | iterations=ITERATIONS, |
| | llm_config=evo_llm_config, |
| | concurrency_limit=CONCURRENCY_LIMIT, |
| | combination_sample_size=COMBINATION_SAMPLE_SIZE, |
| | enable_logging=True |
| | ) |
| | logger.info("Starting DE optimization...") |
| | await optimizer_DE.optimize(benchmark=benchmark) |
| | logger.info("DE optimization completed. Starting evaluation...") |
| | de_metrics = await optimizer_DE.evaluate(benchmark=benchmark, eval_mode="test") |
| | logger.info(f"DE results for {task_name}: {de_metrics['accuracy']}") |
| |
|
| | |
| | optimizer_GA = GAOptimizer( |
| | registry=registry, |
| | program=program, |
| | population_size=POPULATION_SIZE, |
| | iterations=ITERATIONS, |
| | llm_config=evo_llm_config, |
| | concurrency_limit=CONCURRENCY_LIMIT, |
| | combination_sample_size=COMBINATION_SAMPLE_SIZE, |
| | enable_logging=True |
| | ) |
| | logger.info("Starting GA optimization...") |
| | await optimizer_GA.optimize(benchmark=benchmark) |
| | logger.info("GA optimization completed. Starting evaluation...") |
| | ga_metrics = await optimizer_GA.evaluate(benchmark=benchmark, eval_mode="test") |
| | logger.info(f"GA results for {task_name}: {ga_metrics['accuracy']}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | asyncio.run(main()) |
| |
|