iLOVE2D
/

selfevolveagent

Model card Files Files and versions

selfevolveagent / examples /aflow /humaneval /optimized /processed_experience.json

iLOVE2D's picture

Upload 2846 files

5374a2d verified 2 months ago

history blame contribute delete

4.21 kB

	{
	"5": {
	"score": 0.8181818181818181,
	"success": {
	"11": {
	"modification": "modify: added a new step to create a list of multiple solutions from CustomCodeGenerate before testing and applied ScEnsemble for solution selection.",
	"score": 0.8242424242424242
	},
	"10": {
	"modification": "Add ScEnsemble operator for better solution selection.",
	"score": 0.8303030303030303
	}
	},
	"failure": {
	"8": {
	"modification": "(Added ScEnsemble operator to improve solution selection based on ensemble method)",
	"score": 0.7878787878787878
	}
	}
	},
	"1": {
	"score": 0.8363636363636363,
	"success": {},
	"failure": {
	"9": {
	"modification": "(Added self-ensemble approach after generating multiple solutions to improve reliability)",
	"score": 0.6606060606060605
	},
	"18": {
	"modification": "modify: Added usage of the ScEnsemble operator after generating multiple solutions to improve the final selection.",
	"score": 0.0
	},
	"16": {
	"modification": "modified - incorporated the ScEnsemble operator to enhance the selection process by integrating multiple solutions' insights and improving the overall reliability of the result",
	"score": 0.8121212121212121
	},
	"6": {
	"modification": "modify - added ensemble operator for improved selection of solutions",
	"score": 0.8
	}
	}
	},
	"0": {
	"score": 0.8,
	"success": {
	"19": {
	"modification": "(add: Test operator to validate solutions, modify: include ScEnsemble for optimal selection)",
	"score": 0.8242424242424242
	},
	"3": {
	"modification": "(add Test operator to validate the generated solution before returning it)",
	"score": 0.806060606060606
	},
	"4": {
	"modification": "(added a Test operator to validate the solution before returning it and iterate if necessary)",
	"score": 0.8121212121212121
	},
	"5": {
	"modification": "add: self.test = operator.Test(self.llm)",
	"score": 0.8181818181818181
	},
	"1": {
	"modification": "(added a new operator Test to validate the solution before providing it)",
	"score": 0.8363636363636363
	},
	"2": {
	"modification": "(add a step to gather multiple solutions, then test the best solution)",
	"score": 0.8121212121212121
	}
	},
	"failure": {}
	},
	"15": {
	"score": 0.8363636363636363,
	"success": {},
	"failure": {
	"17": {
	"modification": "(such as:add /delete /modify/ ...)",
	"score": 0.08484848484848484
	}
	}
	},
	"11": {
	"score": 0.8242424242424242,
	"success": {
	"15": {
	"modification": "(Add error handling to improve robustness)",
	"score": 0.8363636363636363
	}
	},
	"failure": {}
	},
	"2": {
	"score": 0.8121212121212121,
	"success": {},
	"failure": {
	"7": {
	"modification": "modify: Added an operator to validate the solution before testing",
	"score": 0.0
	}
	}
	},
	"10": {
	"score": 0.8303030303030303,
	"success": {},
	"failure": {
	"14": {
	"modification": "Introduce insight gathering for common test failure patterns before attempting modifications to solutions.",
	"score": 0.8121212121212121
	},
	"13": {
	"modification": "(such as:add /delete /modify/ ...)",
	"score": 0.0
	}
	}
	}
	}