Spaces:

Protolaw
/

synplanner_dev

Sleeping

synplanner_dev / synplan /interfaces /cli.py

Gilmullin Almaz

Refactor code structure for improved readability and maintainability

72a3513 9 months ago

14.4 kB

	"""Module containing commands line scripts for training and planning steps."""

	import os
	import warnings
	from pathlib import Path

	import click
	import yaml

	from synplan.chem.data.filtering import ReactionFilterConfig, filter_reactions_from_file
	from synplan.chem.data.standardizing import (
	ReactionStandardizationConfig,
	standardize_reactions_from_file,
	)
	from synplan.chem.reaction_rules.extraction import extract_rules_from_reactions
	from synplan.chem.reaction_routes.clustering import run_cluster_cli
	from synplan.chem.utils import standardize_building_blocks
	from synplan.mcts.search import run_search
	from synplan.ml.training.supervised import create_policy_dataset, run_policy_training
	from synplan.ml.training.reinforcement import run_updating
	from synplan.utils.config import (
	PolicyNetworkConfig,
	RuleExtractionConfig,
	TreeConfig,
	TuningConfig,
	ValueNetworkConfig,
	)
	from synplan.utils.loading import download_all_data
	from synplan.utils.visualisation import (
	routes_clustering_report,
	routes_subclustering_report,
	)

	warnings.filterwarnings("ignore")


	@click.group(name="synplan")
	def synplan():
	"""SynPlanner command line interface."""


	@synplan.command(name="download_all_data")
	@click.option(
	"--save_to",
	"save_to",
	help="Path to the folder where downloaded data will be stored.",
	)
	def download_all_data_cli(save_to: str = ".") -> None:
	"""Downloads all data for training, planning and benchmarking SynPlanner."""
	download_all_data(save_to=save_to)


	@synplan.command(name="building_blocks_standardizing")
	@click.option(
	"--input",
	"input_file",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with building blocks to be standardized.",
	)
	@click.option(
	"--output",
	"output_file",
	required=True,
	type=click.Path(),
	help="Path to the file where standardized building blocks will be stored.",
	)
	def building_blocks_standardizing_cli(input_file: str, output_file: str) -> None:
	"""Standardizes building blocks."""
	standardize_building_blocks(input_file=input_file, output_file=output_file)


	@synplan.command(name="reaction_standardizing")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for reactions standardizing.",
	)
	@click.option(
	"--input",
	"input_file",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with reactions to be standardized.",
	)
	@click.option(
	"--output",
	"output_file",
	type=click.Path(),
	help="Path to the file where standardized reactions will be stored.",
	)
	@click.option(
	"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
	)
	def reaction_standardizing_cli(
	config_path: str, input_file: str, output_file: str, num_cpus: int
	) -> None:
	"""Standardizes reactions and remove duplicates."""
	stand_config = ReactionStandardizationConfig.from_yaml(config_path)
	standardize_reactions_from_file(
	config=stand_config,
	input_reaction_data_path=input_file,
	standardized_reaction_data_path=output_file,
	num_cpus=num_cpus,
	batch_size=100,
	)


	@synplan.command(name="reaction_filtering")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for reactions filtering.",
	)
	@click.option(
	"--input",
	"input_file",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with reactions to be filtered.",
	)
	@click.option(
	"--output",
	"output_file",
	default=Path("./"),
	type=click.Path(),
	help="Path to the file where successfully filtered reactions will be stored.",
	)
	@click.option(
	"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
	)
	def reaction_filtering_cli(
	config_path: str, input_file: str, output_file: str, num_cpus: int
	):
	"""Filters erroneous reactions."""
	reaction_check_config = ReactionFilterConfig().from_yaml(config_path)
	filter_reactions_from_file(
	config=reaction_check_config,
	input_reaction_data_path=input_file,
	filtered_reaction_data_path=output_file,
	num_cpus=num_cpus,
	batch_size=100,
	)


	@synplan.command(name="rule_extracting")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for reaction rules extracting.",
	)
	@click.option(
	"--input",
	"input_file",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with reactions for reaction rules extraction.",
	)
	@click.option(
	"--output",
	"output_file",
	required=True,
	type=click.Path(),
	help="Path to the file where extracted reaction rules will be stored.",
	)
	@click.option(
	"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
	)
	def rule_extracting_cli(
	config_path: str, input_file: str, output_file: str, num_cpus: int
	):
	"""Reaction rules extraction."""
	reaction_rule_config = RuleExtractionConfig.from_yaml(config_path)
	extract_rules_from_reactions(
	config=reaction_rule_config,
	reaction_data_path=input_file,
	reaction_rules_path=output_file,
	num_cpus=num_cpus,
	batch_size=100,
	)


	@synplan.command(name="ranking_policy_training")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for ranking policy training.",
	)
	@click.option(
	"--reaction_data",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with reactions for ranking policy training.",
	)
	@click.option(
	"--reaction_rules",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with extracted reaction rules.",
	)
	@click.option(
	"--results_dir",
	default=Path("."),
	type=click.Path(),
	help="Path to the directory where the trained policy network will be stored.",
	)
	@click.option(
	"--num_cpus",
	default=4,
	type=int,
	help="The number of CPUs to use for training set preparation.",
	)
	def ranking_policy_training_cli(
	config_path: str,
	reaction_data: str,
	reaction_rules: str,
	results_dir: str,
	num_cpus: int,
	) -> None:
	"""Ranking policy network training."""
	policy_config = PolicyNetworkConfig.from_yaml(config_path)
	policy_config.policy_type = "ranking"
	policy_dataset_file = os.path.join(results_dir, "policy_dataset.dt")

	datamodule = create_policy_dataset(
	reaction_rules_path=reaction_rules,
	molecules_or_reactions_path=reaction_data,
	output_path=policy_dataset_file,
	dataset_type="ranking",
	batch_size=policy_config.batch_size,
	num_cpus=num_cpus,
	)

	run_policy_training(datamodule, config=policy_config, results_path=results_dir)


	@synplan.command(name="filtering_policy_training")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for filtering policy training.",
	)
	@click.option(
	"--molecule_data",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with molecules for filtering policy training.",
	)
	@click.option(
	"--reaction_rules",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with extracted reaction rules.",
	)
	@click.option(
	"--results_dir",
	default=Path("."),
	type=click.Path(),
	help="Path to the directory where the trained policy network will be stored.",
	)
	@click.option(
	"--num_cpus",
	default=8,
	type=int,
	help="The number of CPUs to use for training set preparation.",
	)
	def filtering_policy_training_cli(
	config_path: str,
	molecule_data: str,
	reaction_rules: str,
	results_dir: str,
	num_cpus: int,
	):
	"""Filtering policy network training."""

	policy_config = PolicyNetworkConfig.from_yaml(config_path)
	policy_config.policy_type = "filtering"
	policy_dataset_file = os.path.join(results_dir, "policy_dataset.ckpt")

	datamodule = create_policy_dataset(
	reaction_rules_path=reaction_rules,
	molecules_or_reactions_path=molecule_data,
	output_path=policy_dataset_file,
	dataset_type="filtering",
	batch_size=policy_config.batch_size,
	num_cpus=num_cpus,
	)

	run_policy_training(datamodule, config=policy_config, results_path=results_dir)


	@synplan.command(name="value_network_tuning")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for value network training.",
	)
	@click.option(
	"--targets",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with target molecules for planning simulations.",
	)
	@click.option(
	"--reaction_rules",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with extracted reaction rules. Needed for planning simulations.",
	)
	@click.option(
	"--building_blocks",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with building blocks. Needed for planning simulations.",
	)
	@click.option(
	"--policy_network",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with trained policy network. Needed for planning simulations.",
	)
	@click.option(
	"--value_network",
	default=None,
	type=click.Path(exists=True),
	help="Path to the file with trained value network. Needed in case of additional value network fine-tuning",
	)
	@click.option(
	"--results_dir",
	default=".",
	type=click.Path(exists=False),
	help="Path to the directory where the trained value network will be stored.",
	)
	def value_network_tuning_cli(
	config_path: str,
	targets: str,
	reaction_rules: str,
	building_blocks: str,
	policy_network: str,
	value_network: str,
	results_dir: str,
	):
	"""Value network tuning."""

	with open(config_path, "r", encoding="utf-8") as file:
	config = yaml.safe_load(file)

	policy_config = PolicyNetworkConfig.from_dict(config["node_expansion"])
	policy_config.weights_path = policy_network

	value_config = ValueNetworkConfig.from_dict(config["value_network"])
	if value_network is None:
	value_config.weights_path = os.path.join(
	results_dir, "weights", "value_network.ckpt"
	)

	tree_config = TreeConfig.from_dict(config["tree"])
	tuning_config = TuningConfig.from_dict(config["tuning"])

	run_updating(
	targets_path=targets,
	tree_config=tree_config,
	policy_config=policy_config,
	value_config=value_config,
	reinforce_config=tuning_config,
	reaction_rules_path=reaction_rules,
	building_blocks_path=building_blocks,
	results_root=results_dir,
	)


	@synplan.command(name="planning")
	@click.option(
	"--config",
	"config_path",
	required=True,
	type=click.Path(exists=True),
	help="Path to the configuration file for retrosynthetic planning.",
	)
	@click.option(
	"--targets",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with target molecules for retrosynthetic planning.",
	)
	@click.option(
	"--reaction_rules",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with extracted reaction rules.",
	)
	@click.option(
	"--building_blocks",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with building blocks.",
	)
	@click.option(
	"--policy_network",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with trained policy network.",
	)
	@click.option(
	"--value_network",
	default=None,
	type=click.Path(exists=True),
	help="Path to the file with trained value network.",
	)
	@click.option(
	"--results_dir",
	default=".",
	type=click.Path(exists=False),
	help="Path to the file where retrosynthetic planning results will be stored.",
	)
	def planning_cli(
	config_path: str,
	targets: str,
	reaction_rules: str,
	building_blocks: str,
	policy_network: str,
	value_network: str,
	results_dir: str,
	):
	"""Retrosynthetic planning."""

	with open(config_path, "r", encoding="utf-8") as file:
	config = yaml.safe_load(file)

	search_config = {config["tree"], config["node_evaluation"]}
	policy_config = PolicyNetworkConfig.from_dict(
	{config["node_expansion"], {"weights_path": policy_network}}
	)

	run_search(
	targets_path=targets,
	search_config=search_config,
	policy_config=policy_config,
	reaction_rules_path=reaction_rules,
	building_blocks_path=building_blocks,
	value_network_path=value_network,
	results_root=results_dir,
	)


	@synplan.command(name="clustering")
	@click.option(
	"--targets",
	required=True,
	type=click.Path(exists=True),
	help="Path to the file with target molecules for retrosynthetic planning.",
	)
	@click.option(
	"--routes_file",
	default=".",
	type=click.Path(exists=False),
	help="Path to the file where the planning results are stored.",
	)
	@click.option(
	"--cluster_results_dir",
	default=".",
	type=click.Path(exists=False),
	help="Path to the file where clustering results will be stored.",
	)
	@click.option(
	"--perform_subcluster",
	default=None,
	type=click.Path(exists=False),
	help="Perform subclustering.",
	)
	@click.option(
	"--subcluster_results_dir",
	default=".",
	type=click.Path(exists=False),
	help="Path to the file where subclustering results will be stored.",
	)
	def cluster_route_from_file_cli(
	targets: str,
	routes_file: str,
	cluster_results_dir: str,
	perform_subcluster: bool,
	subcluster_results_dir: str,
	):
	"""Clustering the routes from planning"""
	run_cluster_cli(
	routes_file=routes_file,
	cluster_results_dir=cluster_results_dir,
	perform_subcluster=perform_subcluster,
	subcluster_results_dir=subcluster_results_dir if perform_subcluster else None,
	)


	if __name__ == "__main__":
	synplan()