| """Module containing commands line scripts for training and planning steps.""" |
|
|
| import os |
| import warnings |
| from pathlib import Path |
|
|
| import click |
| import yaml |
|
|
| from synplan.chem.data.filtering import ReactionFilterConfig, filter_reactions_from_file |
| from synplan.chem.data.standardizing import ( |
| ReactionStandardizationConfig, |
| standardize_reactions_from_file, |
| ) |
| from synplan.chem.reaction_rules.extraction import extract_rules_from_reactions |
| from synplan.chem.reaction_routes.clustering import run_cluster_cli |
| from synplan.chem.utils import standardize_building_blocks |
| from synplan.mcts.search import run_search |
| from synplan.ml.training.supervised import create_policy_dataset, run_policy_training |
| from synplan.ml.training.reinforcement import run_updating |
| from synplan.utils.config import ( |
| PolicyNetworkConfig, |
| RuleExtractionConfig, |
| TreeConfig, |
| TuningConfig, |
| ValueNetworkConfig, |
| ) |
| from synplan.utils.loading import download_all_data |
| from synplan.utils.visualisation import ( |
| routes_clustering_report, |
| routes_subclustering_report, |
| ) |
|
|
| warnings.filterwarnings("ignore") |
|
|
|
|
| @click.group(name="synplan") |
| def synplan(): |
| """SynPlanner command line interface.""" |
|
|
|
|
| @synplan.command(name="download_all_data") |
| @click.option( |
| "--save_to", |
| "save_to", |
| help="Path to the folder where downloaded data will be stored.", |
| ) |
| def download_all_data_cli(save_to: str = ".") -> None: |
| """Downloads all data for training, planning and benchmarking SynPlanner.""" |
| download_all_data(save_to=save_to) |
|
|
|
|
| @synplan.command(name="building_blocks_standardizing") |
| @click.option( |
| "--input", |
| "input_file", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with building blocks to be standardized.", |
| ) |
| @click.option( |
| "--output", |
| "output_file", |
| required=True, |
| type=click.Path(), |
| help="Path to the file where standardized building blocks will be stored.", |
| ) |
| def building_blocks_standardizing_cli(input_file: str, output_file: str) -> None: |
| """Standardizes building blocks.""" |
| standardize_building_blocks(input_file=input_file, output_file=output_file) |
|
|
|
|
| @synplan.command(name="reaction_standardizing") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for reactions standardizing.", |
| ) |
| @click.option( |
| "--input", |
| "input_file", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with reactions to be standardized.", |
| ) |
| @click.option( |
| "--output", |
| "output_file", |
| type=click.Path(), |
| help="Path to the file where standardized reactions will be stored.", |
| ) |
| @click.option( |
| "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| ) |
| def reaction_standardizing_cli( |
| config_path: str, input_file: str, output_file: str, num_cpus: int |
| ) -> None: |
| """Standardizes reactions and remove duplicates.""" |
| stand_config = ReactionStandardizationConfig.from_yaml(config_path) |
| standardize_reactions_from_file( |
| config=stand_config, |
| input_reaction_data_path=input_file, |
| standardized_reaction_data_path=output_file, |
| num_cpus=num_cpus, |
| batch_size=100, |
| ) |
|
|
|
|
| @synplan.command(name="reaction_filtering") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for reactions filtering.", |
| ) |
| @click.option( |
| "--input", |
| "input_file", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with reactions to be filtered.", |
| ) |
| @click.option( |
| "--output", |
| "output_file", |
| default=Path("./"), |
| type=click.Path(), |
| help="Path to the file where successfully filtered reactions will be stored.", |
| ) |
| @click.option( |
| "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| ) |
| def reaction_filtering_cli( |
| config_path: str, input_file: str, output_file: str, num_cpus: int |
| ): |
| """Filters erroneous reactions.""" |
| reaction_check_config = ReactionFilterConfig().from_yaml(config_path) |
| filter_reactions_from_file( |
| config=reaction_check_config, |
| input_reaction_data_path=input_file, |
| filtered_reaction_data_path=output_file, |
| num_cpus=num_cpus, |
| batch_size=100, |
| ) |
|
|
|
|
| @synplan.command(name="rule_extracting") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for reaction rules extracting.", |
| ) |
| @click.option( |
| "--input", |
| "input_file", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with reactions for reaction rules extraction.", |
| ) |
| @click.option( |
| "--output", |
| "output_file", |
| required=True, |
| type=click.Path(), |
| help="Path to the file where extracted reaction rules will be stored.", |
| ) |
| @click.option( |
| "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| ) |
| def rule_extracting_cli( |
| config_path: str, input_file: str, output_file: str, num_cpus: int |
| ): |
| """Reaction rules extraction.""" |
| reaction_rule_config = RuleExtractionConfig.from_yaml(config_path) |
| extract_rules_from_reactions( |
| config=reaction_rule_config, |
| reaction_data_path=input_file, |
| reaction_rules_path=output_file, |
| num_cpus=num_cpus, |
| batch_size=100, |
| ) |
|
|
|
|
| @synplan.command(name="ranking_policy_training") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for ranking policy training.", |
| ) |
| @click.option( |
| "--reaction_data", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with reactions for ranking policy training.", |
| ) |
| @click.option( |
| "--reaction_rules", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with extracted reaction rules.", |
| ) |
| @click.option( |
| "--results_dir", |
| default=Path("."), |
| type=click.Path(), |
| help="Path to the directory where the trained policy network will be stored.", |
| ) |
| @click.option( |
| "--num_cpus", |
| default=4, |
| type=int, |
| help="The number of CPUs to use for training set preparation.", |
| ) |
| def ranking_policy_training_cli( |
| config_path: str, |
| reaction_data: str, |
| reaction_rules: str, |
| results_dir: str, |
| num_cpus: int, |
| ) -> None: |
| """Ranking policy network training.""" |
| policy_config = PolicyNetworkConfig.from_yaml(config_path) |
| policy_config.policy_type = "ranking" |
| policy_dataset_file = os.path.join(results_dir, "policy_dataset.dt") |
|
|
| datamodule = create_policy_dataset( |
| reaction_rules_path=reaction_rules, |
| molecules_or_reactions_path=reaction_data, |
| output_path=policy_dataset_file, |
| dataset_type="ranking", |
| batch_size=policy_config.batch_size, |
| num_cpus=num_cpus, |
| ) |
|
|
| run_policy_training(datamodule, config=policy_config, results_path=results_dir) |
|
|
|
|
| @synplan.command(name="filtering_policy_training") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for filtering policy training.", |
| ) |
| @click.option( |
| "--molecule_data", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with molecules for filtering policy training.", |
| ) |
| @click.option( |
| "--reaction_rules", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with extracted reaction rules.", |
| ) |
| @click.option( |
| "--results_dir", |
| default=Path("."), |
| type=click.Path(), |
| help="Path to the directory where the trained policy network will be stored.", |
| ) |
| @click.option( |
| "--num_cpus", |
| default=8, |
| type=int, |
| help="The number of CPUs to use for training set preparation.", |
| ) |
| def filtering_policy_training_cli( |
| config_path: str, |
| molecule_data: str, |
| reaction_rules: str, |
| results_dir: str, |
| num_cpus: int, |
| ): |
| """Filtering policy network training.""" |
|
|
| policy_config = PolicyNetworkConfig.from_yaml(config_path) |
| policy_config.policy_type = "filtering" |
| policy_dataset_file = os.path.join(results_dir, "policy_dataset.ckpt") |
|
|
| datamodule = create_policy_dataset( |
| reaction_rules_path=reaction_rules, |
| molecules_or_reactions_path=molecule_data, |
| output_path=policy_dataset_file, |
| dataset_type="filtering", |
| batch_size=policy_config.batch_size, |
| num_cpus=num_cpus, |
| ) |
|
|
| run_policy_training(datamodule, config=policy_config, results_path=results_dir) |
|
|
|
|
| @synplan.command(name="value_network_tuning") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for value network training.", |
| ) |
| @click.option( |
| "--targets", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with target molecules for planning simulations.", |
| ) |
| @click.option( |
| "--reaction_rules", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with extracted reaction rules. Needed for planning simulations.", |
| ) |
| @click.option( |
| "--building_blocks", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with building blocks. Needed for planning simulations.", |
| ) |
| @click.option( |
| "--policy_network", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with trained policy network. Needed for planning simulations.", |
| ) |
| @click.option( |
| "--value_network", |
| default=None, |
| type=click.Path(exists=True), |
| help="Path to the file with trained value network. Needed in case of additional value network fine-tuning", |
| ) |
| @click.option( |
| "--results_dir", |
| default=".", |
| type=click.Path(exists=False), |
| help="Path to the directory where the trained value network will be stored.", |
| ) |
| def value_network_tuning_cli( |
| config_path: str, |
| targets: str, |
| reaction_rules: str, |
| building_blocks: str, |
| policy_network: str, |
| value_network: str, |
| results_dir: str, |
| ): |
| """Value network tuning.""" |
|
|
| with open(config_path, "r", encoding="utf-8") as file: |
| config = yaml.safe_load(file) |
|
|
| policy_config = PolicyNetworkConfig.from_dict(config["node_expansion"]) |
| policy_config.weights_path = policy_network |
|
|
| value_config = ValueNetworkConfig.from_dict(config["value_network"]) |
| if value_network is None: |
| value_config.weights_path = os.path.join( |
| results_dir, "weights", "value_network.ckpt" |
| ) |
|
|
| tree_config = TreeConfig.from_dict(config["tree"]) |
| tuning_config = TuningConfig.from_dict(config["tuning"]) |
|
|
| run_updating( |
| targets_path=targets, |
| tree_config=tree_config, |
| policy_config=policy_config, |
| value_config=value_config, |
| reinforce_config=tuning_config, |
| reaction_rules_path=reaction_rules, |
| building_blocks_path=building_blocks, |
| results_root=results_dir, |
| ) |
|
|
|
|
| @synplan.command(name="planning") |
| @click.option( |
| "--config", |
| "config_path", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the configuration file for retrosynthetic planning.", |
| ) |
| @click.option( |
| "--targets", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with target molecules for retrosynthetic planning.", |
| ) |
| @click.option( |
| "--reaction_rules", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with extracted reaction rules.", |
| ) |
| @click.option( |
| "--building_blocks", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with building blocks.", |
| ) |
| @click.option( |
| "--policy_network", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with trained policy network.", |
| ) |
| @click.option( |
| "--value_network", |
| default=None, |
| type=click.Path(exists=True), |
| help="Path to the file with trained value network.", |
| ) |
| @click.option( |
| "--results_dir", |
| default=".", |
| type=click.Path(exists=False), |
| help="Path to the file where retrosynthetic planning results will be stored.", |
| ) |
| def planning_cli( |
| config_path: str, |
| targets: str, |
| reaction_rules: str, |
| building_blocks: str, |
| policy_network: str, |
| value_network: str, |
| results_dir: str, |
| ): |
| """Retrosynthetic planning.""" |
|
|
| with open(config_path, "r", encoding="utf-8") as file: |
| config = yaml.safe_load(file) |
|
|
| search_config = {**config["tree"], **config["node_evaluation"]} |
| policy_config = PolicyNetworkConfig.from_dict( |
| {**config["node_expansion"], **{"weights_path": policy_network}} |
| ) |
|
|
| run_search( |
| targets_path=targets, |
| search_config=search_config, |
| policy_config=policy_config, |
| reaction_rules_path=reaction_rules, |
| building_blocks_path=building_blocks, |
| value_network_path=value_network, |
| results_root=results_dir, |
| ) |
|
|
|
|
| @synplan.command(name="clustering") |
| @click.option( |
| "--targets", |
| required=True, |
| type=click.Path(exists=True), |
| help="Path to the file with target molecules for retrosynthetic planning.", |
| ) |
| @click.option( |
| "--routes_file", |
| default=".", |
| type=click.Path(exists=False), |
| help="Path to the file where the planning results are stored.", |
| ) |
| @click.option( |
| "--cluster_results_dir", |
| default=".", |
| type=click.Path(exists=False), |
| help="Path to the file where clustering results will be stored.", |
| ) |
| @click.option( |
| "--perform_subcluster", |
| default=None, |
| type=click.Path(exists=False), |
| help="Perform subclustering.", |
| ) |
| @click.option( |
| "--subcluster_results_dir", |
| default=".", |
| type=click.Path(exists=False), |
| help="Path to the file where subclustering results will be stored.", |
| ) |
| def cluster_route_from_file_cli( |
| targets: str, |
| routes_file: str, |
| cluster_results_dir: str, |
| perform_subcluster: bool, |
| subcluster_results_dir: str, |
| ): |
| """Clustering the routes from planning""" |
| run_cluster_cli( |
| routes_file=routes_file, |
| cluster_results_dir=cluster_results_dir, |
| perform_subcluster=perform_subcluster, |
| subcluster_results_dir=subcluster_results_dir if perform_subcluster else None, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| synplan() |
|
|