| import argparse
|
| import yaml
|
| from datasets import load_dataset
|
|
|
|
|
| def load_config(config_path):
|
| with open(config_path, 'r') as file:
|
| return yaml.safe_load(file)
|
|
|
|
|
| def download_huggingface_dataset(config):
|
|
|
| dataset_name = config['dataset_name']
|
| local_dir = config['local_dir']
|
|
|
|
|
| user_name, model_hub_name = dataset_name.split('/')
|
|
|
|
|
| ds = load_dataset(dataset_name, cache_dir=local_dir)
|
|
|
|
|
| print(f"User Name: {user_name}")
|
| print(f"Model Hub Name: {model_hub_name}")
|
| print(f"Dataset saved to: {local_dir}")
|
| print(f"Dataset info: {ds}")
|
|
|
|
|
| if __name__ == "__main__":
|
|
|
| parser = argparse.ArgumentParser(description="Download dataset from Hugging Face")
|
| parser.add_argument('--config_path',
|
| type=str,
|
| default='configs/datasets_info.yaml',
|
| help='Path to the dataset configuration YAML file')
|
|
|
| args = parser.parse_args()
|
|
|
|
|
| configs = load_config(args.config_path)
|
|
|
|
|
| for config in configs:
|
|
|
| if config['platform'] == 'HuggingFace':
|
| download_huggingface_dataset(config)
|
| else:
|
| print(f"Unsupported platform: {config['platform']}") |