| import os |
|
|
| import yaml |
|
|
| import fiftyone as fo |
| import fiftyone.utils.random as four |
| import fiftyone.utils.huggingface as fouh |
|
|
| |
|
|
| def shuffle_data(dataset): |
| """Shuffle the dataset""" |
| return dataset.shuffle(seed=51) |
|
|
| def take_random_sample(dataset): |
| """Take a sample from the dataset""" |
| return dataset.take(size=10,seed=51) |
|
|
| |
| training_config = { |
| |
| "train_split": 0.9, |
| "val_split": 0.1, |
|
|
| |
| "train_params": { |
| "epochs": 1, |
| "batch": 16, |
| "imgsz": 640, |
| "lr0": 0.01, |
| "lrf": 0.01 |
| } |
| } |
|
|
|
|
| |
| def prepare_dataset(): |
| """ |
| Prepare the dataset for model training. |
| |
| NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY". |
| |
| Args: |
| name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set". |
| |
| Returns: |
| fiftyone.core.dataset.Dataset: The curated dataset. |
| |
| Note: |
| The following code block MUST NOT be removed from your submission: |
| |
| This ensures that only the approved dataset is used for the competition. |
| """ |
| |
| |
| dataset = fouh.load_from_hub("/tmp/data/train") |
| |
| |
| dataset = shuffle_data(dataset) |
| dataset = take_random_sample(dataset) |
| |
| |
| curated_dataset = dataset.clone(name="curated_dataset") |
| |
| curated_dataset.persistent = True |
|
|
| |
| def export_to_yolo_format( |
| samples, |
| classes, |
| label_field="ground_truth", |
| export_dir=".", |
| splits=["train", "val"] |
| ): |
| """ |
| Export samples to YOLO format, optionally handling multiple data splits. |
| |
| NOTE: DO NOT MODIFY THIS FUNCTION. |
| |
| Args: |
| samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export. |
| export_dir (str): The directory where the exported data will be saved. |
| classes (list): A list of class names for the YOLO format. |
| label_field (str, optional): The field in the samples that contains the labels. |
| Defaults to "ground_truth". |
| splits (str, list, optional): The split(s) to export. Can be a single split name (str) |
| or a list of split names. If None, all samples are exported as "val" split. |
| Defaults to None. |
| |
| Returns: |
| None |
| |
| """ |
| if splits is None: |
| splits = ["val"] |
| elif isinstance(splits, str): |
| splits = [splits] |
|
|
| for split in splits: |
| split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split) |
| |
| split_view.export( |
| export_dir=export_dir, |
| dataset_type=fo.types.YOLOv5Dataset, |
| label_field=label_field, |
| classes=classes, |
| split=split |
| ) |
|
|
| |
| def train_model(training_config=training_config): |
| """ |
| Train the YOLO model on the given dataset using the provided configuration. |
| |
| NOTE: DO NOT MODIFY THIS FUNCTION AT ALL OR YOUR SCRIPT WILL FAIL. |
| """ |
|
|
| training_dataset = prepare_dataset() |
|
|
| print("Splitting the dataset...") |
|
|
| four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']}) |
| |
| print("Dataset split completed.") |
|
|
| print("Exporting dataset to YOLO format...") |
|
|
| export_to_yolo_format( |
| samples=training_dataset, |
| classes=training_dataset.default_classes, |
| ) |
|
|
| print("Dataset export completed.") |
|
|
| print("Initializing the YOLO model...") |
|
|
| |
| model = YOLO( |
| model="/tmp/data/yolo11m.pt", |
| |
| ) |
| |
| print("Model initialized.") |
|
|
| print("Starting model training...") |
|
|
| results = model.train( |
| data="dataset.yaml", |
| **training_config['train_params'] |
| ) |
|
|
| print("Model training completed.") |
|
|
| best_model_path = str(results.save_dir / "weights/best.pt") |
|
|
| print(f"Best model saved to: {best_model_path}") |
|
|
| |
| if __name__=="__main__": |
| train_model() |
|
|