| import csv |
| from pathlib import Path |
| import datasets |
|
|
| _CITATION = "" |
| _DESCRIPTION = "Local video dataset with nested 'video' field (id, path, bytes)." |
|
|
| class LocalVideoConfig(datasets.BuilderConfig): |
| def __init__(self, **kwargs): |
| super().__init__(version=datasets.Version("1.0.0"), **kwargs) |
|
|
| class LocalVideoDataset(datasets.GeneratorBasedBuilder): |
| BUILDER_CONFIGS = [LocalVideoConfig(name="default", description=_DESCRIPTION)] |
|
|
| def _info(self): |
| features = datasets.Features({ |
| "video": { |
| "id": datasets.Value("string"), |
| "path": datasets.Value("string"), |
| "bytes": datasets.Value("binary"), |
| } |
| }) |
| return datasets.DatasetInfo( |
| description=_DESCRIPTION, |
| features=features, |
| citation=_CITATION, |
| homepage="", |
| ) |
|
|
| def _split_generators(self, dl_manager): |
| base = Path(__file__).parent.resolve() |
| manifest = base / "test_manifest.csv" |
| if not manifest.exists(): |
| raise FileNotFoundError(f"Missing manifest CSV: {manifest}") |
| return [datasets.SplitGenerator(name=datasets.Split.TEST, |
| gen_kwargs={"manifest_path": str(manifest)})] |
|
|
| def _generate_examples(self, manifest_path): |
| with open(manifest_path, newline="") as f: |
| reader = csv.DictReader(f) |
| for row in reader: |
| vid_id = row["id"] |
| path = row["path"] |
| data = Path(path).read_bytes() |
| yield vid_id, {"video": {"id": vid_id, "path": path, "bytes": data}} |
|
|