| |
| """ |
| HunyuanVideo-Foley: Multimodal Diffusion with Representation Alignment |
| for High-Fidelity Foley Audio Generation |
| |
| Setup script for building and installing the HunyuanVideo-Foley package. |
| """ |
|
|
| import os |
| import re |
| from typing import List |
| from setuptools import setup, find_packages |
|
|
| def read_file(filename: str) -> str: |
| """Read content from a file.""" |
| here = os.path.abspath(os.path.dirname(__file__)) |
| with open(os.path.join(here, filename), 'r', encoding='utf-8') as f: |
| return f.read() |
|
|
| def get_version() -> str: |
| """Extract version from constants.py or use default.""" |
| try: |
| constants_path = os.path.join('hunyuanvideo_foley', 'constants.py') |
| content = read_file(constants_path) |
| version_match = re.search(r"__version__\s*=\s*['\"]([^'\"]*)['\"]", content) |
| if version_match: |
| return version_match.group(1) |
| except FileNotFoundError: |
| pass |
| return "1.0.0" |
|
|
| def parse_requirements(filename: str) -> List[str]: |
| """Parse requirements from requirements.txt file.""" |
| try: |
| content = read_file(filename) |
| lines = content.splitlines() |
| requirements = [] |
| |
| for line in lines: |
| line = line.strip() |
| if not line or line.startswith('#'): |
| continue |
| |
| |
| if line.startswith('git+'): |
| if 'transformers' in line: |
| requirements.append('transformers>=4.49.0') |
| elif 'audiotools' in line: |
| |
| |
| continue |
| else: |
| continue |
| else: |
| requirements.append(line) |
| |
| return requirements |
| except FileNotFoundError: |
| return [] |
|
|
| def get_long_description() -> str: |
| """Get long description from README.md.""" |
| try: |
| readme = read_file("README.md") |
| |
| readme = re.sub(r'<[^>]+>', '', readme) |
| return readme |
| except FileNotFoundError: |
| return "Multimodal Diffusion with Representation Alignment for High-Fidelity Foley Audio Generation" |
|
|
| |
| install_requires = parse_requirements("requirements.txt") |
|
|
| |
| dev_requirements = [ |
| "black>=23.0.0", |
| "isort>=5.12.0", |
| "flake8>=6.0.0", |
| "mypy>=1.3.0", |
| "pre-commit>=3.0.0", |
| "pytest>=7.0.0", |
| "pytest-cov>=4.0.0", |
| ] |
|
|
| |
| extras_require = { |
| "dev": dev_requirements, |
| "test": [ |
| "pytest>=7.0.0", |
| "pytest-cov>=4.0.0", |
| ], |
| "gradio": [ |
| "gradio==3.50.2", |
| ], |
| "comfyui": [ |
| |
| ], |
| "all": dev_requirements + ["gradio==3.50.2"], |
| } |
|
|
| setup( |
| name="hunyuanvideo-foley", |
| version=get_version(), |
| |
| |
| author="Tencent Hunyuan Team", |
| author_email="hunyuan@tencent.com", |
| description="Multimodal Diffusion with Representation Alignment for High-Fidelity Foley Audio Generation", |
| long_description=get_long_description(), |
| long_description_content_type="text/markdown", |
| |
| |
| url="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley", |
| project_urls={ |
| "Homepage": "https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley", |
| "Repository": "https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley", |
| "Documentation": "https://szczesnys.github.io/hunyuanvideo-foley", |
| "Paper": "https://arxiv.org/abs/2508.16930", |
| "Demo": "https://huggingface.co/spaces/tencent/HunyuanVideo-Foley", |
| "Models": "https://huggingface.co/tencent/HunyuanVideo-Foley", |
| }, |
| |
| |
| packages=find_packages( |
| include=["hunyuanvideo_foley", "hunyuanvideo_foley.*"] |
| ), |
| include_package_data=True, |
| |
| |
| python_requires=">=3.8", |
| install_requires=install_requires, |
| extras_require=extras_require, |
| |
| |
| entry_points={ |
| "console_scripts": [ |
| "hunyuanvideo-foley=hunyuanvideo_foley.cli:main", |
| ], |
| }, |
| |
| |
| package_data={ |
| "hunyuanvideo_foley": [ |
| "configs/*.yaml", |
| "configs/*.yml", |
| "*.yaml", |
| "*.yml", |
| ], |
| }, |
| |
| |
| classifiers=[ |
| "Development Status :: 4 - Beta", |
| "Intended Audience :: Developers", |
| "Intended Audience :: Science/Research", |
| "License :: OSI Approved :: Apache Software License", |
| "Operating System :: OS Independent", |
| "Programming Language :: Python :: 3", |
| "Programming Language :: Python :: 3.8", |
| "Programming Language :: Python :: 3.9", |
| "Programming Language :: Python :: 3.10", |
| "Programming Language :: Python :: 3.11", |
| "Topic :: Scientific/Engineering :: Artificial Intelligence", |
| "Topic :: Multimedia :: Sound/Audio :: Analysis", |
| "Topic :: Multimedia :: Video", |
| ], |
| |
| |
| keywords=[ |
| "artificial intelligence", |
| "machine learning", |
| "deep learning", |
| "multimodal", |
| "diffusion models", |
| "audio generation", |
| "foley audio", |
| "video-to-audio", |
| "text-to-audio", |
| "pytorch", |
| "huggingface", |
| "tencent", |
| "hunyuan" |
| ], |
| |
| |
| license="Apache-2.0", |
| |
| |
| zip_safe=False, |
| |
| |
| platforms=["any"], |
| ) |