#!/bin/bash # Check number of arguments if [ "$#" -lt 2 ] || [ "$#" -gt 3 ] || [ "$#" -gt 4 ]; then echo "Usage: $0 [use-half]" exit 1 fi VIDEO_PATH="$1" CSV_PATH="$2" SAVE_PATH="$3" USE_HALF_FLAG="$4" dataset_config="ThinkSound/configs/multimodal_dataset_demo.json" model_config="ThinkSound/configs/model_configs/thinksound.json" # Create necessary directories mkdir -p results results/features SAVE_PATH=${SAVE_PATH:-"results/features"} FIRST_VIDEO=$(find "$VIDEO_PATH" -type f \( -iname "*.mp4" \) | head -n 1) if [ -z "$FIRST_VIDEO" ]; then echo "❌ No .mp4 video file found in $VIDEO_PATH" exit 1 fi DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$FIRST_VIDEO") DURATION_SEC=${DURATION%.*} # Run feature extraction echo "⏳ Extracting features..." EXTRACT_CMD=("python" "extract_latents.py" "--root" "$VIDEO_PATH" "--tsv_path" "$CSV_PATH" "--save-dir" "results/features" "--duration_sec" "$DURATION_SEC") if [ "$USE_HALF_FLAG" = "use-half" ]; then EXTRACT_CMD+=("--use_half") fi "${EXTRACT_CMD[@]}" 2>&1 if [ $? -ne 0 ]; then echo "❌ Feature extraction failed" exit 3 fi # Run inference echo "⏳ Running model inference..." python eval_batch.py --dataset-config "$dataset_config" \ --model-config "$model_config" \ --duration-sec "$DURATION_SEC" \ --results-dir "results/features"\ --save-dir "$SAVE_PATH" 2>&1 \ if [ $? -ne 0 ]; then echo "❌ Inference failed" exit 4 fi # Get generated audio file CURRENT_DATE=$(date +"%m%d") AUDIO_PATH=$SAVE_PATH"/${CURRENT_DATE}_batch_size1" echo "Audio files path: $AUDIO_PATH"