| #!/usr/bin/env node |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| const fs = require('fs'); |
| const path = require('path'); |
|
|
| |
| const HOP_SIZE = 256; |
| const VOICE_THRESHOLD = 0.5; |
|
|
| |
| const WASM_DIR = './../lib/Web'; |
| const WASM_JS_FILE = path.join(WASM_DIR, 'ten_vad.js'); |
| const WASM_BINARY_FILE = path.join(WASM_DIR, 'ten_vad.wasm'); |
|
|
| |
| let vadModule = null; |
| let vadHandle = null; |
| let vadHandlePtr = null; |
|
|
| |
| |
| |
|
|
| function getTimestamp() { |
| return Date.now(); |
| } |
|
|
| function addHelperFunctions() { |
| if (!vadModule.getValue) { |
| vadModule.getValue = function(ptr, type) { |
| switch (type) { |
| case 'i32': return vadModule.HEAP32[ptr >> 2]; |
| case 'float': return vadModule.HEAPF32[ptr >> 2]; |
| default: throw new Error(`Unsupported type: ${type}`); |
| } |
| }; |
| } |
| |
| if (!vadModule.UTF8ToString) { |
| vadModule.UTF8ToString = function(ptr) { |
| if (!ptr) return ''; |
| let result = ''; |
| let i = ptr; |
| while (vadModule.HEAPU8[i]) { |
| result += String.fromCharCode(vadModule.HEAPU8[i++]); |
| } |
| return result; |
| }; |
| } |
| } |
|
|
| |
| |
| |
|
|
| function generateTestAudio(durationMs = 5000) { |
| const sampleRate = 16000; |
| const totalSamples = Math.floor(durationMs * sampleRate / 1000); |
| const audioData = new Int16Array(totalSamples); |
| |
| console.log(`Generating ${totalSamples} samples for ${durationMs}ms audio...`); |
| |
| for (let i = 0; i < totalSamples; i++) { |
| const t = i / sampleRate; |
| let sample = 0; |
| |
| if (t < 2.0) { |
| |
| sample = Math.sin(2 * Math.PI * 440 * t) * 8000 + |
| Math.sin(2 * Math.PI * 880 * t) * 4000; |
| } else if (t < 3.0) { |
| |
| sample = (Math.random() - 0.5) * 3000; |
| } else if (t < 4.0) { |
| |
| sample = Math.sin(2 * Math.PI * 220 * t) * 6000 + |
| Math.sin(2 * Math.PI * 660 * t) * 3000; |
| } else { |
| |
| sample = Math.random() * 50; |
| } |
| |
| audioData[i] = Math.max(-32768, Math.min(32767, Math.floor(sample))); |
| } |
| |
| return audioData; |
| } |
|
|
| |
| |
| |
|
|
| function getVADVersion() { |
| if (!vadModule) return "unknown"; |
| try { |
| const versionPtr = vadModule._ten_vad_get_version(); |
| return vadModule.UTF8ToString(versionPtr); |
| } catch (error) { |
| return "unknown"; |
| } |
| } |
|
|
| function createVADInstance() { |
| try { |
| vadHandlePtr = vadModule._malloc(4); |
| const result = vadModule._ten_vad_create(vadHandlePtr, HOP_SIZE, VOICE_THRESHOLD); |
| |
| if (result === 0) { |
| vadHandle = vadModule.getValue(vadHandlePtr, 'i32'); |
| return true; |
| } else { |
| console.error(`VAD creation failed with code: ${result}`); |
| vadModule._free(vadHandlePtr); |
| return false; |
| } |
| } catch (error) { |
| console.error(`Error creating VAD instance: ${error.message}`); |
| return false; |
| } |
| } |
|
|
| function destroyVADInstance() { |
| if (vadHandlePtr && vadModule) { |
| vadModule._ten_vad_destroy(vadHandlePtr); |
| vadModule._free(vadHandlePtr); |
| vadHandlePtr = null; |
| vadHandle = null; |
| } |
| } |
|
|
| async function processAudio(inputBuf, frameNum, outProbs, outFlags) { |
| console.log(`VAD version: ${getVADVersion()}`); |
| |
| if (!createVADInstance()) { |
| return -1; |
| } |
| |
| const startTime = getTimestamp(); |
| |
| for (let i = 0; i < frameNum; i++) { |
| const frameStart = i * HOP_SIZE; |
| const frameData = inputBuf.slice(frameStart, frameStart + HOP_SIZE); |
| |
| const audioPtr = vadModule._malloc(HOP_SIZE * 2); |
| const probPtr = vadModule._malloc(4); |
| const flagPtr = vadModule._malloc(4); |
| |
| try { |
| vadModule.HEAP16.set(frameData, audioPtr / 2); |
| |
| const result = vadModule._ten_vad_process( |
| vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr |
| ); |
| |
| if (result === 0) { |
| const probability = vadModule.getValue(probPtr, 'float'); |
| const flag = vadModule.getValue(flagPtr, 'i32'); |
| |
| outProbs[i] = probability; |
| outFlags[i] = flag; |
| |
| console.log(`[${i}] ${probability.toFixed(6)}, ${flag}`); |
| } else { |
| console.error(`Frame ${i} processing failed with code: ${result}`); |
| outProbs[i] = 0.0; |
| outFlags[i] = 0; |
| } |
| } finally { |
| vadModule._free(audioPtr); |
| vadModule._free(probPtr); |
| vadModule._free(flagPtr); |
| } |
| } |
| |
| const endTime = getTimestamp(); |
| const processingTime = endTime - startTime; |
| |
| destroyVADInstance(); |
| return processingTime; |
| } |
|
|
| |
| |
| |
|
|
| function printResults(processingTime, totalAudioTime, outFlags, frameNum) { |
| const rtf = processingTime / totalAudioTime; |
| const voiceFrames = outFlags.filter(flag => flag === 1).length; |
| const voicePercentage = (voiceFrames / frameNum * 100).toFixed(1); |
| |
| console.log(`\n=== Processing Results ===`); |
| console.log(`Time: ${processingTime}ms, Audio: ${totalAudioTime.toFixed(2)}ms, RTF: ${rtf.toFixed(6)}`); |
| console.log(`Voice frames: ${voiceFrames}/${frameNum} (${voicePercentage}%)`); |
| } |
|
|
| function saveResults(outProbs, outFlags, frameNum, filename = 'out.txt') { |
| let output = ''; |
| for (let i = 0; i < frameNum; i++) { |
| output += `[${i}] ${outProbs[i].toFixed(6)}, ${outFlags[i]}\n`; |
| } |
| |
| try { |
| fs.writeFileSync(filename, output); |
| console.log(`Results saved to ${filename}`); |
| } catch (error) { |
| console.error(`Failed to save results: ${error.message}`); |
| } |
| } |
|
|
| |
| |
| |
|
|
| async function testWithArray() { |
| console.log("=== Array Test ===\n"); |
| |
| const inputBuf = generateTestAudio(5000); |
| const byteNum = inputBuf.byteLength; |
| const sampleNum = byteNum / 2; |
| const totalAudioTime = sampleNum / 16.0; |
| const frameNum = Math.floor(sampleNum / HOP_SIZE); |
| |
| console.log(`Audio info: ${byteNum} bytes, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`); |
| |
| const outProbs = new Float32Array(frameNum); |
| const outFlags = new Int32Array(frameNum); |
| |
| const processingTime = await processAudio(inputBuf, frameNum, outProbs, outFlags); |
| |
| if (processingTime > 0) { |
| printResults(processingTime, totalAudioTime, outFlags, frameNum); |
| } |
| |
| return 0; |
| } |
|
|
| |
| function parseWAVHeader(buffer) { |
| if (buffer.length < 44) { |
| throw new Error('Invalid WAV file: too small'); |
| } |
| |
| |
| const riffHeader = buffer.toString('ascii', 0, 4); |
| if (riffHeader !== 'RIFF') { |
| throw new Error('Invalid WAV file: missing RIFF header'); |
| } |
| |
| |
| const waveHeader = buffer.toString('ascii', 8, 12); |
| if (waveHeader !== 'WAVE') { |
| throw new Error('Invalid WAV file: not WAVE format'); |
| } |
| |
| let offset = 12; |
| let dataOffset = -1; |
| let dataSize = 0; |
| let sampleRate = 0; |
| let channels = 0; |
| let bitsPerSample = 0; |
| |
| |
| while (offset < buffer.length - 8) { |
| const chunkId = buffer.toString('ascii', offset, offset + 4); |
| const chunkSize = buffer.readUInt32LE(offset + 4); |
| |
| if (chunkId === 'fmt ') { |
| |
| const audioFormat = buffer.readUInt16LE(offset + 8); |
| channels = buffer.readUInt16LE(offset + 10); |
| sampleRate = buffer.readUInt32LE(offset + 12); |
| bitsPerSample = buffer.readUInt16LE(offset + 22); |
| |
| if (audioFormat !== 1) { |
| throw new Error('Unsupported WAV format: only PCM is supported'); |
| } |
| |
| if (bitsPerSample !== 16) { |
| throw new Error('Unsupported bit depth: only 16-bit is supported'); |
| } |
| } else if (chunkId === 'data') { |
| |
| dataOffset = offset + 8; |
| dataSize = chunkSize; |
| break; |
| } |
| |
| offset += 8 + chunkSize; |
| |
| if (chunkSize % 2 === 1) { |
| offset++; |
| } |
| } |
| |
| if (dataOffset === -1) { |
| throw new Error('Invalid WAV file: no data chunk found'); |
| } |
| |
| return { |
| sampleRate, |
| channels, |
| bitsPerSample, |
| dataOffset, |
| dataSize, |
| totalSamples: dataSize / (bitsPerSample / 8), |
| samplesPerChannel: dataSize / (bitsPerSample / 8) / channels |
| }; |
| } |
|
|
| async function testWithWAV(inputFile, outputFile) { |
| console.log("=== WAV File Test ===\n"); |
| |
| if (!fs.existsSync(inputFile)) { |
| console.error(`Input file not found: ${inputFile}`); |
| return 1; |
| } |
| |
| try { |
| const buffer = fs.readFileSync(inputFile); |
| |
| |
| const wavInfo = parseWAVHeader(buffer); |
| console.log(`WAV Format: ${wavInfo.channels} channel(s), ${wavInfo.sampleRate}Hz, ${wavInfo.bitsPerSample}-bit`); |
| console.log(`Total samples: ${wavInfo.totalSamples}, samples per channel: ${wavInfo.samplesPerChannel}`); |
| |
| |
| if (wavInfo.sampleRate !== 16000) { |
| console.warn(`Warning: Sample rate is ${wavInfo.sampleRate}Hz, expected 16000Hz`); |
| } |
| |
| if (wavInfo.channels !== 1) { |
| console.warn(`Warning: ${wavInfo.channels} channels detected, only first channel will be used`); |
| } |
| |
| |
| const audioBuffer = buffer.slice(wavInfo.dataOffset, wavInfo.dataOffset + wavInfo.dataSize); |
| const inputBuf = new Int16Array(audioBuffer.buffer.slice(audioBuffer.byteOffset)); |
| |
| |
| const sampleNum = wavInfo.channels === 1 ? |
| wavInfo.samplesPerChannel : |
| Math.floor(wavInfo.samplesPerChannel); |
| |
| const totalAudioTime = sampleNum / wavInfo.sampleRate * 1000; |
| const frameNum = Math.floor(sampleNum / HOP_SIZE); |
| |
| console.log(`Audio info: ${audioBuffer.length} bytes, ${sampleNum} samples, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`); |
| |
| |
| let processedInput = inputBuf; |
| if (wavInfo.channels > 1) { |
| console.log(`Extracting mono from ${wavInfo.channels} channels...`); |
| processedInput = new Int16Array(Math.floor(inputBuf.length / wavInfo.channels)); |
| for (let i = 0; i < processedInput.length; i++) { |
| processedInput[i] = inputBuf[i * wavInfo.channels]; |
| } |
| } |
| |
| const outProbs = new Float32Array(frameNum); |
| const outFlags = new Int32Array(frameNum); |
| |
| const processingTime = await processAudio(processedInput, frameNum, outProbs, outFlags); |
| |
| if (processingTime > 0) { |
| printResults(processingTime, totalAudioTime, outFlags, frameNum); |
| saveResults(outProbs, outFlags, frameNum, outputFile); |
| } |
| |
| return 0; |
| } catch (error) { |
| console.error(`Error processing WAV file: ${error.message}`); |
| return 1; |
| } |
| } |
|
|
| async function runBenchmark() { |
| console.log("=== Performance Benchmark ===\n"); |
| |
| if (!createVADInstance()) return; |
| |
| const testData = new Int16Array(HOP_SIZE); |
| for (let i = 0; i < HOP_SIZE; i++) { |
| testData[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 8000; |
| } |
| |
| const testCases = [100, 1000, 10000]; |
| |
| for (const numFrames of testCases) { |
| const audioPtr = vadModule._malloc(HOP_SIZE * 2); |
| const probPtr = vadModule._malloc(4); |
| const flagPtr = vadModule._malloc(4); |
| |
| vadModule.HEAP16.set(testData, audioPtr / 2); |
| |
| const startTime = getTimestamp(); |
| |
| for (let i = 0; i < numFrames; i++) { |
| vadModule._ten_vad_process(vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr); |
| } |
| |
| const endTime = getTimestamp(); |
| const totalTime = endTime - startTime; |
| const avgTime = totalTime / numFrames; |
| |
| |
| |
| const frameAudioTime = (HOP_SIZE / 16000) * 1000; |
| const totalAudioTime = numFrames * frameAudioTime; |
| const rtf = totalTime / totalAudioTime; |
| |
| console.log(`${numFrames} frames: ${totalTime}ms total, ${avgTime.toFixed(3)}ms/frame, RTF: ${rtf.toFixed(3)}`); |
| |
| vadModule._free(audioPtr); |
| vadModule._free(probPtr); |
| vadModule._free(flagPtr); |
| } |
| |
| destroyVADInstance(); |
| } |
|
|
| |
| |
| |
|
|
| async function loadModule() { |
| try { |
| console.log("Loading WebAssembly module..."); |
| |
| if (!fs.existsSync(WASM_JS_FILE)) { |
| throw new Error(`ten_vad.js not found at ${WASM_JS_FILE}`); |
| } |
| |
| if (!fs.existsSync(WASM_BINARY_FILE)) { |
| throw new Error(`ten_vad.wasm not found at ${WASM_BINARY_FILE}`); |
| } |
| |
| |
| const wasmJsContent = fs.readFileSync(WASM_JS_FILE, 'utf8'); |
| const modifiedContent = wasmJsContent |
| .replace(/import\.meta\.url/g, `"${path.resolve(WASM_JS_FILE)}"`) |
| .replace(/export default createVADModule;/, 'module.exports = createVADModule;'); |
| |
| |
| const tempPath = './ten_vad_temp.js'; |
| fs.writeFileSync(tempPath, modifiedContent); |
| |
| |
| const wasmBinary = fs.readFileSync(WASM_BINARY_FILE); |
| |
| |
| const createVADModule = require(path.resolve(tempPath)); |
| vadModule = await createVADModule({ |
| wasmBinary: wasmBinary, |
| locateFile: (filePath) => filePath.endsWith('.wasm') ? WASM_BINARY_FILE : filePath, |
| noInitialRun: false, |
| noExitRuntime: true |
| }); |
| |
| |
| fs.unlinkSync(tempPath); |
| |
| |
| addHelperFunctions(); |
| |
| console.log(`Module loaded successfully. Version: ${getVADVersion()}\n`); |
| return true; |
| |
| } catch (error) { |
| console.error(`Failed to load module: ${error.message}`); |
| return false; |
| } |
| } |
|
|
| |
| |
| |
|
|
| async function main() { |
| const args = process.argv.slice(2); |
| |
| |
| if (!await loadModule()) { |
| process.exit(1); |
| } |
| |
| try { |
| if (args.length >= 2) { |
| |
| const [inputFile, outputFile] = args; |
| console.log(`Input: ${inputFile}, Output: ${outputFile}\n`); |
| await testWithWAV(inputFile, outputFile); |
| } else { |
| |
| await testWithArray(); |
| } |
| await runBenchmark(); |
| return 0; |
| } catch (error) { |
| console.error(`Test failed: ${error.message}`); |
| return 1; |
| } |
| } |
|
|
| |
| |
| |
|
|
| if (require.main === module) { |
| main().then(exitCode => { |
| process.exit(exitCode); |
| }).catch(error => { |
| console.error(`Fatal error: ${error.message}`); |
| process.exit(1); |
| }); |
| } |
|
|
| module.exports = { main, testWithArray, testWithWAV, runBenchmark }; |