| const API_BASE_URL = window.location.origin; |
| const recorder = new AudioRecorder(); |
|
|
| |
| let audioContext = null; |
| let audioBuffers = []; |
| let pendingAudioPaths = new Set(); |
| let currentAudioPath = null; |
| let ws = null; |
|
|
| |
| const startRecordingBtn = document.getElementById('startRecording'); |
| const stopRecordingBtn = document.getElementById('stopRecording'); |
| const transcriptionArea = document.getElementById('transcription'); |
| const asrStatus = document.getElementById('asrStatus'); |
| const ttsInput = document.getElementById('ttsInput'); |
| const generateSpeechBtn = document.getElementById('generateSpeech'); |
| const ttsStatus = document.getElementById('ttsStatus'); |
| const audioPlayer = document.getElementById('audioPlayer'); |
| const downloadAudioBtn = document.getElementById('downloadAudio'); |
| const audioFileInput = document.getElementById('audioFileInput'); |
| const uploadAudioBtn = document.getElementById('uploadAudio'); |
|
|
| |
| uploadAudioBtn.addEventListener('click', async () => { |
| const file = audioFileInput.files[0]; |
| if (!file) { |
| asrStatus.textContent = 'Please select an audio file'; |
| asrStatus.className = 'status error'; |
| return; |
| } |
|
|
| try { |
| asrStatus.textContent = 'Processing audio file...'; |
| asrStatus.className = 'status'; |
|
|
| |
| const audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
| const arrayBuffer = await file.arrayBuffer(); |
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); |
| |
| |
| const wavBuffer = await audioBufferToWav(audioBuffer); |
| const wavBlob = new Blob([wavBuffer], { type: 'audio/wav' }); |
|
|
| const formData = new FormData(); |
| formData.append('file', wavBlob, 'recording.wav'); |
|
|
| const response = await fetch(`${API_BASE_URL}/asr`, { |
| method: 'POST', |
| body: formData |
| }); |
|
|
| if (!response.ok) throw new Error('ASR request failed'); |
|
|
| const data = await response.json(); |
| transcriptionArea.value = data.text; |
| asrStatus.textContent = 'Transcription complete!'; |
| asrStatus.className = 'status success'; |
| |
| |
| audioContext.close(); |
| } catch (error) { |
| asrStatus.textContent = 'Error: ' + error.message; |
| asrStatus.className = 'status error'; |
| } |
| }); |
|
|
| |
| startRecordingBtn.addEventListener('click', async () => { |
| try { |
| asrStatus.textContent = 'Starting recording...'; |
| asrStatus.className = 'status'; |
| await recorder.start(); |
| startRecordingBtn.disabled = true; |
| stopRecordingBtn.disabled = false; |
| asrStatus.textContent = 'Recording...'; |
| } catch (error) { |
| asrStatus.textContent = 'Error starting recording: ' + error.message; |
| asrStatus.className = 'status error'; |
| } |
| }); |
|
|
| stopRecordingBtn.addEventListener('click', async () => { |
| try { |
| const audioBlob = await recorder.stop(); |
| startRecordingBtn.disabled = false; |
| stopRecordingBtn.disabled = true; |
| asrStatus.textContent = 'Processing audio...'; |
|
|
| |
| const formData = new FormData(); |
| formData.append('file', audioBlob); |
|
|
| const response = await fetch(`${API_BASE_URL}/asr`, { |
| method: 'POST', |
| body: formData |
| }); |
|
|
| if (!response.ok) throw new Error('ASR request failed'); |
|
|
| const data = await response.json(); |
| transcriptionArea.value = data.text; |
| asrStatus.textContent = 'Transcription complete!'; |
| asrStatus.className = 'status success'; |
| } catch (error) { |
| asrStatus.textContent = 'Error: ' + error.message; |
| asrStatus.className = 'status error'; |
| startRecordingBtn.disabled = false; |
| stopRecordingBtn.disabled = true; |
| } |
| }); |
|
|
| |
| function connectWebSocket() { |
| const wsProtocol = window.location.protocol === 'https:' ? 'wss' : 'ws'; |
| ws = new WebSocket(`${wsProtocol}://${window.location.host}/tts-ws`); |
| |
| ws.onopen = () => { |
| console.log('WebSocket connected'); |
| generateSpeechBtn.disabled = false; |
| ttsStatus.textContent = 'Connected to TTS service'; |
| ttsStatus.className = 'status success'; |
| |
| |
| if (!audioContext) { |
| audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
| } else if (audioContext.state === 'suspended') { |
| audioContext.resume(); |
| } |
| }; |
|
|
| ws.onmessage = async (event) => { |
| const response = JSON.parse(event.data); |
| |
| if (response.status === 'partial') { |
| ttsStatus.textContent = 'Generating audio...'; |
| ttsStatus.className = 'status'; |
| |
| try { |
| const audioPath = response.audioPath.split('/').pop(); |
| pendingAudioPaths.add(audioPath); |
| |
| if (audioContext.state === 'suspended') { |
| await audioContext.resume(); |
| } |
| |
| |
| const audioResponse = await fetchWithRetry(`${API_BASE_URL}/cache/${audioPath}`); |
| const arrayBuffer = await audioResponse.arrayBuffer(); |
| |
| if (arrayBuffer.byteLength === 0) { |
| throw new Error('Empty audio data received'); |
| } |
| |
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); |
| audioBuffers.push(audioBuffer); |
| pendingAudioPaths.delete(audioPath); |
| } catch (error) { |
| console.error('Error loading audio:', error); |
| ttsStatus.textContent = 'Error loading audio: ' + error.message; |
| ttsStatus.className = 'status error'; |
| pendingAudioPaths.clear(); |
| } |
| } else if (response.status === 'complete') { |
| |
| if (pendingAudioPaths.size > 0) { |
| ttsStatus.textContent = 'Finalizing audio...'; |
| await new Promise(resolve => setTimeout(resolve, 500)); |
| } |
|
|
| try { |
| |
| const targetSampleRate = 16000; |
| const totalLength = audioBuffers.reduce((acc, buffer) => { |
| |
| const ratio = targetSampleRate / buffer.sampleRate; |
| return acc + Math.ceil(buffer.length * ratio); |
| }, 0); |
| |
| const combinedBuffer = audioContext.createBuffer( |
| 1, |
| totalLength, |
| targetSampleRate |
| ); |
| |
| let offset = 0; |
| for (const buffer of audioBuffers) { |
| |
| let channelData = buffer.getChannelData(0); |
| if (buffer.sampleRate !== targetSampleRate) { |
| channelData = await resampleAudio(channelData, buffer.sampleRate, targetSampleRate); |
| } |
| combinedBuffer.copyToChannel(channelData, 0, offset); |
| offset += channelData.length; |
| } |
| |
| |
| const wavBlob = new Blob([await audioBufferToWav(combinedBuffer)], { type: 'audio/wav' }); |
| const audioUrl = URL.createObjectURL(wavBlob); |
| |
| |
| audioPlayer.src = audioUrl; |
| audioPlayer.load(); |
| downloadAudioBtn.disabled = false; |
| |
| |
| currentAudioPath = audioUrl; |
| |
| ttsStatus.textContent = 'Audio generated successfully!'; |
| ttsStatus.className = 'status success'; |
| } catch (error) { |
| console.error('Error combining audio:', error); |
| ttsStatus.textContent = 'Error combining audio: ' + error.message; |
| ttsStatus.className = 'status error'; |
| } finally { |
| |
| audioBuffers = []; |
| pendingAudioPaths.clear(); |
| } |
| } else if (response.status === 'error') { |
| ttsStatus.textContent = 'Error: ' + response.message; |
| ttsStatus.className = 'status error'; |
| audioBuffers = []; |
| pendingAudioPaths.clear(); |
| } |
| }; |
|
|
| ws.onclose = () => { |
| console.log('WebSocket disconnected'); |
| generateSpeechBtn.disabled = true; |
| ttsStatus.textContent = 'Disconnected. Trying to reconnect...'; |
| ttsStatus.className = 'status error'; |
| |
| |
| audioBuffers = []; |
| pendingAudioPaths.clear(); |
| if (currentAudioPath) { |
| URL.revokeObjectURL(currentAudioPath); |
| currentAudioPath = null; |
| } |
| |
| setTimeout(connectWebSocket, 5000); |
| }; |
|
|
| ws.onerror = (error) => { |
| console.error('WebSocket error:', error); |
| ttsStatus.textContent = 'Connection error. Retrying...'; |
| ttsStatus.className = 'status error'; |
| |
| |
| audioBuffers = []; |
| pendingAudioPaths.clear(); |
| if (currentAudioPath) { |
| URL.revokeObjectURL(currentAudioPath); |
| currentAudioPath = null; |
| } |
| }; |
| } |
|
|
| |
| async function audioBufferToWav(buffer) { |
| |
| let audioData = buffer.getChannelData(0); |
| if (buffer.sampleRate !== 16000) { |
| audioData = await resampleAudio(audioData, buffer.sampleRate, 16000); |
| } |
| |
| const numChannels = 1; |
| const sampleRate = 16000; |
| const format = 1; |
| const bitDepth = 16; |
| |
| const dataLength = audioData.length * (bitDepth / 8); |
| const headerLength = 44; |
| const totalLength = headerLength + dataLength; |
| |
| const arrayBuffer = new ArrayBuffer(totalLength); |
| const view = new DataView(arrayBuffer); |
| |
| |
| writeString(view, 0, 'RIFF'); |
| view.setUint32(4, totalLength - 8, true); |
| writeString(view, 8, 'WAVE'); |
| writeString(view, 12, 'fmt '); |
| view.setUint32(16, 16, true); |
| view.setUint16(20, format, true); |
| view.setUint16(22, numChannels, true); |
| view.setUint32(24, sampleRate, true); |
| view.setUint32(28, sampleRate * numChannels * (bitDepth / 8), true); |
| view.setUint16(32, numChannels * (bitDepth / 8), true); |
| view.setUint16(34, bitDepth, true); |
| writeString(view, 36, 'data'); |
| view.setUint32(40, dataLength, true); |
| |
| |
| floatTo16BitPCM(view, 44, audioData); |
| |
| return arrayBuffer; |
| } |
|
|
| function resampleAudio(audioData, originalSampleRate, targetSampleRate) { |
| const ratio = targetSampleRate / originalSampleRate; |
| const newLength = Math.round(audioData.length * ratio); |
| const result = new Float32Array(newLength); |
| |
| for (let i = 0; i < newLength; i++) { |
| const position = i / ratio; |
| const index = Math.floor(position); |
| const fraction = position - index; |
| |
| if (index + 1 < audioData.length) { |
| result[i] = audioData[index] * (1 - fraction) + audioData[index + 1] * fraction; |
| } else { |
| result[i] = audioData[index]; |
| } |
| } |
| |
| return result; |
| } |
|
|
| function writeString(view, offset, string) { |
| for (let i = 0; i < string.length; i++) { |
| view.setUint8(offset + i, string.charCodeAt(i)); |
| } |
| } |
|
|
| function floatTo16BitPCM(view, offset, input) { |
| for (let i = 0; i < input.length; i++, offset += 2) { |
| const s = Math.max(-1, Math.min(1, input[i])); |
| view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); |
| } |
| } |
|
|
| generateSpeechBtn.addEventListener('click', () => { |
| const text = ttsInput.value.trim(); |
| if (!text) { |
| ttsStatus.textContent = 'Please enter some text'; |
| ttsStatus.className = 'status error'; |
| return; |
| } |
|
|
| if (ws && ws.readyState === WebSocket.OPEN) { |
| ws.send(JSON.stringify({ text })); |
| ttsStatus.textContent = 'Generating audio...'; |
| ttsStatus.className = 'status'; |
| } else { |
| ttsStatus.textContent = 'Connection lost. Reconnecting...'; |
| ttsStatus.className = 'status error'; |
| connectWebSocket(); |
| } |
| }); |
|
|
| downloadAudioBtn.addEventListener('click', () => { |
| if (currentAudioPath) { |
| const link = document.createElement('a'); |
| link.href = currentAudioPath; |
| link.download = `combined_audio_${Date.now()}.wav`; |
| document.body.appendChild(link); |
| link.click(); |
| document.body.removeChild(link); |
| } |
| }); |
|
|
| |
| window.addEventListener('beforeunload', () => { |
| if (audioContext) { |
| audioContext.close(); |
| } |
| if (ws) { |
| ws.close(); |
| } |
| |
| if (currentAudioPath) { |
| URL.revokeObjectURL(currentAudioPath); |
| } |
| |
| audioBuffers = []; |
| pendingAudioPaths.clear(); |
| }); |
|
|
| |
| connectWebSocket(); |
|
|
| async function fetchWithRetry(url, maxRetries = 3, retryDelay = 1000) { |
| for (let i = 0; i < maxRetries; i++) { |
| try { |
| const response = await fetch(url); |
| if (!response.ok) { |
| throw new Error(`HTTP error! status: ${response.status}`); |
| } |
| return response; |
| } catch (error) { |
| if (i === maxRetries - 1) throw error; |
| await new Promise(resolve => setTimeout(resolve, retryDelay)); |
| } |
| } |
| } |