| | import * as ort from 'onnxruntime-web'; |
| | const presetTexts = window.presetTexts || {}; |
| |
|
| | const PLAY_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 5v14l11-7-11-7z"></path></svg>`; |
| | const PAUSE_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 6h3v12H8V6zm5 0h3v12h-3V6z"></path></svg>`; |
| | const STOP_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M7 7h10v10H7V7z"></path></svg>`; |
| |
|
| | |
| | (function initLightningParallax() { |
| | if (typeof document === 'undefined') { |
| | return; |
| | } |
| |
|
| | const runBlink = (className, onComplete) => { |
| | let remaining = 1 + Math.round(Math.random()); |
| | const blink = () => { |
| | if (remaining-- <= 0) { |
| | if (typeof onComplete === 'function') { |
| | onComplete(); |
| | } |
| | return; |
| | } |
| | const wait = 20 + Math.random() * 80; |
| | document.body.classList.add(className); |
| | setTimeout(() => { |
| | document.body.classList.remove(className); |
| | setTimeout(blink, wait); |
| | }, wait); |
| | }; |
| | blink(); |
| | }; |
| |
|
| | const schedule = () => { |
| | setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000); |
| | }; |
| | schedule(); |
| | })(); |
| |
|
| | function escapeHtml(value) { |
| | return value.replace(/[&<>"']/g, (match) => { |
| | switch (match) { |
| | case '&': return '&'; |
| | case '<': return '<'; |
| | case '>': return '>'; |
| | case '"': return '"'; |
| | case "'": return '''; |
| | default: return match; |
| | } |
| | }); |
| | } |
| |
|
| | function formatStatValueWithSuffix(value, suffix, options = {}) { |
| | const { firstLabel = false } = options; |
| | if (value === undefined || value === null) { |
| | return ''; |
| | } |
| | if (!suffix) { |
| | const raw = `${value}`; |
| | return escapeHtml(raw); |
| | } |
| | const raw = `${value}`.trim(); |
| | if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') { |
| | return escapeHtml(raw); |
| | } |
| | const appendSuffix = (segment, includePrefix = false) => { |
| | const trimmed = segment.trim(); |
| | if (!trimmed) { |
| | return ''; |
| | } |
| | const escapedValue = `<span class="stat-value-number">${escapeHtml(trimmed)}</span>`; |
| | const suffixSpan = `<span class="stat-label stat-suffix">${escapeHtml(suffix)}</span>`; |
| | const prefixSpan = includePrefix && firstLabel |
| | ? `<span class="stat-label stat-suffix stat-prefix">First</span>` |
| | : ''; |
| | const segmentClass = includePrefix && firstLabel |
| | ? 'stat-value-segment has-prefix' |
| | : 'stat-value-segment'; |
| | return `<span class="${segmentClass}">${prefixSpan}${escapedValue}${suffixSpan}</span>`; |
| | }; |
| | if (raw.includes('/')) { |
| | const parts = raw.split('/'); |
| | const segments = parts.map((part, index) => appendSuffix(part, index === 0)); |
| | return segments.join(' / '); |
| | } |
| | return appendSuffix(raw); |
| | } |
| |
|
| | |
| | |
| | |
| | export class UnicodeProcessor { |
| | constructor(indexer) { |
| | this.indexer = indexer; |
| | } |
| |
|
| | call(textList, lang = null) { |
| | const processedTexts = textList.map(t => preprocessText(t, lang)); |
| | const textIdsLengths = processedTexts.map(t => t.length); |
| | const maxLen = Math.max(...textIdsLengths); |
| | |
| | const textIds = []; |
| | const unsupportedChars = new Set(); |
| | |
| | for (let i = 0; i < processedTexts.length; i++) { |
| | const row = new Array(maxLen).fill(0); |
| | const unicodeVals = textToUnicodeValues(processedTexts[i]); |
| | for (let j = 0; j < unicodeVals.length; j++) { |
| | const indexValue = this.indexer[unicodeVals[j]]; |
| | |
| | if (indexValue === undefined || indexValue === null || indexValue === -1) { |
| | unsupportedChars.add(processedTexts[i][j]); |
| | row[j] = 0; |
| | } else { |
| | row[j] = indexValue; |
| | } |
| | } |
| | textIds.push(row); |
| | } |
| | |
| | const textMask = getTextMask(textIdsLengths); |
| | return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) }; |
| | } |
| | } |
| |
|
| | const AVAILABLE_LANGS = ["en", "ko", "es", "pt", "fr"]; |
| |
|
| | |
| | |
| | |
| | |
| | export function detectLanguage(text) { |
| | if (!text || text.trim().length < 3) { |
| | return null; |
| | } |
| | |
| | |
| | const sampleText = text.length > 100 ? text.substring(text.length - 100) : text; |
| | |
| | |
| | const normalizedText = sampleText.normalize('NFC').toLowerCase(); |
| | |
| | |
| | const koreanRegex = /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uD7B0-\uD7FF]/g; |
| | const koreanMatches = normalizedText.match(koreanRegex) || []; |
| | if (koreanMatches.length >= 2) { |
| | return 'ko'; |
| | } |
| | |
| | |
| | const scores = { en: 0, es: 0, fr: 0, pt: 0 }; |
| | |
| | |
| | if (/ñ/.test(normalizedText)) scores.es += 15; |
| | if (/[¿¡]/.test(normalizedText)) scores.es += 12; |
| | if (/ã/.test(normalizedText)) scores.pt += 15; |
| | if (/õ/.test(normalizedText)) scores.pt += 15; |
| | if (/œ/.test(normalizedText)) scores.fr += 15; |
| | if (/[ùû]/.test(normalizedText)) scores.fr += 10; |
| | |
| | |
| | if (/ç/.test(normalizedText)) { |
| | scores.fr += 4; |
| | scores.pt += 4; |
| | } |
| | |
| | |
| | if (/[èêë]/.test(normalizedText)) scores.fr += 5; |
| | if (/[àâ]/.test(normalizedText)) scores.fr += 3; |
| | if (/[îï]/.test(normalizedText)) scores.fr += 4; |
| | if (/ô/.test(normalizedText)) scores.fr += 3; |
| | |
| | |
| | const exclusiveWords = { |
| | en: ['the', 'is', 'are', 'was', 'were', 'have', 'has', 'been', 'will', 'would', 'could', 'should', 'this', 'that', 'with', 'from', 'they', 'what', 'which', 'there', 'their', 'about', 'these', 'other', 'into', 'just', 'your', 'some', 'than', 'them', 'then', 'only', 'being', 'through', 'after', 'before'], |
| | es: ['el', 'los', 'las', 'es', 'está', 'están', 'porque', 'pero', 'muy', 'también', 'más', 'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'yo', 'tú', 'nosotros', 'ellos', 'ellas', 'hola', 'gracias', 'buenos', 'buenas', 'ahora', 'siempre', 'nunca', 'todo', 'nada', 'algo', 'alguien'], |
| | fr: ['le', 'les', 'est', 'sont', 'dans', 'ce', 'cette', 'ces', 'il', 'elle', 'ils', 'elles', 'je', 'tu', 'nous', 'vous', 'avec', 'sur', 'ne', 'pas', 'plus', 'tout', 'bien', 'fait', 'être', 'avoir', 'donc', 'car', 'ni', 'jamais', 'toujours', 'rien', 'quelque', 'encore', 'aussi', 'très', 'peu', 'ici'], |
| | pt: ['os', 'as', 'é', 'são', 'está', 'estão', 'não', 'na', 'no', 'da', 'do', 'das', 'dos', 'ao', 'aos', 'ele', 'ela', 'eles', 'elas', 'eu', 'nós', 'você', 'vocês', 'seu', 'sua', 'seus', 'suas', 'muito', 'também', 'já', 'foi', 'só', 'mesmo', 'ter', 'até', 'isso', 'olá', 'obrigado', 'obrigada', 'bom', 'boa', 'agora', 'sempre', 'nunca', 'tudo', 'nada', 'algo', 'alguém'] |
| | }; |
| | |
| | |
| | const words = normalizedText.match(/[a-záàâãäåçéèêëíìîïñóòôõöúùûüýÿœæ]+/g) || []; |
| | |
| | for (const word of words) { |
| | for (const [lang, wordList] of Object.entries(exclusiveWords)) { |
| | if (wordList.includes(word)) { |
| | scores[lang] += 3; |
| | } |
| | } |
| | } |
| | |
| | |
| | const ngramPatterns = { |
| | en: [/th/g, /ing/g, /tion/g, /ight/g, /ould/g], |
| | es: [/ción/g, /mente/g, /ado/g, /ido/g], |
| | fr: [/tion/g, /ment/g, /eau/g, /aux/g, /eux/g, /oir/g, /ais/g, /ait/g, /ont/g], |
| | pt: [/ção/g, /ões/g, /mente/g, /ado/g, /ido/g, /nh/g, /lh/g] |
| | }; |
| | |
| | for (const [lang, patterns] of Object.entries(ngramPatterns)) { |
| | for (const pattern of patterns) { |
| | const matches = normalizedText.match(pattern) || []; |
| | scores[lang] += matches.length * 2; |
| | } |
| | } |
| | |
| | |
| | const frenchContractions = /[cdjlmnst]'[aeiouéèêàâîïôûù]/g; |
| | const frenchContractionMatches = normalizedText.match(frenchContractions) || []; |
| | scores.fr += frenchContractionMatches.length * 5; |
| | |
| | |
| | |
| | if (/\bthe\b/.test(normalizedText)) scores.en += 5; |
| | if (/\b(el|los)\b/.test(normalizedText)) scores.es += 4; |
| | if (/\b(le|les)\b/.test(normalizedText)) scores.fr += 4; |
| | if (/\b(o|os)\b/.test(normalizedText)) scores.pt += 3; |
| | |
| | |
| | let maxScore = 0; |
| | let detectedLang = null; |
| | |
| | for (const [lang, score] of Object.entries(scores)) { |
| | if (score > maxScore) { |
| | maxScore = score; |
| | detectedLang = lang; |
| | } |
| | } |
| | |
| | |
| | if (maxScore >= 4) { |
| | return detectedLang; |
| | } |
| | |
| | return null; |
| | } |
| |
|
| | |
| | const LANGUAGE_NAMES = { |
| | 'en': 'English', |
| | 'ko': 'Korean', |
| | 'es': 'Spanish', |
| | 'pt': 'Portuguese', |
| | 'fr': 'French' |
| | }; |
| |
|
| | export function preprocessText(text, lang = null) { |
| | |
| | text = text.normalize('NFKD'); |
| | |
| | |
| | text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]+/gu, ''); |
| | |
| | |
| | const replacements = { |
| | "–": "-", |
| | "‑": "-", |
| | "—": "-", |
| | "_": " ", |
| | "\u201C": '"', |
| | "\u201D": '"', |
| | "\u2018": "'", |
| | "\u2019": "'", |
| | "´": "'", |
| | "`": "'", |
| | "[": " ", |
| | "]": " ", |
| | "|": " ", |
| | "/": " ", |
| | "#": " ", |
| | "→": " ", |
| | "←": " ", |
| | }; |
| | |
| | for (const [k, v] of Object.entries(replacements)) { |
| | text = text.replaceAll(k, v); |
| | } |
| |
|
| | |
| | text = text.replace(/[♥☆♡©\\]/g, ""); |
| |
|
| | |
| | const exprReplacements = { |
| | "@": " at ", |
| | "e.g.,": "for example,", |
| | "i.e.,": "that is,", |
| | }; |
| | |
| | for (const [k, v] of Object.entries(exprReplacements)) { |
| | text = text.replaceAll(k, v); |
| | } |
| | |
| | |
| | text = text.replace(/ ,/g, ","); |
| | text = text.replace(/ \./g, "."); |
| | text = text.replace(/ !/g, "!"); |
| | text = text.replace(/ \?/g, "?"); |
| | text = text.replace(/ ;/g, ";"); |
| | text = text.replace(/ :/g, ":"); |
| | text = text.replace(/ '/g, "'"); |
| | |
| | |
| | while (text.includes('""')) { |
| | text = text.replace(/""/g, '"'); |
| | } |
| | while (text.includes("''")) { |
| | text = text.replace(/''/g, "'"); |
| | } |
| | while (text.includes("``")) { |
| | text = text.replace(/``/g, "`"); |
| | } |
| | |
| | |
| | text = text.replace(/\s+/g, " ").trim(); |
| |
|
| | |
| | if (!/[.!?;:,'"')\]}…。」』】〉》›»]$/.test(text)) { |
| | text += "."; |
| | } |
| | |
| | |
| | if (lang !== null) { |
| | if (!AVAILABLE_LANGS.includes(lang)) { |
| | throw new Error(`Invalid language: ${lang}`); |
| | } |
| | text = `<${lang}>` + text + `</${lang}>`; |
| | } else { |
| | text = `<na>` + text + `</na>`; |
| | } |
| | |
| | return text; |
| | } |
| |
|
| | export function textToUnicodeValues(text) { |
| | return Array.from(text).map(char => char.charCodeAt(0)); |
| | } |
| |
|
| | export function lengthToMask(lengths, maxLen = null) { |
| | maxLen = maxLen || Math.max(...lengths); |
| | const mask = []; |
| | for (let i = 0; i < lengths.length; i++) { |
| | const row = []; |
| | for (let j = 0; j < maxLen; j++) { |
| | row.push(j < lengths[i] ? 1.0 : 0.0); |
| | } |
| | mask.push([row]); |
| | } |
| | return mask; |
| | } |
| |
|
| | export function getTextMask(textIdsLengths) { |
| | return lengthToMask(textIdsLengths); |
| | } |
| |
|
| | export function getLatentMask(wavLengths, cfgs) { |
| | const baseChunkSize = cfgs.ae.base_chunk_size; |
| | const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
| | const latentSize = baseChunkSize * chunkCompressFactor; |
| | const latentLengths = wavLengths.map(len => |
| | Math.floor((len + latentSize - 1) / latentSize) |
| | ); |
| | return lengthToMask(latentLengths); |
| | } |
| |
|
| | export function sampleNoisyLatent(duration, cfgs) { |
| | const sampleRate = cfgs.ae.sample_rate; |
| | const baseChunkSize = cfgs.ae.base_chunk_size; |
| | const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
| | const ldim = cfgs.ttl.latent_dim; |
| |
|
| | const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate; |
| | const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate)); |
| | const chunkSize = baseChunkSize * chunkCompressFactor; |
| | const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize); |
| | const latentDim = ldim * chunkCompressFactor; |
| |
|
| | const noisyLatent = []; |
| | for (let b = 0; b < duration.length; b++) { |
| | const batch = []; |
| | for (let d = 0; d < latentDim; d++) { |
| | const row = []; |
| | for (let t = 0; t < latentLen; t++) { |
| | const u1 = Math.random(); |
| | const u2 = Math.random(); |
| | const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); |
| | row.push(randNormal); |
| | } |
| | batch.push(row); |
| | } |
| | noisyLatent.push(batch); |
| | } |
| |
|
| | const latentMask = getLatentMask(wavLengths, cfgs); |
| | |
| | for (let b = 0; b < noisyLatent.length; b++) { |
| | for (let d = 0; d < noisyLatent[b].length; d++) { |
| | for (let t = 0; t < noisyLatent[b][d].length; t++) { |
| | noisyLatent[b][d][t] *= latentMask[b][0][t]; |
| | } |
| | } |
| | } |
| |
|
| | return { noisyLatent, latentMask }; |
| | } |
| |
|
| | export async function loadOnnx(onnxPath, opts) { |
| | return await ort.InferenceSession.create(onnxPath, opts); |
| | } |
| |
|
| | export async function loadOnnxAll(basePath, opts, onProgress) { |
| | const models = [ |
| | { name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' }, |
| | { name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' }, |
| | { name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' }, |
| | { name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' } |
| | ]; |
| |
|
| | const result = {}; |
| | let loadedCount = 0; |
| | |
| | |
| | const loadPromises = models.map(async (model) => { |
| | const session = await loadOnnx(model.path, opts); |
| | loadedCount++; |
| | if (onProgress) { |
| | onProgress(model.name, loadedCount, models.length); |
| | } |
| | return { key: model.key, session }; |
| | }); |
| | |
| | |
| | const loadedModels = await Promise.all(loadPromises); |
| | |
| | |
| | loadedModels.forEach(({ key, session }) => { |
| | result[key] = session; |
| | }); |
| |
|
| | try { |
| | |
| | await fetch('https://huggingface.co/Supertone/supertonic-2/resolve/main/config.json'); |
| | } catch (error) { |
| | console.warn('Failed to update download count:', error); |
| | } |
| | return result; |
| | } |
| |
|
| | export async function loadCfgs(basePath) { |
| | const response = await fetch(`${basePath}/tts.json`); |
| | return await response.json(); |
| | } |
| |
|
| | export async function loadProcessors(basePath) { |
| | const response = await fetch(`${basePath}/unicode_indexer.json`); |
| | const unicodeIndexerData = await response.json(); |
| | const textProcessor = new UnicodeProcessor(unicodeIndexerData); |
| | |
| | return { textProcessor }; |
| | } |
| |
|
| | function parseWavFile(buffer) { |
| | const view = new DataView(buffer); |
| | |
| | |
| | const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); |
| | if (riff !== 'RIFF') { |
| | throw new Error('Not a valid WAV file'); |
| | } |
| | |
| | const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11)); |
| | if (wave !== 'WAVE') { |
| | throw new Error('Not a valid WAV file'); |
| | } |
| | |
| | let offset = 12; |
| | let fmtChunk = null; |
| | let dataChunk = null; |
| | |
| | while (offset < buffer.byteLength) { |
| | const chunkId = String.fromCharCode( |
| | view.getUint8(offset), |
| | view.getUint8(offset + 1), |
| | view.getUint8(offset + 2), |
| | view.getUint8(offset + 3) |
| | ); |
| | const chunkSize = view.getUint32(offset + 4, true); |
| | |
| | if (chunkId === 'fmt ') { |
| | fmtChunk = { |
| | audioFormat: view.getUint16(offset + 8, true), |
| | numChannels: view.getUint16(offset + 10, true), |
| | sampleRate: view.getUint32(offset + 12, true), |
| | bitsPerSample: view.getUint16(offset + 22, true) |
| | }; |
| | } else if (chunkId === 'data') { |
| | dataChunk = { |
| | offset: offset + 8, |
| | size: chunkSize |
| | }; |
| | break; |
| | } |
| | |
| | offset += 8 + chunkSize; |
| | } |
| | |
| | if (!fmtChunk || !dataChunk) { |
| | throw new Error('Invalid WAV file format'); |
| | } |
| | |
| | const bytesPerSample = fmtChunk.bitsPerSample / 8; |
| | const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels)); |
| | const audioData = new Float32Array(numSamples); |
| | |
| | if (fmtChunk.bitsPerSample === 16) { |
| | for (let i = 0; i < numSamples; i++) { |
| | let sample = 0; |
| | for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| | const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2; |
| | sample += view.getInt16(sampleOffset, true); |
| | } |
| | audioData[i] = (sample / fmtChunk.numChannels) / 32768.0; |
| | } |
| | } else if (fmtChunk.bitsPerSample === 24) { |
| | |
| | for (let i = 0; i < numSamples; i++) { |
| | let sample = 0; |
| | for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| | const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3; |
| | |
| | const byte1 = view.getUint8(sampleOffset); |
| | const byte2 = view.getUint8(sampleOffset + 1); |
| | const byte3 = view.getUint8(sampleOffset + 2); |
| | let value = (byte3 << 16) | (byte2 << 8) | byte1; |
| | |
| | if (value & 0x800000) { |
| | value = value - 0x1000000; |
| | } |
| | sample += value; |
| | } |
| | audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; |
| | } |
| | } else if (fmtChunk.bitsPerSample === 32) { |
| | for (let i = 0; i < numSamples; i++) { |
| | let sample = 0; |
| | for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| | const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4; |
| | sample += view.getFloat32(sampleOffset, true); |
| | } |
| | audioData[i] = sample / fmtChunk.numChannels; |
| | } |
| | } else { |
| | throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`); |
| | } |
| | |
| | return { |
| | sampleRate: fmtChunk.sampleRate, |
| | audioData: audioData |
| | }; |
| | } |
| |
|
| | export function arrayToTensor(array, dims) { |
| | const flat = array.flat(Infinity); |
| | return new ort.Tensor('float32', Float32Array.from(flat), dims); |
| | } |
| |
|
| | export function intArrayToTensor(array, dims) { |
| | const flat = array.flat(Infinity); |
| | return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims); |
| | } |
| |
|
| | export function writeWavFile(audioData, sampleRate) { |
| | const numChannels = 1; |
| | const bitsPerSample = 16; |
| | const byteRate = sampleRate * numChannels * bitsPerSample / 8; |
| | const blockAlign = numChannels * bitsPerSample / 8; |
| | const dataSize = audioData.length * bitsPerSample / 8; |
| |
|
| | const buffer = new ArrayBuffer(44 + dataSize); |
| | const view = new DataView(buffer); |
| | |
| | |
| | view.setUint8(0, 'R'.charCodeAt(0)); |
| | view.setUint8(1, 'I'.charCodeAt(0)); |
| | view.setUint8(2, 'F'.charCodeAt(0)); |
| | view.setUint8(3, 'F'.charCodeAt(0)); |
| | view.setUint32(4, 36 + dataSize, true); |
| | view.setUint8(8, 'W'.charCodeAt(0)); |
| | view.setUint8(9, 'A'.charCodeAt(0)); |
| | view.setUint8(10, 'V'.charCodeAt(0)); |
| | view.setUint8(11, 'E'.charCodeAt(0)); |
| | |
| | |
| | view.setUint8(12, 'f'.charCodeAt(0)); |
| | view.setUint8(13, 'm'.charCodeAt(0)); |
| | view.setUint8(14, 't'.charCodeAt(0)); |
| | view.setUint8(15, ' '.charCodeAt(0)); |
| | view.setUint32(16, 16, true); |
| | view.setUint16(20, 1, true); |
| | view.setUint16(22, numChannels, true); |
| | view.setUint32(24, sampleRate, true); |
| | view.setUint32(28, byteRate, true); |
| | view.setUint16(32, blockAlign, true); |
| | view.setUint16(34, bitsPerSample, true); |
| | |
| | |
| | view.setUint8(36, 'd'.charCodeAt(0)); |
| | view.setUint8(37, 'a'.charCodeAt(0)); |
| | view.setUint8(38, 't'.charCodeAt(0)); |
| | view.setUint8(39, 'a'.charCodeAt(0)); |
| | view.setUint32(40, dataSize, true); |
| | |
| | |
| | for (let i = 0; i < audioData.length; i++) { |
| | const sample = Math.max(-1, Math.min(1, audioData[i])); |
| | const intSample = Math.floor(sample * 32767); |
| | view.setInt16(44 + i * 2, intSample, true); |
| | } |
| | |
| | return buffer; |
| | } |
| |
|
| |
|
| |
|
| | |
| | document.addEventListener('DOMContentLoaded', () => { |
| | |
| | document.querySelectorAll('a[href^="#"]').forEach(anchor => { |
| | anchor.addEventListener('click', function (e) { |
| | e.preventDefault(); |
| | const href = this.getAttribute('href'); |
| | const target = document.querySelector(href); |
| | if (target) { |
| | |
| | if (history.pushState) { |
| | history.pushState(null, null, href); |
| | } |
| | target.scrollIntoView({ |
| | behavior: 'smooth', |
| | block: 'start' |
| | }); |
| | } |
| | }); |
| | }); |
| | |
| | |
| | const observerOptions = { |
| | threshold: 0.1, |
| | rootMargin: '0px 0px -100px 0px' |
| | }; |
| | |
| | const observer = new IntersectionObserver((entries) => { |
| | entries.forEach(entry => { |
| | if (entry.isIntersecting) { |
| | entry.target.style.opacity = '1'; |
| | entry.target.style.transform = 'translateY(0)'; |
| | } |
| | }); |
| | }, observerOptions); |
| | |
| | }); |
| |
|
| | |
| | (async function() { |
| | |
| | const demoTextInput = document.getElementById('demoTextInput'); |
| | if (!demoTextInput) return; |
| | |
| | |
| | ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/'; |
| | ort.env.wasm.numThreads = 1; |
| | |
| |
|
| | |
| | const REF_EMBEDDING_PATHS = { |
| | 'F1': 'assets/voice_styles/F1.json', |
| | 'F2': 'assets/voice_styles/F2.json', |
| | 'F3': 'assets/voice_styles/F3.json', |
| | 'F4': 'assets/voice_styles/F4.json', |
| | 'F5': 'assets/voice_styles/F5.json', |
| | 'M1': 'assets/voice_styles/M1.json', |
| | 'M2': 'assets/voice_styles/M2.json', |
| | 'M3': 'assets/voice_styles/M3.json', |
| | 'M4': 'assets/voice_styles/M4.json', |
| | 'M5': 'assets/voice_styles/M5.json' |
| | }; |
| |
|
| | |
| | const VOICE_DESCRIPTIONS = { |
| | 'F1': 'Sarah - A calm female voice with a slightly low tone; steady and composed.', |
| | 'F2': 'Lily - A bright, cheerful female voice; lively, playful, and youthful with spirited energy.', |
| | 'F3': 'Jessica - A clear, professional announcer-style female voice; articulate and broadcast-ready.', |
| | 'F4': 'Olivia - A crisp, confident female voice; distinct and expressive with strong delivery.', |
| | 'F5': 'Emily - A kind, gentle female voice; soft-spoken, calm, and naturally soothing.', |
| | 'M1': 'Alex - A lively, upbeat male voice with confident energy and a standard, clear tone.', |
| | 'M2': 'James - A deep, robust male voice; calm, composed, and serious with a grounded presence.', |
| | 'M3': 'Robert - A polished, authoritative male voice; confident and trustworthy with strong presentation quality.', |
| | 'M4': 'Sam - A soft, neutral-toned male voice; gentle and approachable with a youthful, friendly quality.', |
| | 'M5': 'Daniel - A warm, soft-spoken male voice; calm and soothing with a natural storytelling quality.' |
| | }; |
| |
|
| | |
| | let models = null; |
| | let cfgs = null; |
| | let processors = null; |
| | let currentVoice = 'M3'; |
| | |
| | |
| | function detectBrowserLanguage() { |
| | |
| | const browserLang = navigator.language || navigator.userLanguage || 'en'; |
| | |
| | |
| | const langCode = browserLang.split('-')[0].toLowerCase(); |
| | |
| | |
| | const supportedLangs = ['en', 'es', 'pt', 'fr', 'ko']; |
| | |
| | |
| | return supportedLangs.includes(langCode) ? langCode : 'en'; |
| | } |
| | |
| | let currentLanguage = detectBrowserLanguage(); |
| | let refEmbeddingCache = {}; |
| | let currentStyleTtlTensor = null; |
| | let currentStyleDpTensor = null; |
| | let modelsLoading = false; |
| | let modelsLoaded = false; |
| | let modelsLoadPromise = null; |
| |
|
| | |
| | const demoStatusBox = document.getElementById('demoStatusBox'); |
| | const demoStatusText = document.getElementById('demoStatusText'); |
| | const wasmWarningBanner = document.getElementById('wasmWarningBanner'); |
| | const demoGenerateBtn = document.getElementById('demoGenerateBtn'); |
| | const demoTotalSteps = document.getElementById('demoTotalSteps'); |
| | const demoSpeed = document.getElementById('demoSpeed'); |
| | const demoTotalStepsValue = document.getElementById('demoTotalStepsValue'); |
| | const demoSpeedValue = document.getElementById('demoSpeedValue'); |
| | const demoResults = document.getElementById('demoResults'); |
| | const demoError = document.getElementById('demoError'); |
| | const demoCharCount = document.getElementById('demoCharCount'); |
| | const demoCharCounter = document.getElementById('demoCharCounter'); |
| | const demoCharWarning = document.getElementById('demoCharWarning'); |
| |
|
| | |
| | const MIN_CHARS = 10; |
| | const MAX_CHUNK_LENGTH_DEFAULT = 300; |
| | const MAX_CHUNK_LENGTH_KO = 120; |
| | function getMaxChunkLength() { |
| | return currentLanguage === 'ko' ? MAX_CHUNK_LENGTH_KO : MAX_CHUNK_LENGTH_DEFAULT; |
| | } |
| | |
| | |
| | let audioContext = null; |
| | let scheduledSources = []; |
| | let audioChunks = []; |
| | let totalDuration = 0; |
| | let startTime = 0; |
| | let pauseTime = 0; |
| | let isPaused = false; |
| | let isPlaying = false; |
| | let animationFrameId = null; |
| | let playPauseBtn = null; |
| | let progressBar = null; |
| | let currentTimeDisplay = null; |
| | let durationDisplay = null; |
| | let progressFill = null; |
| | let firstChunkGenerationTime = 0; |
| | let totalChunks = 0; |
| | let nextScheduledTime = 0; |
| | let currentGenerationTextLength = 0; |
| | let supertonicPlayerRecord = null; |
| | let isGenerating = false; |
| | |
| | |
| | let customAudioPlayers = []; |
| |
|
| | const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches; |
| | |
| | const isTouchDevice = () => 'ontouchstart' in window || navigator.maxTouchPoints > 0; |
| | const trimDecimalsForMobile = (formatted) => { |
| | if (!formatted) return formatted; |
| | return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted; |
| | }; |
| |
|
| | function pauseAllPlayersExcept(currentPlayer) { |
| | customAudioPlayers.forEach(player => { |
| | if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { |
| | player.pausePlayback(); |
| | } |
| | }); |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | function chunkText(text, maxLen = getMaxChunkLength()) { |
| | |
| | const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim()); |
| | |
| | const chunks = []; |
| | |
| | for (let paragraph of paragraphs) { |
| | paragraph = paragraph.trim(); |
| | if (!paragraph) continue; |
| | |
| | |
| | |
| | const sentences = paragraph.split(/(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+/); |
| | |
| | let currentChunk = ""; |
| | |
| | for (let sentence of sentences) { |
| | if (currentChunk.length + sentence.length + 1 <= maxLen) { |
| | currentChunk += (currentChunk ? " " : "") + sentence; |
| | } else { |
| | if (currentChunk) { |
| | chunks.push(currentChunk.trim()); |
| | } |
| | currentChunk = sentence; |
| | } |
| | } |
| | |
| | if (currentChunk) { |
| | chunks.push(currentChunk.trim()); |
| | } |
| | } |
| | |
| | return chunks; |
| | } |
| |
|
| | function showDemoStatus(message, type = 'info', progress = null) { |
| | demoStatusText.innerHTML = message; |
| | demoStatusBox.className = 'demo-status-box'; |
| | demoStatusBox.style.removeProperty('--status-progress'); |
| | demoStatusBox.style.display = ''; |
| | |
| | if (type === 'success') { |
| | demoStatusBox.classList.add('success'); |
| | } else if (type === 'error') { |
| | demoStatusBox.classList.add('error'); |
| | } |
| | |
| | |
| | if (progress !== null && progress >= 0 && progress <= 100) { |
| | const clampedProgress = Math.max(0, Math.min(progress, 100)); |
| | demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`); |
| | demoStatusBox.classList.toggle('complete', clampedProgress >= 100); |
| | } else if (type === 'success' || type === 'error') { |
| | demoStatusBox.style.removeProperty('--status-progress'); |
| | demoStatusBox.classList.remove('complete'); |
| | } else { |
| | demoStatusBox.style.removeProperty('--status-progress'); |
| | demoStatusBox.classList.remove('complete'); |
| | } |
| | } |
| |
|
| | function hideDemoStatus() { |
| | demoStatusBox.style.display = 'none'; |
| | } |
| |
|
| | function showDemoError(message) { |
| | demoError.textContent = message; |
| | demoError.classList.add('active'); |
| | } |
| |
|
| | function hideDemoError() { |
| | demoError.classList.remove('active'); |
| | } |
| | |
| | |
| | const languageToast = document.getElementById('languageToast'); |
| | const languageToastMessage = document.getElementById('languageToastMessage'); |
| | let languageToastTimeout = null; |
| | |
| | function showLanguageToast(fromLang, toLang) { |
| | if (!languageToast || !languageToastMessage) return; |
| | |
| | const fromName = LANGUAGE_NAMES[fromLang] || fromLang; |
| | const toName = LANGUAGE_NAMES[toLang] || toLang; |
| | |
| | languageToastMessage.innerHTML = `Language auto-detected: <strong>${toName}</strong>`; |
| | |
| | |
| | if (languageToastTimeout) { |
| | clearTimeout(languageToastTimeout); |
| | } |
| | |
| | |
| | languageToast.classList.add('show'); |
| | |
| | |
| | languageToastTimeout = setTimeout(() => { |
| | languageToast.classList.remove('show'); |
| | }, 3000); |
| | } |
| |
|
| | function showWasmWarning() { |
| | if (wasmWarningBanner) { |
| | wasmWarningBanner.style.display = 'flex'; |
| | } |
| | } |
| |
|
| | |
| | function validateCharacters(text) { |
| | if (!processors || !processors.textProcessor) { |
| | return { valid: true, unsupportedChars: [] }; |
| | } |
| | |
| | try { |
| | |
| | const uniqueChars = [...new Set(text)]; |
| | |
| | |
| | |
| | const processedToOriginal = new Map(); |
| | const charToProcessed = new Map(); |
| | |
| | for (const char of uniqueChars) { |
| | const processedChar = preprocessText(char); |
| | charToProcessed.set(char, processedChar); |
| | |
| | |
| | for (const pc of processedChar) { |
| | if (!processedToOriginal.has(pc)) { |
| | processedToOriginal.set(pc, new Set()); |
| | } |
| | processedToOriginal.get(pc).add(char); |
| | } |
| | } |
| | |
| | |
| | const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join(''); |
| | |
| | |
| | const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]); |
| | |
| | |
| | const unsupportedOriginalChars = new Set(); |
| | if (unsupportedChars && unsupportedChars.length > 0) { |
| | for (const unsupportedChar of unsupportedChars) { |
| | const originalChars = processedToOriginal.get(unsupportedChar); |
| | if (originalChars) { |
| | originalChars.forEach(c => unsupportedOriginalChars.add(c)); |
| | } |
| | } |
| | } |
| | |
| | const unsupportedCharsArray = Array.from(unsupportedOriginalChars); |
| | return { |
| | valid: unsupportedCharsArray.length === 0, |
| | unsupportedChars: unsupportedCharsArray |
| | }; |
| | } catch (error) { |
| | return { valid: true, unsupportedChars: [] }; |
| | } |
| | } |
| |
|
| | |
| | function updateCharCounter() { |
| | const rawText = demoTextInput.textContent || demoTextInput.innerText || ''; |
| | const text = rawText.replace(/\n$/g, ''); |
| | const length = text.length; |
| | |
| | demoCharCount.textContent = length; |
| | |
| | |
| | const textareaWidth = demoTextInput.offsetWidth; |
| | |
| | |
| | const maxWidthRef = 640; |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const isMobile = window.innerWidth <= 572; |
| | const mobileMultiplier = isMobile ? 2 : 1; |
| | |
| | let fontSizeRatio; |
| | if (length <= 100) { |
| | fontSizeRatio = 0.055 * mobileMultiplier; |
| | } else if (length <= 200) { |
| | fontSizeRatio = 0.04 * mobileMultiplier; |
| | } else if (length < 240) { |
| | fontSizeRatio = 0.053125 * mobileMultiplier; |
| | } else if (length < 400) { |
| | fontSizeRatio = 0.0425 * mobileMultiplier; |
| | } else if (length < 700) { |
| | fontSizeRatio = 0.031875 * mobileMultiplier; |
| | } else { |
| | fontSizeRatio = 0.025 * mobileMultiplier; |
| | } |
| | |
| | |
| | const fontSize = textareaWidth * fontSizeRatio; |
| | demoTextInput.style.fontSize = `${fontSize}px`; |
| | |
| | |
| | demoCharCounter.classList.remove('error', 'warning', 'valid'); |
| | |
| | |
| | let hasUnsupportedChars = false; |
| | if (models && processors && length > 0) { |
| | const validation = validateCharacters(text); |
| | if (!validation.valid && validation.unsupportedChars.length > 0) { |
| | hasUnsupportedChars = true; |
| | const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', '); |
| | const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : ''; |
| | showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`); |
| | } else { |
| | hideDemoError(); |
| | } |
| | } |
| | |
| | |
| | if (length < MIN_CHARS) { |
| | demoCharCounter.classList.add('error'); |
| | demoCharWarning.textContent = '(At least 10 characters)'; |
| | demoGenerateBtn.disabled = true; |
| | } else if (hasUnsupportedChars) { |
| | demoCharCounter.classList.add('error'); |
| | demoCharWarning.textContent = '(Unsupported characters)'; |
| | demoGenerateBtn.disabled = true; |
| | } else { |
| | demoCharCounter.classList.add('valid'); |
| | demoCharWarning.textContent = ''; |
| | |
| | demoGenerateBtn.disabled = !models || isGenerating; |
| | } |
| | } |
| |
|
| | |
| | function validateTextInput(text) { |
| | if (!text || text.trim().length === 0) { |
| | return { valid: false, message: 'Please enter some text.' }; |
| | } |
| | if (text.length < MIN_CHARS) { |
| | return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` }; |
| | } |
| | return { valid: true }; |
| | } |
| |
|
| | |
| | async function loadStyleEmbeddings(voice) { |
| | try { |
| | |
| | if (refEmbeddingCache[voice]) { |
| | return refEmbeddingCache[voice]; |
| | } |
| | |
| | const embeddingPath = REF_EMBEDDING_PATHS[voice]; |
| | if (!embeddingPath) { |
| | throw new Error(`No embedding path configured for voice: ${voice}`); |
| | } |
| | |
| | const response = await fetch(embeddingPath); |
| | if (!response.ok) { |
| | throw new Error(`Failed to fetch embedding: ${response.statusText}`); |
| | } |
| | |
| | const embeddingData = await response.json(); |
| | |
| | |
| | |
| | const styleTtlData = embeddingData.style_ttl.data.flat(Infinity); |
| | const styleTtlTensor = new ort.Tensor( |
| | embeddingData.style_ttl.type || 'float32', |
| | Float32Array.from(styleTtlData), |
| | embeddingData.style_ttl.dims |
| | ); |
| | |
| | const styleDpData = embeddingData.style_dp.data.flat(Infinity); |
| | const styleDpTensor = new ort.Tensor( |
| | embeddingData.style_dp.type || 'float32', |
| | Float32Array.from(styleDpData), |
| | embeddingData.style_dp.dims |
| | ); |
| | |
| | const embeddings = { |
| | styleTtl: styleTtlTensor, |
| | styleDp: styleDpTensor |
| | }; |
| | |
| | |
| | refEmbeddingCache[voice] = embeddings; |
| | |
| | return embeddings; |
| | } catch (error) { |
| | throw error; |
| | } |
| | } |
| | |
| | |
| | async function switchVoice(voice) { |
| | try { |
| | const embeddings = await loadStyleEmbeddings(voice); |
| | |
| | currentStyleTtlTensor = embeddings.styleTtl; |
| | currentStyleDpTensor = embeddings.styleDp; |
| | currentVoice = voice; |
| | |
| | |
| | if (typeof window.updateActiveSpeaker === 'function') { |
| | window.updateActiveSpeaker(voice); |
| | } |
| | |
| | |
| | updateCharCounter(); |
| | } catch (error) { |
| | showDemoError(`Failed to load voice ${voice}: ${error.message}`); |
| | throw error; |
| | } |
| | } |
| |
|
| | |
| | async function checkWebGPUSupport() { |
| | try { |
| | |
| | const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) || |
| | (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1); |
| | const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent); |
| | |
| | |
| | if (isIOS) { |
| | return { supported: false, reason: 'iOS does not support the required WebGPU features' }; |
| | } |
| | if (isSafari) { |
| | |
| | return { supported: false, reason: 'Safari does not support the required WebGPU features' }; |
| | } |
| | |
| | |
| | if (!navigator.gpu) { |
| | return { supported: false, reason: 'WebGPU not available in this browser' }; |
| | } |
| | |
| | |
| | const adapter = await navigator.gpu.requestAdapter(); |
| | if (!adapter) { |
| | return { supported: false, reason: 'No WebGPU adapter found' }; |
| | } |
| | |
| | |
| | try { |
| | const adapterInfo = await adapter.requestAdapterInfo(); |
| | } catch (infoError) { |
| | |
| | } |
| | |
| | |
| | const device = await adapter.requestDevice(); |
| | if (!device) { |
| | return { supported: false, reason: 'Failed to create WebGPU device' }; |
| | } |
| | |
| | return { supported: true, adapter, device }; |
| | } catch (error) { |
| | |
| | const errorMsg = error.message || ''; |
| | if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) { |
| | return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' }; |
| | } |
| | return { supported: false, reason: error.message }; |
| | } |
| | } |
| |
|
| | |
| | async function warmupModels() { |
| | try { |
| | const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.'; |
| | const totalStep = 5; |
| | const durationFactor = 1.0; |
| | |
| | const textList = [dummyText]; |
| | const bsz = 1; |
| | |
| | |
| | const styleTtlTensor = currentStyleTtlTensor; |
| | const styleDpTensor = currentStyleDpTensor; |
| | |
| | |
| | const { textIds, textMask } = processors.textProcessor.call(textList, currentLanguage); |
| | |
| | const textIdsShape = [bsz, textIds[0].length]; |
| | const textMaskShape = [bsz, 1, textMask[0][0].length]; |
| | const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
| | |
| | const dpResult = await models.dpOrt.run({ |
| | text_ids: intArrayToTensor(textIds, textIdsShape), |
| | style_dp: styleDpTensor, |
| | text_mask: textMaskTensor |
| | }); |
| | |
| | const durOnnx = Array.from(dpResult.duration.data); |
| | for (let i = 0; i < durOnnx.length; i++) { |
| | durOnnx[i] *= durationFactor; |
| | } |
| | const durReshaped = []; |
| | for (let b = 0; b < bsz; b++) { |
| | durReshaped.push([[durOnnx[b]]]); |
| | } |
| | |
| | |
| | const textEncResult = await models.textEncOrt.run({ |
| | text_ids: intArrayToTensor(textIds, textIdsShape), |
| | style_ttl: styleTtlTensor, |
| | text_mask: textMaskTensor |
| | }); |
| | |
| | const textEmbTensor = textEncResult.text_emb; |
| | |
| | |
| | let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
| | const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; |
| | const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
| | const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
| | |
| | const totalStepArray = new Array(bsz).fill(totalStep); |
| | const scalarShape = [bsz]; |
| | const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); |
| | |
| | for (let step = 0; step < totalStep; step++) { |
| | const currentStepArray = new Array(bsz).fill(step); |
| | |
| | const vectorEstResult = await models.vectorEstOrt.run({ |
| | noisy_latent: arrayToTensor(noisyLatent, latentShape), |
| | text_emb: textEmbTensor, |
| | style_ttl: styleTtlTensor, |
| | text_mask: textMaskTensor, |
| | latent_mask: latentMaskTensor, |
| | total_step: totalStepTensor, |
| | current_step: arrayToTensor(currentStepArray, scalarShape) |
| | }); |
| | |
| | const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); |
| | |
| | |
| | let idx = 0; |
| | for (let b = 0; b < noisyLatent.length; b++) { |
| | for (let d = 0; d < noisyLatent[b].length; d++) { |
| | for (let t = 0; t < noisyLatent[b][d].length; t++) { |
| | noisyLatent[b][d][t] = denoisedLatent[idx++]; |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | const vocoderResult = await models.vocoderOrt.run({ |
| | latent: arrayToTensor(noisyLatent, latentShape) |
| | }); |
| | |
| | |
| | } catch (error) { |
| | console.warn('Warmup failed (non-critical):', error.message); |
| | |
| | } |
| | } |
| |
|
| | |
| | async function initializeModels() { |
| | |
| | if (modelsLoading && modelsLoadPromise) { |
| | return modelsLoadPromise; |
| | } |
| | |
| | |
| | if (modelsLoaded && models) { |
| | return; |
| | } |
| | |
| | modelsLoading = true; |
| | |
| | const speakerItemsForLoading = document.querySelectorAll('.speaker-item[data-voice]'); |
| | speakerItemsForLoading.forEach(item => item.classList.add('disabled')); |
| | |
| | |
| | const languageItemsForLoading = document.querySelectorAll('.speaker-item[data-language]'); |
| | languageItemsForLoading.forEach(item => item.classList.add('disabled')); |
| | |
| | modelsLoadPromise = (async () => { |
| | try { |
| | showDemoStatus('<strong>Loading configuration...</strong>', 'info', 5); |
| | |
| | const basePath = 'assets/onnx'; |
| | |
| | |
| | cfgs = await loadCfgs(basePath); |
| | |
| | |
| | showDemoStatus('<strong>Checking WebGPU support...</strong>', 'info', 8); |
| | const webgpuCheck = await checkWebGPUSupport(); |
| | |
| | |
| | const useWebGPU = webgpuCheck.supported; |
| | const executionProvider = useWebGPU ? 'webgpu' : 'wasm'; |
| | |
| | |
| | if (!useWebGPU) { |
| | showWasmWarning(); |
| | } |
| | |
| | |
| | const backendName = useWebGPU ? 'WebGPU' : 'WASM'; |
| | showDemoStatus(`<strong>${backendName} detected! Loading models...</strong>`, 'info', 10); |
| | |
| | const modelsLoadPromise = loadOnnxAll(basePath, { |
| | executionProviders: [executionProvider], |
| | graphOptimizationLevel: 'all' |
| | }, (modelName, current, total) => { |
| | const progress = 10 + (current / total) * 70; |
| | showDemoStatus(`<strong>Loading models with ${backendName} (${current}/${total}):</strong> ${modelName}...`, 'info', progress); |
| | }); |
| | |
| | |
| | const [loadedModels, loadedProcessors] = await Promise.all([ |
| | modelsLoadPromise, |
| | loadProcessors(basePath) |
| | ]); |
| | |
| | models = loadedModels; |
| | processors = loadedProcessors; |
| | showDemoStatus('<strong>Loading reference embeddings...</strong>', 'info', 85); |
| | |
| | |
| | const embeddings = await loadStyleEmbeddings(currentVoice); |
| | currentStyleTtlTensor = embeddings.styleTtl; |
| | currentStyleDpTensor = embeddings.styleDp; |
| | |
| | showDemoStatus('<strong>Warming up models...</strong>', 'info', 90); |
| | |
| | |
| | await warmupModels(); |
| | |
| | hideDemoStatus(); |
| | |
| | demoGenerateBtn.disabled = false; |
| | demoTotalSteps.disabled = false; |
| | demoSpeed.disabled = false; |
| | |
| | |
| | const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); |
| | voiceToggleTexts.forEach(text => text.classList.remove('disabled')); |
| | |
| | |
| | updateCharCounter(); |
| | |
| | |
| | modelsLoaded = true; |
| | modelsLoading = false; |
| | |
| | |
| | speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| | |
| | |
| | languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| | |
| | } catch (error) { |
| | modelsLoading = false; |
| | |
| | speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| | |
| | |
| | languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| | showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
| | showDemoError(`Failed to initialize: ${error.message}. Check console for details.`); |
| | throw error; |
| | } |
| | })(); |
| | |
| | return modelsLoadPromise; |
| | } |
| |
|
| |
|
| | |
| | async function generateSupertonicSpeech(text, totalStep, durationFactor) { |
| | const supertonicStartTime = Date.now(); |
| | |
| | try { |
| | const textList = [text]; |
| | const bsz = 1; |
| | const sampleRate = cfgs.ae.sample_rate; |
| | |
| | |
| | const styleTtlTensor = currentStyleTtlTensor; |
| | const styleDpTensor = currentStyleDpTensor; |
| | |
| | |
| | const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList, currentLanguage); |
| | |
| | |
| | if (unsupportedChars && unsupportedChars.length > 0) { |
| | const charList = unsupportedChars.map(c => `"${c}"`).join(', '); |
| | throw new Error(`Unsupported characters: ${charList}`); |
| | } |
| | |
| | const textIdsShape = [bsz, textIds[0].length]; |
| | const textMaskShape = [bsz, 1, textMask[0][0].length]; |
| | const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
| | |
| | const dpResult = await models.dpOrt.run({ |
| | text_ids: intArrayToTensor(textIds, textIdsShape), |
| | style_dp: styleDpTensor, |
| | text_mask: textMaskTensor |
| | }); |
| | |
| | const durOnnx = Array.from(dpResult.duration.data); |
| | |
| | for (let i = 0; i < durOnnx.length; i++) { |
| | durOnnx[i] *= durationFactor; |
| | } |
| | const durReshaped = []; |
| | for (let b = 0; b < bsz; b++) { |
| | durReshaped.push([[durOnnx[b]]]); |
| | } |
| | |
| | |
| | const textEncResult = await models.textEncOrt.run({ |
| | text_ids: intArrayToTensor(textIds, textIdsShape), |
| | style_ttl: styleTtlTensor, |
| | text_mask: textMaskTensor |
| | }); |
| | |
| | const textEmbTensor = textEncResult.text_emb; |
| | |
| | |
| | let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
| | const latentDim = noisyLatent[0].length; |
| | const latentLen = noisyLatent[0][0].length; |
| | const latentShape = [bsz, latentDim, latentLen]; |
| | const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
| | const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
| | |
| | |
| | const latentBufferSize = bsz * latentDim * latentLen; |
| | const latentBuffer = new Float32Array(latentBufferSize); |
| | |
| | |
| | let initIdx = 0; |
| | for (let b = 0; b < bsz; b++) { |
| | for (let d = 0; d < latentDim; d++) { |
| | for (let t = 0; t < latentLen; t++) { |
| | latentBuffer[initIdx++] = noisyLatent[b][d][t]; |
| | } |
| | } |
| | } |
| | |
| | |
| | const scalarShape = [bsz]; |
| | const totalStepTensor = arrayToTensor(new Array(bsz).fill(totalStep), scalarShape); |
| | |
| | |
| | const stepTensors = []; |
| | for (let step = 0; step < totalStep; step++) { |
| | stepTensors.push(arrayToTensor(new Array(bsz).fill(step), scalarShape)); |
| | } |
| | |
| | for (let step = 0; step < totalStep; step++) { |
| | |
| | const noisyLatentTensor = new ort.Tensor('float32', latentBuffer, latentShape); |
| | |
| | const vectorEstResult = await models.vectorEstOrt.run({ |
| | noisy_latent: noisyLatentTensor, |
| | text_emb: textEmbTensor, |
| | style_ttl: styleTtlTensor, |
| | text_mask: textMaskTensor, |
| | latent_mask: latentMaskTensor, |
| | total_step: totalStepTensor, |
| | current_step: stepTensors[step] |
| | }); |
| | |
| | |
| | const denoisedData = vectorEstResult.denoised_latent.data; |
| | latentBuffer.set(denoisedData); |
| | } |
| | |
| | |
| | const vocoderResult = await models.vocoderOrt.run({ |
| | latent: new ort.Tensor('float32', latentBuffer, latentShape) |
| | }); |
| | |
| | const wavBatch = vocoderResult.wav_tts.data; |
| | const wavLen = Math.floor(sampleRate * durOnnx[0]); |
| | |
| | const audioData = wavBatch.slice(0, wavLen); |
| | |
| | |
| | const supertonicEndTime = Date.now(); |
| | const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
| | const audioDurationSec = durOnnx[0]; |
| | |
| | return { |
| | success: true, |
| | processingTime: supertonicProcessingTime, |
| | audioDuration: audioDurationSec, |
| | audioData: audioData, |
| | sampleRate: sampleRate, |
| | text: text |
| | }; |
| | } catch (error) { |
| | return { |
| | success: false, |
| | error: error.message, |
| | text: text |
| | }; |
| | } |
| | } |
| |
|
| | |
| | function formatTimeDetailed(seconds) { |
| | const hours = Math.floor(seconds / 3600); |
| | const mins = Math.floor((seconds % 3600) / 60); |
| | const secs = seconds % 60; |
| | const ms = Math.floor((secs % 1) * 100); |
| | const wholeSecs = Math.floor(secs); |
| | |
| | if (seconds < 60) { |
| | return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| | } else if (seconds < 3600) { |
| | return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| | } else { |
| | return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| | } |
| | } |
| |
|
| | |
| | async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) { |
| | const supertonicStartTime = Date.now(); |
| | const sampleRate = cfgs.ae.sample_rate; |
| | const silenceDuration = 0.3; |
| | |
| | try { |
| | |
| | const chunks = chunkText(text); |
| | |
| | const audioDataArrays = []; |
| | const durations = []; |
| | const silenceSamples = Math.floor(silenceDuration * sampleRate); |
| | let firstChunkEndTime = 0; |
| | let firstChunkTime = 0; |
| | |
| | |
| | for (let i = 0; i < chunks.length; i++) { |
| | const chunkText = chunks[i]; |
| | |
| | const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor); |
| | |
| | if (!result.success) { |
| | throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`); |
| | } |
| | |
| | |
| | const audioData = result.audioData; |
| | |
| | audioDataArrays.push(audioData); |
| | durations.push(result.audioDuration); |
| | |
| | |
| | if (i === 0 && onFirstChunkReady) { |
| | |
| | firstChunkEndTime = Date.now(); |
| | firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000; |
| | |
| | const totalDurationSoFar = result.audioDuration; |
| | const processedChars = chunks[0].length; |
| | |
| | onFirstChunkReady(audioData, sampleRate, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars); |
| | } else if (i > 0 && onChunkAdded) { |
| | |
| | const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i; |
| | const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000; |
| | const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0); |
| | |
| | onChunkAdded(audioData, sampleRate, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars); |
| | } |
| | } |
| | |
| | |
| | const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1); |
| | |
| | |
| | let totalSamples = 0; |
| | for (let i = 0; i < audioDataArrays.length; i++) { |
| | totalSamples += audioDataArrays[i].length; |
| | if (i < audioDataArrays.length - 1) { |
| | totalSamples += silenceSamples; |
| | } |
| | } |
| | |
| | const wavCat = new Float32Array(totalSamples); |
| | |
| | let currentIdx = 0; |
| | for (let i = 0; i < audioDataArrays.length; i++) { |
| | |
| | const audioData = audioDataArrays[i]; |
| | wavCat.set(audioData, currentIdx); |
| | currentIdx += audioData.length; |
| | |
| | |
| | if (i < audioDataArrays.length - 1) { |
| | |
| | currentIdx += silenceSamples; |
| | } |
| | } |
| | |
| | |
| | const wavBuffer = writeWavFile(wavCat, sampleRate); |
| | const blob = new Blob([wavBuffer], { type: 'audio/wav' }); |
| | const url = URL.createObjectURL(blob); |
| | |
| | const supertonicEndTime = Date.now(); |
| | const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
| | |
| | return { |
| | success: true, |
| | processingTime: supertonicProcessingTime, |
| | audioDuration: totalDuration, |
| | url: url, |
| | text: text, |
| | firstChunkTime: firstChunkTime |
| | }; |
| | } catch (error) { |
| | return { |
| | success: false, |
| | error: error.message, |
| | text: text |
| | }; |
| | } |
| | } |
| |
|
| | |
| | async function generateSpeech() { |
| | let text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| | |
| | |
| | const validation = validateTextInput(text); |
| | if (!validation.valid) { |
| | showDemoError(validation.message); |
| | return; |
| | } |
| | |
| | if (!models || !cfgs || !processors) { |
| | showDemoError('Models are still loading. Please wait.'); |
| | return; |
| | } |
| | |
| | if (!currentStyleTtlTensor || !currentStyleDpTensor) { |
| | showDemoError('Reference embeddings are not ready. Please wait.'); |
| | return; |
| | } |
| | |
| | |
| | const charValidation = validateCharacters(text); |
| | if (!charValidation.valid && charValidation.unsupportedChars.length > 0) { |
| | const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', '); |
| | showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`); |
| | return; |
| | } |
| |
|
| | currentGenerationTextLength = text.length; |
| | |
| | try { |
| | isGenerating = true; |
| | demoGenerateBtn.disabled = true; |
| | |
| | |
| | const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
| | speakerItemsForGeneration.forEach(item => item.classList.add('disabled')); |
| | |
| | |
| | const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
| | languageItemsForGeneration.forEach(item => item.classList.add('disabled')); |
| | |
| | hideDemoError(); |
| | hideDemoStatus(); |
| | |
| | |
| | if (audioContext) { |
| | |
| | scheduledSources.forEach(source => { |
| | try { |
| | source.stop(); |
| | } catch (e) { |
| | |
| | } |
| | }); |
| | scheduledSources = []; |
| | |
| | |
| | if (audioContext.state !== 'closed') { |
| | audioContext.close(); |
| | } |
| | audioContext = null; |
| | } |
| | |
| | |
| | if (animationFrameId) { |
| | cancelAnimationFrame(animationFrameId); |
| | animationFrameId = null; |
| | } |
| | |
| | |
| | customAudioPlayers.forEach(player => { |
| | if (player.cleanup) { |
| | player.cleanup(); |
| | } |
| | }); |
| | customAudioPlayers = []; |
| | |
| | |
| | audioChunks = []; |
| | totalDuration = 0; |
| | startTime = 0; |
| | pauseTime = 0; |
| | isPaused = false; |
| | isPlaying = false; |
| | firstChunkGenerationTime = 0; |
| | totalChunks = 0; |
| | nextScheduledTime = 0; |
| | |
| | |
| | const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => { |
| | const titleStatus = includeStatus |
| | ? `<span class="title-status status-running" id="${system}-status">⏳ Running...</span>` |
| | : ''; |
| | return ` |
| | <div class="demo-result-item ${system}-result-item generating" id="${system}-result" style="--result-progress: 0%;"> |
| | <div class="demo-result-title"> |
| | <span class="title-main" style="color: ${titleColor};">${titleMain}</span> |
| | <span class="title-sub">${titleSub}</span> |
| | ${titleStatus} |
| | </div> |
| | <div class="demo-result-info"> |
| | <!-- |
| | <div class="stat"> |
| | <div class="stat-value" id="${system}-chars">--</div> |
| | <div class="stat-label">Processed Chars</div> |
| | </div> |
| | --> |
| | <div class="stat"> |
| | <div class="stat-value" id="${system}-time">--</div> |
| | <div class="stat-label">Processing Time<span class="stat-arrow stat-arrow--down">↓</span></div> |
| | </div> |
| | <div class="stat"> |
| | <div class="stat-value" id="${system}-cps">--</div> |
| | <div class="stat-label">Chars/sec<span class="stat-arrow stat-arrow--up">↑</span></div> |
| | </div> |
| | <div class="stat"> |
| | <div class="stat-value" id="${system}-rtf">--</div> |
| | <div class="stat-label">RTF<span class="stat-arrow stat-arrow--down">↓</span></div> |
| | </div> |
| | </div> |
| | <div class="custom-audio-player"> |
| | <div class="demo-placeholder-audio">Generating speech...</div> |
| | </div> |
| | </div> |
| | `; |
| | }; |
| | const supertonicInitial = createInitialResultItem( |
| | 'supertonic', |
| | 'Supertonic', |
| | 'On-Device', |
| | 'var(--accent-yellow)', |
| | false |
| | ); |
| | demoResults.style.display = 'flex'; |
| | demoResults.innerHTML = supertonicInitial; |
| | |
| | const totalStep = parseInt(demoTotalSteps.value); |
| | const speed = parseFloat(demoSpeed.value); |
| | const durationFactor = speedToDurationFactor(speed); |
| | |
| | |
| | let latestSupertonicProcessedChars = 0; |
| | |
| | |
| | const formatTime = (seconds, { trimMobile = false } = {}) => { |
| | const mins = Math.floor(seconds / 60); |
| | const secs = seconds % 60; |
| | const secString = secs.toFixed(2).padStart(5, '0'); |
| | let formatted = `${mins}:${secString}`; |
| | if (trimMobile) { |
| | formatted = trimDecimalsForMobile(formatted); |
| | } |
| | return formatted; |
| | }; |
| | |
| | const updateProgress = () => { |
| | if (!isPlaying || !audioContext) return; |
| | |
| | const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime); |
| | const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0; |
| | |
| | if (progressFill) { |
| | progressFill.style.width = `${Math.min(progress, 100)}%`; |
| | } |
| | if (currentTimeDisplay) { |
| | currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true }); |
| | } |
| | |
| | if (currentTime < totalDuration) { |
| | animationFrameId = requestAnimationFrame(updateProgress); |
| | } else { |
| | |
| | isPlaying = false; |
| | isPaused = false; |
| | if (playPauseBtn) { |
| | playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| | } |
| | } |
| | }; |
| | |
| | const togglePlayPause = () => { |
| | if (!audioContext || audioChunks.length === 0) return; |
| | |
| | if (isPaused) { |
| | |
| | pauseAllPlayersExcept(supertonicPlayerRecord); |
| | |
| | const seekTime = pauseTime; |
| | |
| | |
| | let accumulatedTime = 0; |
| | let startChunkIndex = 0; |
| | let offsetInChunk = seekTime; |
| | |
| | for (let i = 0; i < audioChunks.length; i++) { |
| | const chunkDuration = audioChunks[i].buffer.duration; |
| | if (accumulatedTime + chunkDuration > seekTime) { |
| | startChunkIndex = i; |
| | offsetInChunk = seekTime - accumulatedTime; |
| | break; |
| | } |
| | accumulatedTime += chunkDuration + 0.3; |
| | } |
| | |
| | |
| | scheduledSources.forEach(source => { |
| | try { |
| | source.stop(); |
| | } catch (e) { |
| | |
| | } |
| | }); |
| | scheduledSources = []; |
| | |
| | |
| | if (audioContext.state === 'suspended') { |
| | audioContext.resume(); |
| | } |
| | |
| | |
| | startTime = audioContext.currentTime - seekTime; |
| | let nextStartTime = audioContext.currentTime; |
| | |
| | for (let i = startChunkIndex; i < audioChunks.length; i++) { |
| | const source = audioContext.createBufferSource(); |
| | source.buffer = audioChunks[i].buffer; |
| | source.connect(audioContext.destination); |
| | |
| | if (i === startChunkIndex) { |
| | source.start(nextStartTime, offsetInChunk); |
| | nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
| | } else { |
| | source.start(nextStartTime); |
| | nextStartTime += audioChunks[i].buffer.duration; |
| | } |
| | |
| | if (i < audioChunks.length - 1) { |
| | nextStartTime += 0.3; |
| | } |
| | |
| | scheduledSources.push(source); |
| | } |
| | |
| | nextScheduledTime = nextStartTime; |
| | |
| | isPaused = false; |
| | isPlaying = true; |
| | playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| | updateProgress(); |
| | } else if (isPlaying) { |
| | |
| | pauseTime = audioContext.currentTime - startTime; |
| | audioContext.suspend(); |
| | isPaused = true; |
| | playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| | if (animationFrameId) { |
| | cancelAnimationFrame(animationFrameId); |
| | } |
| | } else { |
| | |
| | pauseAllPlayersExcept(supertonicPlayerRecord); |
| | |
| | pauseTime = 0; |
| | |
| | |
| | if (audioContext.state === 'suspended') { |
| | audioContext.resume(); |
| | } |
| | |
| | |
| | scheduledSources.forEach(source => { |
| | try { |
| | source.stop(); |
| | } catch (e) { |
| | |
| | } |
| | }); |
| | scheduledSources = []; |
| | |
| | |
| | startTime = audioContext.currentTime; |
| | let nextStartTime = audioContext.currentTime; |
| | |
| | for (let i = 0; i < audioChunks.length; i++) { |
| | const source = audioContext.createBufferSource(); |
| | source.buffer = audioChunks[i].buffer; |
| | source.connect(audioContext.destination); |
| | source.start(nextStartTime); |
| | nextStartTime += audioChunks[i].buffer.duration; |
| | |
| | if (i < audioChunks.length - 1) { |
| | nextStartTime += 0.3; |
| | } |
| | |
| | scheduledSources.push(source); |
| | } |
| | |
| | nextScheduledTime = nextStartTime; |
| | |
| | isPlaying = true; |
| | isPaused = false; |
| | playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| | updateProgress(); |
| | } |
| | }; |
| | |
| | const seekTo = (percentage) => { |
| | if (!audioContext || audioChunks.length === 0) return; |
| | |
| | const seekTime = (percentage / 100) * totalDuration; |
| | |
| | |
| | const wasPlaying = isPlaying; |
| | const wasPaused = isPaused; |
| | |
| | |
| | scheduledSources.forEach(source => { |
| | try { |
| | source.stop(); |
| | } catch (e) { |
| | |
| | } |
| | }); |
| | scheduledSources = []; |
| | |
| | |
| | if (animationFrameId) { |
| | cancelAnimationFrame(animationFrameId); |
| | } |
| | |
| | |
| | let accumulatedTime = 0; |
| | let startChunkIndex = 0; |
| | let offsetInChunk = seekTime; |
| | |
| | for (let i = 0; i < audioChunks.length; i++) { |
| | const chunkDuration = audioChunks[i].buffer.duration; |
| | if (accumulatedTime + chunkDuration > seekTime) { |
| | startChunkIndex = i; |
| | offsetInChunk = seekTime - accumulatedTime; |
| | break; |
| | } |
| | accumulatedTime += chunkDuration + 0.3; |
| | } |
| | |
| | |
| | if (wasPaused || !wasPlaying) { |
| | pauseTime = seekTime; |
| | |
| | |
| | if (progressFill) { |
| | const progress = (seekTime / totalDuration) * 100; |
| | progressFill.style.width = `${Math.min(progress, 100)}%`; |
| | } |
| | if (currentTimeDisplay) { |
| | currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); |
| | } |
| | |
| | |
| | isPaused = true; |
| | isPlaying = true; |
| | |
| | if (playPauseBtn) { |
| | playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| | } |
| | |
| | return; |
| | } |
| | |
| | |
| | if (audioContext.state === 'suspended') { |
| | audioContext.resume(); |
| | } |
| | |
| | |
| | startTime = audioContext.currentTime - seekTime; |
| | let nextStartTime = audioContext.currentTime; |
| | |
| | for (let i = startChunkIndex; i < audioChunks.length; i++) { |
| | const source = audioContext.createBufferSource(); |
| | source.buffer = audioChunks[i].buffer; |
| | source.connect(audioContext.destination); |
| | |
| | if (i === startChunkIndex) { |
| | |
| | source.start(nextStartTime, offsetInChunk); |
| | nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
| | } else { |
| | source.start(nextStartTime); |
| | nextStartTime += audioChunks[i].buffer.duration; |
| | } |
| | |
| | |
| | if (i < audioChunks.length - 1) { |
| | nextStartTime += 0.3; |
| | } |
| | |
| | scheduledSources.push(source); |
| | } |
| | |
| | |
| | nextScheduledTime = nextStartTime; |
| | |
| | |
| | isPlaying = true; |
| | isPaused = false; |
| | if (playPauseBtn) { |
| | playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| | } |
| | |
| | |
| | updateProgress(); |
| | }; |
| | |
| | |
| | |
| | const createAudioBufferFromFloat32 = (audioData, sampleRate) => { |
| | const audioBuffer = audioContext.createBuffer(1, audioData.length, sampleRate); |
| | audioBuffer.getChannelData(0).set(audioData); |
| | return audioBuffer; |
| | }; |
| | |
| | const onFirstChunkReady = async (audioData, sampleRate, duration, text, numChunks, firstChunkTime, processedChars) => { |
| | totalChunks = numChunks; |
| | firstChunkGenerationTime = firstChunkTime; |
| | |
| | const container = document.getElementById('demoResults'); |
| | |
| |
|
| | const textLength = currentGenerationTextLength > 0 |
| | ? currentGenerationTextLength |
| | : (text ? text.length : 0); |
| | const isBatch = textLength >= getMaxChunkLength(); |
| | const processingTimeStr = isBatch && firstChunkTime |
| | ? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}` |
| | : formatTimeDetailed(firstChunkTime); |
| | const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0; |
| | const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars; |
| | const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0 |
| | ? (displayedInitialChars / firstChunkTime).toFixed(1) |
| | : '0.0'; |
| | const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-'; |
| | const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0; |
| |
|
| | const resultItemEl = document.getElementById('supertonic-result'); |
| | if (!resultItemEl) { |
| | console.warn('Supertonic result container not found.'); |
| | return; |
| | } |
| |
|
| | resultItemEl.classList.remove('generating'); |
| | resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
| |
|
| | const titleMainEl = resultItemEl.querySelector('.title-main'); |
| | if (titleMainEl) { |
| | titleMainEl.textContent = 'Supertonic'; |
| | titleMainEl.style.color = 'var(--accent-yellow)'; |
| | } |
| | const titleSubEl = resultItemEl.querySelector('.title-sub'); |
| | if (titleSubEl) { |
| | titleSubEl.textContent = 'On-Device'; |
| | } |
| |
|
| | const infoContainer = resultItemEl.querySelector('.demo-result-info'); |
| | if (infoContainer) { |
| | infoContainer.classList.remove('error'); |
| | } |
| | const timeElInitial = document.getElementById('supertonic-time'); |
| | if (timeElInitial) { |
| | timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
| | } |
| | const cpsElInitial = document.getElementById('supertonic-cps'); |
| | if (cpsElInitial) { |
| | cpsElInitial.textContent = charsPerSec; |
| | } |
| | const rtfElInitial = document.getElementById('supertonic-rtf'); |
| | if (rtfElInitial) { |
| | rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| | } |
| |
|
| | const playerContainer = resultItemEl.querySelector('.custom-audio-player'); |
| | if (playerContainer) { |
| | playerContainer.style.display = ''; |
| | playerContainer.innerHTML = ` |
| | <button id="play-pause-btn" class="player-btn">${PAUSE_ICON_SVG}</button> |
| | <div class="time-display" id="current-time">0:00.00</div> |
| | <div class="progress-container" id="progress-container"> |
| | <div class="progress-bar"> |
| | <div class="progress-fill" id="progress-fill"></div> |
| | </div> |
| | </div> |
| | <div class="time-display" id="total-duration">${formatTime(duration, { trimMobile: true })}</div> |
| | <div class="demo-result-actions" style="display: none;"> |
| | <button class="demo-download-btn" id="supertonic-download" aria-label="Download WAV" title="Download WAV"> |
| | <svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"> |
| | <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/> |
| | <polyline points="7 10 12 15 17 10"/> |
| | <line x1="12" y1="15" x2="12" y2="3"/> |
| | </svg> |
| | </button> |
| | </div> |
| | `; |
| | } |
| |
|
| | container.style.display = 'flex'; |
| | latestSupertonicProcessedChars = displayedInitialChars; |
| | |
| | |
| | playPauseBtn = document.getElementById('play-pause-btn'); |
| | progressBar = document.getElementById('progress-container'); |
| | currentTimeDisplay = document.getElementById('current-time'); |
| | durationDisplay = document.getElementById('total-duration'); |
| | progressFill = document.getElementById('progress-fill'); |
| | |
| | |
| | audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
| | startTime = audioContext.currentTime; |
| | totalDuration = duration; |
| | isPlaying = true; |
| | isPaused = false; |
| | |
| | |
| | const pausePlayback = () => { |
| | if (!audioContext || audioContext.state === 'closed') return; |
| | if (isPlaying) { |
| | pauseTime = audioContext.currentTime - startTime; |
| | scheduledSources.forEach(source => { |
| | try { |
| | source.stop(); |
| | } catch (e) { |
| | |
| | } |
| | }); |
| | scheduledSources = []; |
| | audioContext.suspend(); |
| | isPaused = true; |
| | isPlaying = false; |
| | if (playPauseBtn) { |
| | playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| | } |
| | if (animationFrameId) { |
| | cancelAnimationFrame(animationFrameId); |
| | } |
| | } |
| | }; |
| | |
| | supertonicPlayerRecord = { |
| | audioContext: audioContext, |
| | pausePlayback: pausePlayback |
| | }; |
| | |
| | |
| | customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext); |
| | customAudioPlayers.push(supertonicPlayerRecord); |
| | |
| | |
| | pauseAllPlayersExcept(supertonicPlayerRecord); |
| | |
| | |
| | const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
| | |
| | audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration }); |
| | |
| | |
| | const source = audioContext.createBufferSource(); |
| | source.buffer = audioBuffer; |
| | source.connect(audioContext.destination); |
| | source.start(audioContext.currentTime); |
| | scheduledSources.push(source); |
| | |
| | |
| | nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; |
| | |
| | |
| | playPauseBtn.addEventListener('click', togglePlayPause); |
| | |
| | progressBar.addEventListener('click', (e) => { |
| | const rect = progressBar.getBoundingClientRect(); |
| | const percentage = ((e.clientX - rect.left) / rect.width) * 100; |
| | seekTo(percentage); |
| | }); |
| | |
| | |
| | updateProgress(); |
| | }; |
| | |
| | |
| | const onChunkAdded = async (audioData, sampleRate, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => { |
| | if (!audioContext) return; |
| | |
| | |
| | const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
| | |
| | const chunkDuration = audioBuffer.duration; |
| | audioChunks.push({ buffer: audioBuffer, duration: chunkDuration }); |
| | |
| | |
| | const source = audioContext.createBufferSource(); |
| | source.buffer = audioBuffer; |
| | source.connect(audioContext.destination); |
| | source.start(nextScheduledTime); |
| | scheduledSources.push(source); |
| | |
| | |
| | nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; |
| | |
| | |
| | totalDuration = duration; |
| | |
| | |
| | if (durationDisplay) { |
| | durationDisplay.textContent = formatTime(duration, { trimMobile: true }); |
| | durationDisplay.style.transition = 'color 0.3s'; |
| | durationDisplay.style.color = '#ffffff'; |
| | setTimeout(() => { |
| | durationDisplay.style.color = ''; |
| | }, 300); |
| | } |
| | |
| | |
| | const textLengthCandidate = currentGenerationTextLength > 0 |
| | ? currentGenerationTextLength |
| | : (demoTextInput.textContent || demoTextInput.innerText || '').trim().length; |
| | const textLength = textLengthCandidate; |
| | const isBatch = textLength >= getMaxChunkLength(); |
| | const timeEl = document.getElementById('supertonic-time'); |
| | const durationEl = document.getElementById('supertonic-duration'); |
| | const cpsEl = document.getElementById('supertonic-cps'); |
| | const rtfEl = document.getElementById('supertonic-rtf'); |
| | const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars; |
| |
|
| | if (effectiveProcessedChars < latestSupertonicProcessedChars) { |
| | return; |
| | } |
| |
|
| | const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars; |
| | const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0; |
| | if (durationEl) { |
| | durationEl.textContent = formatTimeDetailed(duration); |
| | } |
| | if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) { |
| | const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`; |
| | timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true }); |
| | } |
| | if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) { |
| | const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1); |
| | cpsEl.textContent = charsPerSec; |
| | } |
| | if (rtfEl && duration > 0 && currentProcessingTime > 0) { |
| | const rtf = (currentProcessingTime / duration).toFixed(3); |
| | rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| | } |
| | const resultItemEl = document.getElementById('supertonic-result'); |
| | if (resultItemEl) { |
| | resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
| | } |
| | latestSupertonicProcessedChars = clampedProcessedChars; |
| | }; |
| | |
| | |
| | const result = await generateSupertonicSpeechChunked( |
| | text, |
| | totalStep, |
| | durationFactor, |
| | onFirstChunkReady, |
| | onChunkAdded |
| | ); |
| | |
| | if (result.success) { |
| | const textLength = result.text ? result.text.length : 0; |
| | const isBatch = textLength >= getMaxChunkLength(); |
| | const processingTimeStr = isBatch && firstChunkGenerationTime > 0 |
| | ? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}` |
| | : formatTimeDetailed(result.processingTime); |
| | const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0'; |
| | const progressValue = textLength > 0 ? 100 : 0; |
| | |
| | const timeEl = document.getElementById('supertonic-time'); |
| | const durationEl = document.getElementById('supertonic-duration'); |
| | const cpsEl = document.getElementById('supertonic-cps'); |
| | const rtfEl = document.getElementById('supertonic-rtf'); |
| | |
| | if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
| | if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration); |
| | latestSupertonicProcessedChars = textLength; |
| | if (cpsEl) cpsEl.textContent = charsPerSec; |
| | if (rtfEl) { |
| | const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; |
| | rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| | } |
| | const resultItemEl = document.getElementById('supertonic-result'); |
| | if (resultItemEl) { |
| | resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
| | } |
| | |
| | |
| | if (audioContext && audioChunks.length > 0) { |
| | totalDuration = result.audioDuration; |
| | if (durationDisplay) { |
| | durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true }); |
| | } |
| | } |
| | |
| | |
| | const downloadBtn = document.getElementById('supertonic-download'); |
| | if (downloadBtn) { |
| | downloadBtn.parentElement.style.display = 'block'; |
| | downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav'); |
| | } |
| | } |
| | |
| | } catch (error) { |
| | showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
| | showDemoError(`Error during synthesis: ${error.message}`); |
| | console.error('Synthesis error:', error); |
| | |
| | |
| | demoResults.style.display = 'none'; |
| | demoResults.innerHTML = ` |
| | <div class="demo-placeholder"> |
| | <div class="demo-placeholder-icon">🎙️</div> |
| | <p>Your generated speech will appear here</p> |
| | </div> |
| | `; |
| | } finally { |
| | isGenerating = false; |
| | demoGenerateBtn.disabled = false; |
| | |
| | |
| | const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
| | speakerItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
| | |
| | |
| | const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
| | languageItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
| | } |
| | } |
| |
|
| | |
| | window.downloadDemoAudio = function(url, filename) { |
| | const a = document.createElement('a'); |
| | a.href = url; |
| | a.download = filename; |
| | a.click(); |
| | }; |
| |
|
| | |
| | function speedToDurationFactor(speed, offset=0.05) { |
| | return 1 / (speed + offset); |
| | } |
| |
|
| | |
| | function updateSliderValues() { |
| | demoTotalStepsValue.textContent = demoTotalSteps.value + ' Steps'; |
| | |
| | const speed = parseFloat(demoSpeed.value); |
| | demoSpeedValue.textContent = speed.toFixed(2) + 'x'; |
| | } |
| | |
| | |
| | demoTotalSteps.addEventListener('input', updateSliderValues); |
| | demoSpeed.addEventListener('input', updateSliderValues); |
| | |
| | |
| | updateSliderValues(); |
| |
|
| | |
| | demoGenerateBtn.addEventListener('click', generateSpeech); |
| |
|
| | |
| | const presetItems = document.querySelectorAll('.preset-item[data-preset]'); |
| | const freeformBtn = document.getElementById('freeformBtn'); |
| | let currentPreset = 'quote'; |
| | |
| | let isPresetChanging = false; |
| | |
| | |
| | function updateActiveButton(presetType) { |
| | |
| | presetItems.forEach(item => item.classList.remove('active')); |
| | |
| | |
| | if (presetType) { |
| | const targetItem = document.querySelector(`.preset-item[data-preset="${presetType}"]`); |
| | if (targetItem) { |
| | targetItem.classList.add('active'); |
| | } |
| | } |
| | currentPreset = presetType; |
| | updateQuoteModeState(presetType === 'quote'); |
| | } |
| |
|
| | function updateQuoteModeState(isQuote) { |
| | if (!demoResults) return; |
| | demoResults.classList.toggle('quote-mode', Boolean(isQuote)); |
| | } |
| | |
| | |
| | updateActiveButton('quote'); |
| | if (presetTexts.quote && typeof presetTexts.quote === 'object' && presetTexts.quote[currentLanguage]) { |
| | demoTextInput.textContent = presetTexts.quote[currentLanguage]; |
| | updateCharCounter(); |
| | } |
| | |
| | presetItems.forEach(item => { |
| | item.addEventListener('click', () => { |
| | const presetType = item.getAttribute('data-preset'); |
| | |
| | if (presetType === 'freeform') { |
| | |
| | isPresetChanging = true; |
| | demoTextInput.textContent = ''; |
| | updateCharCounter(); |
| | updateActiveButton('freeform'); |
| | isPresetChanging = false; |
| | } else { |
| | |
| | const preset = presetTexts[presetType]; |
| | if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
| | const text = preset[currentLanguage]; |
| | isPresetChanging = true; |
| | demoTextInput.textContent = text; |
| | updateCharCounter(); |
| | updateActiveButton(presetType); |
| | isPresetChanging = false; |
| | } else if (preset && typeof preset === 'string') { |
| | |
| | isPresetChanging = true; |
| | demoTextInput.textContent = preset; |
| | updateCharCounter(); |
| | updateActiveButton(presetType); |
| | isPresetChanging = false; |
| | } |
| | } |
| | }); |
| | }); |
| |
|
| | |
| | demoTextInput.addEventListener('paste', (e) => { |
| | e.preventDefault(); |
| | const text = (e.clipboardData || window.clipboardData).getData('text/plain'); |
| | const selection = window.getSelection(); |
| | if (!selection.rangeCount) return; |
| | |
| | const range = selection.getRangeAt(0); |
| | range.deleteContents(); |
| | const textNode = document.createTextNode(text); |
| | range.insertNode(textNode); |
| | range.setStartAfter(textNode); |
| | range.collapse(true); |
| | selection.removeAllRanges(); |
| | selection.addRange(range); |
| | |
| | |
| | demoTextInput.dispatchEvent(new Event('input', { bubbles: true })); |
| | }); |
| |
|
| | |
| | let previousTextValue = demoTextInput.textContent || demoTextInput.innerText || ''; |
| | |
| | const demoInputSection = document.querySelector('.demo-input-section'); |
| | function updateLeftBorderHeight() { |
| | if (demoInputSection) { |
| | const height = demoInputSection.offsetHeight; |
| | demoInputSection.style.setProperty('--demo-text-input-height', `${height}px`); |
| | } |
| | } |
| | |
| | |
| | updateLeftBorderHeight(); |
| | const resizeObserver = new ResizeObserver(() => { |
| | updateLeftBorderHeight(); |
| | }); |
| | if (demoInputSection) { |
| | resizeObserver.observe(demoInputSection); |
| | } |
| | |
| | |
| | function calculateTextInputHeight() { |
| | if (window.innerWidth <= 768) { |
| | |
| | demoTextInput.style.height = ''; |
| | return; |
| | } |
| | |
| | const viewportHeight = window.innerHeight; |
| | const interactiveDemoEl = document.querySelector('.interactive-demo'); |
| | const containerEl = document.querySelector('.container'); |
| | const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
| | const controlsEl = document.querySelector('.demo-controls'); |
| | const inputLabelEl = document.querySelector('.demo-input-label'); |
| | const presetRowEl = document.querySelector('#presetControlsRow'); |
| | const outputSectionEl = document.querySelector('.demo-output-section'); |
| | const contentEl = document.querySelector('.demo-content'); |
| | |
| | |
| | const interactiveDemoStyle = window.getComputedStyle(interactiveDemoEl || document.body); |
| | const containerStyle = window.getComputedStyle(containerEl || document.body); |
| | const contentStyle = window.getComputedStyle(contentEl || document.body); |
| | |
| | |
| | let totalHeight = 0; |
| | |
| | |
| | const interactiveDemoPaddingTop = parseFloat(interactiveDemoStyle.paddingTop) || 0; |
| | const interactiveDemoPaddingBottom = parseFloat(interactiveDemoStyle.paddingBottom) || 0; |
| | totalHeight += interactiveDemoPaddingTop + interactiveDemoPaddingBottom; |
| | |
| | |
| | const containerPaddingTop = parseFloat(containerStyle.paddingTop) || 0; |
| | const containerPaddingBottom = parseFloat(containerStyle.paddingBottom) || 0; |
| | totalHeight += containerPaddingTop + containerPaddingBottom; |
| | |
| | |
| | if (headerWrapperEl) { |
| | totalHeight += headerWrapperEl.offsetHeight; |
| | } |
| | |
| | |
| | if (controlsEl) { |
| | totalHeight += controlsEl.offsetHeight; |
| | } |
| | |
| | |
| | const contentGap = parseFloat(contentStyle.gap) || 0; |
| | totalHeight += contentGap; |
| | |
| | |
| | if (inputLabelEl) { |
| | totalHeight += inputLabelEl.offsetHeight; |
| | } |
| | |
| | |
| | if (presetRowEl) { |
| | totalHeight += presetRowEl.offsetHeight; |
| | } |
| | |
| | |
| | totalHeight += contentGap; |
| | |
| | |
| | if (outputSectionEl) { |
| | totalHeight += outputSectionEl.offsetHeight; |
| | } |
| | |
| | |
| | const availableHeight = viewportHeight - totalHeight - 275; |
| | |
| | |
| | const minHeight = 200; |
| | const maxHeight = availableHeight - 20; |
| | |
| | if (availableHeight > minHeight) { |
| | demoTextInput.style.height = `${Math.max(minHeight, maxHeight)}px`; |
| | } else { |
| | demoTextInput.style.height = `${minHeight}px`; |
| | } |
| | } |
| | |
| | |
| | calculateTextInputHeight(); |
| | window.addEventListener('resize', calculateTextInputHeight); |
| | |
| | |
| | const heightObserver = new ResizeObserver(() => { |
| | calculateTextInputHeight(); |
| | }); |
| | |
| | const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
| | const controlsEl = document.querySelector('.demo-controls'); |
| | const presetRowEl = document.querySelector('#presetControlsRow'); |
| | const outputSectionEl = document.querySelector('.demo-output-section'); |
| | |
| | if (headerWrapperEl) heightObserver.observe(headerWrapperEl); |
| | if (controlsEl) heightObserver.observe(controlsEl); |
| | if (presetRowEl) heightObserver.observe(presetRowEl); |
| | if (outputSectionEl) heightObserver.observe(outputSectionEl); |
| | |
| | |
| | let scrollbarTimeout; |
| | demoTextInput.addEventListener('scroll', () => { |
| | |
| | demoTextInput.classList.add('scrolling'); |
| | |
| | |
| | if (scrollbarTimeout) { |
| | clearTimeout(scrollbarTimeout); |
| | } |
| | |
| | |
| | scrollbarTimeout = setTimeout(() => { |
| | demoTextInput.classList.remove('scrolling'); |
| | }, 1500); |
| | }); |
| | |
| | demoTextInput.addEventListener('input', () => { |
| | updateCharCounter(); |
| | |
| | |
| | const currentText = demoTextInput.textContent || demoTextInput.innerText || ''; |
| | if (!isPresetChanging && currentText !== previousTextValue) { |
| | updateActiveButton('freeform'); |
| | } |
| | |
| | if (currentPreset === 'freeform') { |
| | |
| | const detectedLang = detectLanguage(currentText); |
| | if (detectedLang && detectedLang !== currentLanguage) { |
| | const previousLang = currentLanguage; |
| | currentLanguage = detectedLang; |
| | window.updateActiveLanguage(currentLanguage); |
| | showLanguageToast(previousLang, detectedLang); |
| | } |
| | } |
| | |
| | previousTextValue = currentText; |
| | }); |
| | |
| | |
| | let resizeTimeout; |
| | window.addEventListener('resize', () => { |
| | clearTimeout(resizeTimeout); |
| | resizeTimeout = setTimeout(() => { |
| | updateCharCounter(); |
| | }, 100); |
| | }); |
| | |
| | |
| | updateCharCounter(); |
| |
|
| | |
| | const speakerList = document.getElementById('speakerList'); |
| | const speakerItems = speakerList ? speakerList.querySelectorAll('.speaker-item[data-voice]') : []; |
| | const createVoiceBtn = document.getElementById('createVoiceBtn'); |
| | const comingSoonModal = document.getElementById('comingSoonModal'); |
| | const comingSoonCloseBtn = document.getElementById('comingSoonCloseBtn'); |
| | let voiceSelectDisabled = false; |
| | |
| | |
| | window.updateActiveSpeaker = function(voice) { |
| | if (!speakerList || !speakerItems) return; |
| | speakerItems.forEach(item => { |
| | if (item.dataset.voice === voice) { |
| | item.classList.add('active'); |
| | } else { |
| | item.classList.remove('active'); |
| | } |
| | }); |
| | }; |
| | |
| | |
| | if (speakerList && speakerItems.length > 0) { |
| | window.updateActiveSpeaker(currentVoice); |
| | } |
| | |
| | |
| | const speakerTooltip = document.getElementById('speakerTooltip'); |
| | |
| | if (speakerList) { |
| | speakerItems.forEach(item => { |
| | |
| | let clickFromTouch = false; |
| | |
| | |
| | item.addEventListener('click', async (e) => { |
| | |
| | |
| | if (isTouchDevice() && isMobileViewport() && !clickFromTouch) { |
| | return; |
| | } |
| | |
| | |
| | clickFromTouch = false; |
| | |
| | if (voiceSelectDisabled || modelsLoading || isGenerating) return; |
| | |
| | const selectedVoice = item.dataset.voice; |
| | |
| | |
| | if (selectedVoice === currentVoice) { |
| | const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| | if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| | generateSpeech(); |
| | } |
| | return; |
| | } |
| | |
| | |
| | const wasDisabled = demoGenerateBtn.disabled; |
| | demoGenerateBtn.disabled = true; |
| | voiceSelectDisabled = true; |
| | |
| | |
| | window.updateActiveSpeaker(selectedVoice); |
| | |
| | try { |
| | await switchVoice(selectedVoice); |
| | |
| | if (models && cfgs && processors) { |
| | demoGenerateBtn.disabled = false; |
| | voiceSelectDisabled = false; |
| | |
| | |
| | const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| | if (text.length >= 10 && !isGenerating) { |
| | generateSpeech(); |
| | } |
| | } |
| | } catch (error) { |
| | console.error('Failed to switch voice:', error); |
| | |
| | window.updateActiveSpeaker(currentVoice); |
| | voiceSelectDisabled = false; |
| | if (!wasDisabled) demoGenerateBtn.disabled = false; |
| | } |
| | }); |
| | |
| | |
| | if (speakerTooltip) { |
| | |
| | item.addEventListener('mouseenter', (e) => { |
| | if (isTouchDevice() && isMobileViewport()) return; |
| | const voice = item.dataset.voice; |
| | if (voice && VOICE_DESCRIPTIONS[voice]) { |
| | speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
| | speakerTooltip.style.display = 'block'; |
| | updateTooltipPosition(e, speakerTooltip); |
| | } |
| | }); |
| | |
| | item.addEventListener('mousemove', (e) => { |
| | if (isTouchDevice() && isMobileViewport()) return; |
| | if (speakerTooltip.style.display === 'block') { |
| | updateTooltipPosition(e, speakerTooltip); |
| | } |
| | }); |
| | |
| | item.addEventListener('mouseleave', () => { |
| | if (isTouchDevice() && isMobileViewport()) return; |
| | speakerTooltip.style.display = 'none'; |
| | }); |
| | |
| | |
| | let touchStartTime = 0; |
| | let touchHandled = false; |
| | let touchStartY = 0; |
| | const TOUCH_MOVE_THRESHOLD = 10; |
| | |
| | item.addEventListener('touchstart', (e) => { |
| | if (!isTouchDevice() || !isMobileViewport()) return; |
| | |
| | touchHandled = false; |
| | const touch = e.touches[0]; |
| | touchStartTime = Date.now(); |
| | touchStartY = touch.clientY; |
| | |
| | const voice = item.dataset.voice; |
| | if (voice && VOICE_DESCRIPTIONS[voice]) { |
| | |
| | e.preventDefault(); |
| | |
| | |
| | speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
| | speakerTooltip.style.display = 'block'; |
| | updateTooltipPositionMobile(speakerTooltip, touch.clientY); |
| | } |
| | }, { passive: false }); |
| | |
| | item.addEventListener('touchmove', (e) => { |
| | if (!isTouchDevice() || !isMobileViewport()) return; |
| | |
| | const touch = e.touches[0]; |
| | const deltaY = Math.abs(touch.clientY - touchStartY); |
| | |
| | |
| | if (deltaY > TOUCH_MOVE_THRESHOLD) { |
| | touchHandled = true; |
| | |
| | speakerTooltip.style.display = 'none'; |
| | } |
| | |
| | |
| | e.preventDefault(); |
| | }, { passive: false }); |
| | |
| | item.addEventListener('touchend', (e) => { |
| | if (!isTouchDevice() || !isMobileViewport()) return; |
| | |
| | const touchEndTime = Date.now(); |
| | const touchDuration = touchEndTime - touchStartTime; |
| | |
| | |
| | speakerTooltip.style.display = 'none'; |
| | |
| | |
| | e.preventDefault(); |
| | |
| | |
| | if (!touchHandled && touchDuration < 500) { |
| | |
| | clickFromTouch = true; |
| | setTimeout(() => { |
| | const clickEvent = new MouseEvent('click', { |
| | bubbles: true, |
| | cancelable: true, |
| | view: window |
| | }); |
| | item.dispatchEvent(clickEvent); |
| | }, 50); |
| | } else { |
| | |
| | touchHandled = true; |
| | e.stopPropagation(); |
| | } |
| | }, { passive: false }); |
| | |
| | item.addEventListener('touchcancel', (e) => { |
| | if (!isTouchDevice() || !isMobileViewport()) return; |
| | |
| | |
| | speakerTooltip.style.display = 'none'; |
| | touchHandled = true; |
| | |
| | |
| | e.preventDefault(); |
| | }, { passive: false }); |
| | |
| | |
| | item.addEventListener('contextmenu', (e) => { |
| | if (isTouchDevice() && isMobileViewport()) { |
| | e.preventDefault(); |
| | return false; |
| | } |
| | }); |
| | } |
| | }); |
| | } |
| |
|
| | |
| | function updateTooltipPosition(event, tooltip) { |
| | const x = event.clientX; |
| | const y = event.clientY - 40; |
| | |
| | tooltip.style.left = x + 'px'; |
| | tooltip.style.top = y + 'px'; |
| | |
| | |
| | const tooltipRect = tooltip.getBoundingClientRect(); |
| | const windowWidth = window.innerWidth; |
| | const windowHeight = window.innerHeight; |
| | |
| | if (tooltipRect.right > windowWidth) { |
| | tooltip.style.left = (windowWidth - tooltipRect.width - 10) + 'px'; |
| | } |
| | if (tooltipRect.left < 0) { |
| | tooltip.style.left = '10px'; |
| | } |
| | if (tooltipRect.top < 0) { |
| | tooltip.style.top = (event.clientY + 40) + 'px'; |
| | } |
| | if (tooltipRect.bottom > windowHeight) { |
| | tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
| | } |
| | } |
| | |
| | |
| | function updateTooltipPositionMobile(tooltip, touchY) { |
| | const windowWidth = window.innerWidth; |
| | const windowHeight = window.innerHeight; |
| | |
| | |
| | tooltip.style.width = '90%'; |
| | tooltip.style.left = '5%'; |
| | tooltip.style.right = 'auto'; |
| | tooltip.style.marginLeft = '0'; |
| | tooltip.style.marginRight = '0'; |
| | tooltip.style.whiteSpace = 'normal'; |
| | tooltip.style.textAlign = 'center'; |
| | |
| | |
| | const y = touchY - 75; |
| | tooltip.style.top = y + 'px'; |
| | |
| | |
| | const tooltipRect = tooltip.getBoundingClientRect(); |
| | |
| | if (tooltipRect.top < 10) { |
| | |
| | tooltip.style.top = (touchY + 20) + 'px'; |
| | } |
| | if (tooltipRect.bottom > windowHeight - 10) { |
| | tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
| | } |
| | } |
| | |
| | |
| | if (createVoiceBtn && comingSoonModal) { |
| | createVoiceBtn.addEventListener('click', () => { |
| | comingSoonModal.classList.add('show'); |
| | }); |
| | } |
| | |
| | |
| | if (comingSoonCloseBtn && comingSoonModal) { |
| | comingSoonCloseBtn.addEventListener('click', () => { |
| | comingSoonModal.classList.remove('show'); |
| | }); |
| | } |
| | |
| | if (comingSoonModal) { |
| | const overlay = comingSoonModal.querySelector('.coming-soon-modal-overlay'); |
| | if (overlay) { |
| | overlay.addEventListener('click', () => { |
| | comingSoonModal.classList.remove('show'); |
| | }); |
| | } |
| | } |
| |
|
| | |
| | const languageList = document.getElementById('languageList'); |
| | const languageItems = languageList ? languageList.querySelectorAll('.speaker-item[data-language]') : []; |
| | |
| | |
| | window.updateActiveLanguage = function(language) { |
| | if (!languageList || !languageItems) return; |
| | languageItems.forEach(item => { |
| | if (item.dataset.language === language) { |
| | item.classList.add('active'); |
| | } else { |
| | item.classList.remove('active'); |
| | } |
| | }); |
| | }; |
| | |
| | |
| | if (languageList && languageItems.length > 0) { |
| | window.updateActiveLanguage(currentLanguage); |
| | } |
| | |
| | |
| | if (languageList) { |
| | languageItems.forEach(item => { |
| | item.addEventListener('click', async (e) => { |
| | |
| | if (modelsLoading || isGenerating) return; |
| | |
| | const selectedLanguage = item.dataset.language; |
| | |
| | |
| | if (selectedLanguage === currentLanguage) { |
| | const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| | if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| | generateSpeech(); |
| | } |
| | return; |
| | } |
| | |
| | |
| | currentLanguage = selectedLanguage; |
| | window.updateActiveLanguage(currentLanguage); |
| | |
| | |
| | if (currentPreset && currentPreset !== 'freeform' && presetTexts[currentPreset]) { |
| | const preset = presetTexts[currentPreset]; |
| | if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
| | isPresetChanging = true; |
| | demoTextInput.textContent = preset[currentLanguage]; |
| | updateCharCounter(); |
| | isPresetChanging = false; |
| | } |
| | } |
| | |
| | |
| | |
| | await new Promise(resolve => setTimeout(resolve, 100)); |
| | const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| | if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| | generateSpeech(); |
| | } |
| | }); |
| | }); |
| | } |
| |
|
| | |
| | const demoTitleLeft = document.querySelector('.demo-title-left'); |
| | const demoTitleRight = document.querySelector('.demo-title-right'); |
| | const demoOutputSection = document.querySelector('.demo-output-section'); |
| |
|
| | |
| | if (demoTitleLeft) { |
| | const text = demoTitleLeft.textContent.trim(); |
| | demoTitleLeft.innerHTML = text.split('').map(char => |
| | char === ' ' ? ' ' : `<span class="letter visible">${char}</span>` |
| | ).join(''); |
| | } |
| |
|
| | |
| | if (demoInputSection && demoTitleLeft) { |
| | demoInputSection.addEventListener('click', () => { |
| | const letters = demoTitleLeft.querySelectorAll('.letter'); |
| | |
| | letters.forEach(letter => { |
| | letter.classList.remove('visible'); |
| | }); |
| | |
| | |
| | letters.forEach((letter, index) => { |
| | setTimeout(() => { |
| | letter.classList.add('visible'); |
| | }, index * 0.0625 * 1000); |
| | }); |
| | }); |
| | } |
| |
|
| | |
| | if (demoOutputSection && demoTitleRight) { |
| | demoOutputSection.addEventListener('click', (event) => { |
| | if (event.target.closest('#demoGenerateBtn')) { |
| | return; |
| | } |
| | demoTitleRight.classList.remove('animate-speech'); |
| | |
| | void demoTitleRight.offsetWidth; |
| | demoTitleRight.classList.add('animate-speech'); |
| | }); |
| | } |
| |
|
| | |
| | initializeModels(); |
| | })(); |
| |
|