import { useState, useRef, useCallback, useEffect } from "react"; import { useTranscriber } from "./transcriberContext.ts"; import Confetti, { type ConfettiHandle } from "./Confetti.tsx"; import { langToFlag } from "./utils.ts"; import { CohereLogo, UploadIcon, MicrophoneIcon, CopyIcon, DownloadIcon, CheckIcon, FileIcon, MicSmallIcon, } from "./icons.tsx"; type Screen = "landing" | "loading" | "transcription"; type TranscriptionMode = "idle" | "file" | "microphone"; // ---- Constants ---- const SCREEN_TRANSITION_MS = 600; // must match .screen CSS transition duration in index.css const COPY_FEEDBACK_MS = 2000; const POST_LOAD_DELAY_MS = 500; const AUDIO_SAMPLE_RATE = 16000; const LANGUAGES: { code: string; label: string; native: string }[] = [ { code: "en", label: "English", native: "English" }, { code: "fr", label: "French", native: "Français" }, { code: "de", label: "German", native: "Deutsch" }, { code: "es", label: "Spanish", native: "Español" }, { code: "it", label: "Italian", native: "Italiano" }, { code: "pt", label: "Portuguese", native: "Português" }, { code: "nl", label: "Dutch", native: "Nederlands" }, { code: "pl", label: "Polish", native: "Polski" }, { code: "el", label: "Greek", native: "Ελληνικά" }, { code: "ar", label: "Arabic", native: "العربية" }, { code: "ja", label: "Japanese", native: "日本語" }, { code: "zh", label: "Chinese", native: "中文" }, { code: "vi", label: "Vietnamese", native: "Tiếng Việt" }, { code: "ko", label: "Korean", native: "한국어" }, ]; // ---- Formatting helpers ---- function formatDuration(seconds: number): string { if (seconds < 60) return `${seconds.toFixed(1)}s`; const mins = Math.floor(seconds / 60); const secs = seconds % 60; return secs > 0 ? `${mins}m ${secs.toFixed(0)}s` : `${mins}m`; } // ---- Audio helpers ---- async function decodeAudio(arrayBuffer: ArrayBuffer): Promise { const audioCtx = new AudioContext({ sampleRate: AUDIO_SAMPLE_RATE }); const decoded = await audioCtx.decodeAudioData(arrayBuffer); const float32 = decoded.getChannelData(0); await audioCtx.close(); return float32; } // ---- Main App ---- function App() { const [screen, setScreen] = useState("landing"); const [prevScreen, setPrevScreen] = useState(null); const [mode, setMode] = useState("idle"); const [language, setLanguage] = useState("en"); const [transcriptionText, setTranscriptionText] = useState(""); const [streamedText, setStreamedText] = useState(""); const [isTranscribing, setIsTranscribing] = useState(false); const [audioFileName, setAudioFileName] = useState(null); const [isRecording, setIsRecording] = useState(false); const [copied, setCopied] = useState(false); const [isDragging, setIsDragging] = useState(false); const [stats, setStats] = useState<{ audioDuration: number; elapsed: number; } | null>(null); const fileInputRef = useRef(null); const videoRef = useRef(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const outputRef = useRef(null); const streamedTextRef = useRef(""); const confettiRef = useRef(null); const transcriber = useTranscriber(); const displayText = isTranscribing ? streamedText : transcriptionText; // ---- Screen transitions ---- const transitionTo = useCallback( (next: Screen) => { setPrevScreen(screen); setScreen(next); setTimeout(() => setPrevScreen(null), SCREEN_TRANSITION_MS); }, [screen], ); const getScreenClass = useCallback( (s: Screen) => { if (s === screen) return "screen screen-enter"; if (s === prevScreen) return "screen screen-exit"; return "screen screen-hidden"; }, [screen, prevScreen], ); // ---- Video autoplay fallback ---- useEffect(() => { if (screen === "landing" && videoRef.current) { videoRef.current.play().catch(() => {}); } }, [screen]); // ---- Model loading: start when entering loading screen ---- useEffect(() => { if (screen !== "loading") return; transcriber.load().then(() => { setTimeout(() => transitionTo("transcription"), POST_LOAD_DELAY_MS); }); }, [screen, transcriber, transitionTo]); // ---- Auto-scroll output during streaming ---- useEffect(() => { if (isTranscribing && outputRef.current) { outputRef.current.scrollTop = outputRef.current.scrollHeight; } }, [streamedText, isTranscribing]); // ---- Streaming callback ---- const onToken = useCallback((token: string) => { streamedTextRef.current += token; setStreamedText(streamedTextRef.current); }, []); // ---- Run transcription (shared by file + mic) ---- const runTranscription = useCallback( async (audio: Float32Array) => { setIsTranscribing(true); setTranscriptionText(""); setStreamedText(""); setStats(null); streamedTextRef.current = ""; const audioDuration = audio.length / AUDIO_SAMPLE_RATE; const startTime = performance.now(); try { const finalText = await transcriber.transcribe( audio, language, onToken, ); const elapsed = (performance.now() - startTime) / 1000; setTranscriptionText(finalText); setStats({ audioDuration, elapsed }); } catch (err) { setTranscriptionText( `Error: ${err instanceof Error ? err.message : "Transcription failed"}`, ); } finally { setIsTranscribing(false); } }, [transcriber, language, onToken], ); // ---- File handling (shared by input + drag-and-drop) ---- const processFile = useCallback( async (file: File) => { setAudioFileName(file.name); setMode("file"); const audioData = await decodeAudio(await file.arrayBuffer()); runTranscription(audioData); }, [runTranscription], ); const handleFileSelect = useCallback( (e: React.ChangeEvent) => { const file = e.target.files?.[0]; if (!file) return; processFile(file); }, [processFile], ); // ---- Drag and drop ---- const dragCounter = useRef(0); const handleDragEnter = useCallback( (e: React.DragEvent) => { e.preventDefault(); if (screen !== "transcription" || mode !== "idle") return; dragCounter.current++; if (dragCounter.current === 1) setIsDragging(true); }, [screen, mode], ); const handleDragLeave = useCallback((e: React.DragEvent) => { e.preventDefault(); dragCounter.current--; if (dragCounter.current === 0) setIsDragging(false); }, []); const handleDragOver = useCallback((e: React.DragEvent) => { e.preventDefault(); }, []); const handleDrop = useCallback( (e: React.DragEvent) => { e.preventDefault(); dragCounter.current = 0; setIsDragging(false); if (screen !== "transcription" || mode !== "idle") return; const file = e.dataTransfer.files?.[0]; if (!file) return; processFile(file); }, [screen, mode, processFile], ); // ---- Microphone ---- const startRecording = useCallback(async () => { setMode("microphone"); setIsRecording(true); setTranscriptionText(""); setStreamedText(""); audioChunksRef.current = []; try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const recorder = new MediaRecorder(stream); mediaRecorderRef.current = recorder; recorder.ondataavailable = (e) => { if (e.data.size > 0) { audioChunksRef.current.push(e.data); } }; recorder.onstop = async () => { stream.getTracks().forEach((t) => t.stop()); setIsRecording(false); try { const blob = new Blob(audioChunksRef.current, { type: "audio/webm" }); const float32 = await decodeAudio(await blob.arrayBuffer()); runTranscription(float32); } catch (err) { setTranscriptionText( `Error: ${err instanceof Error ? err.message : "Transcription failed"}`, ); } }; recorder.start(); } catch (err) { setIsRecording(false); setMode("idle"); console.error("Microphone access denied:", err); } }, [runTranscription]); const stopRecording = useCallback(() => { mediaRecorderRef.current?.stop(); }, []); // ---- Copy to clipboard ---- const copyToClipboard = useCallback(() => { navigator.clipboard.writeText(transcriptionText).then(() => { setCopied(true); setTimeout(() => setCopied(false), COPY_FEEDBACK_MS); }); }, [transcriptionText]); // ---- Download as .txt ---- const downloadText = useCallback(() => { const blob = new Blob([transcriptionText], { type: "text/plain" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = "transcription.txt"; a.click(); URL.revokeObjectURL(url); }, [transcriptionText]); // ---- Reset ---- const resetTranscription = useCallback(() => { setMode("idle"); setTranscriptionText(""); setStreamedText(""); streamedTextRef.current = ""; setIsTranscribing(false); setAudioFileName(null); setIsRecording(false); setCopied(false); setStats(null); if (fileInputRef.current) fileInputRef.current.value = ""; }, []); // ---- Render ---- const isDone = !isTranscribing && !isRecording && !!transcriptionText; return (
{/* ==================== Screen 1: Landing ==================== */}
screen === "landing" && transitionTo("loading")} > {/* Video background */}
{/* ==================== Screen 2: Loading ==================== */}
{/* Spinner */}
{/* Status text */}

Loading model...

{/* Progress bar */}

{transcriber.statusText}

{/* Footer */}

Powered by Transformers.js

{/* ==================== Screen 3: Transcription ==================== */}
{/* Header */}
Cohere
{/* Main content */}
{mode === "idle" ? ( /* ---- Mode Selection + Language ---- */
{/* Upload / Record cards */}
{/* Upload File Card */} {/* Record Audio Card */}
{/* Language selector */}
Language
{LANGUAGES.map((lang) => ( ))}
) : ( /* ---- Transcription Area ---- */
{/* Source indicator + status */}
{mode === "file" ? : }
{mode === "file" ? audioFileName : "Microphone recording"} {/* Recording controls */} {isRecording && (
Recording...
)} {/* Status badge */} {isTranscribing && !isRecording && ( Transcribing... )} {isDone && ( {stats ? `Transcribed ${formatDuration(stats.audioDuration)} of audio in ${formatDuration(stats.elapsed)}` : "Complete"} )}
{/* Transcription output */}
{displayText ? (

{displayText.trim()}

) : isRecording ? (

Listening... Press stop when you're done speaking.

) : isTranscribing ? (
) : null}
{/* Actions */}
{isDone && ( <> )}
)}
{/* Footer */}

Runs 100% locally in your browser with WebGPU

{/* Drag overlay */} {isDragging && (

Drop audio/video file here

)} {/* Confetti overlay */} {/* Hidden file input */}
); } export default App;