Spaces:

gladguy
/

SimpleViva

Runtime error

App Files Files Community

gladguy commited on Nov 26, 2025

Commit

2464a55

1 Parent(s): e677348

Gradio migration

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +289 -259
packages.txt +1 -0
requirements.txt +3 -3

README.md CHANGED Viewed

@@ -3,7 +3,7 @@ title: SimpleViva
 emoji: 🧬
 colorFrom: blue
 colorTo: indigo
-sdk: docker
 pinned: false
 license: mit
 ---

 emoji: 🧬
 colorFrom: blue
 colorTo: indigo
+sdk: gradio
 pinned: false
 license: mit
 ---

app.py CHANGED Viewed

@@ -1,45 +1,23 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import FileResponse, JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-import json
-import time
-import base64
-import io
-import os
-from typing import Dict, Optional
 import torch
 import numpy as np
 from scipy.io.wavfile import write
-# Initialize FastAPI app
-app = FastAPI(title="Anatomy Viva Voice App", version="1.0.0")
-# Mount static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 class FreeVoiceTTS:
     def __init__(self):
         self.model = None
         self.device = "cpu"
     def load_silero_tts(self):
         """Load Silero TTS - lightweight and reliable"""
         try:
-            import torch
-            device = torch.device('cpu')
             torch.set_num_threads(4)
             model, example_text = torch.hub.load(
                 repo_or_dir='snakers4/silero-models',
                 model='silero_tts',
@@ -52,269 +30,321 @@ class FreeVoiceTTS:
             print(f"Silero TTS loading failed: {e}")
             return False
-    def text_to_speech_silero(self, text: str) -> bytes:
-        """Convert text to speech using Silero TTS"""
         try:
             if not hasattr(self, 'silero_model'):
                 if not self.load_silero_tts():
-                    raise Exception("Silero TTS not available")
             # Generate audio using Silero
             audio = self.silero_model.apply_tts(
                 text=text,
                 speaker='en_0',  # English female voice
-                sample_rate=24000
             )
-            # Convert to bytes
-            audio_buffer = io.BytesIO()
-            write(audio_buffer, 24000, (audio * 32767).numpy().astype(np.int16))
-            return audio_buffer.getvalue()
         except Exception as e:
             print(f"Silero TTS failed: {e}")
-            return self.generate_silence()
-    def generate_silence(self) -> bytes:
-        """Generate 1 second of silence as fallback"""
-        silence = np.zeros(24000, dtype=np.int16)
-        audio_buffer = io.BytesIO()
-        write(audio_buffer, 24000, silence)
-        return audio_buffer.getvalue()
-# Initialize TTS
 tts_engine = FreeVoiceTTS()
-class AnatomyProfessor:
-    def __init__(self):
-        self.current_topic = None
-        self.conversation_history = []
-        self.question_bank = self._initialize_question_bank()
-    def _initialize_question_bank(self):
-        return {
-            "upper_limb": [
-                {
-                    "question": "Describe the course and distribution of the median nerve from its origin to the hand.",
-                    "key_points": ["brachial plexus roots C5-T1", "medial and lateral cords", "carpal tunnel", "LOAF muscles"],
-                    "follow_up": "What clinical condition results from median nerve compression at the wrist?",
-                    "difficulty": "medium"
-                },
-                {
-                    "question": "Explain the brachial plexus in detail, including its major branches.",
-                    "key_points": ["roots, trunks, divisions, cords, branches", "mnemonic: Real Texans Drink Cold Beer", "musculocutaneous, axillary, radial, median, ulnar nerves"],
-                    "follow_up": "Which cord of the brachial plexus is most vulnerable in shoulder dislocations?",
-                    "difficulty": "hard"
-                },
-                {
-                    "question": "What are the muscles of the rotator cuff and their functions?",
-                    "key_points": ["supraspinatus", "infraspinatus", "teres minor", "subscapularis", "SITS mnemonic"],
-                    "follow_up": "Which rotator cuff muscle is most commonly injured?",
-                    "difficulty": "medium"
-                }
-            ],
-            "lower_limb": [
-                {
-                    "question": "Trace the course of the sciatic nerve from its origin to its terminal branches.",
-                    "key_points": ["L4-S3 roots", "passes through greater sciatic foramen", "divides into tibial and common fibular nerves", "innervates hamstrings"],
-                    "follow_up": "What are the clinical manifestations of sciatic nerve injury?",
-                    "difficulty": "medium"
-                },
-                {
-                    "question": "Describe the boundaries and contents of the femoral triangle.",
-                    "key_points": ["inguinal ligament", "sartorius", "adductor longus", "femoral nerve, artery, vein", "NAVY arrangement"],
-                    "follow_up": "Why is the femoral triangle important clinically?",
-                    "difficulty": "medium"
-                }
-            ],
-            "cardiology": [
-                {
-                    "question": "Describe the blood supply to the heart and the coronary circulation.",
-                    "key_points": ["left and right coronary arteries", "circumflex artery", "left anterior descending", "coronary sinus"],
-                    "follow_up": "Which coronary artery is most commonly involved in myocardial infarction?",
-                    "difficulty": "medium"
-                },
-                {
-                    "question": "Explain the conduction system of the heart.",
-                    "key_points": ["SA node", "AV node", "bundle of His", "bundle branches", "Purkinje fibers"],
-                    "follow_up": "What is the clinical significance of the AV node?",
-                    "difficulty": "hard"
-                }
-            ],
-            "neuroanatomy": [
-                {
-                    "question": "Describe the blood supply of the brain.",
-                    "key_points": ["internal carotid arteries", "vertebral arteries", "circle of Willis", "anterior, middle, posterior cerebral arteries"],
-                    "follow_up": "What is the clinical consequence of middle cerebral artery occlusion?",
-                    "difficulty": "hard"
-                },
-                {
-                    "question": "Name the twelve cranial nerves and their basic functions.",
-                    "key_points": ["olfactory, optic, oculomotor, trochlear, trigeminal, abducens, facial, vestibulocochlear, glossopharyngeal, vagus, accessory, hypoglossal"],
-                    "follow_up": "Which cranial nerve has the longest intracranial course?",
-                    "difficulty": "medium"
-                }
-            ]
         }
-    def set_topic(self, topic: str):
-        self.current_topic = topic
-        self.conversation_history = []
-    def get_next_question(self) -> Dict:
-        """Get the next question for the current topic"""
-        if not self.current_topic or self.current_topic not in self.question_bank:
-            return {"error": "Invalid topic selected"}
-        asked_indices = [conv.get("question_index", -1) for conv in self.conversation_history]
-        for i, question_data in enumerate(self.question_bank[self.current_topic]):
-            if i not in asked_indices:
-                return {
-                    "question": question_data["question"],
-                    "question_index": i,
-                    "key_points": question_data["key_points"],
-                    "difficulty": question_data["difficulty"]
-                }
-        return {"question": "You have completed all questions for this topic. Excellent work!", "completed": True}
-    def evaluate_answer(self, question_index: int, student_answer: str) -> Dict:
-        """Evaluate student's answer and provide feedback"""
-        if self.current_topic not in self.question_bank or question_index >= len(self.question_bank[self.current_topic]):
-            return {"error": "Invalid question index"}
-        question_data = self.question_bank[self.current_topic][question_index]
-        # Enhanced evaluation
-        evaluation = self._evaluate_answer_comprehensive(question_data, student_answer)
-        self.conversation_history.append({
-            "question_index": question_index,
-            "question": question_data["question"],
-            "answer": student_answer,
-            "feedback": evaluation["feedback"],
-            "score": evaluation["score"],
-            "timestamp": time.time()
-        })
-        return {
-            "feedback": evaluation["feedback"],
-            "score": evaluation["score"],
-            "next_question": self.get_next_question(),
-            "conversation_history": self.conversation_history
-        }
-    def _evaluate_answer_comprehensive(self, question_data: Dict, answer: str) -> Dict:
-        """Comprehensive answer evaluation"""
-        base_score = self._calculate_comprehensiveness(answer, question_data["key_points"])
-        # Generate appropriate feedback
-        if base_score >= 8:
-            feedback = f"Excellent! You demonstrated thorough understanding. {question_data.get('follow_up', '')}"
-        elif base_score >= 6:
-            feedback = f"Good attempt. You covered main concepts well. {question_data.get('follow_up', 'Consider the clinical applications.')}"
-        else:
-            missed_points = self._get_missed_points(answer, question_data["key_points"])
-            feedback = f"Let me help you improve. Key aspects: {', '.join(missed_points)}. {question_data.get('follow_up', '')}"
-        return {
-            "feedback": feedback,
-            "score": base_score
-        }
-    def _calculate_comprehensiveness(self, answer: str, key_points: list) -> float:
-        """Calculate score based on coverage of key points"""
-        answer_lower = answer.lower()
-        covered_points = sum(1 for point in key_points if any(word in answer_lower for word in point.lower().split()))
-        return min(10, (covered_points / len(key_points)) * 10)
-    def _get_missed_points(self, answer: str, key_points: list) -> list:
-        """Get points that were missed in the answer"""
-        answer_lower = answer.lower()
-        missed = []
-        for point in key_points:
-            if not any(word in answer_lower for word in point.lower().split()):
-                missed.append(point)
-        return missed if missed else key_points[:2]  # Return first two if all covered
-# Global professor instance
-professor = AnatomyProfessor()
-# Serve the main page
-@app.get("/")
-async def read_index():
-    return FileResponse('static/index.html')
-# API Routes
-@app.post("/api/start_session")
-async def start_session(topic: str):
-    """Start a new viva session"""
-    professor.set_topic(topic)
-    first_question = professor.get_next_question()
-    return JSONResponse({
-        "status": "started",
-        "topic": topic,
-        "first_question": first_question,
-        "message": f"Viva session started on {topic}"
-    })
-@app.post("/api/text_to_speech")
-async def text_to_speech(text: str):
-    """Convert text to speech using free TTS"""
-    try:
-        audio_data = tts_engine.text_to_speech_silero(text)
-        return JSONResponse({
-            "audio_data": base64.b64encode(audio_data).decode('utf-8'),
-            "text": text,
-            "format": "wav"
-        })
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"TTS failed: {str(e)}")
-@app.post("/api/evaluate_answer")
-async def evaluate_answer(question_index: int, answer: str):
-    """Evaluate student's answer"""
-    try:
-        evaluation = professor.evaluate_answer(question_index, answer)
-        return JSONResponse(evaluation)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Evaluation failed: {str(e)}")
-@app.get("/api/topics")
-async def get_topics():
-    """Get available anatomy topics"""
-    return JSONResponse({
-        "topics": {
-            "upper_limb": "Upper Limb Anatomy",
-            "lower_limb": "Lower Limb Anatomy",
-            "cardiology": "Cardiac Anatomy",
-            "neuroanatomy": "Neuroanatomy"
-        }
-    })
-@app.get("/api/health")
-async def health_check():
-    """Health check endpoint"""
-    return JSONResponse({"status": "healthy", "timestamp": time.time()})
-# Initialize TTS on startup
-@app.on_event("startup")
-async def startup_event():
-    """Initialize TTS models on app startup"""
-    print("Initializing Anatomy Viva App...")
-    success = tts_engine.load_silero_tts()
-    if success:
-        print("Silero TTS initialized successfully")
-    else:
-        print("TTS initialization failed - will use fallbacks")
-# For Hugging Face Spaces - they look for this
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+import gradio as gr
 import torch
 import numpy as np
+import io
 from scipy.io.wavfile import write
+from transformers import pipeline
+import time
+from typing import Dict, List, Tuple
+# --- TTS Engine ---
 class FreeVoiceTTS:
     def __init__(self):
         self.model = None
         self.device = "cpu"
+        self.sample_rate = 24000
     def load_silero_tts(self):
         """Load Silero TTS - lightweight and reliable"""
         try:
             torch.set_num_threads(4)
             model, example_text = torch.hub.load(
                 repo_or_dir='snakers4/silero-models',
                 model='silero_tts',
             print(f"Silero TTS loading failed: {e}")
             return False
+    def text_to_speech(self, text: str) -> Tuple[int, np.ndarray]:
+        """Convert text to speech, returning (sample_rate, audio_numpy)"""
         try:
             if not hasattr(self, 'silero_model'):
                 if not self.load_silero_tts():
+                    return None
             # Generate audio using Silero
             audio = self.silero_model.apply_tts(
                 text=text,
                 speaker='en_0',  # English female voice
+                sample_rate=self.sample_rate
             )
+            # Convert to numpy array for Gradio
+            # Silero returns a torch tensor, we convert to numpy
+            return (self.sample_rate, audio.numpy())
         except Exception as e:
             print(f"Silero TTS failed: {e}")
+            return None
+# --- STT Engine ---
+class SpeechToText:
+    def __init__(self):
+        self.transcriber = None
+    def load_model(self):
+        try:
+            self.transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+            return True
+        except Exception as e:
+            print(f"STT loading failed: {e}")
+            return False
+    def transcribe(self, audio_path: str) -> str:
+        if not self.transcriber:
+            self.load_model()
+        if not audio_path:
+            return ""
+        try:
+            result = self.transcriber(audio_path)
+            return result["text"]
+        except Exception as e:
+            print(f"Transcription failed: {e}")
+            return ""
+# --- Application Logic ---
+# Initialize Engines
 tts_engine = FreeVoiceTTS()
+stt_engine = SpeechToText()
+# Pre-load models
+print("Loading AI Models...")
+tts_engine.load_silero_tts()
+stt_engine.load_model()
+print("Models Loaded.")
+QUESTION_BANK = {
+    "upper_limb": [
+        {
+            "question": "Describe the course and distribution of the median nerve from its origin to the hand.",
+            "key_points": ["brachial plexus roots C5-T1", "medial and lateral cords", "carpal tunnel", "LOAF muscles"],
+            "follow_up": "What clinical condition results from median nerve compression at the wrist?",
+            "difficulty": "medium"
+        },
+        {
+            "question": "Explain the brachial plexus in detail, including its major branches.",
+            "key_points": ["roots, trunks, divisions, cords, branches", "mnemonic: Real Texans Drink Cold Beer", "musculocutaneous, axillary, radial, median, ulnar nerves"],
+            "follow_up": "Which cord of the brachial plexus is most vulnerable in shoulder dislocations?",
+            "difficulty": "hard"
+        },
+        {
+            "question": "What are the muscles of the rotator cuff and their functions?",
+            "key_points": ["supraspinatus", "infraspinatus", "teres minor", "subscapularis", "SITS mnemonic"],
+            "follow_up": "Which rotator cuff muscle is most commonly injured?",
+            "difficulty": "medium"
         }
+    ],
+    "lower_limb": [
+        {
+            "question": "Trace the course of the sciatic nerve from its origin to its terminal branches.",
+            "key_points": ["L4-S3 roots", "passes through greater sciatic foramen", "divides into tibial and common fibular nerves", "innervates hamstrings"],
+            "follow_up": "What are the clinical manifestations of sciatic nerve injury?",
+            "difficulty": "medium"
+        },
+        {
+            "question": "Describe the boundaries and contents of the femoral triangle.",
+            "key_points": ["inguinal ligament", "sartorius", "adductor longus", "femoral nerve, artery, vein", "NAVY arrangement"],
+            "follow_up": "Why is the femoral triangle important clinically?",
+            "difficulty": "medium"
+        }
+    ],
+    "cardiology": [
+        {
+            "question": "Describe the blood supply to the heart and the coronary circulation.",
+            "key_points": ["left and right coronary arteries", "circumflex artery", "left anterior descending", "coronary sinus"],
+            "follow_up": "Which coronary artery is most commonly involved in myocardial infarction?",
+            "difficulty": "medium"
+        },
+        {
+            "question": "Explain the conduction system of the heart.",
+            "key_points": ["SA node", "AV node", "bundle of His", "bundle branches", "Purkinje fibers"],
+            "follow_up": "What is the clinical significance of the AV node?",
+            "difficulty": "hard"
+        }
+    ],
+    "neuroanatomy": [
+        {
+            "question": "Describe the blood supply of the brain.",
+            "key_points": ["internal carotid arteries", "vertebral arteries", "circle of Willis", "anterior, middle, posterior cerebral arteries"],
+            "follow_up": "What is the clinical consequence of middle cerebral artery occlusion?",
+            "difficulty": "hard"
+        },
+        {
+            "question": "Name the twelve cranial nerves and their basic functions.",
+            "key_points": ["olfactory, optic, oculomotor, trochlear, trigeminal, abducens, facial, vestibulocochlear, glossopharyngeal, vagus, accessory, hypoglossal"],
+            "follow_up": "Which cranial nerve has the longest intracranial course?",
+            "difficulty": "medium"
+        }
+    ]
+}
+def start_session(topic):
+    if not topic:
+        return (
+            None,
+            [],
+            "Please select a topic first.",
+            gr.update(visible=False),
+            gr.update(visible=True)
+        )
+    session_state = {
+        "topic": topic,
+        "question_index": 0,
+        "score": 0,
+        "history": [],
+        "current_question_data": QUESTION_BANK[topic][0]
+    }
+    first_question = session_state["current_question_data"]["question"]
+    # Generate audio for first question
+    audio = tts_engine.text_to_speech(first_question)
+    return (
+        session_state,
+        [(None, first_question)], # Chat history
+        f"Topic: {topic.replace('_', ' ').title()}",
+        gr.update(visible=True), # Show session
+        gr.update(visible=False), # Hide topic selection
+        audio # Auto-play question
+    )
+def process_response(audio_input, text_input, session_state, history):
+    if not session_state:
+        return session_state, history, "Error: No active session", None, None
+    # Determine user answer (Audio takes precedence)
+    user_answer = ""
+    if audio_input:
+        user_answer = stt_engine.transcribe(audio_input)
+    elif text_input:
+        user_answer = text_input
+    if not user_answer:
+        return session_state, history, "", None, None # No input
+    # Evaluate Answer
+    question_data = session_state["current_question_data"]
+    score, feedback = evaluate_answer(user_answer, question_data)
+    # Update State
+    session_state["score"] += score
+    session_state["history"].append({
+        "question": question_data["question"],
+        "answer": user_answer,
+        "feedback": feedback,
+        "score": score
+    })
+    # Update Chat History
+    history.append((user_answer, feedback))
+    # Prepare Next Question
+    session_state["question_index"] += 1
+    topic_questions = QUESTION_BANK[session_state["topic"]]
+    next_audio = None
+    if session_state["question_index"] < len(topic_questions):
+        next_question_data = topic_questions[session_state["question_index"]]
+        session_state["current_question_data"] = next_question_data
+        next_q_text = next_question_data["question"]
+        history.append((None, next_q_text))
+        # Generate audio for next question
+        next_audio = tts_engine.text_to_speech(next_q_text)
+    else:
+        # End of session
+        final_score = session_state["score"]
+        count = len(topic_questions)
+        avg = final_score / count if count > 0 else 0
+        end_msg = f"Session Complete! Final Score: {final_score:.1f}/{count*10} (Avg: {avg:.1f})"
+        history.append((None, end_msg))
+        next_audio = tts_engine.text_to_speech(end_msg)
+        session_state = None # Reset state
+    return (
+        session_state,
+        history,
+        "", # Clear text input
+        None, # Clear audio input
+        next_audio
+    )
+def evaluate_answer(answer: str, question_data: Dict) -> Tuple[float, str]:
+    """Simple keyword matching evaluation"""
+    answer_lower = answer.lower()
+    key_points = question_data["key_points"]
+    covered_points = sum(1 for point in key_points if any(word in answer_lower for word in point.lower().split()))
+    score = min(10, (covered_points / len(key_points)) * 10)
+    if score >= 8:
+        feedback = f"Excellent! {question_data.get('follow_up', '')}"
+    elif score >= 5:
+        feedback = f"Good. You missed some details. {question_data.get('follow_up', '')}"
+    else:
+        missed = [p for p in key_points if not any(w in answer_lower for w in p.lower().split())]
+        feedback = f"Key points missed: {', '.join(missed[:2])}. {question_data.get('follow_up', '')}"
+    return score, feedback
+# --- Gradio UI ---
+with gr.Blocks(title="Anatomy Viva Voce", theme=gr.themes.Soft()) as demo:
+    state = gr.State(None) # Session state
+    gr.Markdown("# 🧠 Anatomy Viva Voce Simulator")
+    gr.Markdown("Practice medical anatomy with an AI Professor. Speak or type your answers!")
+    # Topic Selection View
+    with gr.Group(visible=True) as topic_view:
+        gr.Markdown("### Select a Topic to Begin")
+        with gr.Row():
+            btn_upper = gr.Button("Upper Limb", variant="primary")
+            btn_lower = gr.Button("Lower Limb", variant="primary")
+            btn_cardio = gr.Button("Cardiology", variant="primary")
+            btn_neuro = gr.Button("Neuroanatomy", variant="primary")
+    # Session View
+    with gr.Group(visible=False) as session_view:
+        session_info = gr.Markdown("Topic: ...")
+        chatbot = gr.Chatbot(label="Viva Session", height=400)
+        # Professor Audio Output (Hidden player, auto-played via return)
+        professor_audio = gr.Audio(label="Professor's Voice", autoplay=True, visible=False)
+        with gr.Row():
+            with gr.Column(scale=4):
+                txt_input = gr.Textbox(
+                    show_label=False,
+                    placeholder="Type your answer here...",
+                    lines=2
+                )
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    source="microphone",
+                    type="filepath",
+                    label="Voice Answer",
+                    show_label=False
+                )
+        with gr.Row():
+            submit_btn = gr.Button("Submit Answer", variant="primary")
+            end_btn = gr.Button("End Session", variant="stop")
+    # Event Handlers
+    topic_buttons = [btn_upper, btn_lower, btn_cardio, btn_neuro]
+    topics = ["upper_limb", "lower_limb", "cardiology", "neuroanatomy"]
+    for btn, topic in zip(topic_buttons, topics):
+        btn.click(
+            fn=start_session,
+            inputs=[gr.State(topic)],
+            outputs=[state, chatbot, session_info, session_view, topic_view, professor_audio]
+        )
+    # Submit via Text or Audio
+    submit_inputs = [audio_input, txt_input, state, chatbot]
+    submit_outputs = [state, chatbot, txt_input, audio_input, professor_audio]
+    submit_btn.click(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
+    txt_input.submit(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
+    audio_input.change(fn=process_response, inputs=submit_inputs, outputs=submit_outputs) # Auto-submit on stop recording? Maybe better to require button for audio to avoid accidental submits.
+    # Actually, let's NOT auto-submit audio on change, user might want to re-record.
+    # But `change` triggers when recording stops. Let's stick to button for now to be safe, or add a specific listener.
+    # For now, let's keep it simple: User records, then clicks submit.
+    # Wait, `audio_input.change` is triggered when file is updated.
+    def reset_ui():
+        return None, [], gr.update(visible=False), gr.update(visible=True)
+    end_btn.click(
+        fn=reset_ui,
+        inputs=None,
+        outputs=[state, chatbot, session_view, topic_view]
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

packages.txt CHANGED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-fastapi
-uvicorn
 torch
 numpy
 scipy
-omegaconf
 torchaudio

+gradio
 torch
 numpy
 scipy
+transformers
 torchaudio
+omegaconf