gladguy commited on
Commit
2464a55
Β·
1 Parent(s): e677348

Gradio migration

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +289 -259
  3. packages.txt +1 -0
  4. requirements.txt +3 -3
README.md CHANGED
@@ -3,7 +3,7 @@ title: SimpleViva
3
  emoji: 🧬
4
  colorFrom: blue
5
  colorTo: indigo
6
- sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
 
3
  emoji: 🧬
4
  colorFrom: blue
5
  colorTo: indigo
6
+ sdk: gradio
7
  pinned: false
8
  license: mit
9
  ---
app.py CHANGED
@@ -1,45 +1,23 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.staticfiles import StaticFiles
3
- from fastapi.responses import FileResponse, JSONResponse
4
- from fastapi.middleware.cors import CORSMiddleware
5
- import json
6
- import time
7
- import base64
8
- import io
9
- import os
10
- from typing import Dict, Optional
11
  import torch
12
  import numpy as np
 
13
  from scipy.io.wavfile import write
 
 
 
14
 
15
- # Initialize FastAPI app
16
- app = FastAPI(title="Anatomy Viva Voice App", version="1.0.0")
17
-
18
- # Mount static files
19
- app.mount("/static", StaticFiles(directory="static"), name="static")
20
-
21
- # CORS middleware
22
- app.add_middleware(
23
- CORSMiddleware,
24
- allow_origins=["*"],
25
- allow_credentials=True,
26
- allow_methods=["*"],
27
- allow_headers=["*"],
28
- )
29
-
30
  class FreeVoiceTTS:
31
  def __init__(self):
32
  self.model = None
33
  self.device = "cpu"
 
34
 
35
  def load_silero_tts(self):
36
  """Load Silero TTS - lightweight and reliable"""
37
  try:
38
- import torch
39
-
40
- device = torch.device('cpu')
41
  torch.set_num_threads(4)
42
-
43
  model, example_text = torch.hub.load(
44
  repo_or_dir='snakers4/silero-models',
45
  model='silero_tts',
@@ -52,269 +30,321 @@ class FreeVoiceTTS:
52
  print(f"Silero TTS loading failed: {e}")
53
  return False
54
 
55
- def text_to_speech_silero(self, text: str) -> bytes:
56
- """Convert text to speech using Silero TTS"""
57
  try:
58
  if not hasattr(self, 'silero_model'):
59
  if not self.load_silero_tts():
60
- raise Exception("Silero TTS not available")
61
 
62
  # Generate audio using Silero
63
  audio = self.silero_model.apply_tts(
64
  text=text,
65
  speaker='en_0', # English female voice
66
- sample_rate=24000
67
  )
68
 
69
- # Convert to bytes
70
- audio_buffer = io.BytesIO()
71
- write(audio_buffer, 24000, (audio * 32767).numpy().astype(np.int16))
72
-
73
- return audio_buffer.getvalue()
74
 
75
  except Exception as e:
76
  print(f"Silero TTS failed: {e}")
77
- return self.generate_silence()
78
-
79
- def generate_silence(self) -> bytes:
80
- """Generate 1 second of silence as fallback"""
81
- silence = np.zeros(24000, dtype=np.int16)
82
- audio_buffer = io.BytesIO()
83
- write(audio_buffer, 24000, silence)
84
- return audio_buffer.getvalue()
 
 
 
 
 
 
85
 
86
- # Initialize TTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  tts_engine = FreeVoiceTTS()
 
88
 
89
- class AnatomyProfessor:
90
- def __init__(self):
91
- self.current_topic = None
92
- self.conversation_history = []
93
- self.question_bank = self._initialize_question_bank()
94
-
95
- def _initialize_question_bank(self):
96
- return {
97
- "upper_limb": [
98
- {
99
- "question": "Describe the course and distribution of the median nerve from its origin to the hand.",
100
- "key_points": ["brachial plexus roots C5-T1", "medial and lateral cords", "carpal tunnel", "LOAF muscles"],
101
- "follow_up": "What clinical condition results from median nerve compression at the wrist?",
102
- "difficulty": "medium"
103
- },
104
- {
105
- "question": "Explain the brachial plexus in detail, including its major branches.",
106
- "key_points": ["roots, trunks, divisions, cords, branches", "mnemonic: Real Texans Drink Cold Beer", "musculocutaneous, axillary, radial, median, ulnar nerves"],
107
- "follow_up": "Which cord of the brachial plexus is most vulnerable in shoulder dislocations?",
108
- "difficulty": "hard"
109
- },
110
- {
111
- "question": "What are the muscles of the rotator cuff and their functions?",
112
- "key_points": ["supraspinatus", "infraspinatus", "teres minor", "subscapularis", "SITS mnemonic"],
113
- "follow_up": "Which rotator cuff muscle is most commonly injured?",
114
- "difficulty": "medium"
115
- }
116
- ],
117
- "lower_limb": [
118
- {
119
- "question": "Trace the course of the sciatic nerve from its origin to its terminal branches.",
120
- "key_points": ["L4-S3 roots", "passes through greater sciatic foramen", "divides into tibial and common fibular nerves", "innervates hamstrings"],
121
- "follow_up": "What are the clinical manifestations of sciatic nerve injury?",
122
- "difficulty": "medium"
123
- },
124
- {
125
- "question": "Describe the boundaries and contents of the femoral triangle.",
126
- "key_points": ["inguinal ligament", "sartorius", "adductor longus", "femoral nerve, artery, vein", "NAVY arrangement"],
127
- "follow_up": "Why is the femoral triangle important clinically?",
128
- "difficulty": "medium"
129
- }
130
- ],
131
- "cardiology": [
132
- {
133
- "question": "Describe the blood supply to the heart and the coronary circulation.",
134
- "key_points": ["left and right coronary arteries", "circumflex artery", "left anterior descending", "coronary sinus"],
135
- "follow_up": "Which coronary artery is most commonly involved in myocardial infarction?",
136
- "difficulty": "medium"
137
- },
138
- {
139
- "question": "Explain the conduction system of the heart.",
140
- "key_points": ["SA node", "AV node", "bundle of His", "bundle branches", "Purkinje fibers"],
141
- "follow_up": "What is the clinical significance of the AV node?",
142
- "difficulty": "hard"
143
- }
144
- ],
145
- "neuroanatomy": [
146
- {
147
- "question": "Describe the blood supply of the brain.",
148
- "key_points": ["internal carotid arteries", "vertebral arteries", "circle of Willis", "anterior, middle, posterior cerebral arteries"],
149
- "follow_up": "What is the clinical consequence of middle cerebral artery occlusion?",
150
- "difficulty": "hard"
151
- },
152
- {
153
- "question": "Name the twelve cranial nerves and their basic functions.",
154
- "key_points": ["olfactory, optic, oculomotor, trochlear, trigeminal, abducens, facial, vestibulocochlear, glossopharyngeal, vagus, accessory, hypoglossal"],
155
- "follow_up": "Which cranial nerve has the longest intracranial course?",
156
- "difficulty": "medium"
157
- }
158
- ]
159
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- def set_topic(self, topic: str):
162
- self.current_topic = topic
163
- self.conversation_history = []
 
 
 
 
164
 
165
- def get_next_question(self) -> Dict:
166
- """Get the next question for the current topic"""
167
- if not self.current_topic or self.current_topic not in self.question_bank:
168
- return {"error": "Invalid topic selected"}
169
-
170
- asked_indices = [conv.get("question_index", -1) for conv in self.conversation_history]
171
-
172
- for i, question_data in enumerate(self.question_bank[self.current_topic]):
173
- if i not in asked_indices:
174
- return {
175
- "question": question_data["question"],
176
- "question_index": i,
177
- "key_points": question_data["key_points"],
178
- "difficulty": question_data["difficulty"]
179
- }
180
-
181
- return {"question": "You have completed all questions for this topic. Excellent work!", "completed": True}
182
 
183
- def evaluate_answer(self, question_index: int, student_answer: str) -> Dict:
184
- """Evaluate student's answer and provide feedback"""
185
- if self.current_topic not in self.question_bank or question_index >= len(self.question_bank[self.current_topic]):
186
- return {"error": "Invalid question index"}
187
-
188
- question_data = self.question_bank[self.current_topic][question_index]
189
-
190
- # Enhanced evaluation
191
- evaluation = self._evaluate_answer_comprehensive(question_data, student_answer)
192
-
193
- self.conversation_history.append({
194
- "question_index": question_index,
195
- "question": question_data["question"],
196
- "answer": student_answer,
197
- "feedback": evaluation["feedback"],
198
- "score": evaluation["score"],
199
- "timestamp": time.time()
200
- })
201
-
202
- return {
203
- "feedback": evaluation["feedback"],
204
- "score": evaluation["score"],
205
- "next_question": self.get_next_question(),
206
- "conversation_history": self.conversation_history
207
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- def _evaluate_answer_comprehensive(self, question_data: Dict, answer: str) -> Dict:
210
- """Comprehensive answer evaluation"""
 
 
 
 
 
211
 
212
- base_score = self._calculate_comprehensiveness(answer, question_data["key_points"])
 
213
 
214
- # Generate appropriate feedback
215
- if base_score >= 8:
216
- feedback = f"Excellent! You demonstrated thorough understanding. {question_data.get('follow_up', '')}"
217
- elif base_score >= 6:
218
- feedback = f"Good attempt. You covered main concepts well. {question_data.get('follow_up', 'Consider the clinical applications.')}"
219
- else:
220
- missed_points = self._get_missed_points(answer, question_data["key_points"])
221
- feedback = f"Let me help you improve. Key aspects: {', '.join(missed_points)}. {question_data.get('follow_up', '')}"
 
222
 
223
- return {
224
- "feedback": feedback,
225
- "score": base_score
226
- }
 
 
 
 
 
 
 
 
227
 
228
- def _calculate_comprehensiveness(self, answer: str, key_points: list) -> float:
229
- """Calculate score based on coverage of key points"""
230
- answer_lower = answer.lower()
231
- covered_points = sum(1 for point in key_points if any(word in answer_lower for word in point.lower().split()))
232
- return min(10, (covered_points / len(key_points)) * 10)
233
 
234
- def _get_missed_points(self, answer: str, key_points: list) -> list:
235
- """Get points that were missed in the answer"""
236
- answer_lower = answer.lower()
237
- missed = []
238
- for point in key_points:
239
- if not any(word in answer_lower for word in point.lower().split()):
240
- missed.append(point)
241
- return missed if missed else key_points[:2] # Return first two if all covered
242
-
243
- # Global professor instance
244
- professor = AnatomyProfessor()
245
 
246
- # Serve the main page
247
- @app.get("/")
248
- async def read_index():
249
- return FileResponse('static/index.html')
250
 
251
- # API Routes
252
- @app.post("/api/start_session")
253
- async def start_session(topic: str):
254
- """Start a new viva session"""
255
- professor.set_topic(topic)
256
- first_question = professor.get_next_question()
257
 
258
- return JSONResponse({
259
- "status": "started",
260
- "topic": topic,
261
- "first_question": first_question,
262
- "message": f"Viva session started on {topic}"
263
- })
264
-
265
- @app.post("/api/text_to_speech")
266
- async def text_to_speech(text: str):
267
- """Convert text to speech using free TTS"""
268
- try:
269
- audio_data = tts_engine.text_to_speech_silero(text)
270
-
271
- return JSONResponse({
272
- "audio_data": base64.b64encode(audio_data).decode('utf-8'),
273
- "text": text,
274
- "format": "wav"
275
- })
276
 
277
- except Exception as e:
278
- raise HTTPException(status_code=500, detail=f"TTS failed: {str(e)}")
 
 
 
 
 
 
279
 
280
- @app.post("/api/evaluate_answer")
281
- async def evaluate_answer(question_index: int, answer: str):
282
- """Evaluate student's answer"""
283
- try:
284
- evaluation = professor.evaluate_answer(question_index, answer)
285
- return JSONResponse(evaluation)
286
- except Exception as e:
287
- raise HTTPException(status_code=500, detail=f"Evaluation failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- @app.get("/api/topics")
290
- async def get_topics():
291
- """Get available anatomy topics"""
292
- return JSONResponse({
293
- "topics": {
294
- "upper_limb": "Upper Limb Anatomy",
295
- "lower_limb": "Lower Limb Anatomy",
296
- "cardiology": "Cardiac Anatomy",
297
- "neuroanatomy": "Neuroanatomy"
298
- }
299
- })
300
 
301
- @app.get("/api/health")
302
- async def health_check():
303
- """Health check endpoint"""
304
- return JSONResponse({"status": "healthy", "timestamp": time.time()})
 
 
 
 
 
 
 
 
 
 
305
 
306
- # Initialize TTS on startup
307
- @app.on_event("startup")
308
- async def startup_event():
309
- """Initialize TTS models on app startup"""
310
- print("Initializing Anatomy Viva App...")
311
- success = tts_engine.load_silero_tts()
312
- if success:
313
- print("Silero TTS initialized successfully")
314
- else:
315
- print("TTS initialization failed - will use fallbacks")
316
 
317
- # For Hugging Face Spaces - they look for this
318
  if __name__ == "__main__":
319
- import uvicorn
320
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import gradio as gr
 
 
 
 
 
 
 
 
 
2
  import torch
3
  import numpy as np
4
+ import io
5
  from scipy.io.wavfile import write
6
+ from transformers import pipeline
7
+ import time
8
+ from typing import Dict, List, Tuple
9
 
10
+ # --- TTS Engine ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class FreeVoiceTTS:
12
  def __init__(self):
13
  self.model = None
14
  self.device = "cpu"
15
+ self.sample_rate = 24000
16
 
17
  def load_silero_tts(self):
18
  """Load Silero TTS - lightweight and reliable"""
19
  try:
 
 
 
20
  torch.set_num_threads(4)
 
21
  model, example_text = torch.hub.load(
22
  repo_or_dir='snakers4/silero-models',
23
  model='silero_tts',
 
30
  print(f"Silero TTS loading failed: {e}")
31
  return False
32
 
33
+ def text_to_speech(self, text: str) -> Tuple[int, np.ndarray]:
34
+ """Convert text to speech, returning (sample_rate, audio_numpy)"""
35
  try:
36
  if not hasattr(self, 'silero_model'):
37
  if not self.load_silero_tts():
38
+ return None
39
 
40
  # Generate audio using Silero
41
  audio = self.silero_model.apply_tts(
42
  text=text,
43
  speaker='en_0', # English female voice
44
+ sample_rate=self.sample_rate
45
  )
46
 
47
+ # Convert to numpy array for Gradio
48
+ # Silero returns a torch tensor, we convert to numpy
49
+ return (self.sample_rate, audio.numpy())
 
 
50
 
51
  except Exception as e:
52
  print(f"Silero TTS failed: {e}")
53
+ return None
54
+
55
+ # --- STT Engine ---
56
+ class SpeechToText:
57
+ def __init__(self):
58
+ self.transcriber = None
59
+
60
+ def load_model(self):
61
+ try:
62
+ self.transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
63
+ return True
64
+ except Exception as e:
65
+ print(f"STT loading failed: {e}")
66
+ return False
67
 
68
+ def transcribe(self, audio_path: str) -> str:
69
+ if not self.transcriber:
70
+ self.load_model()
71
+
72
+ if not audio_path:
73
+ return ""
74
+
75
+ try:
76
+ result = self.transcriber(audio_path)
77
+ return result["text"]
78
+ except Exception as e:
79
+ print(f"Transcription failed: {e}")
80
+ return ""
81
+
82
+ # --- Application Logic ---
83
+
84
+ # Initialize Engines
85
  tts_engine = FreeVoiceTTS()
86
+ stt_engine = SpeechToText()
87
 
88
+ # Pre-load models
89
+ print("Loading AI Models...")
90
+ tts_engine.load_silero_tts()
91
+ stt_engine.load_model()
92
+ print("Models Loaded.")
93
+
94
+ QUESTION_BANK = {
95
+ "upper_limb": [
96
+ {
97
+ "question": "Describe the course and distribution of the median nerve from its origin to the hand.",
98
+ "key_points": ["brachial plexus roots C5-T1", "medial and lateral cords", "carpal tunnel", "LOAF muscles"],
99
+ "follow_up": "What clinical condition results from median nerve compression at the wrist?",
100
+ "difficulty": "medium"
101
+ },
102
+ {
103
+ "question": "Explain the brachial plexus in detail, including its major branches.",
104
+ "key_points": ["roots, trunks, divisions, cords, branches", "mnemonic: Real Texans Drink Cold Beer", "musculocutaneous, axillary, radial, median, ulnar nerves"],
105
+ "follow_up": "Which cord of the brachial plexus is most vulnerable in shoulder dislocations?",
106
+ "difficulty": "hard"
107
+ },
108
+ {
109
+ "question": "What are the muscles of the rotator cuff and their functions?",
110
+ "key_points": ["supraspinatus", "infraspinatus", "teres minor", "subscapularis", "SITS mnemonic"],
111
+ "follow_up": "Which rotator cuff muscle is most commonly injured?",
112
+ "difficulty": "medium"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
+ ],
115
+ "lower_limb": [
116
+ {
117
+ "question": "Trace the course of the sciatic nerve from its origin to its terminal branches.",
118
+ "key_points": ["L4-S3 roots", "passes through greater sciatic foramen", "divides into tibial and common fibular nerves", "innervates hamstrings"],
119
+ "follow_up": "What are the clinical manifestations of sciatic nerve injury?",
120
+ "difficulty": "medium"
121
+ },
122
+ {
123
+ "question": "Describe the boundaries and contents of the femoral triangle.",
124
+ "key_points": ["inguinal ligament", "sartorius", "adductor longus", "femoral nerve, artery, vein", "NAVY arrangement"],
125
+ "follow_up": "Why is the femoral triangle important clinically?",
126
+ "difficulty": "medium"
127
+ }
128
+ ],
129
+ "cardiology": [
130
+ {
131
+ "question": "Describe the blood supply to the heart and the coronary circulation.",
132
+ "key_points": ["left and right coronary arteries", "circumflex artery", "left anterior descending", "coronary sinus"],
133
+ "follow_up": "Which coronary artery is most commonly involved in myocardial infarction?",
134
+ "difficulty": "medium"
135
+ },
136
+ {
137
+ "question": "Explain the conduction system of the heart.",
138
+ "key_points": ["SA node", "AV node", "bundle of His", "bundle branches", "Purkinje fibers"],
139
+ "follow_up": "What is the clinical significance of the AV node?",
140
+ "difficulty": "hard"
141
+ }
142
+ ],
143
+ "neuroanatomy": [
144
+ {
145
+ "question": "Describe the blood supply of the brain.",
146
+ "key_points": ["internal carotid arteries", "vertebral arteries", "circle of Willis", "anterior, middle, posterior cerebral arteries"],
147
+ "follow_up": "What is the clinical consequence of middle cerebral artery occlusion?",
148
+ "difficulty": "hard"
149
+ },
150
+ {
151
+ "question": "Name the twelve cranial nerves and their basic functions.",
152
+ "key_points": ["olfactory, optic, oculomotor, trochlear, trigeminal, abducens, facial, vestibulocochlear, glossopharyngeal, vagus, accessory, hypoglossal"],
153
+ "follow_up": "Which cranial nerve has the longest intracranial course?",
154
+ "difficulty": "medium"
155
+ }
156
+ ]
157
+ }
158
+
159
+ def start_session(topic):
160
+ if not topic:
161
+ return (
162
+ None,
163
+ [],
164
+ "Please select a topic first.",
165
+ gr.update(visible=False),
166
+ gr.update(visible=True)
167
+ )
168
 
169
+ session_state = {
170
+ "topic": topic,
171
+ "question_index": 0,
172
+ "score": 0,
173
+ "history": [],
174
+ "current_question_data": QUESTION_BANK[topic][0]
175
+ }
176
 
177
+ first_question = session_state["current_question_data"]["question"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ # Generate audio for first question
180
+ audio = tts_engine.text_to_speech(first_question)
181
+
182
+ return (
183
+ session_state,
184
+ [(None, first_question)], # Chat history
185
+ f"Topic: {topic.replace('_', ' ').title()}",
186
+ gr.update(visible=True), # Show session
187
+ gr.update(visible=False), # Hide topic selection
188
+ audio # Auto-play question
189
+ )
190
+
191
+ def process_response(audio_input, text_input, session_state, history):
192
+ if not session_state:
193
+ return session_state, history, "Error: No active session", None, None
194
+
195
+ # Determine user answer (Audio takes precedence)
196
+ user_answer = ""
197
+ if audio_input:
198
+ user_answer = stt_engine.transcribe(audio_input)
199
+ elif text_input:
200
+ user_answer = text_input
201
+
202
+ if not user_answer:
203
+ return session_state, history, "", None, None # No input
204
+
205
+ # Evaluate Answer
206
+ question_data = session_state["current_question_data"]
207
+ score, feedback = evaluate_answer(user_answer, question_data)
208
+
209
+ # Update State
210
+ session_state["score"] += score
211
+ session_state["history"].append({
212
+ "question": question_data["question"],
213
+ "answer": user_answer,
214
+ "feedback": feedback,
215
+ "score": score
216
+ })
217
+
218
+ # Update Chat History
219
+ history.append((user_answer, feedback))
220
+
221
+ # Prepare Next Question
222
+ session_state["question_index"] += 1
223
+ topic_questions = QUESTION_BANK[session_state["topic"]]
224
 
225
+ next_audio = None
226
+
227
+ if session_state["question_index"] < len(topic_questions):
228
+ next_question_data = topic_questions[session_state["question_index"]]
229
+ session_state["current_question_data"] = next_question_data
230
+ next_q_text = next_question_data["question"]
231
+ history.append((None, next_q_text))
232
 
233
+ # Generate audio for next question
234
+ next_audio = tts_engine.text_to_speech(next_q_text)
235
 
236
+ else:
237
+ # End of session
238
+ final_score = session_state["score"]
239
+ count = len(topic_questions)
240
+ avg = final_score / count if count > 0 else 0
241
+ end_msg = f"Session Complete! Final Score: {final_score:.1f}/{count*10} (Avg: {avg:.1f})"
242
+ history.append((None, end_msg))
243
+ next_audio = tts_engine.text_to_speech(end_msg)
244
+ session_state = None # Reset state
245
 
246
+ return (
247
+ session_state,
248
+ history,
249
+ "", # Clear text input
250
+ None, # Clear audio input
251
+ next_audio
252
+ )
253
+
254
+ def evaluate_answer(answer: str, question_data: Dict) -> Tuple[float, str]:
255
+ """Simple keyword matching evaluation"""
256
+ answer_lower = answer.lower()
257
+ key_points = question_data["key_points"]
258
 
259
+ covered_points = sum(1 for point in key_points if any(word in answer_lower for word in point.lower().split()))
260
+ score = min(10, (covered_points / len(key_points)) * 10)
 
 
 
261
 
262
+ if score >= 8:
263
+ feedback = f"Excellent! {question_data.get('follow_up', '')}"
264
+ elif score >= 5:
265
+ feedback = f"Good. You missed some details. {question_data.get('follow_up', '')}"
266
+ else:
267
+ missed = [p for p in key_points if not any(w in answer_lower for w in p.lower().split())]
268
+ feedback = f"Key points missed: {', '.join(missed[:2])}. {question_data.get('follow_up', '')}"
269
+
270
+ return score, feedback
 
 
271
 
272
+ # --- Gradio UI ---
 
 
 
273
 
274
+ with gr.Blocks(title="Anatomy Viva Voce", theme=gr.themes.Soft()) as demo:
275
+ state = gr.State(None) # Session state
 
 
 
 
276
 
277
+ gr.Markdown("# 🧠 Anatomy Viva Voce Simulator")
278
+ gr.Markdown("Practice medical anatomy with an AI Professor. Speak or type your answers!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # Topic Selection View
281
+ with gr.Group(visible=True) as topic_view:
282
+ gr.Markdown("### Select a Topic to Begin")
283
+ with gr.Row():
284
+ btn_upper = gr.Button("Upper Limb", variant="primary")
285
+ btn_lower = gr.Button("Lower Limb", variant="primary")
286
+ btn_cardio = gr.Button("Cardiology", variant="primary")
287
+ btn_neuro = gr.Button("Neuroanatomy", variant="primary")
288
 
289
+ # Session View
290
+ with gr.Group(visible=False) as session_view:
291
+ session_info = gr.Markdown("Topic: ...")
292
+
293
+ chatbot = gr.Chatbot(label="Viva Session", height=400)
294
+
295
+ # Professor Audio Output (Hidden player, auto-played via return)
296
+ professor_audio = gr.Audio(label="Professor's Voice", autoplay=True, visible=False)
297
+
298
+ with gr.Row():
299
+ with gr.Column(scale=4):
300
+ txt_input = gr.Textbox(
301
+ show_label=False,
302
+ placeholder="Type your answer here...",
303
+ lines=2
304
+ )
305
+ with gr.Column(scale=1):
306
+ audio_input = gr.Audio(
307
+ source="microphone",
308
+ type="filepath",
309
+ label="Voice Answer",
310
+ show_label=False
311
+ )
312
+
313
+ with gr.Row():
314
+ submit_btn = gr.Button("Submit Answer", variant="primary")
315
+ end_btn = gr.Button("End Session", variant="stop")
316
 
317
+ # Event Handlers
318
+ topic_buttons = [btn_upper, btn_lower, btn_cardio, btn_neuro]
319
+ topics = ["upper_limb", "lower_limb", "cardiology", "neuroanatomy"]
320
+
321
+ for btn, topic in zip(topic_buttons, topics):
322
+ btn.click(
323
+ fn=start_session,
324
+ inputs=[gr.State(topic)],
325
+ outputs=[state, chatbot, session_info, session_view, topic_view, professor_audio]
326
+ )
 
327
 
328
+ # Submit via Text or Audio
329
+ submit_inputs = [audio_input, txt_input, state, chatbot]
330
+ submit_outputs = [state, chatbot, txt_input, audio_input, professor_audio]
331
+
332
+ submit_btn.click(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
333
+ txt_input.submit(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
334
+ audio_input.change(fn=process_response, inputs=submit_inputs, outputs=submit_outputs) # Auto-submit on stop recording? Maybe better to require button for audio to avoid accidental submits.
335
+ # Actually, let's NOT auto-submit audio on change, user might want to re-record.
336
+ # But `change` triggers when recording stops. Let's stick to button for now to be safe, or add a specific listener.
337
+ # For now, let's keep it simple: User records, then clicks submit.
338
+ # Wait, `audio_input.change` is triggered when file is updated.
339
+
340
+ def reset_ui():
341
+ return None, [], gr.update(visible=False), gr.update(visible=True)
342
 
343
+ end_btn.click(
344
+ fn=reset_ui,
345
+ inputs=None,
346
+ outputs=[state, chatbot, session_view, topic_view]
347
+ )
 
 
 
 
 
348
 
 
349
  if __name__ == "__main__":
350
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
packages.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- fastapi
2
- uvicorn
3
  torch
4
  numpy
5
  scipy
6
- omegaconf
7
  torchaudio
 
 
1
+ gradio
 
2
  torch
3
  numpy
4
  scipy
5
+ transformers
6
  torchaudio
7
+ omegaconf