File size: 11,763 Bytes
ed147e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
"""

FastAPI application for YouTube study notes generation.

Provides REST API endpoints for note generation and status tracking.

"""

import asyncio
import uuid
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional
from enum import Enum
from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel, HttpUrl, Field

from src.audio.downloader import YouTubeDownloader
from src.audio.processor import AudioProcessor
from src.transcription.whisper_transcriber import WhisperTranscriber
from src.summarization.segmenter import TranscriptSegmenter
from src.summarization.note_generator import NoteGenerator
from src.utils.logger import setup_logger
from src.utils.config import settings
from src.db.database import create_db_and_tables

logger = setup_logger(__name__)


# Pydantic Models
class TaskStatus(str, Enum):
    """Task processing status."""

    PENDING = "pending"
    DOWNLOADING = "downloading"
    TRANSCRIBING = "transcribing"
    GENERATING_NOTES = "generating_notes"
    COMPLETED = "completed"
    FAILED = "failed"


class GenerateNotesRequest(BaseModel):
    """Request model for note generation."""

    youtube_url: HttpUrl = Field(..., description="YouTube video URL")
    language: str = Field(default="en", description="Video language code")

    class Config:
        json_schema_extra = {
            "example": {
                "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
                "language": "en",
            }
        }


class TaskResponse(BaseModel):
    """Response model for task creation."""

    task_id: str = Field(..., description="Unique task identifier")
    status: TaskStatus = Field(..., description="Current task status")
    message: str = Field(..., description="Status message")


class TaskStatusResponse(BaseModel):
    """Response model for task status queries."""

    task_id: str
    status: TaskStatus
    message: str
    video_title: Optional[str] = None
    progress: Optional[int] = Field(None, description="Progress percentage (0-100)")
    notes_file: Optional[str] = None
    created_at: datetime
    updated_at: datetime


# Global task storage (in production, use a database)
tasks: Dict[str, Dict] = {}


# --- Lifespan Event Handler (Fixes Windows Event Loop Issue) ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    """

    Handle startup and shutdown events.

    Initializes the database tables when the server starts.

    """
    logger.info("Lifespan: Initializing database tables...")
    await create_db_and_tables()
    logger.info("Lifespan: Database tables initialized successfully")
    yield
    logger.info("Lifespan: Server shutting down...")


# FastAPI app
app = FastAPI(
    title="YouTube Study Notes AI",
    description="Generate structured study notes from YouTube educational videos",
    version="1.0.0",
    lifespan=lifespan,
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify allowed origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include routers
from src.api.auth_routes import router as auth_router
from src.api.notes_routes import router as notes_router
from src.api.analytics_routes import router as analytics_router

app.include_router(auth_router)
app.include_router(notes_router)
app.include_router(analytics_router)


@app.get("/")
async def root():
    """Root endpoint with API information."""
    return {
        "name": "YouTube Study Notes AI",
        "version": "1.0.0",
        "description": "Generate structured study notes from YouTube videos with user management",
        "endpoints": {
            "authentication": {
                "signup": "POST /auth/signup",
                "login": "POST /auth/login",
            },
            "notes": {
                "create": "POST /notes",
                "list": "GET /notes",
                "get": "GET /notes/{note_id}",
                "delete": "DELETE /notes/{note_id}",
            },
            "analytics": {"user_stats": "GET /analytics"},
            "generation": {
                "generate_notes": "POST /generate-notes",
                "check_status": "GET /status/{task_id}",
                "download_notes": "GET /download/{task_id}",
            },
        },
        "documentation": {"swagger_ui": "/docs", "redoc": "/redoc"},
    }


@app.post("/generate-notes", response_model=TaskResponse)
async def generate_notes(

    request: GenerateNotesRequest, background_tasks: BackgroundTasks

):
    """

    Generate study notes from a YouTube video.



    This endpoint starts an async task to process the video.

    Use the returned task_id to check status and download results.

    """
    try:
        # Generate unique task ID
        task_id = str(uuid.uuid4())

        # Initialize task
        tasks[task_id] = {
            "status": TaskStatus.PENDING,
            "message": "Task created, starting processing...",
            "youtube_url": str(request.youtube_url),
            "language": request.language,
            "video_title": None,
            "progress": 0,
            "notes_file": None,
            "created_at": datetime.now(),
            "updated_at": datetime.now(),
        }

        # Start background processing
        background_tasks.add_task(
            process_video, task_id, str(request.youtube_url), request.language
        )

        logger.info(f"Created task {task_id} for URL: {request.youtube_url}")

        return TaskResponse(
            task_id=task_id,
            status=TaskStatus.PENDING,
            message="Processing started. Use task_id to check status.",
        )

    except Exception as e:
        logger.error(f"Failed to create task: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/status/{task_id}", response_model=TaskStatusResponse)
async def get_status(task_id: str):
    """Get the current status of a processing task."""
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")

    task = tasks[task_id]

    return TaskStatusResponse(
        task_id=task_id,
        status=task["status"],
        message=task["message"],
        video_title=task.get("video_title"),
        progress=task.get("progress"),
        notes_file=task.get("notes_file"),
        created_at=task["created_at"],
        updated_at=task["updated_at"],
    )


@app.get("/download/{task_id}")
async def download_notes(task_id: str):
    """Download the generated notes file."""
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")

    task = tasks[task_id]

    if task["status"] != TaskStatus.COMPLETED:
        raise HTTPException(
            status_code=400, detail=f"Notes not ready. Current status: {task['status']}"
        )

    notes_file = task.get("notes_file")
    if not notes_file or not Path(notes_file).exists():
        raise HTTPException(status_code=404, detail="Notes file not found")

    return FileResponse(
        notes_file, media_type="text/markdown", filename=Path(notes_file).name
    )


async def process_video(task_id: str, youtube_url: str, language: str):
    """

    Background task to process video and generate notes.



    Args:

        task_id: Unique task identifier

        youtube_url: YouTube video URL

        language: Video language code

    """
    audio_file = None

    try:
        # Update status: Downloading
        update_task(task_id, TaskStatus.DOWNLOADING, "Downloading video...", 10)

        # Download video and extract audio
        downloader = YouTubeDownloader()

        # Get video info
        video_info = downloader.get_video_info(youtube_url)
        video_title = video_info["title"]
        video_duration = video_info["duration"]

        update_task(
            task_id,
            TaskStatus.DOWNLOADING,
            f"Downloading: {video_title}",
            20,
            video_title=video_title,
        )

        audio_file = downloader.download_audio(youtube_url, task_id)

        # Validate audio
        processor = AudioProcessor()
        if not processor.validate_audio_file(audio_file):
            raise ValueError("Invalid audio file")

        # Update status: Transcribing
        update_task(task_id, TaskStatus.TRANSCRIBING, "Transcribing audio...", 40)

        # Transcribe audio
        transcriber = WhisperTranscriber()
        transcript_data = transcriber.transcribe(audio_file, language=language)

        update_task(task_id, TaskStatus.TRANSCRIBING, "Transcription complete", 60)

        # Update status: Generating notes
        update_task(
            task_id, TaskStatus.GENERATING_NOTES, "Generating structured notes...", 70
        )

        # Segment transcript
        segmenter = TranscriptSegmenter()

        # For shorter transcripts, process as a whole
        # For longer ones, segment first
        word_count = len(transcript_data["text"].split())

        if word_count < 2000:
            # Short video: process full transcript
            logger.info("Processing short video (full transcript)")
            note_gen = NoteGenerator()
            notes = note_gen.generate_notes_from_full_transcript(
                transcript_data["text"], video_title
            )
        else:
            # Long video: segment and process
            logger.info("Processing long video (segmented)")
            segments = segmenter.segment_transcript(transcript_data, method="time")

            note_gen = NoteGenerator()
            notes = note_gen.generate_notes_from_segments(segments)

            # Add title
            notes = f"# {video_title}\n\n{notes}"

        update_task(task_id, TaskStatus.GENERATING_NOTES, "Formatting notes...", 90)

        # Format final notes with metadata
        final_notes = note_gen.format_final_notes(
            notes, video_title, youtube_url, video_duration
        )

        # Save notes to file
        notes_file = settings.output_dir / f"{task_id}_notes.md"
        notes_file.write_text(final_notes, encoding="utf-8")

        # Update status: Completed
        update_task(
            task_id,
            TaskStatus.COMPLETED,
            "Notes generated successfully!",
            100,
            notes_file=str(notes_file),
        )

        logger.info(f"Task {task_id} completed successfully")

    except Exception as e:
        logger.error(f"Task {task_id} failed: {e}")
        update_task(task_id, TaskStatus.FAILED, f"Processing failed: {str(e)}", 0)

    finally:
        # Cleanup audio file
        if audio_file and audio_file.exists():
            try:
                downloader.cleanup(audio_file)
            except Exception as e:
                logger.warning(f"Cleanup failed: {e}")


def update_task(

    task_id: str, status: TaskStatus, message: str, progress: int, **kwargs

):
    """Update task status and metadata."""
    if task_id in tasks:
        tasks[task_id].update(
            {
                "status": status,
                "message": message,
                "progress": progress,
                "updated_at": datetime.now(),
                **kwargs,
            }
        )