Spaces:

ALI7ADEL
/

study-notes-api

Sleeping

File size: 11,763 Bytes

ed147e2

"""

FastAPI application for YouTube study notes generation.

Provides REST API endpoints for note generation and status tracking.

"""

import asyncio
import uuid
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional
from enum import Enum
from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel, HttpUrl, Field

from src.audio.downloader import YouTubeDownloader
from src.audio.processor import AudioProcessor
from src.transcription.whisper_transcriber import WhisperTranscriber
from src.summarization.segmenter import TranscriptSegmenter
from src.summarization.note_generator import NoteGenerator
from src.utils.logger import setup_logger
from src.utils.config import settings
from src.db.database import create_db_and_tables

logger = setup_logger(__name__)


# Pydantic Models
class TaskStatus(str, Enum):
    """Task processing status."""

    PENDING = "pending"
    DOWNLOADING = "downloading"
    TRANSCRIBING = "transcribing"
    GENERATING_NOTES = "generating_notes"
    COMPLETED = "completed"
    FAILED = "failed"


class GenerateNotesRequest(BaseModel):
    """Request model for note generation."""

    youtube_url: HttpUrl = Field(..., description="YouTube video URL")
    language: str = Field(default="en", description="Video language code")

    class Config:
        json_schema_extra = {
            "example": {
                "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
                "language": "en",
            }
        }


class TaskResponse(BaseModel):
    """Response model for task creation."""

    task_id: str = Field(..., description="Unique task identifier")
    status: TaskStatus = Field(..., description="Current task status")
    message: str = Field(..., description="Status message")


class TaskStatusResponse(BaseModel):
    """Response model for task status queries."""

    task_id: str
    status: TaskStatus
    message: str
    video_title: Optional[str] = None
    progress: Optional[int] = Field(None, description="Progress percentage (0-100)")
    notes_file: Optional[str] = None
    created_at: datetime
    updated_at: datetime


# Global task storage (in production, use a database)
tasks: Dict[str, Dict] = {}


# --- Lifespan Event Handler (Fixes Windows Event Loop Issue) ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    """

    Handle startup and shutdown events.

    Initializes the database tables when the server starts.

    """
    logger.info("Lifespan: Initializing database tables...")
    await create_db_and_tables()
    logger.info("Lifespan: Database tables initialized successfully")
    yield
    logger.info("Lifespan: Server shutting down...")


# FastAPI app
app = FastAPI(
    title="YouTube Study Notes AI",
    description="Generate structured study notes from YouTube educational videos",
    version="1.0.0",
    lifespan=lifespan,
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify allowed origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include routers
from src.api.auth_routes import router as auth_router
from src.api.notes_routes import router as notes_router
from src.api.analytics_routes import router as analytics_router

app.include_router(auth_router)
app.include_router(notes_router)
app.include_router(analytics_router)


@app.get("/")
async def root():
    """Root endpoint with API information."""
    return {
        "name": "YouTube Study Notes AI",
        "version": "1.0.0",
        "description": "Generate structured study notes from YouTube videos with user management",
        "endpoints": {
            "authentication": {
                "signup": "POST /auth/signup",
                "login": "POST /auth/login",
            },
            "notes": {
                "create": "POST /notes",
                "list": "GET /notes",
                "get": "GET /notes/{note_id}",
                "delete": "DELETE /notes/{note_id}",
            },
            "analytics": {"user_stats": "GET /analytics"},
            "generation": {
                "generate_notes": "POST /generate-notes",
                "check_status": "GET /status/{task_id}",
                "download_notes": "GET /download/{task_id}",
            },
        },
        "documentation": {"swagger_ui": "/docs", "redoc": "/redoc"},
    }


@app.post("/generate-notes", response_model=TaskResponse)
async def generate_notes(

    request: GenerateNotesRequest, background_tasks: BackgroundTasks

):
    """

    Generate study notes from a YouTube video.



    This endpoint starts an async task to process the video.

    Use the returned task_id to check status and download results.

    """
    try:
        # Generate unique task ID
        task_id = str(uuid.uuid4())

        # Initialize task
        tasks[task_id] = {
            "status": TaskStatus.PENDING,
            "message": "Task created, starting processing...",
            "youtube_url": str(request.youtube_url),
            "language": request.language,
            "video_title": None,
            "progress": 0,
            "notes_file": None,
            "created_at": datetime.now(),
            "updated_at": datetime.now(),
        }

        # Start background processing
        background_tasks.add_task(
            process_video, task_id, str(request.youtube_url), request.language
        )

        logger.info(f"Created task {task_id} for URL: {request.youtube_url}")

        return TaskResponse(
            task_id=task_id,
            status=TaskStatus.PENDING,
            message="Processing started. Use task_id to check status.",
        )

    except Exception as e:
        logger.error(f"Failed to create task: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/status/{task_id}", response_model=TaskStatusResponse)
async def get_status(task_id: str):
    """Get the current status of a processing task."""
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")

    task = tasks[task_id]

    return TaskStatusResponse(
        task_id=task_id,
        status=task["status"],
        message=task["message"],
        video_title=task.get("video_title"),
        progress=task.get("progress"),
        notes_file=task.get("notes_file"),
        created_at=task["created_at"],
        updated_at=task["updated_at"],
    )


@app.get("/download/{task_id}")
async def download_notes(task_id: str):
    """Download the generated notes file."""
    if task_id not in tasks:
        raise HTTPException(status_code=404, detail="Task not found")

    task = tasks[task_id]

    if task["status"] != TaskStatus.COMPLETED:
        raise HTTPException(
            status_code=400, detail=f"Notes not ready. Current status: {task['status']}"
        )

    notes_file = task.get("notes_file")
    if not notes_file or not Path(notes_file).exists():
        raise HTTPException(status_code=404, detail="Notes file not found")

    return FileResponse(
        notes_file, media_type="text/markdown", filename=Path(notes_file).name
    )


async def process_video(task_id: str, youtube_url: str, language: str):
    """

    Background task to process video and generate notes.



    Args:

        task_id: Unique task identifier

        youtube_url: YouTube video URL

        language: Video language code

    """
    audio_file = None

    try:
        # Update status: Downloading
        update_task(task_id, TaskStatus.DOWNLOADING, "Downloading video...", 10)

        # Download video and extract audio
        downloader = YouTubeDownloader()

        # Get video info
        video_info = downloader.get_video_info(youtube_url)
        video_title = video_info["title"]
        video_duration = video_info["duration"]

        update_task(
            task_id,
            TaskStatus.DOWNLOADING,
            f"Downloading: {video_title}",
            20,
            video_title=video_title,
        )

        audio_file = downloader.download_audio(youtube_url, task_id)

        # Validate audio
        processor = AudioProcessor()
        if not processor.validate_audio_file(audio_file):
            raise ValueError("Invalid audio file")

        # Update status: Transcribing
        update_task(task_id, TaskStatus.TRANSCRIBING, "Transcribing audio...", 40)

        # Transcribe audio
        transcriber = WhisperTranscriber()
        transcript_data = transcriber.transcribe(audio_file, language=language)

        update_task(task_id, TaskStatus.TRANSCRIBING, "Transcription complete", 60)

        # Update status: Generating notes
        update_task(
            task_id, TaskStatus.GENERATING_NOTES, "Generating structured notes...", 70
        )

        # Segment transcript
        segmenter = TranscriptSegmenter()

        # For shorter transcripts, process as a whole
        # For longer ones, segment first
        word_count = len(transcript_data["text"].split())

        if word_count < 2000:
            # Short video: process full transcript
            logger.info("Processing short video (full transcript)")
            note_gen = NoteGenerator()
            notes = note_gen.generate_notes_from_full_transcript(
                transcript_data["text"], video_title
            )
        else:
            # Long video: segment and process
            logger.info("Processing long video (segmented)")
            segments = segmenter.segment_transcript(transcript_data, method="time")

            note_gen = NoteGenerator()
            notes = note_gen.generate_notes_from_segments(segments)

            # Add title
            notes = f"# {video_title}\n\n{notes}"

        update_task(task_id, TaskStatus.GENERATING_NOTES, "Formatting notes...", 90)

        # Format final notes with metadata
        final_notes = note_gen.format_final_notes(
            notes, video_title, youtube_url, video_duration
        )

        # Save notes to file
        notes_file = settings.output_dir / f"{task_id}_notes.md"
        notes_file.write_text(final_notes, encoding="utf-8")

        # Update status: Completed
        update_task(
            task_id,
            TaskStatus.COMPLETED,
            "Notes generated successfully!",
            100,
            notes_file=str(notes_file),
        )

        logger.info(f"Task {task_id} completed successfully")

    except Exception as e:
        logger.error(f"Task {task_id} failed: {e}")
        update_task(task_id, TaskStatus.FAILED, f"Processing failed: {str(e)}", 0)

    finally:
        # Cleanup audio file
        if audio_file and audio_file.exists():
            try:
                downloader.cleanup(audio_file)
            except Exception as e:
                logger.warning(f"Cleanup failed: {e}")


def update_task(

    task_id: str, status: TaskStatus, message: str, progress: int, **kwargs

):
    """Update task status and metadata."""
    if task_id in tasks:
        tasks[task_id].update(
            {
                "status": status,
                "message": message,
                "progress": progress,
                "updated_at": datetime.now(),
                **kwargs,
            }
        )