Spaces:

iemafzalhassan
/

prism-enhancer

Sleeping

App Files Files Community

GitHub Actions Bot commited on Jan 25

Commit

eaa0562

1 Parent(s): d59f4dd

Deploy: c6cdf9e43825d598e4e3b553c1069ab77675c9fb

Browse files

Files changed (12) hide show

.env +0 -2
.env.example +0 -2
README.md +34 -15
app/__pycache__/main.cpython-314.pyc +0 -0
app/main.py +72 -76
app/services/__pycache__/model_service.cpython-314.pyc +0 -0
app/services/llm_service.py +46 -0
app/services/model_service.py +0 -64
app/services/prompt_logic.py +27 -0
app/services/storage_service.py +51 -0
app/services/vision_service.py +41 -0
requirements.txt +10 -6

.env DELETED Viewed

	@@ -1,2 +0,0 @@
1	- SUPABASE_URL=https://muftiyckgkedprgjdbhj.supabase.co
2	- SUPABASE_KEY=sb_publishable_zlFaPiRisg0rA_-4NGG31g_8VrLFxJu

.env.example DELETED Viewed

	@@ -1,2 +0,0 @@
1	- SUPABASE_URL=your_supabase_url_here
2	- SUPABASE_KEY=your_supabase_anon_key_here

README.md CHANGED Viewed

@@ -8,23 +8,42 @@ pinned: false
 app_port: 7860
 ---
-# Prism Enhancer - Backend
-This is the backend for the Prism AI Prompt Enhancer.
-It runs a quantized version of **Gemma 2 (2B)** completely offline on the CPU using `llama-cpp-python`.
-## Features
-- **Offline Inference**: No external APIs.
-- **MLOps Flywheel**: Synchronizes training data to Supabase (if configured).
-- **FastAPI**: High-performance async API.
-## API Usage
-POST `/api/v1/enhance`
-```json
-{
-  "text": "Make this better",
-  "mode": "creative",
-  "platform": "chatgpt"
-}
 ```

 app_port: 7860
 ---
+# Prism Enhancer v3 - Multimodal Engine
+This backend runs a dual-model pipeline for text and image understanding.
+- **Vision Brain**: `vikhyatk/moondream2` (CPU Optimized)
+- **Logic Brain**: `Qwen2.5-1.5B-Instruct` (GGUF)
+## 🛠️ Supabase SQL Setup
+Run this in your Supabase SQL Editor:
+```sql
+-- 1. Create Storage Bucket for User Uploads
+insert into storage.buckets (id, name, public)
+values ('user_uploads', 'user_uploads', true);
+-- 2. Create MLOps Training Logs Table
+create table training_logs (
+  id bigint generated by default as identity primary key,
+  created_at timestamp with time zone default timezone('utc'::text, now()) not null,
+  original_text text,
+  enhanced_text text,
+  vision_description text,
+  image_path text,
+  category text
+);
+-- 3. Set up Storage Policies (Allow Public Read)
+create policy "Public Access" on storage.objects for select using ( bucket_id = 'user_uploads' );
+create policy "Allow Upload" on storage.objects for insert with check ( bucket_id = 'user_uploads' );
 ```
+## 🚀 API Deployment
+The backend is configured for Hugging Face Spaces (Docker). It uses `llama-cpp-python` for text logic and `transformers` for vision.
+**Endpoint**: `POST /api/v1/enhance` (Multipart/FormData)
+- `prompt`: Text input
+- `category`: [General, 3D Logo, Cartoon Logo, Future Avatar, Video]
+- `file`: Optional image upload for visual reference

app/__pycache__/main.cpython-314.pyc DELETED Viewed

Binary file (6.14 kB)

app/main.py CHANGED Viewed

@@ -1,99 +1,95 @@
 import os
-from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from dotenv import load_dotenv
-from supabase import create_client, Client
-from app.services.model_service import ModelService
-# Load environment variables
 load_dotenv()
-# Supabase Configuration
-SUPABASE_URL = os.getenv("SUPABASE_URL")
-SUPABASE_KEY = os.getenv("SUPABASE_KEY")
-# Initialize Supabase Client
-supabase: Client = None
-if SUPABASE_URL and SUPABASE_KEY:
-    supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
-else:
-    print("WARNING: Supabase URL/KEY not found. Logging will be disabled.")
-app = FastAPI(title="Prism v2 - Offline AI & MLOps", version="2.0.0")
-# Data Models
-class EnhanceRequest(BaseModel):
-    text: str
-    mode: str
-    platform: str # chatgpt | midjourney
 class EnhanceResponse(BaseModel):
-    original_text: str
     enhanced_prompt: str
-    mode: str
-    platform: str
-    log_id: int | None = None
-class FeedbackRequest(BaseModel):
-    log_id: int
-    liked: bool
-# Helper for logging interaction to Supabase and getting the ID
-def log_interaction(original_text: str, enhanced_text: str, mode: str, platform: str) -> int | None:
-    if not supabase:
-        return None
     try:
         data = {
-            "original_text": original_text,
-            "enhanced_text": enhanced_text,
-            "mode": mode,
-            "platform": platform,
-            "liked": False
         }
-        # Execute synchronously to retrieve the ID for the feedback loop
-        res = supabase.table("training_logs").insert(data).execute()
-        if res.data and len(res.data) > 0:
-            return res.data[0]['id']
     except Exception as e:
-        print(f"Log Error: {e}")
-    return None
-@app.on_event("startup")
-def startup_event():
-    # Pre-load model
-    ModelService.get_model()
-@app.post("/api/v1/enhance", response_model=EnhanceResponse)
-def enhance_prompt(request: Request, body: EnhanceRequest):
-    # 1. Inference
-    enhanced_text = ModelService.enhance(body.text, body.mode, body.platform)
-    # 2. Logging
-    # Performed synchronously to capture the ID required for the client-side feedback loop.
-    log_id = log_interaction(body.text, enhanced_text, body.mode, body.platform)
-    return EnhanceResponse(
-        original_text=body.text,
-        enhanced_prompt=enhanced_text,
-        mode=body.mode,
-        platform=body.platform,
-        log_id=log_id
     )
-@app.post("/api/v1/feedback")
-def feedback_loop(body: FeedbackRequest):
-    if not supabase:
-        raise HTTPException(status_code=503, detail="Supabase not configured")
-    try:
-        # Update the log entry
-        supabase.table("training_logs").update({"liked": body.liked}).eq("id", body.log_id).execute()
-        return {"status": "success", "message": "Feedback recorded"}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
-def health_check():
-    return {"status": "running", "model": ModelService.MODEL_FILE}

 import os
+import io
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException, UploadFile, File, Form, BackgroundTasks
 from pydantic import BaseModel
 from dotenv import load_dotenv
+from app.services.vision_service import get_vision_service
+from app.services.storage_service import storage_service
+from app.services.llm_service import llm_service
+from app.services.prompt_logic import prompt_logic
 load_dotenv()
+# Lifecycle Manager
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Pre-load models once at startup
+    print("Initializing Dual-Brain Engine...")
+    get_vision_service()
+    llm_service.get_model()
+    yield
+    print("Shutting down Dual-Brain Engine...")
+app = FastAPI(title="Prism v3 - Multimodal AI Engine", version="3.0.0", lifespan=lifespan)
+# Response Model
 class EnhanceResponse(BaseModel):
     enhanced_prompt: str
+    vision_analysis: str | None = None
+    image_url: str | None = None
+def log_to_supabase(original, enhanced, vision_desc, img_url, category):
     try:
         data = {
+            "original_text": original,
+            "enhanced_text": enhanced,
+            "vision_description": vision_desc,
+            "image_path": img_url,
+            "category": category
         }
+        storage_service.supabase.table("training_logs").insert(data).execute()
     except Exception as e:
+        print(f"MLOps Log Error: {e}")
+@app.post("/api/v1/enhance")
+async def enhance_pipeline(
+    background_tasks: BackgroundTasks,
+    prompt: str = Form(...),
+    category: str = Form("general"),
+    file: UploadFile | None = File(None)
+):
+    vision_desc = None
+    img_url = None
+    opt_bytes = None
+    # Step 1 & 2: Process Image if exists
+    if file:
+        file_bytes = await file.read()
+        # 1. Optimize
+        opt_bytes = storage_service.optimize_image(file_bytes)
+        # 2. Analyze (Vision Brain)
+        vision_desc = get_vision_service().analyze_image(opt_bytes)
+        # 3. Upload (Vault) - Background Task to keep API fast
+        # Note: We actually need the URL for the response if possible,
+        # but let's upload and return the path/url as requested.
+        img_url = storage_service.upload_image(opt_bytes)
+    # Step 3: Enhance (Logic Brain)
+    # Construct combined instruction
+    master_prompt = prompt_logic.construct_master_prompt(
+        user_text=prompt,
+        vision_desc=vision_desc,
+        category=category
+    )
+    final_prompt = llm_service.generate(master_prompt)
+    # Step 4: Data Flywheel Logging
+    background_tasks.add_task(
+        log_to_supabase, prompt, final_prompt, vision_desc, img_url, category
     )
+    return EnhanceResponse(
+        enhanced_prompt=final_prompt,
+        vision_analysis=vision_desc,
+        image_url=img_url
+    )
 @app.get("/")
+def health():
+    return {"status": "running", "engine": "Prism-v3-Multimodal"}

app/services/__pycache__/model_service.cpython-314.pyc DELETED Viewed

Binary file (3.18 kB)

app/services/llm_service.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+class LLMService:
+    _llm = None
+    # Qwen2.5-1.5B is great for CPU and logic
+    MODEL_REPO = "bartowski/Qwen2.5-1.5B-Instruct-GGUF"
+    MODEL_FILE = "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
+    @classmethod
+    def get_model(cls):
+        if cls._llm is None:
+            print(f"Loading LLM: {cls.MODEL_REPO}...")
+            try:
+                model_path = hf_hub_download(repo_id=cls.MODEL_REPO, filename=cls.MODEL_FILE)
+                cls._llm = Llama(
+                    model_path=model_path,
+                    n_ctx=4096,
+                    verbose=False
+                )
+                print("LLM loaded successfully.")
+            except Exception as e:
+                print(f"Error loading LLM: {e}")
+                raise e
+        return cls._llm
+    @classmethod
+    def generate(cls, master_prompt: str) -> str:
+        llm = cls.get_model()
+        # Qwen2.5 chat template
+        prompt = f"<|im_start|>system\nYou are a professional Prompt Architect. Refine and enhance input prompts based on category and visual evidence.<|im_end|>\n<|im_start|>user\n{master_prompt}<|im_end|>\n<|im_start|>assistant\n"
+        output = llm.create_completion(
+            prompt,
+            max_tokens=600,
+            temperature=0.7,
+            stop=["<|im_end|>"]
+        )
+        return output['choices'][0]['text'].strip()
+# Singleton helper
+llm_service = LLMService()

app/services/model_service.py DELETED Viewed

@@ -1,64 +0,0 @@
-import os
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-class ModelService:
-    _instance = None
-    _llm = None
-    # Configuration
-    # Using a stable quantized GGUF of Gemma 2 2B
-    MODEL_REPO = "bartowski/gemma-2-2b-it-GGUF"
-    MODEL_FILE = "gemma-2-2b-it-Q4_K_M.gguf"
-    @classmethod
-    def get_model(cls):
-        if cls._llm is None:
-            print(f"Loading model... {cls.MODEL_REPO}/{cls.MODEL_FILE}")
-            # Download/Load Model
-            try:
-                model_path = hf_hub_download(
-                    repo_id=cls.MODEL_REPO,
-                    filename=cls.MODEL_FILE,
-                )
-                print(f"Model path: {model_path}")
-                # Initialize Llama
-                cls._llm = Llama(
-                    model_path=model_path,
-                    n_ctx=2048,
-                    verbose=False
-                )
-                print("Model loaded successfully.")
-            except Exception as e:
-                print(f"Error loading model: {e}")
-                raise e
-        return cls._llm
-    @staticmethod
-    def construct_system_prompt(mode: str, platform: str) -> str:
-        return (
-            f"You are an expert Prompt Engineer. Rewrite the user's input to be clear, structured, "
-            f"and optimized for {platform}. Use a {mode} tone. "
-            f"Return ONLY the enhanced prompt, no conversational filler."
-        )
-    @classmethod
-    def enhance(cls, text: str, mode: str, platform: str) -> str:
-        llm = cls.get_model()
-        system_prompt = cls.construct_system_prompt(mode, platform)
-        # Gemma 2 formatting
-        full_prompt = f"<start_of_turn>user\nSystem: {system_prompt}\nUser Input: {text}<end_of_turn>\n<start_of_turn>model\n"
-        output = llm.create_completion(
-            full_prompt,
-            max_tokens=512,
-            stop=["<end_of_turn>"],
-            temperature=0.7
-        )
-        return output['choices'][0]['text'].strip()

app/services/prompt_logic.py ADDED Viewed

	@@ -0,0 +1,27 @@

+class PromptLogic:
+    SYSTEM_INSTRUCTIONS = {
+        "general": "Enhance the user's prompt to be more descriptive and professional while maintaining the core intent.",
+        "3d logo": "Professional 3D logo design, high resolution, isometric view, Octane Render, Unreal Engine 5, minimalist, clean lines, professional color palette.",
+        "cartoon logo": "Playful cartoon logo, vibrant colors, vector style, bold outlines, 2D minimalist, sticker aesthetic.",
+        "future avatar": "Cyberpunk aesthetic, neon lighting, hyper-realistic, 8k resolution, ray tracing textures, metallic and glowing accents, futuristic clothing.",
+        "video": "Cinematic video prompt, 4k ultra HD, 60fps, professional lighting, dolly zoom effect, anamorphic lens, ISO 400, golden hour lighting, hyper-detailed textures.",
+    }
+    @staticmethod
+    def construct_master_prompt(user_text: str, vision_desc: str = None, category: str = "general") -> str:
+        category = category.lower()
+        instruction = PromptLogic.SYSTEM_INSTRUCTIONS.get(category, PromptLogic.SYSTEM_INSTRUCTIONS["general"])
+        master_prompt = f"System Instruction: {instruction}\n\n"
+        if vision_desc:
+            master_prompt += f"Visual Reference Analysis: {vision_desc}\n\n"
+            master_prompt += "Ensure the final prompt incorporates the visual details (face, hair, clothing, lighting) to maintain exact character consistency.\n\n"
+        master_prompt += f"User Input: {user_text}\n\n"
+        master_prompt += "Modified Output Prompt:"
+        return master_prompt
+# Helper
+prompt_logic = PromptLogic()

app/services/storage_service.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import io
+import uuid
+import os
+from PIL import Image
+from supabase import create_client, Client
+from dotenv import load_dotenv
+load_dotenv()
+class StorageService:
+    def __init__(self):
+        url = os.getenv("SUPABASE_URL")
+        key = os.getenv("SUPABASE_KEY")
+        self.supabase: Client = create_client(url, key)
+        self.bucket_name = "user_uploads"
+    def optimize_image(self, file_bytes: bytes) -> bytes:
+        img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+        # Resize if > 1024px
+        max_size = 1024
+        if max(img.size) > max_size:
+            ratio = max_size / max(img.size)
+            new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
+            img = img.resize(new_size, Image.LANCZOS)
+        # Compress and save as WebP
+        output = io.BytesIO()
+        img.save(output, format="WEBP", quality=85)
+        return output.getvalue()
+    def upload_image(self, file_bytes: bytes) -> str:
+        try:
+            filename = f"{uuid.uuid4()}.webp"
+            # Binary upload to Supabase Storage
+            self.supabase.storage.from_(self.bucket_name).upload(
+                path=filename,
+                file=file_bytes,
+                file_options={"content-type": "image/webp"}
+            )
+            # Get Public URL
+            res = self.supabase.storage.from_(self.bucket_name).get_public_url(filename)
+            return res
+        except Exception as e:
+            print(f"Upload Error: {e}")
+            return ""
+# Singleton
+storage_service = StorageService()

app/services/vision_service.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from PIL import Image
+import io
+class VisionService:
+    def __init__(self):
+        self.model_id = "vikhyatk/moondream2"
+        self.revision = "2024-08-26" # Use a stable revision
+        self.device = "cpu"
+        print(f"Loading Vision Model: {self.model_id}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, revision=self.revision)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            trust_remote_code=True,
+            revision=self.revision
+        ).to(self.device)
+        self.model.eval()
+    def analyze_image(self, image_bytes: bytes) -> str:
+        try:
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            enc_image = self.model.encode_image(image)
+            prompt = "Describe this person's face, hair, body type, lighting, and clothing in extreme detail for a 3D character reference."
+            description = self.model.answer_question(enc_image, prompt, self.tokenizer)
+            return description
+        except Exception as e:
+            print(f"Vision Analysis Error: {e}")
+            return "No image description available."
+# Singleton instance
+vision_service = None
+def get_vision_service():
+    global vision_service
+    if vision_service is None:
+        vision_service = VisionService()
+    return vision_service

requirements.txt CHANGED Viewed

@@ -1,8 +1,12 @@
 fastapi==0.109.0
 uvicorn==0.27.0
-pydantic==2.6.0
-python-dotenv==1.0.0
-supabase==2.3.0
-huggingface-hub==0.20.3
-llama-cpp-python==0.2.90
-python-multipart==0.0.9

+--index-url https://download.pytorch.org/whl/cpu
 fastapi==0.109.0
 uvicorn==0.27.0
+python-multipart
+torch
+transformers
+einops
+pillow
+supabase
+huggingface-hub
+llama-cpp-python
+python-dotenv