File size: 2,769 Bytes
82d3f8a
 
 
f8ffbef
82d3f8a
be56d43
 
 
82d3f8a
be56d43
 
82d3f8a
 
be56d43
f8ffbef
e597996
be56d43
 
 
 
 
82d3f8a
 
be56d43
82d3f8a
be56d43
82d3f8a
 
 
be56d43
82d3f8a
 
be56d43
 
 
 
 
 
 
 
f8ffbef
 
 
 
 
 
 
 
be56d43
 
 
 
 
 
 
 
 
 
 
 
 
 
82d3f8a
be56d43
 
82d3f8a
 
 
 
 
 
 
 
 
e597996
be56d43
 
e597996
82d3f8a
 
 
 
f8ffbef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import gradio as gr
import requests
from PIL import Image

# Set your API keys (replace with your actual keys or use environment variables)
nvidia_api_key = os.getenv("Vision")  # NVIDIA API Key
imagebb_api_key = os.getenv("ImageAPI")  # Imgbb API Key

# NVIDIA API Endpoint
invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions"

def upload_image_to_imgbb(image_path):
    """Uploads an image to ImgBB and returns the URL."""
    url = f"https://api.imgbb.com/1/upload?key={imagebb_api_key}"
    with open(image_path, "rb") as image_file:
        response = requests.post(url, files={"image": image_file})
    if response.status_code == 200:
        return response.json()["data"]["url"]
    else:
        raise ValueError(f"Image upload failed: {response.json()}")

def analyze_image(image, instruction):
    """Analyzes the image using NVIDIA’s Llama 3.2 Vision Instruct model based on the provided instruction."""
    try:
        # Save image locally
        image_path = "uploaded_image.png"
        image.save(image_path)

        # Upload image to ImgBB and get the URL
        image_url = upload_image_to_imgbb(image_path)

        # NVIDIA API Request
        headers = {
            "Authorization": f"Bearer {nvidia_api_key}",
            "Accept": "application/json"
        }
        payload = {
            "model": "meta/llama-3.2-90b-vision-instruct",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": instruction},
                        {"type": "image_url", "image_url": {"url": image_url}}
                    ]
                }
            ],
            "max_tokens": 512,
            "temperature": 0.1,
            "top_p": 0.1
        }

        response = requests.post(invoke_url, headers=headers, json=payload)
        response_data = response.json()

        # Extract the response
        if "choices" in response_data:
            return response_data["choices"][0]["message"]["content"]
        else:
            return f"Error in response: {response_data}"

    except Exception as e:
        return f"Error: {str(e)}"

# Gradio interface
iface = gr.Interface(
    fn=analyze_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Instruction", placeholder="Enter your analysis instruction here.", lines=2)
    ],
    outputs="text",
    title="Deep Image Analysis using LLM",
    description=(
        "Upload an image and provide instructions to analyze it using Llama 3.2 90B Vision. "
        "You can upload and analyze multiple pictures, but one at a time."
    ),
    live=False,
)

# Launch the app
iface.launch()