import os import gradio as gr import requests from PIL import Image # Set your API keys (replace with your actual keys or use environment variables) nvidia_api_key = os.getenv("Vision") # NVIDIA API Key imagebb_api_key = os.getenv("ImageAPI") # Imgbb API Key # NVIDIA API Endpoint invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions" def upload_image_to_imgbb(image_path): """Uploads an image to ImgBB and returns the URL.""" url = f"https://api.imgbb.com/1/upload?key={imagebb_api_key}" with open(image_path, "rb") as image_file: response = requests.post(url, files={"image": image_file}) if response.status_code == 200: return response.json()["data"]["url"] else: raise ValueError(f"Image upload failed: {response.json()}") def analyze_image(image, instruction): """Analyzes the image using NVIDIA’s Llama 3.2 Vision Instruct model based on the provided instruction.""" try: # Save image locally image_path = "uploaded_image.png" image.save(image_path) # Upload image to ImgBB and get the URL image_url = upload_image_to_imgbb(image_path) # NVIDIA API Request headers = { "Authorization": f"Bearer {nvidia_api_key}", "Accept": "application/json" } payload = { "model": "meta/llama-3.2-90b-vision-instruct", "messages": [ { "role": "user", "content": [ {"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}} ] } ], "max_tokens": 512, "temperature": 0.1, "top_p": 0.1 } response = requests.post(invoke_url, headers=headers, json=payload) response_data = response.json() # Extract the response if "choices" in response_data: return response_data["choices"][0]["message"]["content"] else: return f"Error in response: {response_data}" except Exception as e: return f"Error: {str(e)}" # Gradio interface iface = gr.Interface( fn=analyze_image, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Instruction", placeholder="Enter your analysis instruction here.", lines=2) ], outputs="text", title="Deep Image Analysis using LLM", description=( "Upload an image and provide instructions to analyze it using Llama 3.2 90B Vision. " "You can upload and analyze multiple pictures, but one at a time." ), live=False, ) # Launch the app iface.launch()