| import streamlit as st |
|
|
|
|
| from PIL import Image |
| import requests |
|
|
| from transformers import CLIPProcessor, CLIPModel |
|
|
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
|
| url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
| image = Image.open(requests.get(url, stream=True).raw) |
|
|
| inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True) |
|
|
| outputs = model(**inputs) |
| st.write(outputs) |
|
|