| from ops_mm_embedding_v1 import OpsMMEmbeddingV1, fetch_image |
|
|
|
|
| model = OpsMMEmbeddingV1( |
| "OpenSearch-AI/Ops-MM-embedding-v1-7B", |
| device="cuda", |
| attn_implementation="flash_attention_2" |
| ) |
|
|
| t2i_prompt = "Find an image that matches the given text." |
| texts = [ |
| "The Tesla Cybertruck is a battery electric pickup truck built by Tesla, Inc. since 2023.", |
| "Alibaba office.", |
| "Alibaba office.", |
| ] |
| images = [ |
| "https://upload.wikimedia.org/wikipedia/commons/e/e9/Tesla_Cybertruck_damaged_window.jpg", |
| "https://upload.wikimedia.org/wikipedia/commons/e/e0/TaobaoCity_Alibaba_Xixi_Park.jpg", |
| "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Alibaba_Binjiang_Park.jpg/1024px-Alibaba_Binjiang_Park.jpg" |
| ] |
|
|
| images = [fetch_image(image) for image in images] |
|
|
| |
| text_embeddings = model.get_text_embeddings(texts) |
| image_embeddings = model.get_image_embeddings(images) |
| print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
|
|
| |
| text_with_image_embeddings = model.get_fused_embeddings(texts=texts, images=images, instruction=t2i_prompt) |
| print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
|
|
| |
| multi_images = [ |
| [images[0]], |
| [images[1], images[2]], |
| ] |
| multi_image_embeddings = model.get_image_embeddings(multi_images) |
| print('Multi-image embeddings', (multi_image_embeddings @ multi_image_embeddings.T).tolist()) |
|
|