The example python code just doesn't work anymore?
#9
by
TheoryOfMadness
- opened
"""
Test cross-modal text-to-image search with Nomic embeddings.
This test verifies that text embeddings with 'search_query:' prefix
can properly match against image embeddings.
"""
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
from PIL import Image
import requests
import sys
from pathlib import Path
# Add backend to path
backend_dir = Path(__file__).parent.parent
sys.path.insert(0, str(backend_dir))
def mean_pooling(model_output, attention_mask):
"""Mean pooling for text embeddings."""
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def test_nomic_cross_modal_search():
"""Test Nomic's cross-modal text-to-image search."""
print("\n" + "="*60)
print("Testing Nomic Cross-Modal Text-to-Image Search")
print("="*60)
# Load vision model
print("\n[1/4] Loading vision model...")
processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
vision_model.eval()
print("β
Vision model loaded")
# Load image
print("\n[2/4] Loading test image...")
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)
print("β
Image loaded (cats)")
# Generate image embedding
print("\n[3/4] Generating image embedding...")
inputs = processor(image, return_tensors="pt")
with torch.no_grad():
img_emb = vision_model(**inputs).last_hidden_state
img_embeddings = F.normalize(img_emb[:, 0], p=2, dim=1)
print(f"β
Image embedding generated: shape={img_embeddings.shape}")
# Load text model
print("\n[4/4] Loading text model and generating query embeddings...")
tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
text_model.eval()
# Test queries with search_query prefix
sentences = [
'search_query: What are cute animals to cuddle with?',
'search_query: What do cats look like?',
'search_query: Dogs playing in a park', # Negative example
]
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = text_model(**encoded_input)
# Apply mean pooling + layer norm + L2 norm
text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
print(f"β
Text embeddings generated: shape={text_embeddings.shape}")
# Compute similarity scores
print("\n" + "="*60)
print("Cross-Modal Similarity Scores")
print("="*60)
scores = torch.matmul(img_embeddings, text_embeddings.T)
for i, query in enumerate(sentences):
score = scores[0, i].item()
query_text = query.replace('search_query: ', '')
print(f"\nQuery: {query_text}")
print(f"Score: {score:.4f}")
if score > 0.3:
print("β
GOOD - Strong similarity")
elif score > 0.1:
print("β οΈ MEDIUM - Moderate similarity")
else:
print("β LOW - Weak similarity")
print("\n" + "="*60)
print("Test complete!")
print("="*60)
# Verify that cat-related queries have higher scores than dog query
cat_score = max(scores[0, 0].item(), scores[0, 1].item())
dog_score = scores[0, 2].item()
print(f"\nBest cat-related score: {cat_score:.4f}")
print(f"Dog-related score: {dog_score:.4f}")
if cat_score > dog_score:
print("β
PASS: Cat queries score higher than dog query")
return True
else:
print("β FAIL: Expected cat queries to score higher")
return False
if __name__ == "__main__":
success = test_nomic_cross_modal_search()
sys.exit(0 if success else 1)
β Text embeddings generated: shape=torch.Size([3, 768])
============================================================
Cross-Modal Similarity Scores
Query: What are cute animals to cuddle with?
Score: 0.0751
β LOW - Weak similarity
Query: What do cats look like?
Score: 0.0684
β LOW - Weak similarity
Query: Dogs playing in a park
Score: 0.0535
β LOW - Weak similarity