bidiptas
/

PG-InstructBLIP

image-captioning

Model card Files Files and versions

PG-InstructBLIP / test.py

bidiptas's picture

Correct output from test.py

f536ed7 over 2 years ago

history blame contribute delete

1.39 kB

	import torch
	from PIL import Image
	from omegaconf import OmegaConf

	from lavis.models import load_model, load_preprocess
	from lavis.common.registry import registry

	import requests

	from generate import generate

	url = "https://iliad.stanford.edu/pg-vlm/example_images/ceramic_bowl.jpg"
	example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

	vlm = load_model(
	name='blip2_t5_instruct',
	model_type='flant5xxl',
	checkpoint='pgvlm_weights.bin', # replace with location of downloaded weights
	is_eval=True,
	device="cuda" if torch.cuda.is_available() else "cpu"
	)

	vlm.qformer_text_input = False # Optionally disable qformer text

	model_cls = registry.get_model_class('blip2_t5_instruct')
	model_type = 'flant5xxl'
	preprocess_cfg = OmegaConf.load(model_cls.default_config_path(model_type)).preprocess
	vis_processors, _ = load_preprocess(preprocess_cfg)
	processor = vis_processors["eval"]

	question_samples = {
	'prompt': 'Question: Classify this object as transparent, translucent, or opaque? Respond unknown if you are not sure. Short answer:',
	'image': torch.stack([processor(example_image)], dim=0).to(vlm.device)
	}

	answers, scores = generate(vlm, question_samples, length_penalty=0, repetition_penalty=1, num_captions=3)
	print(answers, scores)
	# ['opaque', 'translucent', 'transparent'] tensor([-0.0373, -4.2404, -4.4436], device='cuda:0')