| import json |
| import os |
| from glob import glob |
| imagefile = open('dataset/SBU_captioned_photo_dataset_urls.txt', 'r').readlines() |
| captionfile = open('dataset/SBU_captioned_photo_dataset_captions.txt', 'r').readlines() |
|
|
| valid_list = list(glob("images/*")) |
| valid_list = [ i.split('/')[-1] for i in valid_list] |
| |
|
|
| name2cap = {} |
| for imageurl, caption in zip(imagefile, captionfile): |
| filename = imageurl.strip().split('/')[-1] |
| name2cap[filename] = caption.strip() |
|
|
| data_list = {} |
| for valid_img in valid_list: |
| data_list[valid_img]=name2cap[valid_img] |
|
|
| fp = open('annotations/subcaption.json', 'w') |
| json.dump(data_list, fp) |
|
|
| print(len(data_list)) |
|
|