| import pandas as pd |
| import numpy as np |
| import pickle |
| from tqdm import tqdm |
| from model.feature_extractor import FeatureExtractor |
| from utils.image_utils import load_image_from_url |
|
|
| def main(): |
| df = pd.read_csv("data/product_data.csv") |
| fe = FeatureExtractor() |
| embeddings = [] |
| valid_urls = [] |
|
|
| |
| for url in tqdm(df['IMAGE_URL'], desc="Extracting embeddings"): |
| img = load_image_from_url(url) |
| if img is not None: |
| emb = fe.extract(img) |
| embeddings.append(emb) |
| valid_urls.append(url) |
|
|
| embeddings = np.array(embeddings) |
| np.save("data/embeddings.npy", embeddings) |
|
|
| with open("data/image_urls.pkl", "wb") as f: |
| pickle.dump(valid_urls, f) |
|
|
| print(f"Saved {len(valid_urls)} embeddings and URLs.") |
|
|
| if __name__ == "__main__": |
| main() |