| import torch |
| import os |
| debug = True |
| image_path = "" |
| captions_path = os.getcwd() |
| batch_size = 20 |
| num_workers = 0 |
| lr = 1e-3 |
| weight_decay = 1e-3 |
| patience = 2 |
| factor = 0.5 |
| epochs = 5 |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| model_name = 'resnet50' |
| image_embedding = 2048 |
| text_encoder_model = "distilbert/distilbert-base-uncased" |
| text_embedding = 768 |
| text_tokenizer = "distilbert/distilbert-base-uncased" |
| max_length = 200 |
|
|
| pretrained = False |
| trainable = False |
| temperature = 1.0 |
|
|
| |
| size = 224 |
|
|
| |
| num_projection_layers = 1 |
| projection_dim = 256 |
| dropout = 0.1 |