| { |
| "module": "keras_hub.src.models.clip.clip_backbone", |
| "class_name": "CLIPBackbone", |
| "config": { |
| "name": "clip_backbone", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "float32" |
| }, |
| "registered_name": null |
| }, |
| "vision_encoder": { |
| "module": "keras_hub.src.models.clip.clip_vision_encoder", |
| "class_name": "CLIPVisionEncoder", |
| "config": { |
| "name": "clip_vision_encoder", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "float32" |
| }, |
| "registered_name": null |
| }, |
| "patch_size": 14, |
| "hidden_dim": 1024, |
| "num_layers": 24, |
| "num_heads": 16, |
| "intermediate_dim": 4096, |
| "intermediate_activation": "quick_gelu", |
| "intermediate_output_index": null, |
| "image_shape": [ |
| 336, |
| 336, |
| 3 |
| ] |
| }, |
| "registered_name": "keras_hub>CLIPVisionEncoder" |
| }, |
| "text_encoder": { |
| "module": "keras_hub.src.models.clip.clip_text_encoder", |
| "class_name": "CLIPTextEncoder", |
| "config": { |
| "name": "clip_text_encoder", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "float32" |
| }, |
| "registered_name": null |
| }, |
| "vocabulary_size": 49408, |
| "embedding_dim": 768, |
| "hidden_dim": 768, |
| "num_layers": 12, |
| "num_heads": 12, |
| "intermediate_dim": 3072, |
| "intermediate_activation": "quick_gelu", |
| "intermediate_output_index": null, |
| "max_sequence_length": 77 |
| }, |
| "registered_name": "keras_hub>CLIPTextEncoder" |
| }, |
| "projection_dim": 768 |
| }, |
| "registered_name": "keras_hub>CLIPBackbone" |
| } |