| { |
| "_name_or_path": "Math-PUMA_DeepSeek-Math-VL-7B", |
| "aligner_config": { |
| "cls": "MlpProjector", |
| "model_type": "aligner", |
| "params": { |
| "depth": 2, |
| "input_dim": 1024, |
| "n_embed": 4096, |
| "projector_type": "low_high_hybrid_split_mlp_gelu" |
| } |
| }, |
| "architectures": [ |
| "MultiModalityCausalLM" |
| ], |
| "language_config": { |
| "max_position_embeddings": 16384, |
| "model_type": "llama", |
| "num_hidden_layers": 30, |
| "torch_dtype": "float16", |
| "vocab_size": 102400 |
| }, |
| "model_type": "multi_modality", |
| "torch_dtype": "float32", |
| "transformers_version": "4.42.0", |
| "vision_config": { |
| "cls": "HybridVisionTower", |
| "model_type": "vision", |
| "params": { |
| "concat_type": "tuple", |
| "freeze_high": true, |
| "freeze_low": true, |
| "high_res_cfg": { |
| "ckpt_path": "", |
| "image_size": 1024, |
| "model_name": "sam_b_downsample", |
| "output_dim": 1024, |
| "pixel_mean": [ |
| 0.48145466, |
| 0.4578275, |
| 0.40821073 |
| ], |
| "pixel_std": [ |
| 0.26862954, |
| 0.26130258, |
| 0.27577711 |
| ], |
| "select_feature": "same", |
| "select_layer": -1 |
| }, |
| "low_res_cfg": { |
| "ckpt_path": "", |
| "image_size": 384, |
| "model_name": "siglip_large_patch16_384", |
| "output_dim": 1024, |
| "pixel_mean": [ |
| 0.5, |
| 0.5, |
| 0.5 |
| ], |
| "pixel_std": [ |
| 0.5, |
| 0.5, |
| 0.5 |
| ], |
| "select_feature": "same", |
| "select_layer": -1 |
| } |
| } |
| } |
| } |
|
|