| { |
| "eval_type_id": "sparse_probing", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "LabHC/bias_in_bios_class_set2", |
| "LabHC/bias_in_bios_class_set3", |
| "canrager/amazon_reviews_mcauley_1and5", |
| "canrager/amazon_reviews_mcauley_1and5_sentiment", |
| "codeparrot/github-code", |
| "fancyzhx/ag_news", |
| "Helsinki-NLP/europarl" |
| ], |
| "probe_train_set_size": 4000, |
| "probe_test_set_size": 1000, |
| "context_length": 128, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "model_name": "gemma-2-2b", |
| "k_values": [ |
| 1, |
| 2, |
| 5 |
| ], |
| "lower_vram_usage": false |
| }, |
| "eval_id": "23b811f8-60d2-4669-9569-74fb61ee47a0", |
| "datetime_epoch_millis": 1745753616182, |
| "eval_result_metrics": { |
| "llm": { |
| "llm_test_accuracy": 0.9424125477671622, |
| "llm_top_1_test_accuracy": 0.68273125, |
| "llm_top_2_test_accuracy": 0.7352375, |
| "llm_top_5_test_accuracy": 0.8027937499999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null |
| }, |
| "sae": { |
| "sae_test_accuracy": 0.9407375391572714, |
| "sae_top_1_test_accuracy": 0.74713125, |
| "sae_top_2_test_accuracy": 0.8054499999999999, |
| "sae_top_5_test_accuracy": 0.850675, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_results", |
| "llm_test_accuracy": 0.9590000510215759, |
| "llm_top_1_test_accuracy": 0.6634, |
| "llm_top_2_test_accuracy": 0.6988000000000001, |
| "llm_top_5_test_accuracy": 0.7651999999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9594000458717347, |
| "sae_top_1_test_accuracy": 0.792, |
| "sae_top_2_test_accuracy": 0.8103999999999999, |
| "sae_top_5_test_accuracy": 0.8722, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set2_results", |
| "llm_test_accuracy": 0.9476000547409058, |
| "llm_top_1_test_accuracy": 0.692, |
| "llm_top_2_test_accuracy": 0.7265999999999999, |
| "llm_top_5_test_accuracy": 0.7922, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9402000427246093, |
| "sae_top_1_test_accuracy": 0.713, |
| "sae_top_2_test_accuracy": 0.7872, |
| "sae_top_5_test_accuracy": 0.8347999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set3_results", |
| "llm_test_accuracy": 0.9198000431060791, |
| "llm_top_1_test_accuracy": 0.6822, |
| "llm_top_2_test_accuracy": 0.7003999999999999, |
| "llm_top_5_test_accuracy": 0.7762, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9170000433921814, |
| "sae_top_1_test_accuracy": 0.6900000000000001, |
| "sae_top_2_test_accuracy": 0.771, |
| "sae_top_5_test_accuracy": 0.8182, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results", |
| "llm_test_accuracy": 0.9044000506401062, |
| "llm_top_1_test_accuracy": 0.631, |
| "llm_top_2_test_accuracy": 0.6616, |
| "llm_top_5_test_accuracy": 0.7238, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.8958000540733337, |
| "sae_top_1_test_accuracy": 0.6629999999999999, |
| "sae_top_2_test_accuracy": 0.7424, |
| "sae_top_5_test_accuracy": 0.7847999999999999, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results", |
| "llm_test_accuracy": 0.8935000598430634, |
| "llm_top_1_test_accuracy": 0.609, |
| "llm_top_2_test_accuracy": 0.648, |
| "llm_top_5_test_accuracy": 0.734, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.8955000340938568, |
| "sae_top_1_test_accuracy": 0.679, |
| "sae_top_2_test_accuracy": 0.742, |
| "sae_top_5_test_accuracy": 0.777, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "codeparrot/github-code_results", |
| "llm_test_accuracy": 0.9724000453948974, |
| "llm_top_1_test_accuracy": 0.6896, |
| "llm_top_2_test_accuracy": 0.745, |
| "llm_top_5_test_accuracy": 0.8248000000000001, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9710000395774842, |
| "sae_top_1_test_accuracy": 0.7312000000000001, |
| "sae_top_2_test_accuracy": 0.7809999999999999, |
| "sae_top_5_test_accuracy": 0.8586, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "fancyzhx/ag_news_results", |
| "llm_test_accuracy": 0.9430000483989716, |
| "llm_top_1_test_accuracy": 0.71925, |
| "llm_top_2_test_accuracy": 0.7835, |
| "llm_top_5_test_accuracy": 0.8487499999999999, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9480000287294388, |
| "sae_top_1_test_accuracy": 0.7922500000000001, |
| "sae_top_2_test_accuracy": 0.837, |
| "sae_top_5_test_accuracy": 0.883, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| }, |
| { |
| "dataset_name": "Helsinki-NLP/europarl_results", |
| "llm_test_accuracy": 0.9996000289916992, |
| "llm_top_1_test_accuracy": 0.7754, |
| "llm_top_2_test_accuracy": 0.9179999999999999, |
| "llm_top_5_test_accuracy": 0.9574, |
| "llm_top_10_test_accuracy": null, |
| "llm_top_20_test_accuracy": null, |
| "llm_top_50_test_accuracy": null, |
| "llm_top_100_test_accuracy": null, |
| "sae_test_accuracy": 0.9990000247955322, |
| "sae_top_1_test_accuracy": 0.9166000000000001, |
| "sae_top_2_test_accuracy": 0.9725999999999999, |
| "sae_top_5_test_accuracy": 0.9768000000000001, |
| "sae_top_10_test_accuracy": null, |
| "sae_top_20_test_accuracy": null, |
| "sae_top_50_test_accuracy": null, |
| "sae_top_100_test_accuracy": null |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.3.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.3.hook_resid_post", |
| "hook_layer": 3, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1_results": { |
| "sae_test_accuracy": { |
| "0": 0.9310000538825989, |
| "1": 0.9570000171661377, |
| "2": 0.9510000348091125, |
| "6": 0.9860000610351562, |
| "9": 0.9720000624656677 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9350000619888306, |
| "1": 0.9540000557899475, |
| "2": 0.9420000314712524, |
| "6": 0.9920000433921814, |
| "9": 0.9720000624656677 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.607, |
| "1": 0.592, |
| "2": 0.579, |
| "6": 0.776, |
| "9": 0.763 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.612, |
| "1": 0.656, |
| "2": 0.686, |
| "6": 0.789, |
| "9": 0.751 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.702, |
| "1": 0.724, |
| "2": 0.721, |
| "6": 0.883, |
| "9": 0.796 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.811, |
| "1": 0.622, |
| "2": 0.852, |
| "6": 0.753, |
| "9": 0.922 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.809, |
| "1": 0.66, |
| "2": 0.854, |
| "6": 0.788, |
| "9": 0.941 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.815, |
| "1": 0.776, |
| "2": 0.873, |
| "6": 0.95, |
| "9": 0.947 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set2_results": { |
| "sae_test_accuracy": { |
| "11": 0.9590000510215759, |
| "13": 0.9410000443458557, |
| "14": 0.9450000524520874, |
| "18": 0.9000000357627869, |
| "19": 0.956000030040741 |
| }, |
| "llm_test_accuracy": { |
| "11": 0.9480000734329224, |
| "13": 0.9510000348091125, |
| "14": 0.9480000734329224, |
| "18": 0.9320000410079956, |
| "19": 0.9590000510215759 |
| }, |
| "llm_top_1_test_accuracy": { |
| "11": 0.661, |
| "13": 0.678, |
| "14": 0.657, |
| "18": 0.702, |
| "19": 0.762 |
| }, |
| "llm_top_2_test_accuracy": { |
| "11": 0.766, |
| "13": 0.731, |
| "14": 0.653, |
| "18": 0.7, |
| "19": 0.783 |
| }, |
| "llm_top_5_test_accuracy": { |
| "11": 0.863, |
| "13": 0.81, |
| "14": 0.733, |
| "18": 0.721, |
| "19": 0.834 |
| }, |
| "sae_top_1_test_accuracy": { |
| "11": 0.817, |
| "13": 0.671, |
| "14": 0.661, |
| "18": 0.675, |
| "19": 0.741 |
| }, |
| "sae_top_2_test_accuracy": { |
| "11": 0.869, |
| "13": 0.754, |
| "14": 0.829, |
| "18": 0.713, |
| "19": 0.771 |
| }, |
| "sae_top_5_test_accuracy": { |
| "11": 0.864, |
| "13": 0.753, |
| "14": 0.84, |
| "18": 0.829, |
| "19": 0.888 |
| } |
| }, |
| "LabHC/bias_in_bios_class_set3_results": { |
| "sae_test_accuracy": { |
| "20": 0.9410000443458557, |
| "21": 0.909000039100647, |
| "22": 0.9020000696182251, |
| "25": 0.956000030040741, |
| "26": 0.8770000338554382 |
| }, |
| "llm_test_accuracy": { |
| "20": 0.940000057220459, |
| "21": 0.9080000519752502, |
| "22": 0.9100000262260437, |
| "25": 0.9600000381469727, |
| "26": 0.8810000419616699 |
| }, |
| "llm_top_1_test_accuracy": { |
| "20": 0.773, |
| "21": 0.723, |
| "22": 0.624, |
| "25": 0.651, |
| "26": 0.64 |
| }, |
| "llm_top_2_test_accuracy": { |
| "20": 0.782, |
| "21": 0.73, |
| "22": 0.593, |
| "25": 0.77, |
| "26": 0.627 |
| }, |
| "llm_top_5_test_accuracy": { |
| "20": 0.85, |
| "21": 0.798, |
| "22": 0.753, |
| "25": 0.802, |
| "26": 0.678 |
| }, |
| "sae_top_1_test_accuracy": { |
| "20": 0.718, |
| "21": 0.754, |
| "22": 0.584, |
| "25": 0.698, |
| "26": 0.696 |
| }, |
| "sae_top_2_test_accuracy": { |
| "20": 0.862, |
| "21": 0.767, |
| "22": 0.842, |
| "25": 0.691, |
| "26": 0.693 |
| }, |
| "sae_top_5_test_accuracy": { |
| "20": 0.879, |
| "21": 0.763, |
| "22": 0.832, |
| "25": 0.834, |
| "26": 0.783 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_results": { |
| "sae_test_accuracy": { |
| "1": 0.9120000600814819, |
| "2": 0.9180000424385071, |
| "3": 0.893000066280365, |
| "5": 0.8980000615119934, |
| "6": 0.8580000400543213 |
| }, |
| "llm_test_accuracy": { |
| "1": 0.9350000619888306, |
| "2": 0.9130000472068787, |
| "3": 0.909000039100647, |
| "5": 0.9070000648498535, |
| "6": 0.8580000400543213 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1": 0.678, |
| "2": 0.683, |
| "3": 0.571, |
| "5": 0.576, |
| "6": 0.647 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1": 0.706, |
| "2": 0.76, |
| "3": 0.612, |
| "5": 0.571, |
| "6": 0.659 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1": 0.778, |
| "2": 0.769, |
| "3": 0.664, |
| "5": 0.71, |
| "6": 0.698 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1": 0.648, |
| "2": 0.837, |
| "3": 0.583, |
| "5": 0.523, |
| "6": 0.724 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1": 0.761, |
| "2": 0.834, |
| "3": 0.596, |
| "5": 0.791, |
| "6": 0.73 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1": 0.77, |
| "2": 0.874, |
| "3": 0.691, |
| "5": 0.859, |
| "6": 0.73 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5_sentiment_results": { |
| "sae_test_accuracy": { |
| "1.0": 0.8950000405311584, |
| "5.0": 0.8960000276565552 |
| }, |
| "llm_test_accuracy": { |
| "1.0": 0.8940000534057617, |
| "5.0": 0.893000066280365 |
| }, |
| "llm_top_1_test_accuracy": { |
| "1.0": 0.609, |
| "5.0": 0.609 |
| }, |
| "llm_top_2_test_accuracy": { |
| "1.0": 0.648, |
| "5.0": 0.648 |
| }, |
| "llm_top_5_test_accuracy": { |
| "1.0": 0.734, |
| "5.0": 0.734 |
| }, |
| "sae_top_1_test_accuracy": { |
| "1.0": 0.679, |
| "5.0": 0.679 |
| }, |
| "sae_top_2_test_accuracy": { |
| "1.0": 0.742, |
| "5.0": 0.742 |
| }, |
| "sae_top_5_test_accuracy": { |
| "1.0": 0.777, |
| "5.0": 0.777 |
| } |
| }, |
| "codeparrot/github-code_results": { |
| "sae_test_accuracy": { |
| "C": 0.9640000462532043, |
| "Python": 0.984000027179718, |
| "HTML": 0.9860000610351562, |
| "Java": 0.9660000205039978, |
| "PHP": 0.9550000429153442 |
| }, |
| "llm_test_accuracy": { |
| "C": 0.9580000638961792, |
| "Python": 0.987000048160553, |
| "HTML": 0.987000048160553, |
| "Java": 0.9730000495910645, |
| "PHP": 0.9570000171661377 |
| }, |
| "llm_top_1_test_accuracy": { |
| "C": 0.64, |
| "Python": 0.644, |
| "HTML": 0.919, |
| "Java": 0.651, |
| "PHP": 0.594 |
| }, |
| "llm_top_2_test_accuracy": { |
| "C": 0.712, |
| "Python": 0.697, |
| "HTML": 0.935, |
| "Java": 0.728, |
| "PHP": 0.653 |
| }, |
| "llm_top_5_test_accuracy": { |
| "C": 0.838, |
| "Python": 0.821, |
| "HTML": 0.935, |
| "Java": 0.776, |
| "PHP": 0.754 |
| }, |
| "sae_top_1_test_accuracy": { |
| "C": 0.645, |
| "Python": 0.664, |
| "HTML": 0.796, |
| "Java": 0.643, |
| "PHP": 0.908 |
| }, |
| "sae_top_2_test_accuracy": { |
| "C": 0.628, |
| "Python": 0.9, |
| "HTML": 0.826, |
| "Java": 0.643, |
| "PHP": 0.908 |
| }, |
| "sae_top_5_test_accuracy": { |
| "C": 0.681, |
| "Python": 0.966, |
| "HTML": 0.944, |
| "Java": 0.773, |
| "PHP": 0.929 |
| } |
| }, |
| "fancyzhx/ag_news_results": { |
| "sae_test_accuracy": { |
| "0": 0.9330000281333923, |
| "1": 0.984000027179718, |
| "2": 0.9240000247955322, |
| "3": 0.9510000348091125 |
| }, |
| "llm_test_accuracy": { |
| "0": 0.9390000700950623, |
| "1": 0.9850000739097595, |
| "2": 0.9140000343322754, |
| "3": 0.9340000152587891 |
| }, |
| "llm_top_1_test_accuracy": { |
| "0": 0.756, |
| "1": 0.726, |
| "2": 0.66, |
| "3": 0.735 |
| }, |
| "llm_top_2_test_accuracy": { |
| "0": 0.783, |
| "1": 0.879, |
| "2": 0.702, |
| "3": 0.77 |
| }, |
| "llm_top_5_test_accuracy": { |
| "0": 0.858, |
| "1": 0.89, |
| "2": 0.815, |
| "3": 0.832 |
| }, |
| "sae_top_1_test_accuracy": { |
| "0": 0.778, |
| "1": 0.943, |
| "2": 0.804, |
| "3": 0.644 |
| }, |
| "sae_top_2_test_accuracy": { |
| "0": 0.798, |
| "1": 0.951, |
| "2": 0.819, |
| "3": 0.78 |
| }, |
| "sae_top_5_test_accuracy": { |
| "0": 0.844, |
| "1": 0.956, |
| "2": 0.839, |
| "3": 0.893 |
| } |
| }, |
| "Helsinki-NLP/europarl_results": { |
| "sae_test_accuracy": { |
| "en": 0.9980000257492065, |
| "fr": 1.0, |
| "de": 0.999000072479248, |
| "es": 0.9980000257492065, |
| "nl": 1.0 |
| }, |
| "llm_test_accuracy": { |
| "en": 0.999000072479248, |
| "fr": 0.999000072479248, |
| "de": 1.0, |
| "es": 1.0, |
| "nl": 1.0 |
| }, |
| "llm_top_1_test_accuracy": { |
| "en": 0.851, |
| "fr": 0.651, |
| "de": 0.843, |
| "es": 0.664, |
| "nl": 0.868 |
| }, |
| "llm_top_2_test_accuracy": { |
| "en": 0.982, |
| "fr": 0.903, |
| "de": 0.853, |
| "es": 0.951, |
| "nl": 0.901 |
| }, |
| "llm_top_5_test_accuracy": { |
| "en": 0.996, |
| "fr": 0.979, |
| "de": 0.885, |
| "es": 0.981, |
| "nl": 0.946 |
| }, |
| "sae_top_1_test_accuracy": { |
| "en": 0.998, |
| "fr": 0.863, |
| "de": 0.994, |
| "es": 0.874, |
| "nl": 0.854 |
| }, |
| "sae_top_2_test_accuracy": { |
| "en": 1.0, |
| "fr": 0.995, |
| "de": 1.0, |
| "es": 0.997, |
| "nl": 0.871 |
| }, |
| "sae_top_5_test_accuracy": { |
| "en": 0.997, |
| "fr": 0.996, |
| "de": 0.999, |
| "es": 0.997, |
| "nl": 0.895 |
| } |
| } |
| } |
| } |