| { |
| "eval_type_id": "tpp", |
| "eval_config": { |
| "random_seed": 42, |
| "dataset_names": [ |
| "LabHC/bias_in_bios_class_set1", |
| "canrager/amazon_reviews_mcauley_1and5" |
| ], |
| "perform_scr": false, |
| "early_stopping_patience": 20, |
| "train_set_size": 4000, |
| "test_set_size": 1000, |
| "context_length": 128, |
| "probe_train_batch_size": 16, |
| "probe_test_batch_size": 500, |
| "probe_epochs": 20, |
| "probe_lr": 0.001, |
| "probe_l1_penalty": 0.001, |
| "sae_batch_size": 125, |
| "llm_batch_size": 32, |
| "llm_dtype": "bfloat16", |
| "lower_vram_usage": false, |
| "model_name": "gemma-2-2b", |
| "n_values": [ |
| 2, |
| 5, |
| 10, |
| 20, |
| 50, |
| 100, |
| 500 |
| ], |
| "column1_vals_lookup": { |
| "LabHC/bias_in_bios_class_set1": [ |
| [ |
| "professor", |
| "nurse" |
| ], |
| [ |
| "architect", |
| "journalist" |
| ], |
| [ |
| "surgeon", |
| "psychologist" |
| ], |
| [ |
| "attorney", |
| "teacher" |
| ] |
| ], |
| "canrager/amazon_reviews_mcauley_1and5": [ |
| [ |
| "Books", |
| "CDs_and_Vinyl" |
| ], |
| [ |
| "Software", |
| "Electronics" |
| ], |
| [ |
| "Pet_Supplies", |
| "Office_Products" |
| ], |
| [ |
| "Industrial_and_Scientific", |
| "Toys_and_Games" |
| ] |
| ] |
| } |
| }, |
| "eval_id": "91ffb92c-ec01-47b1-9f45-773ec12047b3", |
| "datetime_epoch_millis": 1745752326730, |
| "eval_result_metrics": { |
| "tpp_metrics": { |
| "tpp_threshold_2_total_metric": 0.04567500650882721, |
| "tpp_threshold_2_intended_diff_only": 0.06270001530647278, |
| "tpp_threshold_2_unintended_diff_only": 0.01702500879764557, |
| "tpp_threshold_5_total_metric": 0.1245500087738037, |
| "tpp_threshold_5_intended_diff_only": 0.183100026845932, |
| "tpp_threshold_5_unintended_diff_only": 0.0585500180721283, |
| "tpp_threshold_10_total_metric": 0.152550008893013, |
| "tpp_threshold_10_intended_diff_only": 0.2551000237464905, |
| "tpp_threshold_10_unintended_diff_only": 0.10255001485347748, |
| "tpp_threshold_20_total_metric": 0.19025001674890518, |
| "tpp_threshold_20_intended_diff_only": 0.3256000280380249, |
| "tpp_threshold_20_unintended_diff_only": 0.13535001128911972, |
| "tpp_threshold_50_total_metric": 0.22027501165866853, |
| "tpp_threshold_50_intended_diff_only": 0.4040000319480896, |
| "tpp_threshold_50_unintended_diff_only": 0.18372502028942106, |
| "tpp_threshold_100_total_metric": 0.22200002819299697, |
| "tpp_threshold_100_intended_diff_only": 0.4347000479698181, |
| "tpp_threshold_100_unintended_diff_only": 0.21270001977682113, |
| "tpp_threshold_500_total_metric": 0.16905003488063813, |
| "tpp_threshold_500_intended_diff_only": 0.4365000545978546, |
| "tpp_threshold_500_unintended_diff_only": 0.2674500197172165 |
| } |
| }, |
| "eval_result_details": [ |
| { |
| "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results", |
| "tpp_threshold_2_total_metric": 0.06430000960826873, |
| "tpp_threshold_2_intended_diff_only": 0.09200001955032348, |
| "tpp_threshold_2_unintended_diff_only": 0.027700009942054748, |
| "tpp_threshold_5_total_metric": 0.19450001418590546, |
| "tpp_threshold_5_intended_diff_only": 0.30460003614425657, |
| "tpp_threshold_5_unintended_diff_only": 0.11010002195835114, |
| "tpp_threshold_10_total_metric": 0.20154999792575837, |
| "tpp_threshold_10_intended_diff_only": 0.3920000195503235, |
| "tpp_threshold_10_unintended_diff_only": 0.19045002162456512, |
| "tpp_threshold_20_total_metric": 0.18830000460147858, |
| "tpp_threshold_20_intended_diff_only": 0.4388000249862671, |
| "tpp_threshold_20_unintended_diff_only": 0.2505000203847885, |
| "tpp_threshold_50_total_metric": 0.12605000734329225, |
| "tpp_threshold_50_intended_diff_only": 0.462600040435791, |
| "tpp_threshold_50_unintended_diff_only": 0.33655003309249876, |
| "tpp_threshold_100_total_metric": 0.09565002620220184, |
| "tpp_threshold_100_intended_diff_only": 0.4664000511169434, |
| "tpp_threshold_100_unintended_diff_only": 0.3707500249147415, |
| "tpp_threshold_500_total_metric": 0.05740003287792206, |
| "tpp_threshold_500_intended_diff_only": 0.46680005788803103, |
| "tpp_threshold_500_unintended_diff_only": 0.40940002501010897 |
| }, |
| { |
| "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results", |
| "tpp_threshold_2_total_metric": 0.02705000340938568, |
| "tpp_threshold_2_intended_diff_only": 0.03340001106262207, |
| "tpp_threshold_2_unintended_diff_only": 0.006350007653236389, |
| "tpp_threshold_5_total_metric": 0.05460000336170197, |
| "tpp_threshold_5_intended_diff_only": 0.061600017547607425, |
| "tpp_threshold_5_unintended_diff_only": 0.0070000141859054565, |
| "tpp_threshold_10_total_metric": 0.10355001986026764, |
| "tpp_threshold_10_intended_diff_only": 0.11820002794265747, |
| "tpp_threshold_10_unintended_diff_only": 0.014650008082389832, |
| "tpp_threshold_20_total_metric": 0.19220002889633178, |
| "tpp_threshold_20_intended_diff_only": 0.21240003108978273, |
| "tpp_threshold_20_unintended_diff_only": 0.020200002193450927, |
| "tpp_threshold_50_total_metric": 0.3145000159740448, |
| "tpp_threshold_50_intended_diff_only": 0.3454000234603882, |
| "tpp_threshold_50_unintended_diff_only": 0.030900007486343382, |
| "tpp_threshold_100_total_metric": 0.3483500301837921, |
| "tpp_threshold_100_intended_diff_only": 0.40300004482269286, |
| "tpp_threshold_100_unintended_diff_only": 0.054650014638900755, |
| "tpp_threshold_500_total_metric": 0.2807000368833542, |
| "tpp_threshold_500_intended_diff_only": 0.4062000513076782, |
| "tpp_threshold_500_unintended_diff_only": 0.12550001442432404 |
| } |
| ], |
| "sae_bench_commit_hash": "Unknown", |
| "sae_lens_id": "blocks.6.hook_resid_post", |
| "sae_lens_release_id": "gemma-2-2b-res-matryoshka-dc", |
| "sae_lens_version": "5.9.1", |
| "sae_cfg_dict": { |
| "architecture": "jumprelu", |
| "d_in": 2304, |
| "d_sae": 32768, |
| "activation_fn_str": "relu", |
| "apply_b_dec_to_input": true, |
| "finetuning_scaling_factor": false, |
| "context_size": 1024, |
| "model_name": "gemma-2-2b", |
| "hook_name": "blocks.6.hook_resid_post", |
| "hook_layer": 6, |
| "hook_head_index": null, |
| "prepend_bos": true, |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B", |
| "dataset_trust_remote_code": true, |
| "normalize_activations": "none", |
| "dtype": "torch.bfloat16", |
| "device": "cuda", |
| "sae_lens_training_version": "5.5.1", |
| "activation_fn_kwargs": { |
| "k": 40 |
| }, |
| "neuronpedia_id": null, |
| "model_from_pretrained_kwargs": { |
| "center_writing_weights": false |
| }, |
| "seqpos_slice": [ |
| null |
| ] |
| }, |
| "eval_result_unstructured": { |
| "LabHC/bias_in_bios_class_set1": { |
| "0": { |
| "tpp_threshold_2_total_metric": 0.03899998962879181, |
| "tpp_threshold_2_intended_diff_only": 0.07899999618530273, |
| "tpp_threshold_2_unintended_diff_only": 0.040000006556510925, |
| "tpp_threshold_5_total_metric": 0.20350004732608795, |
| "tpp_threshold_5_intended_diff_only": 0.3240000605583191, |
| "tpp_threshold_5_unintended_diff_only": 0.12050001323223114, |
| "tpp_threshold_10_total_metric": 0.12725000083446503, |
| "tpp_threshold_10_intended_diff_only": 0.4010000228881836, |
| "tpp_threshold_10_unintended_diff_only": 0.27375002205371857, |
| "tpp_threshold_20_total_metric": 0.11599999666213989, |
| "tpp_threshold_20_intended_diff_only": 0.42000001668930054, |
| "tpp_threshold_20_unintended_diff_only": 0.30400002002716064, |
| "tpp_threshold_50_total_metric": 0.08625003695487976, |
| "tpp_threshold_50_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_50_unintended_diff_only": 0.3527500331401825, |
| "tpp_threshold_100_total_metric": 0.06375002861022949, |
| "tpp_threshold_100_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_100_unintended_diff_only": 0.3802500367164612, |
| "tpp_threshold_500_total_metric": 0.030000045895576477, |
| "tpp_threshold_500_intended_diff_only": 0.4440000653266907, |
| "tpp_threshold_500_unintended_diff_only": 0.4140000194311142 |
| }, |
| "1": { |
| "tpp_threshold_2_total_metric": 0.12575003504753113, |
| "tpp_threshold_2_intended_diff_only": 0.14400005340576172, |
| "tpp_threshold_2_unintended_diff_only": 0.01825001835823059, |
| "tpp_threshold_5_total_metric": 0.20650003850460052, |
| "tpp_threshold_5_intended_diff_only": 0.3380000591278076, |
| "tpp_threshold_5_unintended_diff_only": 0.1315000206232071, |
| "tpp_threshold_10_total_metric": 0.22225001454353333, |
| "tpp_threshold_10_intended_diff_only": 0.37300002574920654, |
| "tpp_threshold_10_unintended_diff_only": 0.15075001120567322, |
| "tpp_threshold_20_total_metric": 0.19025000929832458, |
| "tpp_threshold_20_intended_diff_only": 0.4360000491142273, |
| "tpp_threshold_20_unintended_diff_only": 0.2457500398159027, |
| "tpp_threshold_50_total_metric": 0.11899998784065247, |
| "tpp_threshold_50_intended_diff_only": 0.4610000252723694, |
| "tpp_threshold_50_unintended_diff_only": 0.3420000374317169, |
| "tpp_threshold_100_total_metric": 0.07925006747245789, |
| "tpp_threshold_100_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_100_unintended_diff_only": 0.382750004529953, |
| "tpp_threshold_500_total_metric": 0.04375004768371582, |
| "tpp_threshold_500_intended_diff_only": 0.4620000720024109, |
| "tpp_threshold_500_unintended_diff_only": 0.41825002431869507 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.14000003039836884, |
| "tpp_threshold_2_intended_diff_only": 0.21400004625320435, |
| "tpp_threshold_2_unintended_diff_only": 0.07400001585483551, |
| "tpp_threshold_5_total_metric": 0.1640000194311142, |
| "tpp_threshold_5_intended_diff_only": 0.3450000286102295, |
| "tpp_threshold_5_unintended_diff_only": 0.1810000091791153, |
| "tpp_threshold_10_total_metric": 0.17099998891353607, |
| "tpp_threshold_10_intended_diff_only": 0.3790000081062317, |
| "tpp_threshold_10_unintended_diff_only": 0.20800001919269562, |
| "tpp_threshold_20_total_metric": 0.18050000071525574, |
| "tpp_threshold_20_intended_diff_only": 0.41100001335144043, |
| "tpp_threshold_20_unintended_diff_only": 0.2305000126361847, |
| "tpp_threshold_50_total_metric": 0.12075001001358032, |
| "tpp_threshold_50_intended_diff_only": 0.44600003957748413, |
| "tpp_threshold_50_unintended_diff_only": 0.3252500295639038, |
| "tpp_threshold_100_total_metric": 0.09925000369548798, |
| "tpp_threshold_100_intended_diff_only": 0.45600003004074097, |
| "tpp_threshold_100_unintended_diff_only": 0.356750026345253, |
| "tpp_threshold_500_total_metric": 0.046750038862228394, |
| "tpp_threshold_500_intended_diff_only": 0.4580000638961792, |
| "tpp_threshold_500_unintended_diff_only": 0.4112500250339508 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.00550001859664917, |
| "tpp_threshold_2_intended_diff_only": 0.008000016212463379, |
| "tpp_threshold_2_unintended_diff_only": 0.002499997615814209, |
| "tpp_threshold_5_total_metric": 0.23625002801418304, |
| "tpp_threshold_5_intended_diff_only": 0.3200000524520874, |
| "tpp_threshold_5_unintended_diff_only": 0.08375002443790436, |
| "tpp_threshold_10_total_metric": 0.2997500151395798, |
| "tpp_threshold_10_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_10_unintended_diff_only": 0.13225002586841583, |
| "tpp_threshold_20_total_metric": 0.2580000311136246, |
| "tpp_threshold_20_intended_diff_only": 0.4790000319480896, |
| "tpp_threshold_20_unintended_diff_only": 0.22100000083446503, |
| "tpp_threshold_50_total_metric": 0.1770000159740448, |
| "tpp_threshold_50_intended_diff_only": 0.487000048160553, |
| "tpp_threshold_50_unintended_diff_only": 0.3100000321865082, |
| "tpp_threshold_100_total_metric": 0.13550004363059998, |
| "tpp_threshold_100_intended_diff_only": 0.49000006914138794, |
| "tpp_threshold_100_unintended_diff_only": 0.35450002551078796, |
| "tpp_threshold_500_total_metric": 0.09275004267692566, |
| "tpp_threshold_500_intended_diff_only": 0.49000006914138794, |
| "tpp_threshold_500_unintended_diff_only": 0.3972500264644623 |
| }, |
| "9": { |
| "tpp_threshold_2_total_metric": 0.011249974370002747, |
| "tpp_threshold_2_intended_diff_only": 0.014999985694885254, |
| "tpp_threshold_2_unintended_diff_only": 0.0037500113248825073, |
| "tpp_threshold_5_total_metric": 0.16224993765354156, |
| "tpp_threshold_5_intended_diff_only": 0.19599997997283936, |
| "tpp_threshold_5_unintended_diff_only": 0.03375004231929779, |
| "tpp_threshold_10_total_metric": 0.1874999701976776, |
| "tpp_threshold_10_intended_diff_only": 0.375, |
| "tpp_threshold_10_unintended_diff_only": 0.1875000298023224, |
| "tpp_threshold_20_total_metric": 0.1967499852180481, |
| "tpp_threshold_20_intended_diff_only": 0.4480000138282776, |
| "tpp_threshold_20_unintended_diff_only": 0.2512500286102295, |
| "tpp_threshold_50_total_metric": 0.12724998593330383, |
| "tpp_threshold_50_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_50_unintended_diff_only": 0.3527500331401825, |
| "tpp_threshold_100_total_metric": 0.10049998760223389, |
| "tpp_threshold_100_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_100_unintended_diff_only": 0.37950003147125244, |
| "tpp_threshold_500_total_metric": 0.07374998927116394, |
| "tpp_threshold_500_intended_diff_only": 0.48000001907348633, |
| "tpp_threshold_500_unintended_diff_only": 0.4062500298023224 |
| } |
| }, |
| "canrager/amazon_reviews_mcauley_1and5": { |
| "1": { |
| "tpp_threshold_2_total_metric": 0.006750032305717468, |
| "tpp_threshold_2_intended_diff_only": 0.012000024318695068, |
| "tpp_threshold_2_unintended_diff_only": 0.0052499920129776, |
| "tpp_threshold_5_total_metric": 0.010500028729438782, |
| "tpp_threshold_5_intended_diff_only": 0.01500004529953003, |
| "tpp_threshold_5_unintended_diff_only": 0.0045000165700912476, |
| "tpp_threshold_10_total_metric": 0.058500051498413086, |
| "tpp_threshold_10_intended_diff_only": 0.08400005102157593, |
| "tpp_threshold_10_unintended_diff_only": 0.025499999523162842, |
| "tpp_threshold_20_total_metric": 0.12325005233287811, |
| "tpp_threshold_20_intended_diff_only": 0.15000003576278687, |
| "tpp_threshold_20_unintended_diff_only": 0.026749983429908752, |
| "tpp_threshold_50_total_metric": 0.2677500247955322, |
| "tpp_threshold_50_intended_diff_only": 0.2980000376701355, |
| "tpp_threshold_50_unintended_diff_only": 0.03025001287460327, |
| "tpp_threshold_100_total_metric": 0.3725000470876694, |
| "tpp_threshold_100_intended_diff_only": 0.4320000410079956, |
| "tpp_threshold_100_unintended_diff_only": 0.05949999392032623, |
| "tpp_threshold_500_total_metric": 0.2937500476837158, |
| "tpp_threshold_500_intended_diff_only": 0.43900007009506226, |
| "tpp_threshold_500_unintended_diff_only": 0.14525002241134644 |
| }, |
| "2": { |
| "tpp_threshold_2_total_metric": 0.09274999797344208, |
| "tpp_threshold_2_intended_diff_only": 0.10100001096725464, |
| "tpp_threshold_2_unintended_diff_only": 0.008250012993812561, |
| "tpp_threshold_5_total_metric": 0.12799997627735138, |
| "tpp_threshold_5_intended_diff_only": 0.14899998903274536, |
| "tpp_threshold_5_unintended_diff_only": 0.021000012755393982, |
| "tpp_threshold_10_total_metric": 0.1902499943971634, |
| "tpp_threshold_10_intended_diff_only": 0.20800000429153442, |
| "tpp_threshold_10_unintended_diff_only": 0.017750009894371033, |
| "tpp_threshold_20_total_metric": 0.3370000422000885, |
| "tpp_threshold_20_intended_diff_only": 0.3670000433921814, |
| "tpp_threshold_20_unintended_diff_only": 0.030000001192092896, |
| "tpp_threshold_50_total_metric": 0.37450000643730164, |
| "tpp_threshold_50_intended_diff_only": 0.4240000247955322, |
| "tpp_threshold_50_unintended_diff_only": 0.04950001835823059, |
| "tpp_threshold_100_total_metric": 0.34800001978874207, |
| "tpp_threshold_100_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_100_unintended_diff_only": 0.07900002598762512, |
| "tpp_threshold_500_total_metric": 0.2905000299215317, |
| "tpp_threshold_500_intended_diff_only": 0.4270000457763672, |
| "tpp_threshold_500_unintended_diff_only": 0.1365000158548355 |
| }, |
| "3": { |
| "tpp_threshold_2_total_metric": -0.0012500286102294922, |
| "tpp_threshold_2_intended_diff_only": 0.0009999871253967285, |
| "tpp_threshold_2_unintended_diff_only": 0.0022500157356262207, |
| "tpp_threshold_5_total_metric": 0.007749989628791809, |
| "tpp_threshold_5_intended_diff_only": 0.0040000081062316895, |
| "tpp_threshold_5_unintended_diff_only": -0.0037499815225601196, |
| "tpp_threshold_10_total_metric": 0.04074998199939728, |
| "tpp_threshold_10_intended_diff_only": 0.046000003814697266, |
| "tpp_threshold_10_unintended_diff_only": 0.005250021815299988, |
| "tpp_threshold_20_total_metric": 0.09550002217292786, |
| "tpp_threshold_20_intended_diff_only": 0.10500001907348633, |
| "tpp_threshold_20_unintended_diff_only": 0.009499996900558472, |
| "tpp_threshold_50_total_metric": 0.23999999463558197, |
| "tpp_threshold_50_intended_diff_only": 0.2630000114440918, |
| "tpp_threshold_50_unintended_diff_only": 0.023000016808509827, |
| "tpp_threshold_100_total_metric": 0.3487499952316284, |
| "tpp_threshold_100_intended_diff_only": 0.3960000276565552, |
| "tpp_threshold_100_unintended_diff_only": 0.04725003242492676, |
| "tpp_threshold_500_total_metric": 0.29775001108646393, |
| "tpp_threshold_500_intended_diff_only": 0.4050000309944153, |
| "tpp_threshold_500_unintended_diff_only": 0.10725001990795135 |
| }, |
| "5": { |
| "tpp_threshold_2_total_metric": 0.014250010251998901, |
| "tpp_threshold_2_intended_diff_only": 0.021000027656555176, |
| "tpp_threshold_2_unintended_diff_only": 0.006750017404556274, |
| "tpp_threshold_5_total_metric": 0.0507500022649765, |
| "tpp_threshold_5_intended_diff_only": 0.05900001525878906, |
| "tpp_threshold_5_unintended_diff_only": 0.008250012993812561, |
| "tpp_threshold_10_total_metric": 0.10675004124641418, |
| "tpp_threshold_10_intended_diff_only": 0.1170000433921814, |
| "tpp_threshold_10_unintended_diff_only": 0.010250002145767212, |
| "tpp_threshold_20_total_metric": 0.20550000667572021, |
| "tpp_threshold_20_intended_diff_only": 0.2250000238418579, |
| "tpp_threshold_20_unintended_diff_only": 0.019500017166137695, |
| "tpp_threshold_50_total_metric": 0.3680000305175781, |
| "tpp_threshold_50_intended_diff_only": 0.40000003576278687, |
| "tpp_threshold_50_unintended_diff_only": 0.03200000524520874, |
| "tpp_threshold_100_total_metric": 0.36625005304813385, |
| "tpp_threshold_100_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_100_unintended_diff_only": 0.041749998927116394, |
| "tpp_threshold_500_total_metric": 0.26450003683567047, |
| "tpp_threshold_500_intended_diff_only": 0.40800005197525024, |
| "tpp_threshold_500_unintended_diff_only": 0.14350001513957977 |
| }, |
| "6": { |
| "tpp_threshold_2_total_metric": 0.02275000512599945, |
| "tpp_threshold_2_intended_diff_only": 0.03200000524520874, |
| "tpp_threshold_2_unintended_diff_only": 0.00925000011920929, |
| "tpp_threshold_5_total_metric": 0.07600001990795135, |
| "tpp_threshold_5_intended_diff_only": 0.08100003004074097, |
| "tpp_threshold_5_unintended_diff_only": 0.005000010132789612, |
| "tpp_threshold_10_total_metric": 0.12150003015995026, |
| "tpp_threshold_10_intended_diff_only": 0.13600003719329834, |
| "tpp_threshold_10_unintended_diff_only": 0.014500007033348083, |
| "tpp_threshold_20_total_metric": 0.19975002110004425, |
| "tpp_threshold_20_intended_diff_only": 0.21500003337860107, |
| "tpp_threshold_20_unintended_diff_only": 0.015250012278556824, |
| "tpp_threshold_50_total_metric": 0.32225002348423004, |
| "tpp_threshold_50_intended_diff_only": 0.34200000762939453, |
| "tpp_threshold_50_unintended_diff_only": 0.01974998414516449, |
| "tpp_threshold_100_total_metric": 0.30625003576278687, |
| "tpp_threshold_100_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_100_unintended_diff_only": 0.04575002193450928, |
| "tpp_threshold_500_total_metric": 0.25700005888938904, |
| "tpp_threshold_500_intended_diff_only": 0.35200005769729614, |
| "tpp_threshold_500_unintended_diff_only": 0.0949999988079071 |
| } |
| } |
| } |
| } |