| { |
| "_name_or_path": "answerdotai/ModernBERT-base", |
| "architectures": [ |
| "ModernBertForSequenceClassification" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "bos_token_id": 50281, |
| "classifier_activation": "gelu", |
| "classifier_bias": false, |
| "classifier_dropout": 0.0, |
| "classifier_pooling": "mean", |
| "classifiers_size": [ |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 1, |
| 2, |
| 3, |
| 2, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 2, |
| 2, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 6, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 16, |
| 100, |
| 13, |
| 100, |
| 8, |
| 3, |
| 3, |
| 2, |
| 3, |
| 2, |
| 4, |
| 3, |
| 2, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 3, |
| 2, |
| 3, |
| 2, |
| 4, |
| 3, |
| 3, |
| 3, |
| 2, |
| 3, |
| 1, |
| 2, |
| 2, |
| 3, |
| 13, |
| 2, |
| 2, |
| 3, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3, |
| 3, |
| 2, |
| 3, |
| 3, |
| 2, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 3, |
| 4, |
| 3, |
| 3, |
| 2, |
| 2, |
| 3, |
| 3, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2, |
| 4, |
| 3, |
| 2, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3, |
| 2, |
| 3 |
| ], |
| "cls_token_id": 50281, |
| "decoder_bias": true, |
| "deterministic_flash_attn": false, |
| "embedding_dropout": 0.0, |
| "eos_token_id": 50282, |
| "global_attn_every_n_layers": 3, |
| "global_rope_theta": 160000.0, |
| "gradient_checkpointing": false, |
| "hidden_activation": "gelu", |
| "hidden_size": 768, |
| "id2label": { |
| "0": "entailment", |
| "1": "neutral", |
| "2": "contradiction" |
| }, |
| "initializer_cutoff_factor": 2.0, |
| "initializer_range": 0.02, |
| "intermediate_size": 1152, |
| "label2id": { |
| "contradiction": 2, |
| "entailment": 0, |
| "neutral": 1 |
| }, |
| "layer_norm_eps": 1e-05, |
| "local_attention": 128, |
| "local_rope_theta": 10000.0, |
| "max_position_embeddings": 2048, |
| "mlp_bias": false, |
| "mlp_dropout": 0.0, |
| "model_type": "modernbert", |
| "norm_bias": false, |
| "norm_eps": 1e-05, |
| "num_attention_heads": 12, |
| "num_hidden_layers": 22, |
| "pad_token_id": 50283, |
| "position_embedding_type": "absolute", |
| "problem_type": "single_label_classification", |
| "reference_compile": true, |
| "sep_token_id": 50282, |
| "sparse_pred_ignore_index": -100, |
| "sparse_prediction": false, |
| "tasks": [ |
| "glue/mnli", |
| "glue/qnli", |
| "glue/rte", |
| "glue/wnli", |
| "glue/mrpc", |
| "glue/qqp", |
| "glue/stsb", |
| "super_glue/boolq", |
| "super_glue/cb", |
| "super_glue/multirc", |
| "super_glue/wic", |
| "super_glue/axg", |
| "anli/a1", |
| "anli/a2", |
| "anli/a3", |
| "sick/label", |
| "sick/entailment_AB", |
| "snli", |
| "scitail/snli_format", |
| "hans", |
| "WANLI", |
| "recast/recast_ner", |
| "recast/recast_sentiment", |
| "recast/recast_verbnet", |
| "recast/recast_megaveridicality", |
| "recast/recast_verbcorner", |
| "recast/recast_kg_relations", |
| "recast/recast_factuality", |
| "recast/recast_puns", |
| "probability_words_nli/reasoning_1hop", |
| "probability_words_nli/usnli", |
| "probability_words_nli/reasoning_2hop", |
| "nan-nli", |
| "nli_fever", |
| "breaking_nli", |
| "conj_nli", |
| "fracas", |
| "dialogue_nli", |
| "mpe", |
| "dnc", |
| "recast_white/fnplus", |
| "recast_white/sprl", |
| "recast_white/dpr", |
| "robust_nli/IS_CS", |
| "robust_nli/LI_LI", |
| "robust_nli/ST_WO", |
| "robust_nli/PI_SP", |
| "robust_nli/PI_CD", |
| "robust_nli/ST_SE", |
| "robust_nli/ST_NE", |
| "robust_nli/ST_LM", |
| "robust_nli_is_sd", |
| "robust_nli_li_ts", |
| "add_one_rte", |
| "paws/labeled_final", |
| "glue/cola", |
| "glue/sst2", |
| "pragmeval/pdtb", |
| "lex_glue/eurlex", |
| "lex_glue/scotus", |
| "lex_glue/ledgar", |
| "lex_glue/unfair_tos", |
| "dynasent/dynabench.dynasent.r1.all/r1", |
| "dynasent/dynabench.dynasent.r2.all/r2", |
| "cycic_classification", |
| "lingnli", |
| "monotonicity-entailment", |
| "scinli", |
| "naturallogic", |
| "dynahate", |
| "syntactic-augmentation-nli", |
| "autotnli", |
| "defeasible-nli/atomic", |
| "defeasible-nli/snli", |
| "help-nli", |
| "nli-veridicality-transitivity", |
| "lonli", |
| "dadc-limit-nli", |
| "folio", |
| "tomi-nli", |
| "puzzte", |
| "temporal-nli", |
| "counterfactually-augmented-snli", |
| "cnli", |
| "boolq-natural-perturbations", |
| "equate", |
| "chaos-mnli-ambiguity", |
| "logiqa-2.0-nli", |
| "mindgames", |
| "ConTRoL-nli", |
| "logical-fallacy", |
| "cladder", |
| "conceptrules_v2", |
| "zero-shot-label-nli", |
| "scone", |
| "monli", |
| "SpaceNLI", |
| "propsegment/nli", |
| "FLD.v2/default", |
| "FLD.v2/star", |
| "SDOH-NLI", |
| "scifact_entailment", |
| "AdjectiveScaleProbe-nli", |
| "resnli", |
| "semantic_fragments_nli", |
| "dataset_train_nli", |
| "nlgraph", |
| "ruletaker", |
| "PARARULE-Plus", |
| "logical-entailment", |
| "nope", |
| "LogicNLI", |
| "contract-nli/contractnli_a/seg", |
| "contract-nli/contractnli_b/full", |
| "nli4ct_semeval2024", |
| "biosift-nli", |
| "SIGA-nli", |
| "FOL-nli", |
| "doc-nli", |
| "mctest-nli", |
| "natural-language-satisfiability", |
| "idioms-nli", |
| "lifecycle-entailment", |
| "MSciNLI", |
| "hover-3way/nli", |
| "seahorse_summarization_evaluation", |
| "missing-item-prediction/contrastive", |
| "Pol_NLI", |
| "synthetic-retrieval-NLI/count", |
| "synthetic-retrieval-NLI/position", |
| "synthetic-retrieval-NLI/binary", |
| "babi_nli", |
| "gen_debiased_nli" |
| ], |
| "torch_dtype": "float32", |
| "transformers_version": "4.48.0.dev0", |
| "vocab_size": 50368 |
| } |
|
|