Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +64 -0
config.json +24 -0
generation_config.json +5 -0
pytorch_model.bin +3 -0
special_tokens_map.json +7 -0
tokenizer.json +252 -0
tokenizer_config.json +15 -0

README.md CHANGED Viewed

@@ -1,3 +1,67 @@
 ---
 license: mit
 ---

 ---
 license: mit
+tags:
+- automated-planning
+- masked-language-modeling
+- bert
+---
+# BERT for Automated Planning (Driverlog)
+This is a BERT model pretrained on Masked Language Modelling (MLM), specifically developed to tackle tasks related to Automated Planning within the Driverlog domain.
+You can find its full description, methodology, and experimental results in our paper: **[A Preliminary Study on BERT applied to Automated Planning](https://ceur-ws.org/Vol-3345/paper7_3460.pdf)**.
+## Usage
+You can easily load the model and the tokenizer using the Hugging Face `transformers` library:
+```python
+from transformers import BertForMaskedLM, PreTrainedTokenizerFast
+tokenizer = PreTrainedTokenizerFast.from_pretrained("lore-seri97/bert-driverlog")
+model = BertForMaskedLM.from_pretrained("lore-seri97/bert-driverlog")
+```
+## Citation
+If you use this model in your research, please cite our work using the following BibTeX entry:
+```bibtex
+@inproceedings{DBLP:conf/aiia/SerinaCGPS22,
+  author       = {Lorenzo Serina and
+                  Mattia Chiari and
+                  Alfonso Emilio Gerevini and
+                  Luca Putelli and
+                  Ivan Serina},
+  editor       = {Riccardo De Benedictis and
+                  Nicola Gatti and
+                  Marco Maratea and
+                  Andrea Micheli and
+                  Aniello Murano and
+                  Enrico Scala and
+                  Luciano Serafini and
+                  Ivan Serina and
+                  Alessandro Umbrico and
+                  Mauro Vallati},
+  title        = {A Preliminary Study on {BERT} applied to Automated Planning},
+  booktitle    = {Proceedings of the 10th Italian workshop on Planning and Scheduling
+                  {(IPS} 2022), {RCRA} Incontri {E} Confronti (RiCeRcA 2022), and the
+                  workshop on Strategies, Prediction, Interaction, and Reasoning in
+                  Italy {(SPIRIT} 2022) co-located with 21st International Conference
+                  of the Italian Association for Artificial Intelligence (AIxIA 2022),
+                  November 28 - December 2, 2022, University of Udine, Udine, Italy},
+  series       = {{CEUR} Workshop Proceedings},
+  volume       = {3345},
+  publisher    = {CEUR-WS.org},
+  year         = {2022},
+  url          = {[https://ceur-ws.org/Vol-3345/paper7](https://ceur-ws.org/Vol-3345/paper7)\_3460.pdf},
+  timestamp    = {Fri, 10 Mar 2023 16:23:01 +0100},
+  biburl       = {[https://dblp.org/rec/conf/aiia/SerinaCGPS22.bib](https://dblp.org/rec/conf/aiia/SerinaCGPS22.bib)},
+  bibsource    = {dblp computer science bibliography, [https://dblp.org](https://dblp.org)}
+}
+```
 ---

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "BertForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 8,
+  "num_hidden_layers": 8,
+  "pad_token_id": 3,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 85
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "_from_model_config": true,
+  "pad_token_id": 3,
+  "transformers_version": "4.30.0.dev0"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cb3fa4a68612a6c4bcf36ccdc8745d2fe6d1041b5ea0703b85ca553c35aae4d
+size 180714041

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,252 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 5,
+      "content": "[A]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6,
+      "content": "[SI]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 7,
+      "content": "[SG]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "Lowercase"
+  },
+  "pre_tokenizer": {
+    "type": "WhitespaceSplit"
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          1
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
+    }
+  },
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "[UNK]": 0,
+      "[CLS]": 1,
+      "[SEP]": 2,
+      "[PAD]": 3,
+      "[MASK]": 4,
+      "[A]": 5,
+      "[SI]": 6,
+      "[SG]": 7,
+      "at": 8,
+      "truck2": 9,
+      "truck1": 10,
+      "truck3": 11,
+      "drive-truck": 12,
+      "driver1": 13,
+      "driver2": 14,
+      "driver3": 15,
+      "walk": 16,
+      "s6": 17,
+      "s8": 18,
+      "s5": 19,
+      "s0": 20,
+      "s4": 21,
+      "s9": 22,
+      "s2": 23,
+      "s7": 24,
+      "s11": 25,
+      "s3": 26,
+      "s10": 27,
+      "s1": 28,
+      "load-truck": 29,
+      "unload-truck": 30,
+      "package4": 31,
+      "package6": 32,
+      "package1": 33,
+      "package7": 34,
+      "package2": 35,
+      "package3": 36,
+      "package5": 37,
+      "empty": 38,
+      "board-truck": 39,
+      "disembark-truck": 40,
+      "[a]": 41,
+      "[sg]": 42,
+      "[si]": 43,
+      "p3-7": 44,
+      "p9-7": 45,
+      "p6-7": 46,
+      "p7-1": 47,
+      "p8-4": 48,
+      "p4-7": 49,
+      "p6-4": 50,
+      "p0-8": 51,
+      "p2-0": 52,
+      "p7-10": 53,
+      "p11-0": 54,
+      "p11-1": 55,
+      "p5-0": 56,
+      "p0-2": 57,
+      "p6-10": 58,
+      "p11-10": 59,
+      "p8-11": 60,
+      "p2-6": 61,
+      "p8-3": 62,
+      "p1-4": 63,
+      "p1-0": 64,
+      "p1-2": 65,
+      "p2-1": 66,
+      "p6-2": 67,
+      "p6-8": 68,
+      "p8-5": 69,
+      "p11-4": 70,
+      "p4-6": 71,
+      "p2-3": 72,
+      "p6-11": 73,
+      "p2-5": 74,
+      "p3-9": 75,
+      "p4-0": 76,
+      "p2-7": 77,
+      "p6-5": 78,
+      "p10-0": 79,
+      "p9-4": 80,
+      "p3-2": 81,
+      "p7-9": 82,
+      "p3-1": 83,
+      "p0-1": 84
+    },
+    "unk_token": "[UNK]"
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "action_token": "[A]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "goal_token": "[SG]",
+  "init_token": "[SI]",
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding": "max_length",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation": true,
+  "unk_token": "[UNK]"
+}