| { |
| "batch_size": 18, |
| "num_epochs": 4, |
| "lr": 5e-5, |
| "seq_len": 256, |
| "d_model": 256, |
| "N": 4, |
| "h": 4, |
| "train": "dataset/openweb_fine.jsonl", |
| "test": "dataset/openweb_fine.jsonl", |
| "d_ff": 1024, |
| "dropout": 0.1, |
| "model_folder": "openweb2", |
| "model_basename": "openweb2-", |
| "preload": "03", |
| "tokenizer_file": "openweb2.tokenizer.json", |
| "experiment_name": "runs/openweb2", |
| "dataset": "dataset/dataset_general.jsonl", |
| "loss_file": "openweb2/losses.jsonl", |
| "fine_dataset": "dataset/fine_tune.jsonl", |
| "fine_epochs": 0 |
| } |