| import numpy as np |
| import dataclasses as dc |
|
|
|
|
| @dc.dataclass |
| class CtrlArguments: |
| train_data: str = dc.field( |
| default="data/training_cunique_with_distractors.json", |
| metadata={"help": "A CSV list of training data files"} |
| ) |
|
|
| formulation: str = dc.field( |
| default="areg_ltr", |
| metadata={"help": "Type of problem definition: autoregressive (areg) or u-PMLM (upmlm) or mixed (if predict_questions is set)"} |
| ) |
|
|
| context_strategy: str = dc.field( |
| default="take_first", |
| metadata={"help": "How to deal with contexts greater than a specified length"} |
| ) |
|
|
| tokenizer_file: str = dc.field( |
| default="tokenizer.json", |
| metadata={"help": "A JSON file (in the format provided by HuggingFace's tokenizers library) with a trained tokenizer"} |
| ) |
|
|
| sequence_length: int = dc.field( |
| default=256, |
| metadata={"help": "The max sequence length"} |
| ) |
|
|
| force_prepend_control: bool = dc.field( |
| default=False, |
| metadata={"help": "If the control code should be prepended for all sliding windows. Otherwise, it is only prepended at the start of the sequence"} |
| ) |
|
|
|
|
| class GradientPrinter: |
| def __init__(self, name): |
| self.name = name |
|
|
| def __call__(self, grad): |
| np_grad = grad.cpu().numpy() |
| print("======== GRAD FOR {} ========".format(self.name)) |
| print("\tGRAD {}".format(grad)) |
| print("\tGRAD NORM {}".format(np.linalg.norm(np_grad))) |
| print("\tGRAD MEAN {}".format(np.mean(np_grad))) |
| print() |