dataset:
  dataset_kwargs:
    field_name_for_dimension_grouping: base_intensity_functions
    files_to_load:
      base_intensity_functions: base_intensity_functions.pt
      event_times: event_times.pt
      event_types: event_types.pt
      kernel_functions: kernel_functions.pt
      time_offsets: time_offsets.pt
    shuffle: true
  loader_kwargs:
    batch_size: 6
    full_len_ratio: 0.1
    max_number_of_minibatch_sizes: 8
    max_path_count: 2000
    max_sequence_len: 100
    min_path_count: 400
    min_sequence_len: 15
    num_inference_paths: 1
    num_inference_times: 2000
    num_workers: 16
    test_batch_size: 2
    variable_num_of_paths: true
    variable_sequence_lens:
      train: true
      validation: false
  name: HawkesDataLoader
  path:
    train: !!python/tuple
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
    validation: !!python/tuple
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
distributed:
  activation_chekpoint: false
  checkpoint_type: full_state
  enabled: false
  min_num_params: 1e5
  sharding_strategy: NO_SHARD
  wrap_policy: SIZE_BAZED
experiment:
  device_map: auto
  name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
  name_add_date: true
  seed: 10
model:
  alpha_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  beta_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  context_summary_encoder:
    encoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerEncoderLayer
      nhead: 4
    name: torch.nn.TransformerEncoder
    num_layers: 2
  context_summary_pooling:
    attention:
      nhead: 4
    name: fim.models.blocks.neural_operators.AttentionOperator
    num_res_layers: 1
    paths_block_attention: false
  context_ts_encoder:
    encoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerEncoderLayer
      nhead: 4
    name: torch.nn.TransformerEncoder
    num_layers: 4
  decoder_ts:
    decoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerDecoderLayer
      nhead: 4
    name: torch.nn.TransformerDecoder
    num_layers: 4
  delta_time_encoder:
    name: fim.models.blocks.positional_encodings.SineTimeEncoding
    out_features: 256
  evaluation_mark_encoder:
    name: torch.nn.Linear
  hidden_act:
    name: torch.nn.GELU
  hidden_dim: 256
  loss_weights:
    alpha: 0.0
    mu: 0.0
    nll: 1.0
    relative_spike: 0.0
    smape: 0.0
  mark_encoder:
    name: torch.nn.Linear
    out_features: 256
  mark_fusion_attention: null
  max_num_marks: 22
  model_type: fimhawkes
  mu_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  nll:
    method: monte_carlo
    num_integration_points: 200
  normalize_by_max_time: false
  normalize_times: true
  thinning: null
  time_encoder:
    name: fim.models.blocks.positional_encodings.SineTimeEncoding
    out_features: 256
optimizers: !!python/tuple
- optimizer_d:
    lr: 5.0e-05
    name: torch.optim.AdamW
    weight_decay: 0.0001
trainer:
  best_metric: loss
  debug_iterations: null
  detect_anomaly: false
  epochs: 100000
  evaluation_epoch:
    enable_plotting: false
    inference_path_idx: 0
    iterator_name: validation
    path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
    plot_frequency: 10
  experiment_dir: ./results/
  gradient_accumulation_steps: 6
  logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
  name: Trainer
  precision: bf16_mixed
  save_every: 1
  schedulers: !!python/tuple
  - beta: 1.0
    label: gauss_nll
    name: fim.utils.param_scheduler.ConstantScheduler
  - beta: 1.0
    label: init_cross_entropy
    name: fim.utils.param_scheduler.ConstantScheduler
  - beta: 1.0
    label: missing_link
    name: fim.utils.param_scheduler.ConstantScheduler