dataset: dataset_kwargs: field_name_for_dimension_grouping: base_intensity_functions files_to_load: base_intensity_functions: base_intensity_functions.pt event_times: event_times.pt event_types: event_types.pt kernel_functions: kernel_functions.pt time_offsets: time_offsets.pt shuffle: true loader_kwargs: batch_size: 6 full_len_ratio: 0.1 max_number_of_minibatch_sizes: 8 max_path_count: 2000 max_sequence_len: 100 min_path_count: 400 min_sequence_len: 15 num_inference_paths: 1 num_inference_times: 2000 num_workers: 16 test_batch_size: 2 variable_num_of_paths: true variable_sequence_lens: train: true validation: false name: HawkesDataLoader path: train: !!python/tuple - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train - data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train - data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train - data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train - data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train validation: !!python/tuple - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val distributed: activation_chekpoint: false checkpoint_type: full_state enabled: false min_num_params: 1e5 sharding_strategy: NO_SHARD wrap_policy: SIZE_BAZED experiment: device_map: auto name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions name_add_date: true seed: 10 model: alpha_decoder: hidden_act: name: torch.nn.GELU hidden_layers: !!python/tuple - 256 - 256 name: fim.models.blocks.base.MLP beta_decoder: hidden_act: name: torch.nn.GELU hidden_layers: !!python/tuple - 256 - 256 name: fim.models.blocks.base.MLP context_summary_encoder: encoder_layer: batch_first: true dropout: 0.0 name: torch.nn.TransformerEncoderLayer nhead: 4 name: torch.nn.TransformerEncoder num_layers: 2 context_summary_pooling: attention: nhead: 4 name: fim.models.blocks.neural_operators.AttentionOperator num_res_layers: 1 paths_block_attention: false context_ts_encoder: encoder_layer: batch_first: true dropout: 0.0 name: torch.nn.TransformerEncoderLayer nhead: 4 name: torch.nn.TransformerEncoder num_layers: 4 decoder_ts: decoder_layer: batch_first: true dropout: 0.0 name: torch.nn.TransformerDecoderLayer nhead: 4 name: torch.nn.TransformerDecoder num_layers: 4 delta_time_encoder: name: fim.models.blocks.positional_encodings.SineTimeEncoding out_features: 256 evaluation_mark_encoder: name: torch.nn.Linear hidden_act: name: torch.nn.GELU hidden_dim: 256 loss_weights: alpha: 0.0 mu: 0.0 nll: 1.0 relative_spike: 0.0 smape: 0.0 mark_encoder: name: torch.nn.Linear out_features: 256 mark_fusion_attention: null max_num_marks: 22 model_type: fimhawkes mu_decoder: hidden_act: name: torch.nn.GELU hidden_layers: !!python/tuple - 256 - 256 name: fim.models.blocks.base.MLP nll: method: monte_carlo num_integration_points: 200 normalize_by_max_time: false normalize_times: true thinning: null time_encoder: name: fim.models.blocks.positional_encodings.SineTimeEncoding out_features: 256 optimizers: !!python/tuple - optimizer_d: lr: 5.0e-05 name: torch.optim.AdamW weight_decay: 0.0001 trainer: best_metric: loss debug_iterations: null detect_anomaly: false epochs: 100000 evaluation_epoch: enable_plotting: false inference_path_idx: 0 iterator_name: validation path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots plot_frequency: 10 experiment_dir: ./results/ gradient_accumulation_steps: 6 logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s name: Trainer precision: bf16_mixed save_every: 1 schedulers: !!python/tuple - beta: 1.0 label: gauss_nll name: fim.utils.param_scheduler.ConstantScheduler - beta: 1.0 label: init_cross_entropy name: fim.utils.param_scheduler.ConstantScheduler - beta: 1.0 label: missing_link name: fim.utils.param_scheduler.ConstantScheduler