FIM-PP / train_parameters.yaml

Upload folder using huggingface_hub

9b43be1 about 2 months ago

7.5 kB

	dataset:
	dataset_kwargs:
	field_name_for_dimension_grouping: base_intensity_functions
	files_to_load:
	base_intensity_functions: base_intensity_functions.pt
	event_times: event_times.pt
	event_types: event_types.pt
	kernel_functions: kernel_functions.pt
	time_offsets: time_offsets.pt
	shuffle: true
	loader_kwargs:
	batch_size: 6
	full_len_ratio: 0.1
	max_number_of_minibatch_sizes: 8
	max_path_count: 2000
	max_sequence_len: 100
	min_path_count: 400
	min_sequence_len: 15
	num_inference_paths: 1
	num_inference_times: 2000
	num_workers: 16
	test_batch_size: 2
	variable_num_of_paths: true
	variable_sequence_lens:
	train: true
	validation: false
	name: HawkesDataLoader
	path:
	train: !!python/tuple
	- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
	- data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
	- data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
	- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
	- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
	- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
	- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
	validation: !!python/tuple
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
	- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
	distributed:
	activation_chekpoint: false
	checkpoint_type: full_state
	enabled: false
	min_num_params: 1e5
	sharding_strategy: NO_SHARD
	wrap_policy: SIZE_BAZED
	experiment:
	device_map: auto
	name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
	name_add_date: true
	seed: 10
	model:
	alpha_decoder:
	hidden_act:
	name: torch.nn.GELU
	hidden_layers: !!python/tuple
	- 256
	- 256
	name: fim.models.blocks.base.MLP
	beta_decoder:
	hidden_act:
	name: torch.nn.GELU
	hidden_layers: !!python/tuple
	- 256
	- 256
	name: fim.models.blocks.base.MLP
	context_summary_encoder:
	encoder_layer:
	batch_first: true
	dropout: 0.0
	name: torch.nn.TransformerEncoderLayer
	nhead: 4
	name: torch.nn.TransformerEncoder
	num_layers: 2
	context_summary_pooling:
	attention:
	nhead: 4
	name: fim.models.blocks.neural_operators.AttentionOperator
	num_res_layers: 1
	paths_block_attention: false
	context_ts_encoder:
	encoder_layer:
	batch_first: true
	dropout: 0.0
	name: torch.nn.TransformerEncoderLayer
	nhead: 4
	name: torch.nn.TransformerEncoder
	num_layers: 4
	decoder_ts:
	decoder_layer:
	batch_first: true
	dropout: 0.0
	name: torch.nn.TransformerDecoderLayer
	nhead: 4
	name: torch.nn.TransformerDecoder
	num_layers: 4
	delta_time_encoder:
	name: fim.models.blocks.positional_encodings.SineTimeEncoding
	out_features: 256
	evaluation_mark_encoder:
	name: torch.nn.Linear
	hidden_act:
	name: torch.nn.GELU
	hidden_dim: 256
	loss_weights:
	alpha: 0.0
	mu: 0.0
	nll: 1.0
	relative_spike: 0.0
	smape: 0.0
	mark_encoder:
	name: torch.nn.Linear
	out_features: 256
	mark_fusion_attention: null
	max_num_marks: 22
	model_type: fimhawkes
	mu_decoder:
	hidden_act:
	name: torch.nn.GELU
	hidden_layers: !!python/tuple
	- 256
	- 256
	name: fim.models.blocks.base.MLP
	nll:
	method: monte_carlo
	num_integration_points: 200
	normalize_by_max_time: false
	normalize_times: true
	thinning: null
	time_encoder:
	name: fim.models.blocks.positional_encodings.SineTimeEncoding
	out_features: 256
	optimizers: !!python/tuple
	- optimizer_d:
	lr: 5.0e-05
	name: torch.optim.AdamW
	weight_decay: 0.0001
	trainer:
	best_metric: loss
	debug_iterations: null
	detect_anomaly: false
	epochs: 100000
	evaluation_epoch:
	enable_plotting: false
	inference_path_idx: 0
	iterator_name: validation
	path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
	plot_frequency: 10
	experiment_dir: ./results/
	gradient_accumulation_steps: 6
	logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
	name: Trainer
	precision: bf16_mixed
	save_every: 1
	schedulers: !!python/tuple
	- beta: 1.0
	label: gauss_nll
	name: fim.utils.param_scheduler.ConstantScheduler
	- beta: 1.0
	label: init_cross_entropy
	name: fim.utils.param_scheduler.ConstantScheduler
	- beta: 1.0
	label: missing_link
	name: fim.utils.param_scheduler.ConstantScheduler