{ "lr_mp": 5e-05, "lr_vision_backbone": 0.0, "lr_language_backbone": 5e-05, "lr_full_decoder": 0.0, "batch_size": 16, "gradient_accumulation_steps": 8, "max_grad_norm": 1.0, "max_training_steps": 5000, "stop_after_step": 5000, "warmup_ratio": 0.03, "stats_log_interval": 100, "precision": "bf16", "compile": false, "log_train_samples": false, "do_eval": true, "eval_interval": 500, "max_val_batches": 32, "max_images_per_example": 1, "max_sample_length": 6144, "train_dataset_path": "patrickamadeus/the_cauldron", "train_dataset_name": [ "all" ], "train_split": "train", "val_split": "validation", "stream_dataset": true, "dataloader_num_workers": 2, "dataloader_pin_memory": true, "enable_source_filter": true, "allowed_dataset_sources": [ "dvqa", "tallyqa", "figureqa", "ocrvqa", "vqav2", "clevr", "cocoqa", "textvqa", "visual7w", "st_vqa", "docvqa", "aokvqa", "chartqa" ], "relevance_min_rating": 1, "image_correspondence_min_rating": 1, "visual_dependency_min_rating": 1, "formatting_min_rating": 1, "wandb_entity": "HuggingFace", "log_wandb": false, "push_checkpoints_to_hub": true, "save_training_state_to_hub": true, "checkpoint_repo_pattern": "patrickamadeus/stackvlm-{i}", "hf_private": false, "push_final_model_to_hub": false, "resume_from_vlm_checkpoint": false, "resume_checkpoint_path": "patrickamadeus/stackvlm-1500", "resume_start_step": 1500 }