| """ |
| Fine-tune Qwen2.5-0.5B to solve competitive programming problems |
| with chain-of-thought reasoning using the codeforces-cots dataset. |
| """ |
|
|
| import os |
| from datasets import load_dataset |
| from transformers import ( |
| AutoTokenizer, |
| AutoModelForCausalLM, |
| TrainingArguments, |
| Trainer, |
| DataCollatorForLanguageModeling |
| ) |
| import torch |
|
|
| |
| MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" |
| DATASET_NAME = "open-r1/codeforces-cots" |
| OUTPUT_DIR = "./qwen-codeforces-coder" |
| HF_REPO = "mgbam/qwen-codeforces-coder" |
|
|
| print(f"π Starting fine-tuning: {MODEL_NAME}") |
| print(f"π Dataset: {DATASET_NAME}") |
| print(f"πΎ Output: {HF_REPO}") |
| print() |
|
|
| |
| print("Loading tokenizer and model...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.bfloat16, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
|
|
| |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| model.config.pad_token_id = tokenizer.eos_token_id |
|
|
| |
| print(f"Loading dataset: {DATASET_NAME}...") |
| dataset = load_dataset(DATASET_NAME, split="train") |
|
|
| |
| dataset = dataset.select(range(min(1000, len(dataset)))) |
| print(f"Training on {len(dataset)} examples") |
|
|
| |
| dataset = dataset.train_test_split(test_size=0.1, seed=42) |
| train_dataset = dataset["train"] |
| eval_dataset = dataset["test"] |
|
|
| def format_prompt(example): |
| """Format the dataset into instruction-following format.""" |
| |
| problem = example.get('problem', example.get('text', '')) |
| solution = example.get('solution', example.get('output', '')) |
|
|
| |
| prompt = f"""<|im_start|>system |
| You are a competitive programming expert. Solve problems with clear chain-of-thought reasoning.<|im_end|> |
| <|im_start|>user |
| {problem}<|im_end|> |
| <|im_start|>assistant |
| {solution}<|im_end|>""" |
|
|
| return {"text": prompt} |
|
|
| |
| print("Formatting dataset...") |
| train_dataset = train_dataset.map(format_prompt, remove_columns=train_dataset.column_names) |
| eval_dataset = eval_dataset.map(format_prompt, remove_columns=eval_dataset.column_names) |
|
|
| |
| def tokenize_function(examples): |
| return tokenizer( |
| examples["text"], |
| truncation=True, |
| max_length=2048, |
| padding="max_length" |
| ) |
|
|
| print("Tokenizing...") |
| train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"]) |
| eval_dataset = eval_dataset.map(tokenize_function, batched=True, remove_columns=["text"]) |
|
|
| |
| train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"]) |
| eval_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"]) |
|
|
| |
| training_args = TrainingArguments( |
| output_dir=OUTPUT_DIR, |
| num_train_epochs=3, |
| per_device_train_batch_size=4, |
| per_device_eval_batch_size=4, |
| gradient_accumulation_steps=4, |
| learning_rate=2e-5, |
| warmup_steps=100, |
| logging_steps=10, |
| eval_steps=50, |
| save_steps=100, |
| eval_strategy="steps", |
| save_strategy="steps", |
| load_best_model_at_end=True, |
| metric_for_best_model="eval_loss", |
| greater_is_better=False, |
| fp16=False, |
| bf16=True, |
| push_to_hub=True, |
| hub_model_id=HF_REPO, |
| hub_strategy="every_save", |
| report_to=["tensorboard"], |
| logging_first_step=True, |
| ) |
|
|
| |
| data_collator = DataCollatorForLanguageModeling( |
| tokenizer=tokenizer, |
| mlm=False, |
| ) |
|
|
| |
| print("Initializing trainer...") |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=train_dataset, |
| eval_dataset=eval_dataset, |
| data_collator=data_collator, |
| ) |
|
|
| |
| print("\n" + "="*50) |
| print("π₯ Starting training!") |
| print("="*50 + "\n") |
|
|
| trainer.train() |
|
|
| |
| print("\n" + "="*50) |
| print("πΎ Saving final model...") |
| print("="*50 + "\n") |
|
|
| trainer.save_model(OUTPUT_DIR) |
| tokenizer.save_pretrained(OUTPUT_DIR) |
|
|
| |
| print(f"π€ Pushing to Hub: {HF_REPO}") |
| trainer.push_to_hub() |
|
|
| print("\n" + "="*50) |
| print("β
Training complete!") |
| print(f"π― Model available at: https://huggingface.co/{HF_REPO}") |
| print("="*50) |
|
|