| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| """## Import libraries""" |
|
|
| import torch |
| from datasets import load_dataset |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from trl import SFTConfig, SFTTrainer, setup_chat_format |
| from peft import LoraConfig |
|
|
| """# Load Dataset""" |
|
|
| dataset_name = "allenai/tulu-3-sft-personas-code" |
|
|
| |
| dataset = load_dataset(dataset_name, split="train") |
| print(f"Dataset loaded: {dataset}") |
|
|
| |
| print("\nSample data:") |
| print(dataset[0]) |
|
|
| dataset = dataset.remove_columns("prompt") |
| dataset = dataset.train_test_split(test_size=0.2) |
|
|
| print( |
| f"Train Samples: {len(dataset['train'])}\nTest Samples: {len(dataset['test'])}" |
| ) |
|
|
| """## Configuration |
| |
| Set up the configuration parameters for the fine-tuning process. |
| """ |
|
|
| |
| model_name = "Qwen/Qwen3-30B-A3B" |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| output_dir = "./tmp/sft-model" |
| num_train_epochs = 1 |
| per_device_train_batch_size = 1 |
| gradient_accumulation_steps = 1 |
| learning_rate = 2e-4 |
|
|
| """## Load model and tokenizer""" |
|
|
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.bfloat16, |
| use_cache=False, |
| device_map="auto", |
| ) |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| """## Configure PEFT (if enabled)""" |
|
|
| |
| peft_config = LoraConfig( |
| r=32, |
| lora_alpha=16, |
| lora_dropout=0.05, |
| bias="none", |
| task_type="CAUSAL_LM", |
| target_modules="all-linear", |
| ) |
|
|
| """## Configure SFT Trainer""" |
|
|
| |
| training_args = SFTConfig( |
| output_dir=output_dir, |
| num_train_epochs=num_train_epochs, |
| per_device_train_batch_size=per_device_train_batch_size, |
| gradient_accumulation_steps=gradient_accumulation_steps, |
| learning_rate=learning_rate, |
| gradient_checkpointing=True, |
| logging_steps=25, |
| save_strategy="epoch", |
| optim="adamw_torch", |
| lr_scheduler_type="cosine", |
| warmup_ratio=0.1, |
| max_length=1024, |
| packing=True, |
| eos_token=tokenizer.eos_token, |
| bf16=True, |
| fp16=False, |
| max_steps=1000, |
| report_to="wandb", |
| ) |
|
|
| """## Initialize and run the SFT Trainer""" |
|
|
| |
| trainer = SFTTrainer( |
| model=model, |
| args=training_args, |
| train_dataset=dataset["train"], |
| eval_dataset=dataset["test"] if "test" in dataset else None, |
| peft_config=peft_config, |
| processing_class=tokenizer, |
| ) |
|
|
| |
| trainer.train() |
|
|
| """## Save the fine-tuned model""" |
|
|
| |
| trainer.save_model(output_dir) |
|
|
| """## Test the fine-tuned model""" |
|
|
| from peft import PeftModel, PeftConfig |
|
|
| |
| base_model = AutoModelForCausalLM.from_pretrained( |
| model_name, trust_remote_code=True, torch_dtype=torch.bfloat16 |
| ) |
|
|
| |
| model = PeftModel.from_pretrained(base_model, output_dir) |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
| |
| prompt = """Write a function called is_palindrome that takes a single string as input and returns True if the string is a palindrome, and False otherwise. |
| |
| Palindrome Definition: |
| |
| A palindrome is a word, phrase, number, or other sequence of characters that reads the same forward and backward, ignoring spaces, punctuation, and capitalization. |
| |
| Example: |
| ``` |
| is_palindrome("racecar") # Returns True |
| is_palindrome("hello") # Returns False |
| is_palindrome("A man, a plan, a canal: Panama") # Returns True |
| ``` |
| """ |
|
|
| |
| messages = [ |
| {"role": "system", "content": "You are a helpful assistant."}, |
| {"role": "user", "content": prompt}, |
| ] |
| formatted_prompt = tokenizer.apply_chat_template( |
| messages, tokenize=False, add_generation_prompt=True |
| ) |
| print(f"Formatted prompt: {formatted_prompt}") |
|
|
| |
| model.eval() |
| inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=500, |
| temperature=0.7, |
| top_p=0.9, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id, |
| ) |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| print("\nGenerated Response:") |
| print(response) |
|
|
| model.push_to_hub("burtenshaw/Qwen3-30B-A3B-python-code") |