import osimport torchfrom datasets import load_dataset, Datasetfrom transformers import ( BitsAndBytesConfig, AutoTokenizer, TrainingArguments,)from peft import AutoPeftModelForCausalLMfrom trl import DPOTrainerfrom peft import LoraConfighf_auth = ""peft_model_path = 'test/'dataset = load_dataset("test_classification",)print("Dataset loaded:", dataset)def format_instruction(vignette: str): return f"""<s>[INST]{vignette.strip()} Generate given Vignette class and explain the reason for class.[/INST] """.strip()def generate_instruction_dataset(data_point): return {"chosen": data_point["chosen"],"rejected": data_point["rejected"],"prompt": format_instruction(data_point["prompt"]) }def process_dataset(data: Dataset): return ( data.shuffle(seed=42) .map(generate_instruction_dataset) )dataset = process_dataset(dataset)print("Dataset processed:", dataset)compute_dtype = getattr(torch, "float16")bnb_config = BitsAndBytesConfig( load_in_4bit=True, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=compute_dtype, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4",)print("Loading base model:")model = AutoPeftModelForCausalLM.from_pretrained( peft_model_path, # location of saved SFT model device_map="auto", quantization_config=bnb_config,)print("Loading reward model:")model_ref = AutoPeftModelForCausalLM.from_pretrained( peft_model_path, # same model as the main one device_map="auto", quantization_config=bnb_config,)print("Loading tokenizer:")tokenizer = AutoTokenizer.from_pretrained( peft_model_path, use_auth_token=hf_auth, trust_remote_code=True, device_map="auto")output_dir = "dpo/output/"training_args = TrainingArguments( output_dir=output_dir, remove_unused_columns=True, per_device_train_batch_size=4,)print("Lora config added")peft_config = LoraConfig( lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM",)print("DPO trainer initialized:")dpo_trainer = DPOTrainer( model, model_ref, args=training_args, beta=0.1, train_dataset=dataset['train'], # eval_dataset=eval_dataset, tokenizer=tokenizer, peft_config=peft_config, max_length=1024, max_prompt_length=512,)torch.set_grad_enabled(True)print("DPO trainer started:")dpo_trainer.train()print("Training done")
I am use G5 12X Large instance for this training it has following GPU'sGPU 0: NVIDIA A10GGPU 1: NVIDIA A10GGPU 2: NVIDIA A10GGPU 3: NVIDIA A10G
But with start of dpo_trainer.train() following error will occurs.
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:3!