You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I’m trying to finetune Llama 2 model using DPO on a multi GPU set up. The setup has two V100s. The code’s structure is following while omitting unnecessary code
# Imports
device = {"": Accelerator().local_process_index}
model_name = "meta-llama/Llama-2-7b-chat-hf"
system_prompt = "You're an AI assistant that tries to help the user as much as you can."
# Configs
# DPO config
training_args = TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
output_dir="args/",
evaluation_strategy="no",
do_eval=False,
use_cpu=False,
logging_steps=1,
num_train_epochs=3,
)
# Peft config
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.05
)
# BnB config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
# LLMs and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# Settings training amount
number_of_rounds = 10 # How many times each model is finetuned once
minibatch_size = 5 # How many data entries there are for DPO update
num_guesses = 5
test_size = 3
def main():
model_a = AutoModelForCausalLM.from_pretrained(
model_name,
device_map=device,
quantization_config=bnb_config,
temperature=0.1
)
model_b = AutoModelForCausalLM.from_pretrained(
model_name,
device_map=device,
quantization_config=bnb_config,
temperature=0.1
)
dir_a = "finetuned/a"
dir_b = "finetuned/b"
try:
os.makedirs(dir_a)
os.makedirs(dir_b)
except:
print("Couldn't make a directory")
explainer = model_a
guesser = model_b
"""
explainer.save_pretrained(dir_a)
guesser.save_pretrained(dir_b)
"""
round = 0
for i in tqdm(range(number_of_rounds), desc="Playing Alias"):
# Make models play the game
explainer.add_adapter(peft_config)
guesser.add_adapter(peft_config)
dataset, ex_dataset, ex_skipped = generate_dataset(minibatch_size, num_guesses, explainer, guesser)
testdata, ex_testdata, _ = generate_dataset(test_size, num_guesses, explainer, guesser)
# FT the explainer
guesser = get_peft_model(
guesser,
peft_config,
)
explainer = get_peft_model(
explainer,
peft_config,
)
guesser = get_peft_model(
guesser,
peft_config,
)
#print(str(dataset))
#print(device)
trainer = DPOTrainer(
guesser,
explainer,
args=training_args,
beta=0.1,
train_dataset=dataset,
eval_dataset=testdata,
tokenizer=tokenizer,
)
trainer.train()
wandb.log({"round": round, "explainer_skipped_frac": ex_skipped / minibatch_size})
wandb.finish()
ex_trainer = DPOTrainer(
explainer,
guesser,
args=training_args,
beta=0.1,
train_dataset=ex_dataset,
eval_dataset=ex_testdata,
tokenizer=tokenizer,
)
ex_trainer.train()
wandb.log({"round": round, "isExplainer": 1})
wandb.finish()
# Save the model and switch roles by loading them
guesser, explainer = explainer, guesser
if (round%2 == 0):
#print(f"\nSaved to dir. {dir_a}\n")
guesser.save_pretrained(dir_a)
guesser = AutoModelForCausalLM.from_pretrained(
dir_b,
device_map=device,
local_files_only=True
)
explainer = AutoModelForCausalLM.from_pretrained(
dir_a,
device_map=device,
local_files_only=True
)
else:
guesser.save_pretrained(dir_b)
guesser = AutoModelForCausalLM.from_pretrained(
dir_a,
device_map=device,
local_files_only=True
)
explainer = AutoModelForCausalLM.from_pretrained(
dir_b,
device_map=device,
local_files_only=True
)
round += 1
# Automatic evaluation for the latest model
eval.test_model(guesser, tokenizer, device)
if __name__ == "__main__":
main()
where generate_dataset generates datasets for both guesser and explainer. When I try to run using the command
accelerate launch script.py
I get the following error
ValueError: DistributedDataParallel’s input module must be on the same type of devices, but input module parameters locate in {‘cpu’, ‘cuda’}.
The text was updated successfully, but these errors were encountered:
I’m trying to finetune Llama 2 model using DPO on a multi GPU set up. The setup has two V100s. The code’s structure is following while omitting unnecessary code
where
generate_dataset
generates datasets for both guesser and explainer. When I try to run using the commandaccelerate launch script.py
I get the following error
ValueError: DistributedDataParallel’s input module must be on the same type of devices, but input module parameters locate in {‘cpu’, ‘cuda’}.
The text was updated successfully, but these errors were encountered: