You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
MODE: 'train'BASE_DIR: "/home/minsung/workspace/nas_data/"# base dirSAVE_DIR: "/home/minsung/workspace/nas_data/mistral_7b_sft"# directory for saving LLM's checkpointMODEL_PATH: "mistralai/Mistral-7B-v0.1"# Huggingface Model or local path for a checkpointLLM:
is_hf: true # load Huggingface modelbits: 4# x-bit quantizationcache_dir: "${BASE_DIR}"# cache_dir for saving Huggingface model DATASET:
dataset: "kyujinpy/KOpen-platypus"# dataset in Huggingface Datasetsis_hf: true # load Huggingface datasetinstruction_key: "instruction"# name of key to extract instruction text in jsonlanswer_key: "output"# name of key to extract output text in jsonltrain_path: none # custom train dataset (jsonl format)eval_path: none # custom eval dataset (jsonl format)cache_dir: ${BASE_DIR} # cache directory for saving Huggingface Datasetsignore_bp_ins_token: true # ignore gradient of instruction tokens in the SFT step TRAINER:
optim: "paged_adamw_8bit"# optimizermodel_max_length: 2048# maximum sequence length. Sequences will be right padded (and possibly truncated)per_device_train_batch_size: 2# batch size per GPU/TPU/MPS/NPU core/CPU for trainingper_device_eval_batch_size: 2# batch size per GPU/TPU/MPS/NPU core/CPU for evaluationnum_train_epochs: 3# total number of training epochs to performwarmup_steps: 100# linear warmup over warmup_stepslogging_steps: 1# log every X updates steps. Should be an integer or a float in range `[0,1)`lr_scheduler_type: 'cosine'# learning rate schedulerfp16: false # whether to use fp16 (mixed) precision instead of 32-bitbf16: true # whether to use bp16 (mixed) precision instead of 32-bitlearning_rate: 1e-5# the initial learning rate for the optimizerreport_to: "wandb"# choices = {'tensorboard', 'wandb', none}save_steps: 50000# save ckptgradient_checkpointing: true # if True, use gradient checkpointing to save memory at the expense of slower backward passuse_neft: trueWANDB:
PROJECT_NAME: "Mistral_LORA"# wandb project nameNAME: "LLaMA_7B_QLoRA_Test"# wandb experiment name in projectSAVE_CKPT: false
Run
# Mistral 7B with LoRA
sh scripts/run_mistral_lora_sft.sh
DPO with LoRA
Arguments
MODE: 'train'BASE_DIR: "/home/minsung/workspace/nas_data/"# base dirSAVE_DIR: "/home/minsung/workspace/nas_data/mistral_7b_dpo"# directory for saving LLM's checkpointMODEL_PATH: "mistralai/Mistral-7B-v0.1"# Huggingface Model or local path for a checkpointLLM:
is_hf: true # load Huggingface modelbits: 4# x-bit quantizationcache_dir: ${BASE_DIR} # cache_dir for saving Huggingface model DATASET:
dataset: "Intel/orca_dpo_pairs"# dataset in Huggingface Datasetsis_hf: true # load Huggingface datasetchosen_key: "chosen"# name of key to extract instruction text in jsonlrejected_key: "rejected"# name of key to extract output text in jsonltrain_path: none # custom train dataset (jsonl format)eval_path: none # custom eval dataset (jsonl format)cache_dir: ${BASE_DIR}TRAINER:
optim: "paged_adamw_8bit"# optimizermodel_max_length: 2048# maximum sequence length. Sequences will be right padded (and possibly truncated)per_device_train_batch_size: 2# batch size per GPU/TPU/MPS/NPU core/CPU for trainingper_device_eval_batch_size: 2# batch size per GPU/TPU/MPS/NPU core/CPU for evaluationnum_train_epochs: 3# total number of training epochs to performwarmup_steps: 100# linear warmup over warmup_stepslogging_steps: 1# log every X updates steps. Should be an integer or a float in range `[0,1)`lr_scheduler_type: 'cosine'# learning rate schedulerfp16: false # whether to use fp16 (mixed) precision instead of 32-bitbf16: true # whether to use bp16 (mixed) precision instead of 32-bitlearning_rate: 1e-5# the initial learning rate for the optimizerreport_to: "wandb"# choices = {'tensorboard', 'wandb', none}save_steps: 50000# save ckptgradient_checkpointing: true # if True, use gradient checkpointing to save memory at the expense of slower backward passuse_neft: true # NEFTUNE: NOISY EMBEDDINGS IMPROVE INSTRUCTION FINETUNINGDPO:
beta: 0.1# the beta factor in DPO loss. Higher beta means less divergence from the initial policy.WANDB:
PROJECT_NAME: "Mistral_LORA"# wandb project nameNAME: "LLaMA_7B_DPO_Test"# wandb experiment name in projectSAVE_CKPT: false
Run
# Mistral 7B with LoRA
sh scripts/run_mistral_lora_dpo.sh