Support jointly trained Medusa + LoRA adapters (#482)

predibase · May 22, 2024 · a1ff52d · a1ff52d
1 parent 97ede52
commit a1ff52d
Show file tree

Hide file tree

Showing 5 changed files with 145 additions and 19 deletions.
diff --git a/server/lorax_server/adapters/__init__.py b/server/lorax_server/adapters/__init__.py
@@ -1,25 +1,36 @@
 import json
 from pathlib import Path
-from typing import Optional
+from typing import Dict, Optional
 
 from lorax_server.adapters.config import AdapterConfig
 from lorax_server.adapters.lora import LoraConfig
 from lorax_server.adapters.medusa import MedusaConfig
+from lorax_server.adapters.medusa_lora import MedusaLoraConfig
 from lorax_server.adapters.weights import AdapterBatchData, AdapterBatchMetadata
 
 
+def load_medusa_config(config_path: Optional[Path]) -> Optional[Dict]:
+ if config_path is not None and config_path.exists():
+ config = json.load(config_path.open())
+ if "medusa_num_heads" in config:
+ return config
+ return None
+
+
 def load_adapter_config(
  config_path: Optional[Path],
  adapter_config_path: Optional[Path],
  api_token: str,
 ) -> AdapterConfig:
+ medusa_config = load_medusa_config(config_path)
  if adapter_config_path is not None and adapter_config_path.exists():
- return LoraConfig.load(str(adapter_config_path.parent), api_token)
+ if medusa_config is not None:
+ return MedusaLoraConfig.load(str(adapter_config_path.parent), medusa_config, api_token)
+ else:
+ return LoraConfig.load(str(adapter_config_path.parent), api_token)
 
- if config_path is not None and config_path.exists():
- config = json.load(config_path.open())
- if "medusa_num_heads" in config:
- return MedusaConfig.load(config)
+ if medusa_config is not None:
+ return MedusaConfig.load(medusa_config)
 
  raise ValueError(f"No valid adapter config file found: " f"tried {adapter_config_path} and {config_path}")
 

diff --git a/server/lorax_server/adapters/lora.py b/server/lorax_server/adapters/lora.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type, Union
 
 import torch
 from peft import LoraConfig as _LoraConfig
@@ -138,8 +138,8 @@ def _transpose_weights(self):
  self._is_transposed = not self._is_transposed
 
  @classmethod
- def get_batch_type(cls) -> BatchAdapterWeights:
- return BatchLoraWeights
+ def get_batch_types(cls) -> List[Type[BatchAdapterWeights]]:
+ return [BatchLoraWeights]
 
  @classmethod
  def load(
@@ -238,8 +238,11 @@ def key(cls) -> str:
  @classmethod
  def load(
  self, adapter_weights: Dict[int, AdapterWeights], meta: AdapterBatchMetadata, prefill: bool
- ) -> "BatchLoraWeights":
+ ) -> Optional["BatchLoraWeights"]:
+ adapter_weights = {k: _convert_lora(v) for k, v in adapter_weights.items()}
  adapter_weights = {k: v for k, v in adapter_weights.items() if isinstance(v, LoraWeights)}
+ if not adapter_weights:
+ return None
 
  first_weights = list(adapter_weights.values())[0]
  device = first_weights.weights_a.device
@@ -347,3 +350,9 @@ def get_scaling_factor(
  if uses_rslora:
  return lora_alpha / (r**0.5)
  return lora_alpha / r
+
+
+def _convert_lora(v: AdapterWeights) -> AdapterWeights:
+ if hasattr(v, "lora_weights"):
+ return v.lora_weights
+ return v
diff --git a/server/lorax_server/adapters/medusa.py b/server/lorax_server/adapters/medusa.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
 
 import torch
 import torch.distributed
@@ -229,8 +229,8 @@ def __init__(self, config: MedusaConfig, module_map: ModuleMap, model: "Model"):
  self.process_group = model.process_group
 
  @classmethod
- def get_batch_type(cls) -> BatchAdapterWeights:
- return BatchMedusaWeights
+ def get_batch_types(cls) -> List[Type[BatchAdapterWeights]]:
+ return [BatchMedusaWeights]
 
  @property
  def speculative_tokens(self) -> int:
@@ -272,8 +272,12 @@ def __call__(self, x, lm_head):
  @classmethod
  def load(
  cls, adapter_weights: Dict[int, AdapterWeights], meta: "AdapterBatchMetadata", prefill: bool
- ) -> "BatchMedusaWeights":
+ ) -> Optional["BatchMedusaWeights"]:
+ adapter_weights = {k: _convert_medusa(v) for k, v in adapter_weights.items()}
  adapter_weights = {k: v for k, v in adapter_weights.items() if isinstance(v, MedusaWeights)}
+ if not adapter_weights:
+ return None
+
  default_medusa = adapter_weights.get(0)
 
  segments = meta.adapter_segments
@@ -313,3 +317,9 @@ def load(
  s_end=segments[[i + 1 for i in indices]],
  ),
  )
+
+
+def _convert_medusa(v: AdapterWeights) -> AdapterWeights:
+ if hasattr(v, "medusa_weights"):
+ return v.medusa_weights
+ return v
diff --git a/server/lorax_server/adapters/medusa_lora.py b/server/lorax_server/adapters/medusa_lora.py
@@ -0,0 +1,93 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
+
+import torch
+
+from lorax_server.adapters.config import AdapterConfig, ModuleMap
+from lorax_server.adapters.lora import BatchLoraWeights, LoraConfig, LoraWeights
+from lorax_server.adapters.medusa import BatchMedusaWeights, MedusaConfig, MedusaWeights
+from lorax_server.adapters.weights import AdapterWeights, BatchAdapterWeights
+
+if TYPE_CHECKING:
+ from lorax_server.models.model import Model
+
+EMPTY_TENSOR = torch.tensor([])
+
+
+@dataclass
+class MedusaLoraModuleMap:
+ lora_module_map: ModuleMap
+ medusa_module_map: ModuleMap
+
+
+@dataclass
+class MedusaLoraConfig(AdapterConfig):
+ lora_config: LoraConfig
+ medusa_config: MedusaConfig
+
+ def map_weights_for_model(
+ self,
+ adapter_weights: Dict,
+ weight_names: Tuple[str],
+ ) -> Tuple[MedusaLoraModuleMap, Set[str]]:
+ lora_module_map, weight_names = self.lora_config.map_weights_for_model(adapter_weights, weight_names)
+ medusa_module_map, _ = self.medusa_config.map_weights_for_model(adapter_weights, weight_names)
+ return MedusaLoraModuleMap(lora_module_map, medusa_module_map), weight_names
+
+ def load_batched_adapter_weights(
+ self,
+ model: "Model",
+ module_map: MedusaLoraModuleMap,
+ layer_type: str,
+ unused_weight_names: Set[str],
+ dynamic: bool,
+ ) -> Optional[AdapterWeights]:
+ lora_weights = self.lora_config.load_batched_adapter_weights(
+ model, module_map.lora_module_map, layer_type, unused_weight_names, dynamic
+ )
+ medusa_weights = self.medusa_config.load_batched_adapter_weights(
+ model, module_map.medusa_module_map, layer_type, unused_weight_names, dynamic
+ )
+ return MedusaLoraWeights.load(
+ lora_weights,
+ medusa_weights,
+ )
+
+ @classmethod
+ def load(cls, adapter_id: str, config: dict, api_token: str) -> "MedusaLoraConfig":
+ lora_config = LoraConfig.load(adapter_id, api_token)
+ medusa_config = MedusaConfig.load(config)
+ return cls(
+ base_model_name_or_path=lora_config.base_model_name_or_path,
+ lora_config=lora_config,
+ medusa_config=medusa_config,
+ )
+
+
+class MedusaLoraWeights(AdapterWeights):
+ def __init__(
+ self,
+ lora_weights: LoraWeights,
+ medusa_weights: MedusaWeights,
+ ):
+ self.lora_weights = lora_weights
+ self.medusa_weights = medusa_weights
+
+ @classmethod
+ def get_batch_types(cls) -> List[Type[BatchAdapterWeights]]:
+ return [BatchLoraWeights, BatchMedusaWeights]
+
+ @property
+ def speculative_tokens(self) -> int:
+ return self.medusa_weights.speculative_tokens
+
+ @classmethod
+ def load(
+ cls,
+ lora_weights: LoraWeights,
+ medusa_weights: MedusaWeights,
+ ) -> Optional[AdapterWeights]:
+ return MedusaLoraWeights(
+ lora_weights,
+ medusa_weights,
+ )
diff --git a/server/lorax_server/adapters/weights.py b/server/lorax_server/adapters/weights.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractclassmethod
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Dict, List, Set, Type
+from typing import Dict, List, Optional, Set, Type
 
 import torch
 
@@ -27,7 +27,7 @@ class AdapterBatchMetadata:
 
 class AdapterWeights(ABC):
  @abstractclassmethod
- def get_batch_type(cls) -> "BatchAdapterWeights":
+ def get_batch_types(cls) -> List[Type["BatchAdapterWeights"]]:
  pass
 
  @property
@@ -47,7 +47,7 @@ def key(cls) -> str:
  @abstractclassmethod
  def load(
  cls, adapter_weights: Dict[int, AdapterWeights], meta: "AdapterBatchMetadata", prefill: bool
- ) -> "BatchAdapterWeights":
+ ) -> Optional["BatchAdapterWeights"]:
  pass
 
 
@@ -76,11 +76,14 @@ def get_data(self, meta: AdapterBatchMetadata, prefill: bool) -> Dict[str, Batch
  # bucket adapters by batch class
  adapter_batch_types: Dict[Type[BatchAdapterWeights], Dict[int, AdapterWeights]] = defaultdict(dict)
  for adapter_index, adapter_weights in self.adapter_weights.items():
- adapter_batch_types[adapter_weights.get_batch_type()][adapter_index] = adapter_weights
+ for batch_type in adapter_weights.get_batch_types():
+ adapter_batch_types[batch_type][adapter_index] = adapter_weights
 
  batch_data = {}
  for batch_type, adapter_weights in adapter_batch_types.items():
- batch_data[batch_type.key()] = batch_type.load(adapter_weights, meta, prefill)
+ batched_weights = batch_type.load(adapter_weights, meta, prefill)
+ if batched_weights is not None:
+ batch_data[batch_type.key()] = batched_weights
  return batch_data