fix: Triton usage for GPT-Q (#140)

predibase · Dec 18, 2023 · 9ae65b3 · 9ae65b3
1 parent 5080877
commit 9ae65b3
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/server/lorax_server/utils/gptq/custom_autotune.py b/server/lorax_server/utils/gptq/custom_autotune.py
@@ -88,9 +88,9 @@ def kernel_call():
  # In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
  # PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
  return triton.testing.do_bench(
- kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
+ kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
  )
- except triton.compiler.OutOfResources:
+ except triton.OutOfResources:
  return (float("inf"), float("inf"), float("inf"))
 
  def run(self, *args, **kwargs):