TabbyML · cromefire · Nov 25, 2023 · Nov 25, 2023 · Nov 25, 2023 · Nov 26, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -1,2 +1,8 @@
+.idea
+ci
+clients
+.github
+python
 **/target
 **/node_modules
+website
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -50,7 +50,10 @@ jobs:
 
  # Workaround: https://github.com/docker/build-push-action/issues/461
  - name: Setup Docker buildx
- uses: docker/[email protected]
+ uses: docker/[email protected]
+ with:
+ # Needed to support OCI annotations
+ version: v0.12.0
 
  # Login against a Docker registry except on PR
  # https://github.com/docker/login-action
@@ -78,7 +81,7 @@ jobs:
 
  - name: Docker meta
  id: meta
- uses: docker/metadata-action@v4
+ uses: docker/metadata-action@v5.0.0
  with:
  # list of Docker images to use as base name for tags
  images: |
@@ -91,17 +94,50 @@ jobs:
  type=schedule,pattern={{date 'YYYYMMDD'}}
  type=semver,pattern={{version}}
 
+ - name: Docker meta for ROCm
+ id: meta-rocm
+ uses: docker/[email protected]
+ with:
+ # list of Docker images to use as base name for tags
+ images: |
+ ghcr.io/${{ env.IMAGE_NAME }}/rocm
+ ${{ env.IMAGE_NAME }}-rocm
+ # generate Docker tags based on the following events/attributes
+ variant: rocm
+ tags: |
+ type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
+ type=schedule,pattern=nightly
+ type=schedule,pattern={{date 'YYYYMMDD'}}
+ type=semver,pattern={{version}}
+
  # Build and push Docker image with Buildx (don't push on PR)
  # https://github.com/docker/build-push-action
  - name: Build and push Docker image
  id: build-and-push
- uses: docker/build-push-action@v3.1.1
+ uses: docker/build-push-action@v5.1.0
  with:
- file: Dockerfile
+ file: cuda.Dockerfile
  push: true
  context: .
  tags: ${{ steps.meta.outputs.tags }}
  labels: ${{ steps.meta.outputs.labels }}
+ annotations: ${{ steps.meta.outputs.labels }}
+ cache-from: ${{ steps.cache.outputs.cache-from }}
+ cache-to: ${{ steps.cache.outputs.cache-to }}
+ build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
+
+ # Build and push Docker image with Buildx (don't push on PR)
+ # https://github.com/docker/build-push-action
+ - name: Build and push Docker image for ROCm
+ id: build-and-push-rocm
+ uses: docker/[email protected]
+ with:
+ file: rocm.Dockerfile
+ push: true
+ context: .
+ tags: ${{ steps.meta-rocm.outputs.tags }}
+ labels: ${{ steps.meta-rocm.outputs.labels }}
+ annotations: ${{ steps.meta-rocm.outputs.labels }}
  cache-from: ${{ steps.cache.outputs.cache-from }}
  cache-to: ${{ steps.cache.outputs.cache-to }}
  build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
@@ -112,4 +148,3 @@ jobs:
  username: ${{ secrets.DOCKERHUB_USERNAME }}
  password: ${{ secrets.DOCKERHUB_TOKEN }}
  repository: tabbyml/tabby
-
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -49,14 +49,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
 - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)
 
 ### Run Tabby in 1 Minute
-The easiest way to start a Tabby server is by using the following Docker command:
+The easiest way to start a Tabby server is by using the following Docker command...
 
+...with cuda:
 ```bash
 docker run -it \
  --gpus all -p 8080:8080 -v $HOME/.tabby:/data \
  tabbyml/tabby \
  serve --model TabbyML/StarCoder-1B --device cuda
 ```
+
+...with ROCm (Linux only):
+```bash
+docker run -it \
+ --device /dev/dri --device /dev/kfd \
+ -p 8080:8080 -v $HOME/.tabby:/data \
+ tabbyml/tabby-rocm \
+ serve --model TabbyML/StarCoder-1B --device rocm
+```
+
 For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).
 
 ## 🤝 Contributing

diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2021"
 
 [features]
 cuda = []
+rocm = []
 
 [build-dependencies]
 cxx-build = "1.0"

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
@@ -4,6 +4,7 @@ use cmake::Config;
 
 fn main() {
  const LLAMA_CMAKE_PATH: &str = "llama.cpp/CMakeLists.txt";
+ const AMDGPU_TARGETS: &str = "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102";
 
  assert!(
  Path::new(LLAMA_CMAKE_PATH).exists(),
@@ -31,14 +32,31 @@ fn main() {
  println!("cargo:rustc-link-lib=cublas");
  println!("cargo:rustc-link-lib=cublasLt");
  }
+ if cfg!(feature = "rocm") {
+ let rocm_root = "/opt/rocm";
+ config.define("LLAMA_HIPBLAS", "ON");
+ config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root));
+ config.define(
+ "CMAKE_CXX_COMPILER",
+ format!("{}/llvm/bin/clang++", rocm_root),
+ );
+ config.define("AMDGPU_TARGETS", AMDGPU_TARGETS);
+ println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
+ println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
+ println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
+ println!("cargo:rustc-link-search=native={}/hipblas/lib", rocm_root);
+ println!("cargo:rustc-link-lib=amdhip64");
+ println!("cargo:rustc-link-lib=rocblas");
+ println!("cargo:rustc-link-lib=hipblas");
+ }
 
  let dst = config.build();
  println!("cargo:rustc-link-search=native={}/build", dst.display());
 
  cxx_build::bridge("src/lib.rs")
  .file("src/engine.cc")
- .flag_if_supported("-Iinclude")
- .flag_if_supported("-Illama.cpp")
+ .include("include")
+ .include("llama.cpp")
  .flag_if_supported("-std=c++14")
  .compile("cxxbridge");
 }
diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
@@ -6,7 +6,8 @@ edition = "2021"
 [features]
 default = ["ee"]
 ee = ["dep:tabby-webserver"]
-cuda = ["llama-cpp-bindings/cuda"]
+cuda = ["llama-cpp-bindings/cuda", "dep:nvml-wrapper"]
+rocm = ["llama-cpp-bindings/rocm", "dep:rocm_smi_lib"]
 experimental-http = ["dep:http-api-bindings"]
 
 [dependencies]
@@ -36,7 +37,8 @@ tracing-opentelemetry = "0.18.0"
 tantivy = { workspace = true }
 anyhow = { workspace = true }
 sysinfo = "0.29.8"
-nvml-wrapper = "0.9.0"
+nvml-wrapper = { version = "0.9.0", optional = true }
+rocm_smi_lib = { version = "0.1.14", optional = true }
 http-api-bindings = { path = "../http-api-bindings", optional = true } # included when build with `experimental-http` feature
 async-stream = { workspace = true }
 axum-streams = { version = "0.9.1", features = ["json"] }

diff --git a/crates/tabby/build.rs b/crates/tabby/build.rs
@@ -3,6 +3,10 @@ use std::error::Error;
 use vergen::EmitBuilder;
 
 fn main() -> Result<(), Box<dyn Error>> {
+ if cfg!(feature = "rocm") {
+ let rocm_root = "/opt/rocm";
+ println!("cargo:rustc-link-search=native={}/lib", rocm_root);
+ }
  // touch
  EmitBuilder::builder()
  .all_build()

diff --git a/crates/tabby/src/main.rs b/crates/tabby/src/main.rs
@@ -1,22 +1,23 @@
-mod routes;
-mod services;
-
-mod download;
-mod serve;
-
-#[cfg(feature = "ee")]
-mod worker;
-
 use clap::{Parser, Subcommand};
 use opentelemetry::{
  global,
  sdk::{propagation::TraceContextPropagator, trace, trace::Sampler, Resource},
  KeyValue,
 };
 use opentelemetry_otlp::WithExportConfig;
-use tabby_common::config::Config;
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer};
 
+use tabby_common::config::Config;
+
+mod routes;
+mod services;
+
+mod download;
+mod serve;
+
+#[cfg(feature = "ee")]
+mod worker;
+
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
 #[command(propagate_version = true)]
@@ -69,6 +70,10 @@ pub enum Device {
  #[strum(serialize = "cuda")]
  Cuda,
 
+ #[cfg(feature = "rocm")]
+ #[strum(serialize = "rocm")]
+ Rocm,
+
  #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
  #[strum(serialize = "metal")]
  Metal,
@@ -89,7 +94,16 @@ impl Device {
  *self == Device::Cuda
  }
 
- #[cfg(not(any(all(target_os = "macos", target_arch = "aarch64"), feature = "cuda")))]
+ #[cfg(feature = "rocm")]
+ pub fn ggml_use_gpu(&self) -> bool {
+ *self == Device::Rocm
+ }
+
+ #[cfg(not(any(
+ all(target_os = "macos", target_arch = "aarch64"),
+ feature = "cuda",
+ feature = "rocm",
+ )))]
  pub fn ggml_use_gpu(&self) -> bool {
  false
  }

diff --git a/crates/tabby/src/services/health.rs b/crates/tabby/src/services/health.rs
@@ -1,7 +1,12 @@
 use std::env::consts::ARCH;
 
 use anyhow::Result;
+#[cfg(feature = "cuda")]
 use nvml_wrapper::Nvml;
+#[cfg(feature = "rocm")]
+use rocm_smi_lib::error::RocmErr;
+#[cfg(feature = "rocm")]
+use rocm_smi_lib::RocmSmi;
 use serde::{Deserialize, Serialize};
 use sysinfo::{CpuExt, System, SystemExt};
 use utoipa::ToSchema;
@@ -18,15 +23,15 @@ pub struct HealthState {
  arch: String,
  cpu_info: String,
  cpu_count: usize,
- cuda_devices: Vec<String>,
+ gpu_devices: Vec<String>,
  version: Version,
 }
 
 impl HealthState {
  pub fn new(model: Option<&str>, chat_model: Option<&str>, device: &Device) -> Self {
  let (cpu_info, cpu_count) = read_cpu_info();
 
- let cuda_devices = match read_cuda_devices() {
+ let cuda_devices = match read_gpu_devices() {
  Ok(s) => s,
  Err(_) => vec![],
  };
@@ -38,7 +43,7 @@ impl HealthState {
  arch: ARCH.to_string(),
  cpu_info,
  cpu_count,
- cuda_devices,
+ gpu_devices: cuda_devices,
  version: Version::new(),
  }
  }
@@ -59,7 +64,8 @@ pub fn read_cpu_info() -> (String, usize) {
  (info, count)
 }
 
-pub fn read_cuda_devices() -> Result<Vec<String>> {
+#[cfg(feature = "cuda")]
+pub fn read_gpu_devices() -> Result<Vec<String>> {
  // In cases of MacOS or docker containers where --gpus are not specified,
  // the Nvml::init() would return an error. In these scenarios, we
  // assign cuda_devices to be empty, indicating that the current runtime
@@ -74,6 +80,23 @@ pub fn read_cuda_devices() -> Result<Vec<String>> {
  Ok(cuda_devices)
 }
 
+#[cfg(feature = "rocm")]
+pub fn read_gpu_devices() -> Result<Vec<String>, RocmErr> {
+ let rocm = RocmSmi::init()?;
+ let mut rocm_devices = vec![];
+ let device_count = rocm.get_device_count();
+ for i in 0..device_count {
+ let name = rocm.get_device_identifiers(i)?.name;
+ rocm_devices.push(name);
+ }
+ Ok(rocm_devices)
+}
+
+#[cfg(not(any(feature = "cuda", feature = "rocm",)))]
+pub fn read_gpu_devices() -> Result<Vec<String>> {
+ Ok(vec![])
+}
+
 #[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
 pub struct Version {
  build_date: String,