TabbyML · cromefire · Nov 25, 2023 · Nov 25, 2023 · Nov 25, 2023 · Nov 26, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -1,2 +1,8 @@
+.idea
+ci
+clients
+.github
+python
 **/target
 **/node_modules
+website
diff --git a/.github/workflows/docker.yml → .github/workflows/docker-cuda.yml b/.github/workflows/docker.yml → .github/workflows/docker-cuda.yml
@@ -1,4 +1,4 @@
-name: Create and publish docker image
+name: Create and publish CUDA docker image
 
 on:
  workflow_dispatch:
@@ -50,7 +50,10 @@ jobs:
 
  # Workaround: https://github.com/docker/build-push-action/issues/461
  - name: Setup Docker buildx
- uses: docker/[email protected]
+ uses: docker/[email protected]
+ with:
+ # Needed to support OCI annotations
+ version: v0.12.0
 
  # Login against a Docker registry except on PR
  # https://github.com/docker/login-action
@@ -78,7 +81,7 @@ jobs:
 
  - name: Docker meta
  id: meta
- uses: docker/metadata-action@v4
+ uses: docker/metadata-action@v5.0.0
  with:
  # list of Docker images to use as base name for tags
  images: |
@@ -95,13 +98,14 @@ jobs:
  # https://github.com/docker/build-push-action
  - name: Build and push Docker image
  id: build-and-push
- uses: docker/build-push-action@v3.1.1
+ uses: docker/build-push-action@v5.1.0
  with:
- file: Dockerfile
+ file: cuda.Dockerfile
  push: true
  context: .
  tags: ${{ steps.meta.outputs.tags }}
  labels: ${{ steps.meta.outputs.labels }}
+ annotations: ${{ steps.meta.outputs.labels }}
  cache-from: ${{ steps.cache.outputs.cache-from }}
  cache-to: ${{ steps.cache.outputs.cache-to }}
  build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
@@ -112,4 +116,3 @@ jobs:
  username: ${{ secrets.DOCKERHUB_USERNAME }}
  password: ${{ secrets.DOCKERHUB_TOKEN }}
  repository: tabbyml/tabby
-
diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml
@@ -0,0 +1,119 @@
+name: Create and publish ROCm docker image
+
+on:
+ workflow_dispatch:
+ schedule:
+ - cron: '0 20 */1 * *'
+ push:
+ tags:
+ - 'v*'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} 
+
+ # If this is enabled it will cancel current running and start latest
+ cancel-in-progress: true
+
+env:
+ RUST_TOOLCHAIN: 1.73.0
+
+jobs:
+ release-docker:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+ # This is used to complete the identity challenge
+ # with sigstore/fulcio when running outside of PRs.
+ id-token: write
+
+ steps:
+ - name: Free Disk Space (Ubuntu)
+ uses: jlumbroso/free-disk-space@main
+ with:
+ # this might remove tools that are actually needed,
+ # if set to "true" but frees about 6 GB
+ tool-cache: true
+
+ # all of these default to true, but feel free to set to
+ # "false" if necessary for your workflow
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: false
+ swap-storage: true
+
+ - name: Checkout repository
+ uses: actions/checkout@v3
+ with:
+ submodules: recursive
+
+ # Workaround: https://github.com/docker/build-push-action/issues/461
+ - name: Setup Docker buildx
+ uses: docker/[email protected]
+ with:
+ # Needed to support OCI annotations
+ version: v0.12.0
+
+ # Login against a Docker registry except on PR
+ # https://github.com/docker/login-action
+ - name: Log into GitHub Container registry
+ uses: docker/[email protected]
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Log into Docker Hub
+ uses: docker/[email protected]
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Generate image name
+ run: |
+ echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}
+
+ - uses: int128/docker-build-cache-config-action@v1
+ id: cache
+ with:
+ image: ghcr.io/${{ env.IMAGE_NAME }}/cache
+
+ - name: Docker meta
+ id: meta
+ uses: docker/[email protected]
+ with:
+ # list of Docker images to use as base name for tags
+ images: |
+ ghcr.io/${{ env.IMAGE_NAME }}/rocm
+ ${{ env.IMAGE_NAME }}-rocm
+ # generate Docker tags based on the following events/attributes
+ variant: rocm
+ tags: |
+ type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
+ type=schedule,pattern=nightly
+ type=schedule,pattern={{date 'YYYYMMDD'}}
+ type=semver,pattern={{version}}
+
+ # Build and push Docker image with Buildx (don't push on PR)
+ # https://github.com/docker/build-push-action
+ - name: Build and push Docker image
+ id: build-and-push
+ uses: docker/[email protected]
+ with:
+ file: rocm.Dockerfile
+ push: true
+ context: .
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ annotations: ${{ steps.meta.outputs.labels }}
+ cache-from: ${{ steps.cache.outputs.cache-from }}
+ cache-to: ${{ steps.cache.outputs.cache-to }}
+ build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
+
+ - name: Docker Hub Description
+ uses: peter-evans/dockerhub-description@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+ repository: tabbyml/tabby
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -49,14 +49,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
 - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)
 
 ### Run Tabby in 1 Minute
-The easiest way to start a Tabby server is by using the following Docker command:
+The easiest way to start a Tabby server is by using the following Docker command...
 
+...with cuda:
 ```bash
 docker run -it \
  --gpus all -p 8080:8080 -v $HOME/.tabby:/data \
  tabbyml/tabby \
  serve --model TabbyML/StarCoder-1B --device cuda
 ```
+
+...with ROCm (Linux only):
+```bash
+docker run -it \
+ --device /dev/dri --device /dev/kfd \
+ -p 8080:8080 -v $HOME/.tabby:/data \
+ tabbyml/tabby-rocm \
+ serve --model TabbyML/StarCoder-1B --device rocm
+```
+
 For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).
 
 ## 🤝 Contributing

diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2021"
 
 [features]
 cuda = []
+rocm = []
 
 [build-dependencies]
 cxx-build = "1.0"

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
@@ -31,14 +31,33 @@ fn main() {
  println!("cargo:rustc-link-lib=cublas");
  println!("cargo:rustc-link-lib=cublasLt");
  }
+ if cfg!(feature = "rocm") {
+ const AMDGPU_TARGETS: &str = "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102";
+
+ let rocm_root = "/opt/rocm";
+ config.define("LLAMA_HIPBLAS", "ON");
+ config.define("CMAKE_C_COMPILER", format!("{}/llvm/bin/clang", rocm_root));
+ config.define(
+ "CMAKE_CXX_COMPILER",
+ format!("{}/llvm/bin/clang++", rocm_root),
+ );
+ config.define("AMDGPU_TARGETS", AMDGPU_TARGETS);
+ println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
+ println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
+ println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
+ println!("cargo:rustc-link-search=native={}/hipblas/lib", rocm_root);
+ println!("cargo:rustc-link-lib=amdhip64");
+ println!("cargo:rustc-link-lib=rocblas");
+ println!("cargo:rustc-link-lib=hipblas");
+ }
 
  let dst = config.build();
  println!("cargo:rustc-link-search=native={}/build", dst.display());
 
  cxx_build::bridge("src/lib.rs")
  .file("src/engine.cc")
- .flag_if_supported("-Iinclude")
- .flag_if_supported("-Illama.cpp")
+ .include("include")
+ .include("llama.cpp")
  .flag_if_supported("-std=c++14")
  .compile("cxxbridge");
 }
diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
@@ -6,7 +6,8 @@ edition = "2021"
 [features]
 default = ["ee"]
 ee = ["dep:tabby-webserver"]
-cuda = ["llama-cpp-bindings/cuda"]
+cuda = ["llama-cpp-bindings/cuda", "dep:nvml-wrapper"]
+rocm = ["llama-cpp-bindings/rocm", "dep:rocm_smi_lib"]
 experimental-http = ["dep:http-api-bindings"]
 
 [dependencies]
@@ -36,7 +37,8 @@ tracing-opentelemetry = "0.18.0"
 tantivy = { workspace = true }
 anyhow = { workspace = true }
 sysinfo = "0.29.8"
-nvml-wrapper = "0.9.0"
+nvml-wrapper = { version = "0.9.0", optional = true }
+rocm_smi_lib = { version = "0.1.14", optional = true }
 http-api-bindings = { path = "../http-api-bindings", optional = true } # included when build with `experimental-http` feature
 async-stream = { workspace = true }
 axum-streams = { version = "0.9.1", features = ["json"] }

diff --git a/crates/tabby/build.rs b/crates/tabby/build.rs
@@ -3,6 +3,10 @@ use std::error::Error;
 use vergen::EmitBuilder;
 
 fn main() -> Result<(), Box<dyn Error>> {
+ if cfg!(feature = "rocm") {
+ let rocm_root = "/opt/rocm";
+ println!("cargo:rustc-link-search=native={}/lib", rocm_root);
+ }
  // touch
  EmitBuilder::builder()
  .all_build()

diff --git a/crates/tabby/src/main.rs b/crates/tabby/src/main.rs
@@ -1,22 +1,23 @@
-mod routes;
-mod services;
-
-mod download;
-mod serve;
-
-#[cfg(feature = "ee")]
-mod worker;
-
 use clap::{Parser, Subcommand};
 use opentelemetry::{
  global,
  sdk::{propagation::TraceContextPropagator, trace, trace::Sampler, Resource},
  KeyValue,
 };
 use opentelemetry_otlp::WithExportConfig;
-use tabby_common::config::Config;
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer};
 
+use tabby_common::config::Config;
+
+mod routes;
+mod services;
+
+mod download;
+mod serve;
+
+#[cfg(feature = "ee")]
+mod worker;
+
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
 #[command(propagate_version = true)]
@@ -69,6 +70,10 @@ pub enum Device {
  #[strum(serialize = "cuda")]
  Cuda,
 
+ #[cfg(feature = "rocm")]
+ #[strum(serialize = "rocm")]
+ Rocm,
+
  #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
  #[strum(serialize = "metal")]
  Metal,
@@ -89,7 +94,16 @@ impl Device {
  *self == Device::Cuda
  }
 
- #[cfg(not(any(all(target_os = "macos", target_arch = "aarch64"), feature = "cuda")))]
+ #[cfg(feature = "rocm")]
+ pub fn ggml_use_gpu(&self) -> bool {
+ *self == Device::Rocm
+ }
+
+ #[cfg(not(any(
+ all(target_os = "macos", target_arch = "aarch64"),
+ feature = "cuda",
+ feature = "rocm",
+ )))]
  pub fn ggml_use_gpu(&self) -> bool {
  false
  }