TabbyML · cromefire · Nov 25, 2023 · Nov 25, 2023 · Nov 25, 2023 · Nov 26, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -6,3 +6,5 @@ python
 **/target
 **/node_modules
 website
+*.log
+*.Dockerfile
diff --git a/.github/workflows/docker.yml → .github/workflows/docker-cuda.yml b/.github/workflows/docker.yml → .github/workflows/docker-cuda.yml
@@ -1,4 +1,4 @@
-name: Create and publish docker image
+name: Create and publish CUDA docker image
 
 on:
  workflow_dispatch:
@@ -50,7 +50,10 @@ jobs:
 
  # Workaround: https://github.com/docker/build-push-action/issues/461
  - name: Setup Docker buildx
- uses: docker/[email protected]
+ uses: docker/[email protected]
+ with:
+ # Needed to support OCI annotations
+ version: v0.12.0
 
  # Login against a Docker registry except on PR
  # https://github.com/docker/login-action
@@ -78,7 +81,7 @@ jobs:
 
  - name: Docker meta
  id: meta
- uses: docker/metadata-action@v4
+ uses: docker/metadata-action@v5.0.0
  with:
  # list of Docker images to use as base name for tags
  images: |
@@ -95,13 +98,14 @@ jobs:
  # https://github.com/docker/build-push-action
  - name: Build and push Docker image
  id: build-and-push
- uses: docker/build-push-action@v3.1.1
+ uses: docker/build-push-action@v5.1.0
  with:
- file: Dockerfile
+ file: cuda.Dockerfile
  push: true
  context: .
  tags: ${{ steps.meta.outputs.tags }}
  labels: ${{ steps.meta.outputs.labels }}
+ annotations: ${{ steps.meta.outputs.labels }}
  cache-from: ${{ steps.cache.outputs.cache-from }}
  cache-to: ${{ steps.cache.outputs.cache-to }}
  build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
@@ -112,4 +116,3 @@ jobs:
  username: ${{ secrets.DOCKERHUB_USERNAME }}
  password: ${{ secrets.DOCKERHUB_TOKEN }}
  repository: tabbyml/tabby
-
diff --git a/.github/workflows/docker-rocm.yml b/.github/workflows/docker-rocm.yml
@@ -0,0 +1,119 @@
+name: Create and publish ROCm docker image
+
+on:
+ workflow_dispatch:
+ schedule:
+ - cron: '0 20 */1 * *'
+ push:
+ tags:
+ - 'v*'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} 
+
+ # If this is enabled it will cancel current running and start latest
+ cancel-in-progress: true
+
+env:
+ RUST_TOOLCHAIN: 1.73.0
+
+jobs:
+ release-docker:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+ # This is used to complete the identity challenge
+ # with sigstore/fulcio when running outside of PRs.
+ id-token: write
+
+ steps:
+ - name: Free Disk Space (Ubuntu)
+ uses: jlumbroso/free-disk-space@main
+ with:
+ # this might remove tools that are actually needed,
+ # if set to "true" but frees about 6 GB
+ tool-cache: true
+
+ # all of these default to true, but feel free to set to
+ # "false" if necessary for your workflow
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: false
+ swap-storage: true
+
+ - name: Checkout repository
+ uses: actions/checkout@v3
+ with:
+ submodules: recursive
+
+ # Workaround: https://github.com/docker/build-push-action/issues/461
+ - name: Setup Docker buildx
+ uses: docker/[email protected]
+ with:
+ # Needed to support OCI annotations
+ version: v0.12.0
+
+ # Login against a Docker registry except on PR
+ # https://github.com/docker/login-action
+ - name: Log into GitHub Container registry
+ uses: docker/[email protected]
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Log into Docker Hub
+ uses: docker/[email protected]
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Generate image name
+ run: |
+ echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}
+
+ - uses: int128/docker-build-cache-config-action@v1
+ id: cache
+ with:
+ image: ghcr.io/${{ env.IMAGE_NAME }}/cache
+
+ - name: Docker meta
+ id: meta
+ uses: docker/[email protected]
+ with:
+ # list of Docker images to use as base name for tags
+ images: |
+ ghcr.io/${{ env.IMAGE_NAME }}/rocm
+ ${{ env.IMAGE_NAME }}-rocm
+ # generate Docker tags based on the following events/attributes
+ variant: rocm
+ tags: |
+ type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
+ type=schedule,pattern=nightly
+ type=schedule,pattern={{date 'YYYYMMDD'}}
+ type=semver,pattern={{version}}
+
+ # Build and push Docker image with Buildx (don't push on PR)
+ # https://github.com/docker/build-push-action
+ - name: Build and push Docker image
+ id: build-and-push
+ uses: docker/[email protected]
+ with:
+ file: rocm.Dockerfile
+ push: true
+ context: .
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ annotations: ${{ steps.meta.outputs.labels }}
+ cache-from: ${{ steps.cache.outputs.cache-from }}
+ cache-to: ${{ steps.cache.outputs.cache-to }}
+ build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
+
+ - name: Docker Hub Description
+ uses: peter-evans/dockerhub-description@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+ repository: tabbyml/tabby
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -12,7 +12,7 @@ on:
  - '.github/workflows/release.yml'
 
 concurrency:
- group: ${{ github.workflow_ref }}-${{ github.head_ref || github.ref_name }} 
+ group: ${{ github.workflow_ref }}-${{ github.head_ref || github.ref_name }}
 
  # If this is enabled it will cancel current running and start latest
  cancel-in-progress: true
@@ -26,7 +26,7 @@ jobs:
  container: ${{ matrix.container }}
  strategy:
  matrix:
- binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-windows-msvc, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-cuda122, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
+ binary: [ aarch64-apple-darwin, x86_64-manylinux2014, x86_64-windows-msvc, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-cuda122, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57, x86_64-manylinux2014-rocm60 ]
  include:
  - os: macos-latest
  target: aarch64-apple-darwin
@@ -67,6 +67,11 @@ jobs:
  binary: x86_64-manylinux2014-rocm57
  container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest
  build_args: --features static-ssl --features rocm
+ - os: ubuntu-latest
+ target: x86_64-unknown-linux-gnu
+ binary: x86_64-manylinux2014-rocm60
+ container: ghcr.io/cromefire/hipblas-manylinux/2014/6.0:latest
+ build_args: --features static-ssl --features rocm
 
  env:
  SCCACHE_GHA_ENABLED: true
@@ -146,7 +151,7 @@ jobs:
 
  - uses: ncipollo/release-action@v1
  with:
- allowUpdates: true 
+ allowUpdates: true
  prerelease: true
  artifacts: "tabby_*/tabby_*"
  tag: ${{ github.ref_name }}

diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ node_modules
 .DS_Store
 .vscode/
 __pycache__
+*.log
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -54,14 +54,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
 - ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)
 
 ### Run Tabby in 1 Minute
-The easiest way to start a Tabby server is by using the following Docker command:
+The easiest way to start a Tabby server is by using the following Docker command...
 
+...with cuda:
 ```bash
 docker run -it \
  --gpus all -p 8080:8080 -v $HOME/.tabby:/data \
  tabbyml/tabby \
  serve --model TabbyML/StarCoder-1B --device cuda
 ```
+
+...with ROCm (Linux only):
+```bash
+docker run -it \
+ --device /dev/dri --device /dev/kfd \
+ -p 8080:8080 -v $HOME/.tabby:/data \
+ tabbyml/tabby-rocm \
+ serve --model TabbyML/StarCoder-1B --device rocm
+```
+
 For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).
 
 ## 🤝 Contributing

diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
@@ -21,6 +21,9 @@ fn main() {
 
 fn build_llama_cpp() {
  let mut config = Config::new("llama.cpp");
+ config.define("LLAMA_BUILD_TESTS", "OFF");
+ config.define("LLAMA_BUILD_EXAMPLES", "OFF");
+ config.define("LLAMA_BUILD_SERVER", "OFF");
  if cfg!(target_os = "macos") {
  config.define("LLAMA_METAL", "ON");
  println!("cargo:rustc-link-lib=framework=Foundation");
@@ -45,7 +48,7 @@ fn build_llama_cpp() {
  println!("cargo:rustc-link-lib=cublasLt");
  }
  if cfg!(feature = "rocm") {
- let amd_gpu_targets: Vec<&str> = vec![
+ let amd_gpu_default_targets: Vec<&str> = vec![
  "gfx803",
  "gfx900",
  "gfx906:xnack-",
@@ -64,6 +67,8 @@ fn build_llama_cpp() {
  "gfx1102",
  "gfx1103",
  ];
+ let amd_gpu_targets =
+ env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";"));
 
  let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string());
  config.define("LLAMA_HIPBLAS", "ON");
@@ -72,7 +77,7 @@ fn build_llama_cpp() {
  "CMAKE_CXX_COMPILER",
  format!("{}/llvm/bin/clang++", rocm_root),
  );
- config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";"));
+ config.define("AMDGPU_TARGETS", amd_gpu_targets);
  println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
  println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
  println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
@@ -104,8 +109,8 @@ fn build_llama_cpp() {
 fn build_cxx_binding() {
  cxx_build::bridge("src/lib.rs")
  .file("src/engine.cc")
- .flag_if_supported("-Iinclude")
- .flag_if_supported("-Illama.cpp")
+ .include("include")
+ .include("llama.cpp")
  .flag_if_supported("-std=c++14")
  .compile("cxxbridge");
 }
diff --git a/crates/tabby-common/Cargo.toml b/crates/tabby-common/Cargo.toml
@@ -15,6 +15,7 @@ uuid = { version = "1.4.1", features = ["v4"] }
 tantivy.workspace = true
 anyhow.workspace = true
 glob = "0.3.1"
+juniper.workspace = true
 utoipa.workspace = true
 serde_json.workspace = true
 async-trait.workspace = true

diff --git a/crates/tabby-common/src/api/accelerator.rs b/crates/tabby-common/src/api/accelerator.rs
@@ -0,0 +1,21 @@
+use juniper::{GraphQLEnum, GraphQLObject};
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+#[derive(Serialize, Deserialize, GraphQLEnum, ToSchema, PartialEq, Clone, Debug)]
+pub enum DeviceType {
+ Cuda,
+ Rocm,
+}
+
+#[derive(Serialize, Deserialize, GraphQLObject, ToSchema, Clone, Debug)]
+pub struct Accelerator {
+ /// Universally unique ID of the accelerator, if available
+ pub uuid: Option<String>,
+ /// Technical name of the underlying hardware chip, if available
+ pub chip_name: Option<String>,
+ /// User readable name for the accelerator
+ pub display_name: String,
+ /// Type of the accelerator device
+ pub device_type: DeviceType,
+}
diff --git a/crates/tabby-common/src/api/mod.rs b/crates/tabby-common/src/api/mod.rs
@@ -1,2 +1,3 @@
+pub mod accelerator;
 pub mod code;
 pub mod event;
diff --git a/crates/tabby-inference/src/decoding.rs b/crates/tabby-inference/src/decoding.rs
@@ -52,7 +52,7 @@
 }

 pub fn trim_stop_words(&self, language: &'static Language, text: &str) -> Option<String> {
 let Some(re) = self.get_re(language) else {
 return None;
 };

@@ -75,7 +75,8 @@
  .iter()
  .map(|x| regex::escape(&reverse(x)))
  .collect();
- let regex_string = r"(?m)\A".to_owned() + "((" + &reversed_stop_words.join(")|(") + "))";
+ let regex_string =
+ r"(?m)\A".to_owned() + "((" + reversed_stop_words.join(")|(").as_str() + "))";
  Regex::new(&regex_string).expect("Failed to create regex")
 }
 
@@ -98,7 +99,7 @@
 
  pub fn should_stop(&mut self, new_text: &str) -> bool {
  if !new_text.is_empty() {
- self.reversed_text = reverse(new_text) + &self.reversed_text;
+ self.reversed_text = reverse(new_text) + self.reversed_text.as_str();
 
  if let Some(re) = &self.stop_re {
  if re.is_match(&self.reversed_text) {

diff --git a/crates/tabby/build.rs b/crates/tabby/build.rs
@@ -3,6 +3,10 @@ use std::error::Error;
 use vergen::EmitBuilder;
 
 fn main() -> Result<(), Box<dyn Error>> {
+ if cfg!(feature = "rocm") {
+ let rocm_root = "/opt/rocm";
+ println!("cargo:rustc-link-search=native={}/lib", rocm_root);
+ }
  // touch
  EmitBuilder::builder()
  .all_build()

diff --git a/crates/tabby/src/serve.rs b/crates/tabby/src/serve.rs
@@ -5,7 +5,7 @@ use clap::Args;
 use hyper::StatusCode;
 use tabby_common::{
  api,
- api::{code::CodeSearch, event::EventLogger},
+ api::{accelerator, code::CodeSearch, event::EventLogger},
  config::Config,
  usage,
 };
@@ -61,6 +61,8 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi
  chat::ChatCompletionRequest,
  chat::Message,
  chat::ChatCompletionChunk,
+ accelerator::DeviceType,
+ accelerator::Accelerator,
  health::HealthState,
  health::Version,
  api::code::SearchResponse,