From 6ef90408f7a36c863118bc70f9a8b8361037555a Mon Sep 17 00:00:00 2001
From: Cromefire_ <cromefire+git@pm.me>
Date: Wed, 13 Dec 2023 08:59:04 +0100
Subject: [PATCH] feat: Add rocm builds and documentation (#1012)

* Added rocm builds and documentation

* Pulled build improvements from #902

* Fixed build container for rocm build

* Install git in rocm container

* Fixed github step

* Try to fix if statement

* Added more generic dependency installation

* upgraded rustup action

* Update sccache

* Try pytorch manylinux image

* Switched location for toolchain parameter

* Downgraded to deprecated action again

* Readded set default step

* Install minimal rocm on the fly

* fixed typo in binary name

* Downgraded checkout action

* Use curl to download

* Add -y flag to yum

* Also install rocblas

* Update release.yml

* Update release.yml

* Update prepare_build_environment.sh

* Update prepare_build_environment.sh

* Update build.rs

* Update build.rs

* Update README.md

* Update website/docs/faq.mdx

* Update index.md

* Update and rename docker-cuda.yml to docker.yml

* Delete .github/workflows/docker-rocm.yml

* Delete rocm.Dockerfile

* Rename cuda.Dockerfile to Dockerfile

* Update docker.yml

* Update website/docs/installation/docker.mdx

* Update website/docs/installation/docker-compose.mdx

* Update docker-compose.mdx

* Update docker-compose.mdx

* Update docker.mdx

* Update docker.mdx

* Update website/docs/faq.mdx

---------

Co-authored-by: Meng Zhang <meng@tabbyml.com>
---
 .dockerignore                              |  6 ++++++
 .github/workflows/release.yml              | 11 ++++++++---
 Dockerfile                                 |  3 ++-
 website/docs/extensions/troubleshooting.md |  6 +++---
 website/docs/faq.mdx                       | 19 +++++++++++++++----
 website/docs/installation/apple.md         |  2 +-
 6 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index de70e0d1677..bfbb41f4fe5 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,8 @@
+.idea
+ci
+clients
+.github
+python
 **/target
 **/node_modules
+website
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 87b0d03f809..14171731eb8 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,8 +26,7 @@ jobs:
     container: ${{ matrix.container }}
     strategy:
       matrix:
-        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117,
-                 x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122]
+        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
         include:
           - os: macos-latest
             target: aarch64-apple-darwin
@@ -53,6 +52,11 @@ jobs:
             ext: .exe
             build_args: --features cuda
             windows_cuda: '12.2.0'
+          - os: ubuntu-latest
+            target: x86_64-unknown-linux-gnu
+            binary: x86_64-manylinux2014-rocm57
+            container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest
+            build_args: --features rocm
 
     env:
       SCCACHE_GHA_ENABLED: true
@@ -72,7 +76,8 @@ jobs:
           target: ${{ matrix.target }}
           components: clippy
 
-      - run: rustup default ${{ env.RUST_TOOLCHAIN }}
+      - name: Set default rust version
+        run: rustup default ${{ env.RUST_TOOLCHAIN }}
 
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3
diff --git a/Dockerfile b/Dockerfile
index 711d886fe59..fd808683f2c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,12 +29,13 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLC
 ENV PATH="/root/.cargo/bin:${PATH}"
 
 WORKDIR /root/workspace
-COPY . .
 
 RUN mkdir -p /opt/tabby/bin
 RUN mkdir -p /opt/tabby/lib
 RUN mkdir -p target
 
+COPY . .
+
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
     --mount=type=cache,target=/root/workspace/target \
     cargo build --features cuda --release --package tabby && \
diff --git a/website/docs/extensions/troubleshooting.md b/website/docs/extensions/troubleshooting.md
index 94f42a667f5..bf4ee1cc9be 100644
--- a/website/docs/extensions/troubleshooting.md
+++ b/website/docs/extensions/troubleshooting.md
@@ -112,9 +112,9 @@ for the current code context.
 If your completion requests are timing out, Tabby may display a warning message. 
 This could be due to network issues or poor server performance, especially when 
 running a large model on a CPU. To improve performance, consider running the model 
-on a GPU with CUDA support or on Apple M1/M2 with Metal support. When running 
-the server, make sure to specify the device in the arguments using  `--device cuda` 
-or `--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
+on a GPU with CUDA or ROCm support or on Apple M1/M2 with Metal support. When running 
+the server, make sure to specify the device in the arguments using  `--device cuda`, `--device rocm` or
+`--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
 
 By default, the timeout for automatically triggered completion requests is set to 4 seconds. 
 You can adjust this timeout value in the `~/.tabby-client/agent/config.toml` configuration file.
diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx
index 3dc6ecb632f..031f8555fe1 100644
--- a/website/docs/faq.mdx
+++ b/website/docs/faq.mdx
@@ -1,10 +1,11 @@
-import CodeBlock from '@theme/CodeBlock';
-
 # ⁉️ Frequently Asked Questions
 
 <details>
   <summary>How much VRAM a LLM model consumes?</summary>
-  <div>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</div>
+    <div>
+        <p>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</p>
+        <p>For ROCm the actual limits are currently largely untested, but the same CodeLlama-7B seems to use 8GB of VRAM as well on a AMD Radeon™ RX 7900 XTX according to the ROCm monitoring tools.</p>
+    </div>
 </details>
 
 <details>
@@ -24,7 +25,17 @@ import CodeBlock from '@theme/CodeBlock';
 <details>
   <summary>How to utilize multiple NVIDIA GPUs?</summary>
   <div>
-    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES accordingly.</p>
+    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES (for cuda) or HIP_VISIBLE_DEVICES (for rocm) accordingly.</p>
+  </div>
+</details>
+
+<details>
+  <summary>My AMD ROCm device isn't supported by ROCm</summary>
+  <div>
+    <p>
+      You can use the HSA_OVERRIDE_GFX_VERSION variable if there is a similar GPU that is supported by ROCm you can set it to that.
+      For example for RDNA2 you can set it to 10.3.0 and to 11.0.0 for RDNA3.
+    </p>
   </div>
 </details>
 
diff --git a/website/docs/installation/apple.md b/website/docs/installation/apple.md
index 90bd2f6de1b..8fed35ce4ed 100644
--- a/website/docs/installation/apple.md
+++ b/website/docs/installation/apple.md
@@ -14,4 +14,4 @@ brew install tabbyml/tabby/tabby
 tabby serve --device metal --model TabbyML/StarCoder-1B
 ```
 
-The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA. You can find more information about Docker [here](./docker).
+The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA or ROCm. You can find more information about Docker [here](./docker).