Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Support for Devices other than CUDA in Telemetry and Web UI #902

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
aa6bf9d
Added build configurations for Intel and AMD hardware
cromefire Nov 25, 2023
5732d7d
Improved rocm build
cromefire Nov 25, 2023
1e05350
Added options for OneAPI and ROCm
cromefire Nov 25, 2023
3496bb9
Build llama using icx
cromefire Nov 26, 2023
6098475
[autofix.ci] apply automated fixes
autofix-ci[bot] Nov 26, 2023
7d38fc4
Fixed rocm image
cromefire Nov 26, 2023
4f90c98
Build ROCm
cromefire Nov 26, 2023
c0804cb
Tried to adjust compile flags for SYCL
cromefire Nov 26, 2023
8d56d37
Removed references to oneAPI
cromefire Nov 26, 2023
dd72b82
Provide info about the used device for ROCm
cromefire Nov 26, 2023
b4a02d1
Added ROCm documentation
cromefire Nov 26, 2023
7b40db6
Addressed review comments
cromefire Nov 27, 2023
55c7884
Refactored to expose generic accelerator information
cromefire Nov 28, 2023
cbbdf22
Pull request cleanup
cromefire Nov 28, 2023
c2ddb59
Merge branch 'main' into rocm-support
cromefire Dec 10, 2023
f80007c
[autofix.ci] apply automated fixes
autofix-ci[bot] Dec 10, 2023
dd80a3a
Merge branch 'main' into rocm-support
cromefire Dec 10, 2023
b993d5d
Tried to fix most build issues
cromefire Dec 10, 2023
1bc6f48
Fixed the rest of the build issues
cromefire Dec 10, 2023
c73b518
Merge branch 'main' into rocm-support
cromefire Dec 10, 2023
94d22c4
Added rocm binary build
cromefire Dec 10, 2023
7e58915
Merge branch 'main' into rocm-support
cromefire Dec 22, 2023
b3ef2e2
Added ROCm 6.0 build
cromefire Dec 22, 2023
df62a7d
Fixed build and slimmed down container
cromefire Dec 22, 2023
5f6a63d
Merge branch 'main' into rocm-support
cromefire Jan 2, 2024
234f58a
Fixed certificates in docker file
cromefire Jan 2, 2024
28ce9f1
Fixed merge issue
cromefire Jan 2, 2024
fd0891b
Fixed merge issue in workflow
cromefire Jan 2, 2024
b471554
Merge branch 'main' into rocm-support
cromefire Jan 14, 2024
b13bbec
Added support for specifying targets for docker build
cromefire Jan 14, 2024
99f63f3
Fixed docker build
cromefire Jan 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ python
**/target
**/node_modules
website
*.log
*.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Create and publish docker image
name: Create and publish CUDA docker image

on:
workflow_dispatch:
Expand Down Expand Up @@ -50,7 +50,10 @@ jobs:

# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
uses: docker/[email protected]
uses: docker/[email protected]
with:
# Needed to support OCI annotations
version: v0.12.0

# Login against a Docker registry except on PR
# https://github.com/docker/login-action
Expand Down Expand Up @@ -78,7 +81,7 @@ jobs:

- name: Docker meta
id: meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v5.0.0
with:
# list of Docker images to use as base name for tags
images: |
Expand All @@ -95,13 +98,14 @@ jobs:
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v3.1.1
uses: docker/build-push-action@v5.1.0
with:
file: Dockerfile
file: cuda.Dockerfile
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
annotations: ${{ steps.meta.outputs.labels }}
cache-from: ${{ steps.cache.outputs.cache-from }}
cache-to: ${{ steps.cache.outputs.cache-to }}
build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}
Expand All @@ -112,4 +116,3 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
repository: tabbyml/tabby

119 changes: 119 additions & 0 deletions .github/workflows/docker-rocm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
name: Create and publish ROCm docker image

on:
workflow_dispatch:
schedule:
- cron: '0 20 */1 * *'
push:
tags:
- 'v*'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}

# If this is enabled it will cancel current running and start latest
cancel-in-progress: true

env:
RUST_TOOLCHAIN: 1.73.0

jobs:
release-docker:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write

steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: false
swap-storage: true

- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: recursive

# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
uses: docker/[email protected]
with:
# Needed to support OCI annotations
version: v0.12.0

# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into GitHub Container registry
uses: docker/[email protected]
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Log into Docker Hub
uses: docker/[email protected]
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Generate image name
run: |
echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}

- uses: int128/docker-build-cache-config-action@v1
id: cache
with:
image: ghcr.io/${{ env.IMAGE_NAME }}/cache

- name: Docker meta
id: meta
uses: docker/[email protected]
with:
# list of Docker images to use as base name for tags
images: |
ghcr.io/${{ env.IMAGE_NAME }}/rocm
${{ env.IMAGE_NAME }}-rocm
# generate Docker tags based on the following events/attributes
variant: rocm
tags: |
type=raw,value={{branch}}-{{sha}},enable=${{ startsWith(github.ref, 'refs/heads') }}
type=schedule,pattern=nightly
type=schedule,pattern={{date 'YYYYMMDD'}}
type=semver,pattern={{version}}

# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/[email protected]
with:
file: rocm.Dockerfile
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
annotations: ${{ steps.meta.outputs.labels }}
cache-from: ${{ steps.cache.outputs.cache-from }}
cache-to: ${{ steps.cache.outputs.cache-to }}
build-args: RUST_TOOLCHAIN=${{ env.RUST_TOOLCHAIN }}

- name: Docker Hub Description
uses: peter-evans/dockerhub-description@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
repository: tabbyml/tabby
11 changes: 8 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
- '.github/workflows/release.yml'

concurrency:
group: ${{ github.workflow_ref }}-${{ github.head_ref || github.ref_name }}
group: ${{ github.workflow_ref }}-${{ github.head_ref || github.ref_name }}

# If this is enabled it will cancel current running and start latest
cancel-in-progress: true
Expand All @@ -26,7 +26,7 @@ jobs:
container: ${{ matrix.container }}
strategy:
matrix:
binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-windows-msvc, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-cuda122, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
binary: [ aarch64-apple-darwin, x86_64-manylinux2014, x86_64-windows-msvc, x86_64-manylinux2014-cuda117, x86_64-manylinux2014-cuda122, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57, x86_64-manylinux2014-rocm60 ]
include:
- os: macos-latest
target: aarch64-apple-darwin
Expand Down Expand Up @@ -67,6 +67,11 @@ jobs:
binary: x86_64-manylinux2014-rocm57
container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest
build_args: --features static-ssl --features rocm
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
binary: x86_64-manylinux2014-rocm60
container: ghcr.io/cromefire/hipblas-manylinux/2014/6.0:latest
build_args: --features static-ssl --features rocm

env:
SCCACHE_GHA_ENABLED: true
Expand Down Expand Up @@ -146,7 +151,7 @@ jobs:

- uses: ncipollo/release-action@v1
with:
allowUpdates: true
allowUpdates: true
prerelease: true
artifacts: "tabby_*/tabby_*"
tag: ${{ github.ref_name }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ node_modules
.DS_Store
.vscode/
__pycache__
*.log
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,25 @@ You can find our documentation [here](https://tabby.tabbyml.com/docs/getting-sta
- ⚙️ [Configuration](https://tabby.tabbyml.com/docs/configuration)

### Run Tabby in 1 Minute
The easiest way to start a Tabby server is by using the following Docker command:
The easiest way to start a Tabby server is by using the following Docker command...

...with cuda:
```bash
docker run -it \
--gpus all -p 8080:8080 -v $HOME/.tabby:/data \
tabbyml/tabby \
serve --model TabbyML/StarCoder-1B --device cuda
```

...with ROCm (Linux only):
```bash
docker run -it \
--device /dev/dri --device /dev/kfd \
-p 8080:8080 -v $HOME/.tabby:/data \
tabbyml/tabby-rocm \
serve --model TabbyML/StarCoder-1B --device rocm
```

For additional options (e.g inference type, parallelism), please refer to the [documentation page](https://tabbyml.github.io/tabby).

## 🤝 Contributing
Expand Down
13 changes: 9 additions & 4 deletions crates/llama-cpp-bindings/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ fn main() {

fn build_llama_cpp() {
let mut config = Config::new("llama.cpp");
config.define("LLAMA_BUILD_TESTS", "OFF");
config.define("LLAMA_BUILD_EXAMPLES", "OFF");
config.define("LLAMA_BUILD_SERVER", "OFF");
if cfg!(target_os = "macos") {
config.define("LLAMA_METAL", "ON");
println!("cargo:rustc-link-lib=framework=Foundation");
Expand All @@ -45,7 +48,7 @@ fn build_llama_cpp() {
println!("cargo:rustc-link-lib=cublasLt");
}
if cfg!(feature = "rocm") {
let amd_gpu_targets: Vec<&str> = vec![
let amd_gpu_default_targets: Vec<&str> = vec![
"gfx803",
"gfx900",
"gfx906:xnack-",
Expand All @@ -64,6 +67,8 @@ fn build_llama_cpp() {
"gfx1102",
"gfx1103",
];
let amd_gpu_targets =
env::var("AMDGPU_TARGETS").unwrap_or(amd_gpu_default_targets.join(";"));

let rocm_root = env::var("ROCM_ROOT").unwrap_or("/opt/rocm".to_string());
config.define("LLAMA_HIPBLAS", "ON");
Expand All @@ -72,7 +77,7 @@ fn build_llama_cpp() {
"CMAKE_CXX_COMPILER",
format!("{}/llvm/bin/clang++", rocm_root),
);
config.define("AMDGPU_TARGETS", amd_gpu_targets.join(";"));
config.define("AMDGPU_TARGETS", amd_gpu_targets);
println!("cargo:rustc-link-arg=-Wl,--copy-dt-needed-entries");
println!("cargo:rustc-link-search=native={}/hip/lib", rocm_root);
println!("cargo:rustc-link-search=native={}/rocblas/lib", rocm_root);
Expand Down Expand Up @@ -104,8 +109,8 @@ fn build_llama_cpp() {
fn build_cxx_binding() {
cxx_build::bridge("src/lib.rs")
.file("src/engine.cc")
.flag_if_supported("-Iinclude")
.flag_if_supported("-Illama.cpp")
.include("include")
.include("llama.cpp")
.flag_if_supported("-std=c++14")
.compile("cxxbridge");
}
1 change: 1 addition & 0 deletions crates/tabby-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ uuid = { version = "1.4.1", features = ["v4"] }
tantivy.workspace = true
anyhow.workspace = true
glob = "0.3.1"
juniper.workspace = true
utoipa.workspace = true
serde_json.workspace = true
async-trait.workspace = true
Expand Down
21 changes: 21 additions & 0 deletions crates/tabby-common/src/api/accelerator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use juniper::{GraphQLEnum, GraphQLObject};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;

#[derive(Serialize, Deserialize, GraphQLEnum, ToSchema, PartialEq, Clone, Debug)]
pub enum DeviceType {
Cuda,
Rocm,
}

#[derive(Serialize, Deserialize, GraphQLObject, ToSchema, Clone, Debug)]
pub struct Accelerator {
/// Universally unique ID of the accelerator, if available
pub uuid: Option<String>,
Comment on lines +13 to +14
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The biggest issue now is whether we can prevent this field from being part of the telemetry. If not, it definitely has to be removed, as it's basically almost equivalent to a serial number. It's nice for tracking a GPU in the accelerators, but it's a huge issue if that data leaves your control.

/// Technical name of the underlying hardware chip, if available
pub chip_name: Option<String>,
/// User readable name for the accelerator
pub display_name: String,
/// Type of the accelerator device
pub device_type: DeviceType,
}
1 change: 1 addition & 0 deletions crates/tabby-common/src/api/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod accelerator;
pub mod code;
pub mod event;
5 changes: 3 additions & 2 deletions crates/tabby-inference/src/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
}

pub fn trim_stop_words(&self, language: &'static Language, text: &str) -> Option<String> {
let Some(re) = self.get_re(language) else {

Check warning on line 55 in crates/tabby-inference/src/decoding.rs

View workflow job for this annotation

GitHub Actions / autofix

this `let...else` may be rewritten with the `?` operator
return None;
};

Expand All @@ -75,7 +75,8 @@
.iter()
.map(|x| regex::escape(&reverse(x)))
.collect();
let regex_string = r"(?m)\A".to_owned() + "((" + &reversed_stop_words.join(")|(") + "))";
let regex_string =
r"(?m)\A".to_owned() + "((" + reversed_stop_words.join(")|(").as_str() + "))";
Regex::new(&regex_string).expect("Failed to create regex")
}

Expand All @@ -98,7 +99,7 @@

pub fn should_stop(&mut self, new_text: &str) -> bool {
if !new_text.is_empty() {
self.reversed_text = reverse(new_text) + &self.reversed_text;
self.reversed_text = reverse(new_text) + self.reversed_text.as_str();

if let Some(re) = &self.stop_re {
if re.is_match(&self.reversed_text) {
Expand Down
4 changes: 4 additions & 0 deletions crates/tabby/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ use std::error::Error;
use vergen::EmitBuilder;

fn main() -> Result<(), Box<dyn Error>> {
if cfg!(feature = "rocm") {
let rocm_root = "/opt/rocm";
println!("cargo:rustc-link-search=native={}/lib", rocm_root);
}
// touch
EmitBuilder::builder()
.all_build()
Expand Down
4 changes: 3 additions & 1 deletion crates/tabby/src/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use clap::Args;
use hyper::StatusCode;
use tabby_common::{
api,
api::{code::CodeSearch, event::EventLogger},
api::{accelerator, code::CodeSearch, event::EventLogger},
config::Config,
usage,
};
Expand Down Expand Up @@ -61,6 +61,8 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi
chat::ChatCompletionRequest,
chat::Message,
chat::ChatCompletionChunk,
accelerator::DeviceType,
accelerator::Accelerator,
health::HealthState,
health::Version,
api::code::SearchResponse,
Expand Down