predibase · tgaddair · Dec 2, 2023 · Dec 2, 2023 · Dec 2, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -151,14 +151,14 @@ COPY server/punica_kernels/ .
 ENV TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX"
 RUN python setup.py build
 
-# Text Generation Inference base image
+# LoRAX base image
 FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
 
 # Conda env
 ENV PATH=/opt/conda/bin:$PATH \
  CONDA_PREFIX=/opt/conda
 
-# Text Generation Inference base env
+# LoRAX base env
 ENV HUGGINGFACE_HUB_CACHE=/data \
  HF_HUB_ENABLE_HF_TRANSFER=1 \
  PORT=80

diff --git a/clients/python/lorax/errors.py b/clients/python/lorax/errors.py
@@ -1,7 +1,7 @@
 from typing import Dict
 
 
-# Text Generation Inference Errors
+# LoRAX Errors
 class ValidationError(Exception):
  def __init__(self, message: str):
  super().__init__(message)
@@ -74,7 +74,7 @@ def parse_error(status_code: int, payload: Dict[str, str]) -> Exception:
  Exception: parsed exception
 
  """
- # Try to parse a Text Generation Inference error
+ # Try to parse a LoRAX error
  message = payload["error"]
  if "error_type" in payload:
  error_type = payload["error_type"]

diff --git a/clients/python/tests/test_inference_api.py b/clients/python/tests/test_inference_api.py
diff --git a/docs/index.html b/docs/index.html
@@ -3,7 +3,7 @@
  <!-- Load the latest Swagger UI code and style from npm using unpkg.com -->
  <script src="https://unpkg.com/swagger-ui-dist@3/swagger-ui-bundle.js"></script>
  <link rel="stylesheet" type="text/css" href="https://unpkg.com/swagger-ui-dist@3/swagger-ui.css"/>
- <title>Text Generation Inference API</title>
+ <title>LoRAX API</title>
  </head>
  <body>
  <div id="swagger-ui"></div> <!-- Div to hold the UI component -->

diff --git a/docs/openapi.json b/docs/openapi.json
@@ -1,10 +1,10 @@
 {
  "openapi": "3.0.3",
  "info": {
- "title": "Text Generation Inference",
- "description": "Text Generation Webserver",
+ "title": "LoRAX",
+ "description": "LoRAX",
  "contact": {
- "name": "Olivier Dehaene"
+ "name": "Travis Addair"
  },
  "license": {
  "name": "Apache 2.0",
@@ -16,7 +16,7 @@
  "/": {
  "post": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
  "summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
  "description": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
@@ -105,7 +105,7 @@
  "/generate": {
  "post": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
  "summary": "Generate tokens",
  "description": "Generate tokens",
@@ -189,7 +189,7 @@
  "/generate_stream": {
  "post": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
  "summary": "Generate a stream of token using Server-Sent Events",
  "description": "Generate a stream of token using Server-Sent Events",
@@ -273,7 +273,7 @@
  "/health": {
  "get": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
  "summary": "Health check method",
  "description": "Health check method",
@@ -283,7 +283,7 @@
  "description": "Everything is working fine"
  },
  "503": {
- "description": "Text generation inference is down",
+ "description": "LoRAX is down",
  "content": {
  "application/json": {
  "schema": {
@@ -302,10 +302,10 @@
  "/info": {
  "get": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
- "summary": "Text Generation Inference endpoint info",
- "description": "Text Generation Inference endpoint info",
+ "summary": "LoRAX endpoint info",
+ "description": "LoRAX endpoint info",
  "operationId": "get_model_info",
  "responses": {
  "200": {
@@ -324,7 +324,7 @@
  "/metrics": {
  "get": {
  "tags": [
- "Text Generation Inference"
+ "LoRAX"
  ],
  "summary": "Prometheus metrics scrape endpoint",
  "description": "Prometheus metrics scrape endpoint",
@@ -850,8 +850,8 @@
  },
  "tags": [
  {
- "name": "Text Generation Inference",
- "description": "Hugging Face Text Generation Inference API"
+ "name": "LoRAX",
+ "description": "LoRAX API"
  }
  ]
 }
diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lorax-launcher"
-description = "Text Generation Launcher"
+description = "LoRAX Launcher"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true

diff --git a/router/Cargo.toml b/router/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lorax-router"
-description = "Text Generation Webserver"
+description = "LoRAX Webserver"
 build = "build.rs"
 version.workspace = true
 edition.workspace = true

diff --git a/router/client/src/client.rs b/router/client/src/client.rs
@@ -8,7 +8,7 @@ use std::cmp::min;
 use tonic::transport::{Channel, Uri};
 use tracing::instrument;
 
-/// Text Generation Inference gRPC client
+/// LoRAX gRPC client
 #[derive(Debug, Clone)]
 pub struct Client {
  stub: LoraxServiceClient<Channel>,

diff --git a/router/client/src/lib.rs b/router/client/src/lib.rs
@@ -1,4 +1,4 @@
-//! Text Generation gRPC client library
+//! LoRAX gRPC client library
 
 mod client;
 #[allow(clippy::derive_partial_eq_without_eq)]
@@ -19,7 +19,7 @@ use tonic::Status;
 
 #[derive(Error, Debug, Clone)]
 pub enum ClientError {
- #[error("Could not connect to Text Generation server: {0}")]
+ #[error("Could not connect to LoRAX server: {0}")]
  Connection(String),
  #[error("Server error: {0}")]
  Generation(String),

diff --git a/router/client/src/sharded_client.rs b/router/client/src/sharded_client.rs
@@ -6,7 +6,7 @@ use tonic::transport::Uri;
 use tracing::instrument;
 
 #[derive(Debug, Clone)]
-/// Text Generation Inference gRPC multi client
+/// LoRAX gRPC multi client
 pub struct ShardedClient {
  clients: Vec<Client>,
 }

diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -1,4 +1,4 @@
-/// Text Generation Inference Webserver
+/// LoRAX Webserver
 mod adapter;
 mod health;
 mod infer;

diff --git a/router/src/main.rs b/router/src/main.rs
@@ -1,4 +1,4 @@
-/// Text Generation Inference webserver entrypoint
+/// LoRAX webserver entrypoint
 use axum::http::HeaderValue;
 use clap::Parser;
 use lorax_client::{ClientError, ShardedClient};

diff --git a/router/src/server.rs b/router/src/server.rs
@@ -33,7 +33,7 @@ use utoipa_swagger_ui::SwaggerUi;
 /// Generate tokens if `stream == false` or a stream of token if `stream == true`
 #[utoipa::path(
 post,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/",
 request_body = CompatGenerateRequest,
 responses(
@@ -77,10 +77,10 @@ async fn compat_generate(
  }
 }
 
-/// Text Generation Inference endpoint info
+/// LoRAX endpoint info
 #[utoipa::path(
 get,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/info",
 responses((status = 200, description = "Served model info", body = Info))
 )]
@@ -91,11 +91,11 @@ async fn get_model_info(info: Extension<Info>) -> Json<Info> {
 
 #[utoipa::path(
 get,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/health",
 responses(
 (status = 200, description = "Everything is working fine"),
-(status = 503, description = "Text generation inference is down", body = ErrorResponse,
+(status = 503, description = "LoRAX is down", body = ErrorResponse,
 example = json ! ({"error": "unhealthy", "error_type": "healthcheck"})),
 )
 )]
@@ -117,7 +117,7 @@ async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<E
 /// Generate tokens
 #[utoipa::path(
 post,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/generate",
 request_body = GenerateRequest,
 responses(
@@ -307,7 +307,7 @@ async fn generate(
 /// Generate a stream of token using Server-Sent Events
 #[utoipa::path(
 post,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/generate_stream",
 request_body = GenerateRequest,
 responses(
@@ -503,7 +503,7 @@ async fn generate_stream(
 /// Prometheus metrics scrape endpoint
 #[utoipa::path(
 get,
-tag = "Text Generation Inference",
+tag = "LoRAX",
 path = "/metrics",
 responses((status = 200, description = "Prometheus Metrics", body = String))
 )]
@@ -566,10 +566,10 @@ pub async fn run(
  )
  ),
  tags(
- (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API")
+ (name = "LoRAX", description = "LoRAX API")
  ),
  info(
- title = "Text Generation Inference",
+ title = "LoRAX",
  license(
  name = "Apache 2.0",
  url = "https://www.apache.org/licenses/LICENSE-2.0"

diff --git a/server/README.md b/server/README.md
@@ -1,6 +1,6 @@
-# Text Generation Inference Python gRPC Server
+# LoRAX Python gRPC Server
 
-A Python gRPC server for Text Generation Inference
+A Python gRPC server for LoRAX
 
 ## Install