ENH: convert command-r to chat (#1537)

xorbitsai · May 24, 2024 · c3925ac · c3925ac
1 parent d4c4fa9
commit c3925ac
Show file tree

Hide file tree

Showing 8 changed files with 87 additions and 17 deletions.
diff --git a/doc/source/getting_started/installation.rst b/doc/source/getting_started/installation.rst
@@ -47,7 +47,6 @@ Currently, supported models include:
 - ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
 - ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``
 - ``codeqwen1.5``, ``codeqwen1.5-chat``
-- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
 - ``vicuna-v1.3``, ``vicuna-v1.5``
 - ``internlm2-chat``
 - ``qwen-chat``
@@ -56,6 +55,7 @@ Currently, supported models include:
 - ``qwen1.5-chat``, ``qwen1.5-moe-chat``
 - ``gemma-it``
 - ``orion-chat``, ``orion-chat-rag``
+- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
 .. vllm_end
 
 To install Xinference and vLLM::

diff --git a/doc/source/models/builtin/llm/c4ai-command-r-v01.rst b/doc/source/models/builtin/llm/c4ai-command-r-v01.rst
@@ -7,8 +7,8 @@ c4ai-command-r-v01
 - **Context Length:** 131072
 - **Model Name:** c4ai-command-r-v01
 - **Languages:** en, fr, de, es, it, pt, ja, ko, zh, ar
-- **Abilities:** generate
-- **Description:** C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.
+- **Abilities:** chat
+- **Description:** C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.
 
 Specifications
 ^^^^^^^^^^^^^^

diff --git a/doc/source/models/builtin/llm/index.rst b/doc/source/models/builtin/llm/index.rst
@@ -52,9 +52,9 @@ The following is a list of built-in LLM in Xinference:
  - Baichuan-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting.
 
  * - :ref:`c4ai-command-r-v01 <models_llm_c4ai-command-r-v01>`
- - generate
+ - chat
  - 131072
- - C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.
+ - C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.
 
  * - :ref:`c4ai-command-r-v01-4bit <models_llm_c4ai-command-r-v01-4bit>`
  - generate

diff --git a/doc/source/user_guide/backends.rst b/doc/source/user_guide/backends.rst
@@ -54,7 +54,6 @@ Currently, supported model includes:
 - ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
 - ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``
 - ``codeqwen1.5``, ``codeqwen1.5-chat``
-- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
 - ``vicuna-v1.3``, ``vicuna-v1.5``
 - ``internlm2-chat``
 - ``qwen-chat``
@@ -63,6 +62,7 @@ Currently, supported model includes:
 - ``qwen1.5-chat``, ``qwen1.5-moe-chat``
 - ``gemma-it``
 - ``orion-chat``, ``orion-chat-rag``
+- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
 .. vllm_end
 
 SGLang

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -5833,9 +5833,9 @@
  "ar"
  ],
  "model_ability": [
- "generate"
+ "chat"
  ],
- "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
+ "model_description": "C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.",
  "model_specs": [
  {
  "model_format": "pytorch",
@@ -5884,7 +5884,21 @@
  "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
  "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
  }
- ]
+ ],
+ "prompt_style": {
+ "style_name": "c4ai-command-r",
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+ "roles": [
+ "<|USER_TOKEN|>",
+ "<|CHATBOT_TOKEN|>"
+ ],
+ "intra_message_sep": "",
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+ "stop_token_ids": [
+ 6,
+ 255001
+ ]
+ }
  },
  {
  "version": 1,
@@ -5925,7 +5939,21 @@
  "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
  "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
  }
- ]
+ ],
+ "prompt_style": {
+ "style_name": "c4ai-command-r",
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+ "roles": [
+ "<|USER_TOKEN|>",
+ "<|CHATBOT_TOKEN|>"
+ ],
+ "intra_message_sep": "",
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+ "stop_token_ids": [
+ 6,
+ 255001
+ ]
+ }
  },
  {
  "version": 1,

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -3500,7 +3500,7 @@
  "ar"
  ],
  "model_ability": [
- "generate"
+ "chat"
  ],
  "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
  "model_specs": [
@@ -3519,11 +3519,12 @@
  "model_size_in_billions": 35,
  "quantizations": [
  "Q2_K",
+ "Q3_K_M",
  "Q4_K_M",
  "Q5_K_M"
  ],
  "model_id": "mirror013/C4AI-Command-R-v01-GGUF",
- "model_file_name_template": "c4ai-command-r-v01.{quantization}.gguf",
+ "model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
  "model_hub": "modelscope",
  "model_revision": "master"
  },
@@ -3537,7 +3538,21 @@
  "model_id": "AI-ModelScope/c4ai-command-r-plus",
  "model_revision": "master"
  }
- ]
+ ],
+ "prompt_style": {
+ "style_name": "c4ai-command-r",
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+ "roles": [
+ "<|USER_TOKEN|>",
+ "<|CHATBOT_TOKEN|>"
+ ],
+ "intra_message_sep": "",
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+ "stop_token_ids": [
+ 6,
+ 255001
+ ]
+ }
  },
  {
  "version": 1,
@@ -3556,7 +3571,7 @@
  "ar"
  ],
  "model_ability": [
- "generate"
+ "chat"
  ],
  "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
  "model_specs": [
@@ -3570,7 +3585,21 @@
  "model_id": "mirror013/c4ai-command-r-v01-4bit",
  "model_revision": "master"
  }
- ]
+ ],
+ "prompt_style": {
+ "style_name": "c4ai-command-r",
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+ "roles": [
+ "<|USER_TOKEN|>",
+ "<|CHATBOT_TOKEN|>"
+ ],
+ "intra_message_sep": "",
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+ "stop_token_ids": [
+ 6,
+ 255001
+ ]
+ }
  },
  {
  "version": 1,

diff --git a/xinference/model/llm/utils.py b/xinference/model/llm/utils.py
@@ -456,6 +456,19 @@ def get_role(role_name: str):
  ret += f"<|{role}|>{prompt_style.intra_message_sep}"
  ret += "<|assistant|>\n"
  return ret
+ elif prompt_style.style_name == "c4ai-command-r":
+ ret = (
+ f"<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>"
+ f"{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
+ )
+ for i, message in enumerate(chat_history):
+ role = get_role(message["role"])
+ content = message["content"]
+ if content:
+ ret += f"{role}{content}{prompt_style.inter_message_sep}"
+ else:
+ ret += role
+ return ret
  else:
  raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
 

diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -139,8 +139,8 @@ class VLLMGenerateConfig(TypedDict, total=False):
 
 if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-moe-chat")
- VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01")
- VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01-4bit")
+ VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
+ VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01-4bit")
 
 
 class VLLMModel(LLM):