Skip to content

Commit

Permalink
ENH: convert command-r to chat (#1537)
Browse files Browse the repository at this point in the history
  • Loading branch information
qinxuye committed May 24, 2024
1 parent d4c4fa9 commit c3925ac
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 17 deletions.
2 changes: 1 addition & 1 deletion doc/source/getting_started/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ Currently, supported models include:
- ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
- ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``
- ``codeqwen1.5``, ``codeqwen1.5-chat``
- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
- ``vicuna-v1.3``, ``vicuna-v1.5``
- ``internlm2-chat``
- ``qwen-chat``
Expand All @@ -56,6 +55,7 @@ Currently, supported models include:
- ``qwen1.5-chat``, ``qwen1.5-moe-chat``
- ``gemma-it``
- ``orion-chat``, ``orion-chat-rag``
- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
.. vllm_end
To install Xinference and vLLM::
Expand Down
4 changes: 2 additions & 2 deletions doc/source/models/builtin/llm/c4ai-command-r-v01.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ c4ai-command-r-v01
- **Context Length:** 131072
- **Model Name:** c4ai-command-r-v01
- **Languages:** en, fr, de, es, it, pt, ja, ko, zh, ar
- **Abilities:** generate
- **Description:** C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.
- **Abilities:** chat
- **Description:** C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.

Specifications
^^^^^^^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions doc/source/models/builtin/llm/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ The following is a list of built-in LLM in Xinference:
- Baichuan-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting.

* - :ref:`c4ai-command-r-v01 <models_llm_c4ai-command-r-v01>`
- generate
- chat
- 131072
- C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.
- C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.

* - :ref:`c4ai-command-r-v01-4bit <models_llm_c4ai-command-r-v01-4bit>`
- generate
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ Currently, supported model includes:
- ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
- ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``
- ``codeqwen1.5``, ``codeqwen1.5-chat``
- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
- ``vicuna-v1.3``, ``vicuna-v1.5``
- ``internlm2-chat``
- ``qwen-chat``
Expand All @@ -63,6 +62,7 @@ Currently, supported model includes:
- ``qwen1.5-chat``, ``qwen1.5-moe-chat``
- ``gemma-it``
- ``orion-chat``, ``orion-chat-rag``
- ``c4ai-command-r-v01``, ``c4ai-command-r-v01-4bit``
.. vllm_end
SGLang
Expand Down
36 changes: 32 additions & 4 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -5833,9 +5833,9 @@
"ar"
],
"model_ability": [
"generate"
"chat"
],
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
"model_description": "C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.",
"model_specs": [
{
"model_format": "pytorch",
Expand Down Expand Up @@ -5884,7 +5884,21 @@
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
}
]
],
"prompt_style": {
"style_name": "c4ai-command-r",
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
"roles": [
"<|USER_TOKEN|>",
"<|CHATBOT_TOKEN|>"
],
"intra_message_sep": "",
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
"stop_token_ids": [
6,
255001
]
}
},
{
"version": 1,
Expand Down Expand Up @@ -5925,7 +5939,21 @@
"model_id": "CohereForAI/c4ai-command-r-plus-4bit",
"model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
}
]
],
"prompt_style": {
"style_name": "c4ai-command-r",
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
"roles": [
"<|USER_TOKEN|>",
"<|CHATBOT_TOKEN|>"
],
"intra_message_sep": "",
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
"stop_token_ids": [
6,
255001
]
}
},
{
"version": 1,
Expand Down
39 changes: 34 additions & 5 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -3500,7 +3500,7 @@
"ar"
],
"model_ability": [
"generate"
"chat"
],
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
"model_specs": [
Expand All @@ -3519,11 +3519,12 @@
"model_size_in_billions": 35,
"quantizations": [
"Q2_K",
"Q3_K_M",
"Q4_K_M",
"Q5_K_M"
],
"model_id": "mirror013/C4AI-Command-R-v01-GGUF",
"model_file_name_template": "c4ai-command-r-v01.{quantization}.gguf",
"model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
"model_hub": "modelscope",
"model_revision": "master"
},
Expand All @@ -3537,7 +3538,21 @@
"model_id": "AI-ModelScope/c4ai-command-r-plus",
"model_revision": "master"
}
]
],
"prompt_style": {
"style_name": "c4ai-command-r",
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
"roles": [
"<|USER_TOKEN|>",
"<|CHATBOT_TOKEN|>"
],
"intra_message_sep": "",
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
"stop_token_ids": [
6,
255001
]
}
},
{
"version": 1,
Expand All @@ -3556,7 +3571,7 @@
"ar"
],
"model_ability": [
"generate"
"chat"
],
"model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
"model_specs": [
Expand All @@ -3570,7 +3585,21 @@
"model_id": "mirror013/c4ai-command-r-v01-4bit",
"model_revision": "master"
}
]
],
"prompt_style": {
"style_name": "c4ai-command-r",
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
"roles": [
"<|USER_TOKEN|>",
"<|CHATBOT_TOKEN|>"
],
"intra_message_sep": "",
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
"stop_token_ids": [
6,
255001
]
}
},
{
"version": 1,
Expand Down
13 changes: 13 additions & 0 deletions xinference/model/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,19 @@ def get_role(role_name: str):
ret += f"<|{role}|>{prompt_style.intra_message_sep}"
ret += "<|assistant|>\n"
return ret
elif prompt_style.style_name == "c4ai-command-r":
ret = (
f"<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>"
f"{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
)
for i, message in enumerate(chat_history):
role = get_role(message["role"])
content = message["content"]
if content:
ret += f"{role}{content}{prompt_style.inter_message_sep}"
else:
ret += role
return ret
else:
raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")

Expand Down
4 changes: 2 additions & 2 deletions xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ class VLLMGenerateConfig(TypedDict, total=False):

if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-moe-chat")
VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01")
VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01-4bit")
VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01-4bit")


class VLLMModel(LLM):
Expand Down

0 comments on commit c3925ac

Please sign in to comment.