Refactor default chat template warnings (#30551)

* Temporarily silence warnings in apply_chat_template until we can properly deprecate default chat templates * make fixup * Move the default chat template warning into apply_chat_template itself * make fixup
huggingface · May 14, 2024 · aa973ac · aa973ac
1 parent 137e5c6
commit aa973ac
Show file tree

Hide file tree

Showing 20 changed files with 30 additions and 135 deletions.
diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot.py b/src/transformers/models/blenderbot/tokenization_blenderbot.py
@@ -411,13 +411,6 @@ def default_chat_template(self):
  """
  A very simple chat template that just adds whitespace between messages.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"

diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py
@@ -293,13 +293,6 @@ def default_chat_template(self):
  """
  A very simple chat template that just adds whitespace between messages.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"

diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
@@ -224,13 +224,6 @@ def default_chat_template(self):
  """
  A very simple chat template that just adds whitespace between messages.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"

diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py
@@ -104,13 +104,6 @@ def default_chat_template(self):
  """
  A very simple chat template that just adds whitespace between messages.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"

diff --git a/src/transformers/models/bloom/tokenization_bloom_fast.py b/src/transformers/models/bloom/tokenization_bloom_fast.py
@@ -155,11 +155,4 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
diff --git a/src/transformers/models/code_llama/tokenization_code_llama.py b/src/transformers/models/code_llama/tokenization_code_llama.py
@@ -456,13 +456,6 @@ def default_chat_template(self):
  snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
  in the original repository.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  template = (
  "{% if messages[0]['role'] == 'system' %}"
  "{% set loop_messages = messages[1:] %}" # Extract system message if it's present

diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -369,13 +369,6 @@ def default_chat_template(self):
  snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
  in the original repository.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  template = (
  "{% if messages[0]['role'] == 'system' %}"
  "{% set loop_messages = messages[1:] %}" # Extract system message if it's present

diff --git a/src/transformers/models/cohere/tokenization_cohere_fast.py b/src/transformers/models/cohere/tokenization_cohere_fast.py
@@ -247,13 +247,6 @@ def default_chat_template(self):
  '<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'
 
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  default_template = (
  "{{ bos_token }}"
  "{% if messages[0]['role'] == 'system' %}"

diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py
@@ -336,11 +336,4 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
diff --git a/src/transformers/models/gpt2/tokenization_gpt2_fast.py b/src/transformers/models/gpt2/tokenization_gpt2_fast.py
@@ -147,11 +147,5 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
+
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
diff --git a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py
@@ -234,11 +234,4 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py
@@ -165,13 +165,6 @@ def default_chat_template(self):
  """
  A simple chat template that just adds BOS/EOS tokens around messages while discarding role information.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{{ bos_token + eos_token + message.content + eos_token }}"

diff --git a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py
@@ -301,13 +301,6 @@ def default_chat_template(self):
  This chat template formats messages like an instant messenger chat log, with "User:" and "Bot:" strings
  preceding messages. BOS tokens are added between all messages.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{{ eos_token }}{{ bos_token }}"
  "{% for message in messages %}"

diff --git a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py
@@ -246,13 +246,6 @@ def default_chat_template(self):
  A simple chat template that adds standard BOS, SEP and EOS tokens between messages while discarding role
  information.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return (
  "{% for message in messages %}"
  "{% if not loop.first %}{{ bos_token}}{% endif %}"

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
@@ -284,8 +284,14 @@ def apply_chat_template(
  if self.chat_template is not None:
  chat_template = self.chat_template
  else:
+ logger.warning_once(
+ "No chat template is set for this processor, falling back to a default class-level template. This is "
+ "very error-prone, because models are often trained with templates different from the class default! "
+ "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
+ "point any code depending on them will stop working. We recommend setting a valid chat template before "
+ "then to ensure that this model continues working without issues."
+ )
  chat_template = self.default_chat_template
-
  return self.tokenizer.apply_chat_template(
  conversation, chat_template=chat_template, tokenize=tokenize, **kwargs
  )

diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py
@@ -429,13 +429,6 @@ def default_chat_template(self):
  snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
  in the original repository.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  template = (
  "{% if messages[0]['role'] == 'system' %}"
  "{% set loop_messages = messages[1:] %}" # Extract system message if it's present

diff --git a/src/transformers/models/llama/tokenization_llama_fast.py b/src/transformers/models/llama/tokenization_llama_fast.py
@@ -261,13 +261,6 @@ def default_chat_template(self):
  snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
  in the original repository.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  template = (
  "{% if messages[0]['role'] == 'system' %}"
  "{% set loop_messages = messages[1:] %}" # Extract system message if it's present

diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py
@@ -815,13 +815,6 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
 
  def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True):

diff --git a/src/transformers/models/whisper/tokenization_whisper_fast.py b/src/transformers/models/whisper/tokenization_whisper_fast.py
@@ -544,13 +544,6 @@ def default_chat_template(self):
  """
  A simple chat template that ignores role information and just concatenates messages with EOS tokens.
  """
- logger.warning_once(
- "No chat template is set for this tokenizer, falling back to a default class-level template. "
- "This is very error-prone, because models are often trained with templates different from the class "
- "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
- "then to ensure that this model continues working without issues."
- )
  return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
 
  # Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer.get_decoder_prompt_ids