fix ci

speechbrain · Apr 19, 2024 · b350fad · b350fad
1 parent 9d8bf34
commit b350fad
Show file tree

Hide file tree

Showing 13 changed files with 90 additions and 70 deletions.
diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/extra_requirements.txt b/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/extra_requirements.txt
@@ -1,4 +1,4 @@
+ftfy
 # k2 # It is better to install k2 with the procedure listed here: https://k2-fsa.github.io/k2/installation/from_wheels.html
 num2words
 soundfile
-ftfy
diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/hparams/train_wav2vec_char_ctc_k2.yaml b/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/hparams/train_wav2vec_char_ctc_k2.yaml
@@ -1,6 +1,6 @@
 # ################################
 # Model: wav2vec2 + DNN + CTC + LM (k2)
-# Augmentation: SpecAugment
+# Augmentation: SpecAugment + Speed
 # Authors: Pierre Champion 2023
 # ################################
 
@@ -21,7 +21,7 @@ stm_directory: !ref <data_folder>/**/[^\.ne_e2\.|\.ne\.|\.spk\.|part\.]*.stm
 wav_directory: !ref <data_folder>/**/*.wav
 train_splits: {"train_ESTER2":["/ESTER2/train_trans_rapide/*", "/ESTER2/train/*"], "train_ESTER1":["/ESTER1/train/*"], "train_EPAC":["/EPAC/train/*"], "train_ETAPE":["/ETAPE/train/*"], "train_REPERE":["/REPERE/train/*"]}
 dev_splits: {"dev_ESTER2":["/ESTER2/dev/*"], "dev_ESTER1":["/ESTER1/dev/*"], "dev_ETAPE":["/ETAPE/dev/*"], "dev_REPERE2014":["/REPERE/dev2014/*"]}
-test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"] , "test_REPERE2014":["/REPERE/test2014/*"]}
+test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"], "test_REPERE2014":["/REPERE/test2014/*"]}
 merge_train_csv: "train_ESTER2+train_ESTER1+train_EPAC+train_ETAPE+train_REPERE"
 prep_save_folder: !ref <output_folder>
 skip_prep: False
@@ -66,7 +66,6 @@ rescoring_lm_scale: 0.4
 lm_dir: ../LM/results/n_gram_lm
 
 G_arpa: 3-for-char-gram.arpa
-# G_arpa: pguyot.arpa
 G_rescoring_arpa: 4-for-char-gram.arpa
 
 # Training parameters
@@ -152,7 +151,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
  augmentations: [
  !ref <speed_perturb>,
  !ref <drop_freq>,
-  ]
+ ]
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
  input_shape: [null, null, 1280]

diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/hparams/train_wav2vec_phone_ctc_k2.yaml b/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/hparams/train_wav2vec_phone_ctc_k2.yaml
@@ -1,6 +1,6 @@
 # ################################
 # Model: wav2vec2 + DNN + CTC + LM (k2)
-# Augmentation: SpecAugment
+# Augmentation: SpecAugment + Speed
 # Authors: Pierre Champion 2023
 # ################################
 
@@ -21,7 +21,7 @@ stm_directory: !ref <data_folder>/**/[^\.ne_e2\.|\.ne\.|\.spk\.|part\.]*.stm
 wav_directory: !ref <data_folder>/**/*.wav
 train_splits: {"train_ESTER2":["/ESTER2/train_trans_rapide/*", "/ESTER2/train/*"], "train_ESTER1":["/ESTER1/train/*"], "train_EPAC":["/EPAC/train/*"], "train_ETAPE":["/ETAPE/train/*"], "train_REPERE":["/REPERE/train/*"]}
 dev_splits: {"dev_ESTER2":["/ESTER2/dev/*"], "dev_ESTER1":["/ESTER1/dev/*"], "dev_ETAPE":["/ETAPE/dev/*"], "dev_REPERE2014":["/REPERE/dev2014/*"]}
-test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"] , "test_REPERE2014":["/REPERE/test2014/*"]}
+test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"], "test_REPERE2014":["/REPERE/test2014/*"]}
 merge_train_csv: "train_ESTER2+train_ESTER1+train_EPAC+train_ETAPE+train_REPERE"
 prep_save_folder: !ref <output_folder>
 skip_prep: False
@@ -66,7 +66,6 @@ rescoring_lm_scale: 0.4
 lm_dir: ../LM/results/n_gram_lm
 
 G_arpa: 3-for-phone-gram.arpa
-# G_arpa: pguyot.arpa
 G_rescoring_arpa: 4-for-phone-gram.arpa
 
 # Training parameters
@@ -150,7 +149,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
  augmentations: [
  !ref <speed_perturb>,
  !ref <drop_freq>,
-  ]
+ ]
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
  input_shape: [null, null, 1280]

diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/train_with_wav2vec_ctc_k2.py b/recipes/ESTER+EPAC+ETAPE+REPERE/ASR/train_with_wav2vec_ctc_k2.py
@@ -18,21 +18,21 @@
  import phonemizer
 
  phonemizer.phonemize("c'est", language="fr-fr")
-except Exception as e:
+except Exception:
  pass
 
+import logging
 import os
 import sys
-import torch
-import logging
-import speechbrain as sb
-from speechbrain.utils.distributed import run_on_main, if_main_process
-from speechbrain.utils.data_utils import download_file
-from hyperpyyaml import load_hyperpyyaml
 from collections import defaultdict
 from pathlib import Path
 
+import torch
+from hyperpyyaml import load_hyperpyyaml
+
+import speechbrain as sb
 import speechbrain.k2_integration as sbk2
+from speechbrain.utils.distributed import if_main_process, run_on_main
 
 logger = logging.getLogger(__name__)
 
@@ -186,7 +186,8 @@ def on_stage_end(self, stage, stage_loss, epoch):
  valid_stats=stage_stats,
  )
  self.checkpointer.save_and_keep_only(
- meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
+ meta={"WER": stage_stats["WER"]},
+ min_keys=["WER"],
  )
  elif stage == sb.Stage.TEST:
  self.hparams.train_logger.log_stats(
@@ -237,7 +238,8 @@ def dataio_prepare(hparams):
  data_folder = hparams["data_folder"]
 
  train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
- csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
+ csv_path=hparams["train_csv"],
+ replacements={"data_root": data_folder},
  )
 
  if hparams["sorting"] == "ascending":
@@ -270,7 +272,8 @@ def dataio_prepare(hparams):
  )
 
  valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
- csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
+ csv_path=hparams["valid_csv"],
+ replacements={"data_root": data_folder},
  )
  valid_data = valid_data.filtered_sorted(
  sort_key="duration",
@@ -317,7 +320,8 @@ def text_pipeline(wrd):
 
  # 4. Set output:
  sb.dataio.dataset.set_output_keys(
- datasets, ["id", "sig", "wrd"],
+ datasets,
+ ["id", "sig", "wrd"],
  )
 
  return train_data, valid_data, test_datasets
@@ -384,9 +388,11 @@ def text_pipeline(wrd):
  "lang_dir": hparams["lang_dir"],
  "vocab_files": [],
  # "vocab_files": [hparams["vocab_file"]],
- "csv_files": [hparams["output_folder"] + "/train.csv"]
- if not hparams["skip_prep"]
- else [],
+ "csv_files": (
+ [hparams["output_folder"] + "/train.csv"]
+ if not hparams["skip_prep"]
+ else []
+ ),
  "add_word_boundary": hparams["add_word_boundary"],
  "column_text_key": "wrd",
  },
@@ -399,17 +405,19 @@ def text_pipeline(wrd):
  "lang_dir": hparams["lang_dir"],
  "vocab_files": [],
  # "vocab_files": [hparams["vocab_file"]],
- "csv_files": [hparams["output_folder"] + "/train.csv"]
- if not hparams["skip_prep"]
- else [],
+ "csv_files": (
+ [hparams["output_folder"] + "/train.csv"]
+ if not hparams["skip_prep"]
+ else []
+ ),
  "add_word_boundary": hparams["add_word_boundary"],
  "column_text_key": "wrd",
  "lang": "fr-fr",
  },
  )
  else:
  raise NotImplementedError(
- f"token_type={token_type} not not implemented"
+ f"token_type={hparams['token_type']} not not implemented"
  )
 
  caching = (
@@ -438,7 +446,8 @@ def text_pipeline(wrd):
 
  lexicon = sbk2.lexicon.Lexicon(hparams["lang_dir"])
  graph_compiler = sbk2.graph_compiler.CtcGraphCompiler(
- lexicon, device=asr_brain.device,
+ lexicon,
+ device=asr_brain.device,
  )
 
  decoding_params = {}

diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/LM/extra_requirements.txt b/recipes/ESTER+EPAC+ETAPE+REPERE/LM/extra_requirements.txt
@@ -1,4 +1,4 @@
+ftfy
 # k2 # It is better to install k2 with the procedure listed here: https://k2-fsa.github.io/k2/installation/from_wheels.html
 num2words
 soundfile
-ftfy
diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/LM/hparams/train_ngram.yaml b/recipes/ESTER+EPAC+ETAPE+REPERE/LM/hparams/train_ngram.yaml
@@ -1,5 +1,5 @@
 #########
-# Recipe for Training kenLM on stm formated Data.
+# Recipe for Training kenLM on stm formatted Data.
 #
 # Author:
 # - Pierre Champion 2024

diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/LM/train_ngram.py b/recipes/ESTER+EPAC+ETAPE+REPERE/LM/train_ngram.py
@@ -9,14 +9,15 @@
  * Pierre Champion 2023
 """
 
+import logging
 import os
 import sys
-import logging
-import speechbrain as sb
-from speechbrain.utils.distributed import run_on_main
 from hyperpyyaml import load_hyperpyyaml
+
+import speechbrain as sb
 import speechbrain.k2_integration as sbk2
 from speechbrain.utils.data_utils import get_list_from_csv
+from speechbrain.utils.distributed import run_on_main
 
 logger = logging.getLogger(__name__)
 
@@ -118,9 +119,11 @@ def dataprep_lm_training(
  kwargs={
  "lang_dir": hparams["lang_dir"],
  "vocab_files": [hparams["vocab_file"]],
- "extra_csv_files": [hparams["output_folder"] + "/train.csv"]
- if not hparams["skip_prep"]
- else [],
+ "csv_files": (
+ [hparams["output_folder"] + "/train.csv"]
+ if not hparams["skip_prep"]
+ else []
+ ),
  "add_word_boundary": hparams["add_word_boundary"],
  },
  )

diff --git a/recipes/ESTER+EPAC+ETAPE+REPERE/stm_prepare.py b/recipes/ESTER+EPAC+ETAPE+REPERE/stm_prepare.py
@@ -13,26 +13,22 @@
 Pierre Champion
 """
 
-import logging
-import re
+import csv
 import glob
+import logging
 import os
-import csv
+import re
+import string
 import sys
 
-from num2words import num2words
 import ftfy
+import soundfile
+from num2words import num2words
 from tqdm import tqdm
 
-from speechbrain.dataio.dataio import (
- load_pkl,
- save_pkl,
-)
+from speechbrain.dataio.dataio import load_pkl, save_pkl
 from speechbrain.utils.data_utils import get_list_from_csv
 
-import soundfile
-import string
-
 logger = logging.getLogger(__name__)
 OPT_FILE = "opt_stm_prepare.pkl"
 SAMPLERATE = 16000
@@ -193,7 +189,7 @@ def prepare_stm( # noqa
  new_word_on_apostrophe=new_word_on_apostrophe,
  )
 
- # No transcription, might be only rire/jingle anotation
+ # No transcription, might be only rire/jingle annotation
  if text == "":
  continue
 
@@ -391,7 +387,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
  text = ftfy.fix_text(text)
 
  # Names
- text = re.sub(r"Franç§ois", "François", text)
+ text = re.sub(r"Franç§ois", "François", text) # codespell:ignore
  text = re.sub(r"Schrà ¶der", "Schràder", text)
 
  text = re.sub(r"«", "", text)
@@ -419,7 +415,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
  delset = delset.replace(char, "")
  text = text.translate(str.maketrans("", "", delset))
 
- # Undecidable variant heared like on (n') en:
+ # Undecidable variant heard like on (n') en:
  text = re.sub(r"\(.+?\)", "", text)
  text = re.sub(r"\(\)", "", text)
  text = re.sub(r"(O.K.)", "ok", text)
@@ -484,7 +480,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
  # ã used as à in most case
  text = re.sub(r"ã", "à", text)
 
- # replace n succesive spaces with one space.
+ # replace n successive spaces with one space.
  text = re.sub(r"\s{2,}", " ", text)
  text = re.sub("^ ", "", text)
  text = re.sub(" $", "", text)

diff --git a/recipes/LibriSpeech/LM/train_ngram.py b/recipes/LibriSpeech/LM/train_ngram.py
@@ -139,7 +139,7 @@ def dataprep_lm_training(
  kwargs={
  "lang_dir": hparams["lang_dir"],
  "vocab_files": [hparams["vocab_file"]],
- "extra_csv_files": (
+ "csv_files": (
  [hparams["output_folder"] + "/train.csv"]
  if not hparams["skip_prep"]
  else []

diff --git a/speechbrain/k2_integration/graph_compiler.py b/speechbrain/k2_integration/graph_compiler.py
@@ -51,7 +51,10 @@ def device(self):
 
  @abc.abstractmethod
  def compile(
- self, texts: List[str], is_training: bool = True, max_len: torch.float = torch.inf
+ self,
+ texts: List[str],
+ is_training: bool = True,
+ max_len: torch.float = torch.inf,
  ) -> Tuple[k2.Fsa, torch.Tensor, Optional[torch.Tensor]]:
  """
  Compile the graph for the given texts.
@@ -314,7 +317,10 @@ def device(self):
  return self._device
 
  def compile(
- self, texts: List[str], is_training: bool = True, max_len: torch.float = torch.inf
+ self,
+ texts: List[str],
+ is_training: bool = True,
+ max_len: torch.float = torch.inf,
  ) -> Tuple[k2.Fsa, torch.Tensor, Optional[torch.Tensor]]:
  """
  Build decoding graphs by composing ctc_topo with given transcripts.
@@ -365,8 +371,10 @@ def compile(
 
  mask = target_lens < max_len - 2
  if torch.any(~mask):
- logger.debug("Removing elements from batch for CTC loss.\n"
- "Output label length is greater than input length.")
+ logger.debug(
+ "Removing elements from batch for CTC loss.\n"
+ "Output label length is greater than input length."
+ )
  target_lens = target_lens[mask]
  word_idx = [id for i, id in enumerate(word_idx) if mask[i]]