Skip to content

Commit

Permalink
fix ci
Browse files Browse the repository at this point in the history
  • Loading branch information
pchampio committed Apr 19, 2024
1 parent 9d8bf34 commit b350fad
Show file tree
Hide file tree
Showing 13 changed files with 90 additions and 70 deletions.
2 changes: 1 addition & 1 deletion recipes/ESTER+EPAC+ETAPE+REPERE/ASR/extra_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ftfy
# k2 # It is better to install k2 with the procedure listed here: https://k2-fsa.github.io/k2/installation/from_wheels.html
num2words
soundfile
ftfy
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ################################
# Model: wav2vec2 + DNN + CTC + LM (k2)
# Augmentation: SpecAugment
# Augmentation: SpecAugment + Speed
# Authors: Pierre Champion 2023
# ################################

Expand All @@ -21,7 +21,7 @@ stm_directory: !ref <data_folder>/**/[^\.ne_e2\.|\.ne\.|\.spk\.|part\.]*.stm
wav_directory: !ref <data_folder>/**/*.wav
train_splits: {"train_ESTER2":["/ESTER2/train_trans_rapide/*", "/ESTER2/train/*"], "train_ESTER1":["/ESTER1/train/*"], "train_EPAC":["/EPAC/train/*"], "train_ETAPE":["/ETAPE/train/*"], "train_REPERE":["/REPERE/train/*"]}
dev_splits: {"dev_ESTER2":["/ESTER2/dev/*"], "dev_ESTER1":["/ESTER1/dev/*"], "dev_ETAPE":["/ETAPE/dev/*"], "dev_REPERE2014":["/REPERE/dev2014/*"]}
test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"] , "test_REPERE2014":["/REPERE/test2014/*"]}
test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"], "test_REPERE2014":["/REPERE/test2014/*"]}
merge_train_csv: "train_ESTER2+train_ESTER1+train_EPAC+train_ETAPE+train_REPERE"
prep_save_folder: !ref <output_folder>
skip_prep: False
Expand Down Expand Up @@ -66,7 +66,6 @@ rescoring_lm_scale: 0.4
lm_dir: ../LM/results/n_gram_lm

G_arpa: 3-for-char-gram.arpa
# G_arpa: pguyot.arpa
G_rescoring_arpa: 4-for-char-gram.arpa

# Training parameters
Expand Down Expand Up @@ -152,7 +151,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
augmentations: [
!ref <speed_perturb>,
!ref <drop_freq>,
]
]

enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
input_shape: [null, null, 1280]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ################################
# Model: wav2vec2 + DNN + CTC + LM (k2)
# Augmentation: SpecAugment
# Augmentation: SpecAugment + Speed
# Authors: Pierre Champion 2023
# ################################

Expand All @@ -21,7 +21,7 @@ stm_directory: !ref <data_folder>/**/[^\.ne_e2\.|\.ne\.|\.spk\.|part\.]*.stm
wav_directory: !ref <data_folder>/**/*.wav
train_splits: {"train_ESTER2":["/ESTER2/train_trans_rapide/*", "/ESTER2/train/*"], "train_ESTER1":["/ESTER1/train/*"], "train_EPAC":["/EPAC/train/*"], "train_ETAPE":["/ETAPE/train/*"], "train_REPERE":["/REPERE/train/*"]}
dev_splits: {"dev_ESTER2":["/ESTER2/dev/*"], "dev_ESTER1":["/ESTER1/dev/*"], "dev_ETAPE":["/ETAPE/dev/*"], "dev_REPERE2014":["/REPERE/dev2014/*"]}
test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"] , "test_REPERE2014":["/REPERE/test2014/*"]}
test_splits: {"test_ESTER2":["/ESTER2/test/*"], "test_ESTER1":["/ESTER1/test/*"], "test_ETAPE":["/ETAPE/test/*"], "test_EPAC":["/EPAC/test/*"], "test_REPERE2014":["/REPERE/test2014/*"]}
merge_train_csv: "train_ESTER2+train_ESTER1+train_EPAC+train_ETAPE+train_REPERE"
prep_save_folder: !ref <output_folder>
skip_prep: False
Expand Down Expand Up @@ -66,7 +66,6 @@ rescoring_lm_scale: 0.4
lm_dir: ../LM/results/n_gram_lm

G_arpa: 3-for-phone-gram.arpa
# G_arpa: pguyot.arpa
G_rescoring_arpa: 4-for-phone-gram.arpa

# Training parameters
Expand Down Expand Up @@ -150,7 +149,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
augmentations: [
!ref <speed_perturb>,
!ref <drop_freq>,
]
]

enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
input_shape: [null, null, 1280]
Expand Down
47 changes: 28 additions & 19 deletions recipes/ESTER+EPAC+ETAPE+REPERE/ASR/train_with_wav2vec_ctc_k2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@
import phonemizer

phonemizer.phonemize("c'est", language="fr-fr")
except Exception as e:
except Exception:
pass

import logging
import os
import sys
import torch
import logging
import speechbrain as sb
from speechbrain.utils.distributed import run_on_main, if_main_process
from speechbrain.utils.data_utils import download_file
from hyperpyyaml import load_hyperpyyaml
from collections import defaultdict
from pathlib import Path

import torch
from hyperpyyaml import load_hyperpyyaml

import speechbrain as sb
import speechbrain.k2_integration as sbk2
from speechbrain.utils.distributed import if_main_process, run_on_main

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -186,7 +186,8 @@ def on_stage_end(self, stage, stage_loss, epoch):
valid_stats=stage_stats,
)
self.checkpointer.save_and_keep_only(
meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
meta={"WER": stage_stats["WER"]},
min_keys=["WER"],
)
elif stage == sb.Stage.TEST:
self.hparams.train_logger.log_stats(
Expand Down Expand Up @@ -237,7 +238,8 @@ def dataio_prepare(hparams):
data_folder = hparams["data_folder"]

train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
csv_path=hparams["train_csv"],
replacements={"data_root": data_folder},
)

if hparams["sorting"] == "ascending":
Expand Down Expand Up @@ -270,7 +272,8 @@ def dataio_prepare(hparams):
)

valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
csv_path=hparams["valid_csv"],
replacements={"data_root": data_folder},
)
valid_data = valid_data.filtered_sorted(
sort_key="duration",
Expand Down Expand Up @@ -317,7 +320,8 @@ def text_pipeline(wrd):

# 4. Set output:
sb.dataio.dataset.set_output_keys(
datasets, ["id", "sig", "wrd"],
datasets,
["id", "sig", "wrd"],
)

return train_data, valid_data, test_datasets
Expand Down Expand Up @@ -384,9 +388,11 @@ def text_pipeline(wrd):
"lang_dir": hparams["lang_dir"],
"vocab_files": [],
# "vocab_files": [hparams["vocab_file"]],
"csv_files": [hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else [],
"csv_files": (
[hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else []
),
"add_word_boundary": hparams["add_word_boundary"],
"column_text_key": "wrd",
},
Expand All @@ -399,17 +405,19 @@ def text_pipeline(wrd):
"lang_dir": hparams["lang_dir"],
"vocab_files": [],
# "vocab_files": [hparams["vocab_file"]],
"csv_files": [hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else [],
"csv_files": (
[hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else []
),
"add_word_boundary": hparams["add_word_boundary"],
"column_text_key": "wrd",
"lang": "fr-fr",
},
)
else:
raise NotImplementedError(
f"token_type={token_type} not not implemented"
f"token_type={hparams['token_type']} not not implemented"
)

caching = (
Expand Down Expand Up @@ -438,7 +446,8 @@ def text_pipeline(wrd):

lexicon = sbk2.lexicon.Lexicon(hparams["lang_dir"])
graph_compiler = sbk2.graph_compiler.CtcGraphCompiler(
lexicon, device=asr_brain.device,
lexicon,
device=asr_brain.device,
)

decoding_params = {}
Expand Down
2 changes: 1 addition & 1 deletion recipes/ESTER+EPAC+ETAPE+REPERE/LM/extra_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ftfy
# k2 # It is better to install k2 with the procedure listed here: https://k2-fsa.github.io/k2/installation/from_wheels.html
num2words
soundfile
ftfy
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#########
# Recipe for Training kenLM on stm formated Data.
# Recipe for Training kenLM on stm formatted Data.
#
# Author:
# - Pierre Champion 2024
Expand Down
15 changes: 9 additions & 6 deletions recipes/ESTER+EPAC+ETAPE+REPERE/LM/train_ngram.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
* Pierre Champion 2023
"""

import logging
import os
import sys
import logging
import speechbrain as sb
from speechbrain.utils.distributed import run_on_main
from hyperpyyaml import load_hyperpyyaml

import speechbrain as sb
import speechbrain.k2_integration as sbk2
from speechbrain.utils.data_utils import get_list_from_csv
from speechbrain.utils.distributed import run_on_main

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -118,9 +119,11 @@ def dataprep_lm_training(
kwargs={
"lang_dir": hparams["lang_dir"],
"vocab_files": [hparams["vocab_file"]],
"extra_csv_files": [hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else [],
"csv_files": (
[hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else []
),
"add_word_boundary": hparams["add_word_boundary"],
},
)
Expand Down
26 changes: 11 additions & 15 deletions recipes/ESTER+EPAC+ETAPE+REPERE/stm_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,22 @@
Pierre Champion
"""

import logging
import re
import csv
import glob
import logging
import os
import csv
import re
import string
import sys

from num2words import num2words
import ftfy
import soundfile
from num2words import num2words
from tqdm import tqdm

from speechbrain.dataio.dataio import (
load_pkl,
save_pkl,
)
from speechbrain.dataio.dataio import load_pkl, save_pkl
from speechbrain.utils.data_utils import get_list_from_csv

import soundfile
import string

logger = logging.getLogger(__name__)
OPT_FILE = "opt_stm_prepare.pkl"
SAMPLERATE = 16000
Expand Down Expand Up @@ -193,7 +189,7 @@ def prepare_stm( # noqa
new_word_on_apostrophe=new_word_on_apostrophe,
)

# No transcription, might be only rire/jingle anotation
# No transcription, might be only rire/jingle annotation
if text == "":
continue

Expand Down Expand Up @@ -391,7 +387,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
text = ftfy.fix_text(text)

# Names
text = re.sub(r"Franç§ois", "François", text)
text = re.sub(r"Franç§ois", "François", text) # codespell:ignore
text = re.sub(r"Schrà ¶der", "Schràder", text)

text = re.sub(r"«", "", text)
Expand Down Expand Up @@ -419,7 +415,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
delset = delset.replace(char, "")
text = text.translate(str.maketrans("", "", delset))

# Undecidable variant heared like on (n') en:
# Undecidable variant heard like on (n') en:
text = re.sub(r"\(.+?\)", "", text)
text = re.sub(r"\(\)", "", text)
text = re.sub(r"(O.K.)", "ok", text)
Expand Down Expand Up @@ -484,7 +480,7 @@ def normalize_text(text, new_word_on_apostrophe=True):
# ã used as à in most case
text = re.sub(r"ã", "à", text)

# replace n succesive spaces with one space.
# replace n successive spaces with one space.
text = re.sub(r"\s{2,}", " ", text)
text = re.sub("^ ", "", text)
text = re.sub(" $", "", text)
Expand Down
2 changes: 1 addition & 1 deletion recipes/LibriSpeech/LM/train_ngram.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def dataprep_lm_training(
kwargs={
"lang_dir": hparams["lang_dir"],
"vocab_files": [hparams["vocab_file"]],
"extra_csv_files": (
"csv_files": (
[hparams["output_folder"] + "/train.csv"]
if not hparams["skip_prep"]
else []
Expand Down
16 changes: 12 additions & 4 deletions speechbrain/k2_integration/graph_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ def device(self):

@abc.abstractmethod
def compile(
self, texts: List[str], is_training: bool = True, max_len: torch.float = torch.inf
self,
texts: List[str],
is_training: bool = True,
max_len: torch.float = torch.inf,
) -> Tuple[k2.Fsa, torch.Tensor, Optional[torch.Tensor]]:
"""
Compile the graph for the given texts.
Expand Down Expand Up @@ -314,7 +317,10 @@ def device(self):
return self._device

def compile(
self, texts: List[str], is_training: bool = True, max_len: torch.float = torch.inf
self,
texts: List[str],
is_training: bool = True,
max_len: torch.float = torch.inf,
) -> Tuple[k2.Fsa, torch.Tensor, Optional[torch.Tensor]]:
"""
Build decoding graphs by composing ctc_topo with given transcripts.
Expand Down Expand Up @@ -365,8 +371,10 @@ def compile(

mask = target_lens < max_len - 2
if torch.any(~mask):
logger.debug("Removing elements from batch for CTC loss.\n"
"Output label length is greater than input length.")
logger.debug(
"Removing elements from batch for CTC loss.\n"
"Output label length is greater than input length."
)
target_lens = target_lens[mask]
word_idx = [id for i, id in enumerate(word_idx) if mask[i]]

Expand Down

0 comments on commit b350fad

Please sign in to comment.