Bases: Module
Stand-Alone module of the autoregressive language model.
This module has two usages:
1. language model training by speechain.model.lm.LM
2. ASR-LM joint decoding by speechain.model.ar_asr.ARASR
Source code in speechain/module/standalone/lm.py
| class LanguageModel(Module):
"""Stand-Alone module of the autoregressive language model.
This module has two usages:
1. language model training by speechain.model.lm.LM
2. ASR-LM joint decoding by speechain.model.ar_asr.ARASR
"""
embedding_class_dict = dict(embed=EmbedPrenet)
encoder_class_dict = dict(transformer=TransformerEncoder)
def module_init(self, vocab_size: int, emb: Dict, encoder: Dict):
"""
Args:
vocab_size:
emb:
encoder:
"""
# LM embedding layer
assert (
"type" in emb.keys()
), "There must a key named 'type' in model['module_conf']['embedding']!"
embedding_class = self.embedding_class_dict[emb["type"]]
emb["conf"] = dict() if "conf" not in emb.keys() else emb["conf"]
self.embedding = embedding_class(vocab_size=vocab_size, **emb["conf"])
# LM encoder part
assert (
"type" in encoder.keys()
), "There must a key named 'type' in model['module_conf']['encoder']!"
encoder_class = self.encoder_class_dict[encoder["type"]]
encoder["conf"] = dict() if "conf" not in encoder.keys() else encoder["conf"]
# the LM encoder is automatically set to unidirectional
encoder["conf"]["uni_direction"] = True
self.encoder = encoder_class(
input_size=self.embedding.output_size, **encoder["conf"]
)
# LM token prediction layer
self.postnet = TokenPostnet(
input_size=self.encoder.output_size, vocab_size=vocab_size
)
def forward(self, text: torch.Tensor, text_len: torch.Tensor):
"""
Args:
text:
text_len:
Returns:
"""
# Text Embedding
emb_text = self.embedding(text)
# mask generation for the input text
text_mask = make_mask_from_len(text_len)
if text.is_cuda:
text_mask = text_mask.cuda(text.device)
# Encoding
enc_returns = self.encoder(src=emb_text, mask=text_mask)
# Transformer-based encoder additionally returns the encoder self-attention
if len(enc_returns) == 4:
enc_feat, enc_feat_mask, enc_attmat, enc_hidden = enc_returns
# RNN-based encoder doesn't return any attention
elif len(enc_returns) == 3:
(enc_feat, enc_feat_mask, enc_hidden), enc_attmat = enc_returns, None
else:
raise RuntimeError
# Token prediction
logits = self.postnet(enc_feat)
return logits, enc_feat_mask, enc_attmat
|
forward(text, text_len)
Parameters:
Name |
Type |
Description |
Default |
text
|
Tensor
|
|
required
|
text_len
|
Tensor
|
|
required
|
Returns:
Source code in speechain/module/standalone/lm.py
| def forward(self, text: torch.Tensor, text_len: torch.Tensor):
"""
Args:
text:
text_len:
Returns:
"""
# Text Embedding
emb_text = self.embedding(text)
# mask generation for the input text
text_mask = make_mask_from_len(text_len)
if text.is_cuda:
text_mask = text_mask.cuda(text.device)
# Encoding
enc_returns = self.encoder(src=emb_text, mask=text_mask)
# Transformer-based encoder additionally returns the encoder self-attention
if len(enc_returns) == 4:
enc_feat, enc_feat_mask, enc_attmat, enc_hidden = enc_returns
# RNN-based encoder doesn't return any attention
elif len(enc_returns) == 3:
(enc_feat, enc_feat_mask, enc_hidden), enc_attmat = enc_returns, None
else:
raise RuntimeError
# Token prediction
logits = self.postnet(enc_feat)
return logits, enc_feat_mask, enc_attmat
|
module_init(vocab_size, emb, encoder)
Parameters:
Name |
Type |
Description |
Default |
vocab_size
|
int
|
|
required
|
emb
|
Dict
|
|
required
|
encoder
|
Dict
|
|
required
|
Source code in speechain/module/standalone/lm.py
| def module_init(self, vocab_size: int, emb: Dict, encoder: Dict):
"""
Args:
vocab_size:
emb:
encoder:
"""
# LM embedding layer
assert (
"type" in emb.keys()
), "There must a key named 'type' in model['module_conf']['embedding']!"
embedding_class = self.embedding_class_dict[emb["type"]]
emb["conf"] = dict() if "conf" not in emb.keys() else emb["conf"]
self.embedding = embedding_class(vocab_size=vocab_size, **emb["conf"])
# LM encoder part
assert (
"type" in encoder.keys()
), "There must a key named 'type' in model['module_conf']['encoder']!"
encoder_class = self.encoder_class_dict[encoder["type"]]
encoder["conf"] = dict() if "conf" not in encoder.keys() else encoder["conf"]
# the LM encoder is automatically set to unidirectional
encoder["conf"]["uni_direction"] = True
self.encoder = encoder_class(
input_size=self.embedding.output_size, **encoder["conf"]
)
# LM token prediction layer
self.postnet = TokenPostnet(
input_size=self.encoder.output_size, vocab_size=vocab_size
)
|