Skip to content

eval_util

Author: Heli Qi Affiliation: NAIST Date: 2022.07

get_word_edit_alignment(hypo, real)

Parameters:

Name Type Description Default
hypo str
required
real str
required

Returns:

Source code in speechain/utilbox/eval_util.py
def get_word_edit_alignment(hypo: str, real: str) -> (int, int, int, str):
    """

    Args:
        hypo:
        real:

    Returns:

    """
    # calculate the alignment between the hypothesis words and real words
    # Note that split(" ") is not equivalent to split() here
    # because split(" ") will give an extra '' at the end of the list if the string ends with a " "
    # while split() doesn't
    hypo_word_list, real_word_list = hypo.split(), real.split()
    opcodes = edit_distance.SequenceMatcher(
        a=hypo_word_list, b=real_word_list
    ).get_opcodes()

    insertion, deletion, substitution = 0, 0, 0
    hypo_words, real_words, word_ops = [], [], []
    # loop each editing operation
    for i, op in enumerate(opcodes):
        # insertion operation
        if op[0] == "insert":
            insertion += 1
            word_ops.append("I")
            hypo_words.append(" ")
            real_words.append(real_word_list[op[3]])
        # deletion operation
        elif op[0] == "delete":
            deletion += 1
            word_ops.append("D")
            hypo_words.append(hypo_word_list[op[1]])
            real_words.append(" ")
        # substitution operation
        elif op[0] == "replace":
            substitution += 1
            word_ops.append("S")
            hypo_words.append(hypo_word_list[op[1]])
            real_words.append(real_word_list[op[3]])
        # equal condition
        else:
            word_ops.append(" ")
            hypo_words.append(hypo_word_list[op[1]])
            real_words.append(real_word_list[op[3]])

    align_table = get_table_strings(
        contents=[hypo_words, word_ops, real_words],
        first_col=["Hypothesis", "Alignment", "Reference"],
    )

    return insertion, deletion, substitution, align_table