CER / WER metrics

atr_ner_eval.metrics.cer

Compute CER and WER from a label/prediction dataset.

Attributes

logger `module-attribute`

logger = logging.getLogger(__name__)

Classes

TextEval

Bases: NamedTuple

Compute text errors between a label and prediction.

Attributes

label `instance-attribute`

label: str

Label text.

prediction `instance-attribute`

prediction: str

Predicted text.

char_errors `property`

char_errors: int

Compute character errors between the label and prediction.

Returns:

Type	Description
`int`	Character errors.

Examples:

>>> TextEval("I really like cats", "I love cats").char_errors
9

word_errors `property`

word_errors: int

Compute word errors between the label and prediction.

Returns:

Type	Description
`int`	Word errors.

Examples:

>>> TextEval("I really like cats", "I love cats").word_errors
2

char_totals `property`

char_totals: int

Compute the max number of characters in the label or prediction.

Returns:

Type	Description
`int`	Number of characters.

Examples:

>>> TextEval("I really like cats", "I love cats").char_totals
18

word_totals `property`

word_totals: int

Compute the max number of words in the label or prediction.

Returns:

Type	Description
`int`	Number of words.

Examples:

>>> TextEval("I really like cats", "I love cats").word_totals
4

TotalScore

TotalScore()

Compute total evaluation scores.

Initialize errors and counts.

Examples:

>>> score = TotalScore()

Source code in atr_ner_eval/metrics/cer.py

def __init__(self):
    """Initialize errors and counts.

    Examples:
        >>> score = TotalScore()
    """
    self.char_errors = defaultdict(int)
    self.word_errors = defaultdict(int)
    self.char_totals = defaultdict(int)
    self.word_totals = defaultdict(int)
    self.count = defaultdict(int)

Attributes

char_errors `instance-attribute`

char_errors = defaultdict(int)

word_errors `instance-attribute`

word_errors = defaultdict(int)

char_totals `instance-attribute`

char_totals = defaultdict(int)

word_totals `instance-attribute`

word_totals = defaultdict(int)

count `instance-attribute`

count = defaultdict(int)

categories `property`

categories: list[str]

List of semantic categories for which scores are computed.

Returns:

Type	Description
`list[str]`	The list of categories.

Examples:

>>> score.categories
['total', 'animal']

cer `property`

cer: defaultdict(float)

Compute the Character Error Rate (%).

Returns:

Type	Description
`defaultdict(float)`	The Character Error Rate.

Examples:

>>> score.cer
{'total': 38.9, 'animal': 0.0}

wer `property`

wer: defaultdict(float)

Compute the Word Error Rate (%).

Returns:

Type	Description
`defaultdict(float)`	The Word Error Rate.

Examples:

>>> score.wer
{'total': 25.0, 'animal': 0.0}

Functions

update

update(key, score: TextEval)

Update the score with the current evaluation for a given key.

Parameters:

Name	Type	Description	Default
`key`	`str`	Category to update.	required
`score`	`TextEval`	Current score.	required

Examples:

>>> score.update("total", TextEval("I really like cats", "I like cats"))
>>> score.update("animal", TextEval("cats", "cats"))
>>> score.char_errors
defaultdict(<class 'int'>, {'total': 7, 'animal': 0})
>>> score.word_errors
defaultdict(<class 'int'>, {'total': 1, 'animal': 0})
>>> score.char_totals
defaultdict(<class 'int'>, {'total': 18, 'animal': 4})
>>> score.word_totals
defaultdict(<class 'int'>, {'total': 4, 'animal': 1})
>>> score.count
defaultdict(<class 'int'>, {'total': 1, 'animal': 1})

Source code in atr_ner_eval/metrics/cer.py

def update(self, key, score: TextEval):
    """Update the score with the current evaluation for a given key.

    Args:
        key (str): Category to update.
        score (TextEval): Current score.

    Examples:
        >>> score.update("total", TextEval("I really like cats", "I like cats"))
        >>> score.update("animal", TextEval("cats", "cats"))
        >>> score.char_errors
        defaultdict(<class 'int'>, {'total': 7, 'animal': 0})
        >>> score.word_errors
        defaultdict(<class 'int'>, {'total': 1, 'animal': 0})
        >>> score.char_totals
        defaultdict(<class 'int'>, {'total': 18, 'animal': 4})
        >>> score.word_totals
        defaultdict(<class 'int'>, {'total': 4, 'animal': 1})
        >>> score.count
        defaultdict(<class 'int'>, {'total': 1, 'animal': 1})
    """
    self.char_errors[key] += score.char_errors
    self.word_errors[key] += score.word_errors
    self.char_totals[key] += score.char_totals
    self.word_totals[key] += score.word_totals
    self.count[key] += 1

Functions

format_string_for_wer

format_string_for_wer(text: str) -> list[str]

Format string for WER computation.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to format.	required

Returns:

Type	Description
`list[str]`	A list of words formatted for WER computation.

Examples:

>>> format_string_for_wer(text="this is a string to evaluate")
['this', 'is', 'a', 'string', 'to', 'evaluate']
>>> format_string_for_wer(text="this is    another string to   evaluate")
['this', 'is', 'another', 'string', 'to', 'evaluate']

Source code in atr_ner_eval/metrics/cer.py

def format_string_for_wer(text: str) -> list[str]:
    """Format string for WER computation.

    Args:
        text (str): The text to format.

    Returns:
        A list of words formatted for WER computation.

    Examples:
        >>> format_string_for_wer(text="this is a string to evaluate")
        ['this', 'is', 'a', 'string', 'to', 'evaluate']
        >>> format_string_for_wer(text="this is    another string to   evaluate")
        ['this', 'is', 'another', 'string', 'to', 'evaluate']
    """
    return text.strip().split()

format_string_for_cer

format_string_for_cer(text: str) -> str

Format string for CER computation.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to format.	required

Returns:

Type	Description
`str`	The formatted text for CER computation.

Examples:

>>> format_string_for_cer(text="this is a string to evaluate")
'this is a string to evaluate'
>>> format_string_for_cer(text="this is    another string to   evaluate")
'this is another string to evaluate'

Source code in atr_ner_eval/metrics/cer.py

def format_string_for_cer(text: str) -> str:
    """Format string for CER computation.

    Args:
        text (str): The text to format.

    Returns:
        The formatted text for CER computation.

    Examples:
        >>> format_string_for_cer(text="this is a string to evaluate")
        'this is a string to evaluate'
        >>> format_string_for_cer(text="this is    another string to   evaluate")
        'this is another string to evaluate'
    """
    return " ".join(text.strip().split())

make_prettytable

make_prettytable(score: TotalScore) -> PrettyTable

Format and display results using PrettyTable.

Parameters:

Name	Type	Description	Default
`score`	`TotalScore`	Total scores.	required

Returns:

Type	Description
`PrettyTable`	The evaluation table formatted in Markdown.

Source code in atr_ner_eval/metrics/cer.py

def make_prettytable(score: TotalScore) -> PrettyTable:
    """Format and display results using PrettyTable.

    Args:
        score (TotalScore): Total scores.

    Returns:
        The evaluation table formatted in Markdown.
    """
    table = PrettyTable()
    table.set_style(MARKDOWN)
    table.field_names = ["Category", "CER (%)", "WER (%)", "Support"]
    table.align["Category"] = "l"
    table.align["Support"] = "r"

    rows = []
    for tag in score.categories:
        rows.append(
            [
                tag,
                "%.2f" % score.cer[tag],
                "%.2f" % score.wer[tag],
                score.count[tag],
            ],
        )

    table.add_rows(sort_categories(rows))
    return table

merge_entities

merge_entities(
    entities: list[tuple[str, str]]
) -> dict[str, str]

Iterate over entities and merge text for each entity type.

Parameters:

Name	Type	Description	Default
`entities`	`list[tuple[str, str]]`	A list of entities.	required

Returns:

Type	Description
`dict[str, str]`	A dictionary with entity types as keys and the corresponding text as values.

Source code in atr_ner_eval/metrics/cer.py

def merge_entities(entities: list[tuple[str, str]]) -> dict[str, str]:
    """Iterate over entities and merge text for each entity type.

    Args:
        entities (list[tuple[str, str]]): A list of entities.

    Returns:
        A dictionary with entity types as keys and the corresponding text as values.
    """
    entity_text = defaultdict(list)
    for tag, text in entities:
        entity_text[tag].append(text)
    return {k: " ".join(v) for k, v in entity_text.items()}

compute_cer_wer

compute_cer_wer(
    label_dir: Path,
    prediction_dir: Path,
    by_category: bool = False,
) -> None

Read BIO files and compute Character and Word Error Rates globally or for each NER category.

Parameters:

Name	Type	Description	Default
`label_dir`	`Path`	Path to the reference BIO file.	required
`prediction_dir`	`Path`	Path to the prediction BIO file.	required
`by_category`	`bool`	Whether to display CER/WER by category.	`False`

Returns:

Type	Description
`None`	A Markdown formatted table containing evaluation results.

Source code in atr_ner_eval/metrics/cer.py

def compute_cer_wer(
    label_dir: Path,
    prediction_dir: Path,
    by_category: bool = False,
) -> None:
    """Read BIO files and compute Character and Word Error Rates globally or for each NER category.

    Args:
        label_dir (Path): Path to the reference BIO file.
        prediction_dir (Path): Path to the prediction BIO file.
        by_category (bool): Whether to display CER/WER by category.

    Returns:
        A Markdown formatted table containing evaluation results.
    """
    # Initialize scores
    score = TotalScore()
    # Load the dataset
    dataset = load_dataset(label_dir, prediction_dir)
    # Iterate over the dataset
    for label, prediction in dataset:
        # Compute global CER and WER
        score.update(GLOBAL_STAT_NAME, TextEval(label.text, prediction.text))

        # Compute CER and WER by category
        if not by_category:
            continue
        label_text = merge_entities(label.entities)
        pred_text = merge_entities(prediction.entities)
        for tag in label_text:
            score.update(
                tag,
                TextEval(
                    label_text[tag],
                    pred_text[tag] if tag in pred_text else "",
                ),
            )

    # Format and display results
    table = make_prettytable(score)
    print(table)  # noqa: T201
    return table

CER / WER metrics

atr_ner_eval.metrics.cer

Attributes

logger module-attribute

Classes

TextEval

Attributes

label instance-attribute

prediction instance-attribute

char_errors property

word_errors property

char_totals property

word_totals property

TotalScore

Attributes

char_errors instance-attribute

word_errors instance-attribute

char_totals instance-attribute

word_totals instance-attribute

count instance-attribute

categories property

cer property

wer property

Functions

update

Functions

format_string_for_wer

format_string_for_cer

make_prettytable

merge_entities

compute_cer_wer

logger `module-attribute`

label `instance-attribute`

prediction `instance-attribute`

char_errors `property`

word_errors `property`

char_totals `property`

word_totals `property`

char_errors `instance-attribute`

word_errors `instance-attribute`

char_totals `instance-attribute`

word_totals `instance-attribute`

count `instance-attribute`

categories `property`

cer `property`

wer `property`