Training a Mini-GPT to Learn Two-Digit Addition#

Twitter Handle LinkedIn Profile GitHub Profile Tag Code

Motivation#

Generative Pre-trained Transformer (GPT) are well known to perform bad on arithmetic tasks such as addition. This should not come as a surprise since GPT is a language model and not a math model. It is designed to train on a large corpus of text and learn the patterns and structure of natural language. While we do encounter many arithmetic operations in corpus, the encoding of these operations are often in a form that is in the text sense, not in the mathematical sense. After all, what GPT does best is to predict the next token over the entire vocabulary distribution.

In one of the examples provided from the repository minGPT, Karpathy demonstrates training a GPT model to learn the addition of two numbers presented as strings. This is a simple task designed to illustrate how a decoder-only model can be trained to learn “addition”. Thus, the input is a sequence of characters representing an addition operation (like “12 + 35”) and the output is the sequence of characters representing the result of the addition (like “47”).

To this end, we replicate his example, which serves as a proof-of-concept to show that decoder only models, which are often used for language-related tasks, can learn other patterns or “languages,” such as the “language” of arithmetic.

from __future__ import annotations

from tqdm.auto import tqdm

import inspect
import math
import os
import sys
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import rich
import seaborn as sns
import torch
from omegaconf import OmegaConf as om
from rich.pretty import pprint
from torch import nn
from torch.optim import Optimizer
from torch.optim.lr_scheduler import LRScheduler
from torch.utils.data import DataLoader, Dataset, Subset, random_split
def find_root_dir(current_path: Path | None = None, marker: str = '.git') -> Path | None:
    """
    Find the root directory by searching for a directory or file that serves as a
    marker.

    Parameters
    ----------
    current_path : Path | None
        The starting path to search from. If None, the current working directory
        `Path.cwd()` is used.
    marker : str
        The name of the file or directory that signifies the root.

    Returns
    -------
    Path | None
        The path to the root directory. Returns None if the marker is not found.
    """
    if not current_path:
        current_path = Path.cwd()
    current_path = current_path.resolve()
    for parent in [current_path, *current_path.parents]:
        if (parent / marker).exists():
            return parent
    return None

current_file_path = Path(os.getcwd())
root_dir          = find_root_dir(current_file_path, marker='omnivault')

if root_dir is not None:
    sys.path.append(str(root_dir))
    from omnivault._types._alias import Accuracy, Loss
    from omnivault.core.logger import RichLogger
    from omnivault.transformer.config.composer import Composer, DataConfig
    from omnivault.transformer.config.constants import MaybeConstant
    from omnivault.transformer.config.decoder import (
        AddNormConfig,
        DecoderBlockConfig,
        DecoderConfig,
        MultiHeadedAttentionConfig,
        PositionwiseFeedForwardConfig,
    )
    from omnivault.transformer.config.generator import GeneratorConfig
    from omnivault.transformer.config.global_ import MaybeGlobal
    from omnivault.transformer.config.optim import OPTIMIZER_REGISTRY, AdamConfig, OptimizerConfig
    from omnivault.transformer.config.scheduler import SCHEDULER_REGISTRY, LambdaLRConfig
    from omnivault.transformer.config.trainer import TrainerConfig
    from omnivault.transformer.core.callbacks import save_state
    from omnivault.transformer.core.dataset import (
        AdderDataset,
        construct_dummy_batch_future_masks,
        construct_dummy_batch_target_padding_masks,
        create_loader,
        split_dataset,
    )
    from omnivault.transformer.core.optim import apply_weight_decay_to_different_param_groups
    from omnivault.transformer.core.tokenizer import AdderTokenizer
    from omnivault.transformer.core.trainer import Trainer, TrainerEvent
    from omnivault.transformer.core.vocabulary import AdderVocabulary
    from omnivault.transformer.decoder.core import GPTDecoder
    from omnivault.transformer.modules.attention.core import ScaledDotProductAttention
    from omnivault.transformer.projects.adder.main import evaluate_and_generate_on_valid_epoch_end
    from omnivault.transformer.utils.general_utils import create_directory, download_file
    from omnivault.transformer.utils.visualization import show_attention_heatmaps
    from omnivault.utils.config_management.omegaconf import load_yaml_config, merge_configs
    from omnivault.utils.inspector.core import get_field_annotations
    from omnivault.utils.reproducibility.seed import seed_all
else:
    raise ImportError("Root directory not found.")

Config#

yaml_cfg = load_yaml_config(yaml_path=root_dir / "omnivault/transformer/projects/adder/config.yaml")
cfg = merge_configs(yaml_cfg, args_list=[])
om.resolve(cfg)  # inplace ops
constants: MaybeConstant = MaybeConstant(
    NUM_DIGITS=2,
    TOKENS=[
        "0",
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "+",
        "*",
        "-",
        "=",
        "<BOS>",
        "<EOS>",
        "<PAD>",
        "<UNK>",
    ],
)
global_config: MaybeGlobal = MaybeGlobal(seed=42, debug=True, debug_samples=100)
data_config: DataConfig = DataConfig(**cfg.data)
optimizer_config = AdamConfig(name="torch.optim.Adam", lr=0.2, betas=(0.9, 0.98), eps=1e-9)
cfg.trainer.device = "cpu"
cfg.trainer.max_epochs = 9

trainer_config = TrainerConfig(**cfg.trainer)
generate_config = GeneratorConfig(**cfg.generator)
composer = Composer(
    constants=constants,
    global_=global_config,
    data=data_config,
    optimizer=optimizer_config,
    trainer=trainer_config,
    generator=generate_config,
)
pprint(composer)

LOGGER = RichLogger(**composer.logger.model_dump(mode="python")).logger
Composer(
constants=MaybeConstant(
│   │   NUM_DIGITS=2,
│   │   TOKENS=[
│   │   │   '0',
│   │   │   '1',
│   │   │   '2',
│   │   │   '3',
│   │   │   '4',
│   │   │   '5',
│   │   │   '6',
│   │   │   '7',
│   │   │   '8',
│   │   │   '9',
│   │   │   '+',
│   │   │   '*',
│   │   │   '-',
│   │   │   '=',
│   │   │   '<BOS>',
│   │   │   '<EOS>',
│   │   │   '<PAD>',
│   │   │   '<UNK>'
│   │   ]
),
logger=LoggerConfig(
│   │   log_file=None,
│   │   module_name=None,
│   │   propagate=False,
│   │   log_root_dir=None,
│   │   rich_handler_config={
│   │   │   'level': 'INFO',
│   │   │   'console': MISSING,
│   │   │   'show_level': True,
│   │   │   'show_path': True,
│   │   │   'show_time': True,
│   │   │   'rich_tracebacks': True,
│   │   │   'markup': True,
│   │   │   'log_time_format': '[%Y-%m-%d %H:%M:%S]'
│   │   }
),
global_=MaybeGlobal(seed=42, debug=True, debug_samples=100),
data=DataConfig(
│   │   context_length=11,
│   │   dataset_name='adder_dataset',
│   │   dataset_size=10000,
│   │   dataset_path='./data/adder/adder_dataset.txt',
│   │   dataset_dir='./data/adder',
│   │   dataset_url='https://raw.githubusercontent.com/gao-hongnan/omniverse/dev/omnivault/transformer/projects/adder/assets/adder_dataset.txt',
│   │   split=[0.7, 0.2, 0.1],
│   │   collate_fn={'batch_first': True, 'pad_token_id': 16},
│   │   train_loader={
│   │   │   'batch_size': 32,
│   │   │   'shuffle': True,
│   │   │   'num_workers': 0,
│   │   │   'pin_memory': False,
│   │   │   'drop_last': False
│   │   },
│   │   valid_loader={
│   │   │   'batch_size': 32,
│   │   │   'shuffle': False,
│   │   │   'num_workers': 0,
│   │   │   'pin_memory': False,
│   │   │   'drop_last': False
│   │   },
│   │   test_loader={
│   │   │   'batch_size': 128,
│   │   │   'shuffle': False,
│   │   │   'num_workers': 0,
│   │   │   'pin_memory': False,
│   │   │   'drop_last': False
│   │   }
),
model=MISSING,
optimizer=AdamConfig(name='torch.optim.Adam', lr=0.2, betas=(0.9, 0.98), eps=1e-09, weight_decay=0.0),
criterion=MISSING,
scheduler=MISSING,
trainer=TrainerConfig(
│   │   device=device(type='cpu'),
│   │   max_epochs=9,
│   │   log_every_n_steps=100,
│   │   eval_every_n_steps=4,
│   │   step_scheduler_on_batch_or_epoch='epoch',
│   │   use_amp=False,
│   │   autocast_config={'enabled': False, 'dtype': None, 'cache_enabled': None},
│   │   scaler_config={
│   │   │   'enabled': False,
│   │   │   'init_scale': 65536.0,
│   │   │   'growth_factor': 2.0,
│   │   │   'backoff_factor': 0.5,
│   │   │   'growth_interval': 2000
│   │   },
│   │   gradient_accumulation_steps=1,
│   │   clip_grad_norm={'max_norm': 1.0, 'norm_type': 2.0, 'error_if_nonfinite': False, 'foreach': None},
│   │   apply_weight_decay_to_different_param_groups=False,
│   │   save_dir='./data/adder/checkpoints/2024-09-20_14-10-04',
│   │   save_every_epoch=False,
│   │   save_best_only=True,
│   │   monitor='valid_this_epoch_average_loss',
│   │   mode='min'
),
generator=GeneratorConfig(max_tokens=4, temperature=1.0, greedy=True, top_k=None, top_p=None),
distributed=DistributedConfig(
│   │   log_dir='logs_distributed',
│   │   log_level=20,
│   │   log_on_master_or_all=True,
│   │   master_addr='localhost',
│   │   master_port='29500',
│   │   nnodes=1,
│   │   nproc_per_node=1,
│   │   node_rank=0,
│   │   world_size=1,
│   │   backend='gloo',
│   │   init_method='env://'
)
)

Reproducibility#

Reproducibility in deep learning ensures that experiments can be repeated with identical results, critical for verifying research findings and deploying reliable models. Distributed training introduces complexity because it involves multiple computation units which may not synchronize their random states perfectly. If training is paused and resumed, ensuring each unit starts with the correct seed to reproduce the exact computational path becomes challenging. To address this, one can find more sophisticated examples in libraries like Composer, where the whole library’s core is built around training deep neural nets in any environment (distributed or not) with reproducibility in mind.

print(get_field_annotations(func_or_method = seed_all)[0])
print("\n")
print(inspect.getdoc(seed_all))

seed_all(composer.global_.seed, seed_torch=True, set_torch_deterministic=False)
[('seed', <class 'int'>, 1992), ('seed_torch', <class 'bool'>, True), ('set_torch_deterministic', <class 'bool'>, True)]


Seeds all relevant random number generators to ensure reproducible
outcomes. Optionally seeds PyTorch and activates deterministic
behavior in PyTorch based on the flags provided.

Parameters
----------
seed : int, default=1992
    The seed number for reproducibility.
seed_torch : bool, default=True
    If True, seeds PyTorch's RNGs.
set_torch_deterministic : bool, default=True
    If True, activates deterministic mode in PyTorch.

Returns
-------
seed : int
    The seed number used for reproducibility.
42

Vocabulary#

vocabulary = AdderVocabulary.from_tokens(tokens=constants.TOKENS, num_digits=constants.NUM_DIGITS)  # type: ignore[attr-defined]
token_to_index = vocabulary.token_to_index
index_to_token = vocabulary.index_to_token
vocab_size = vocabulary.vocab_size
pprint(token_to_index)
pprint(index_to_token)
pprint(vocab_size)
{
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'+': 10,
'*': 11,
'-': 12,
'=': 13,
'<BOS>': 14,
'<EOS>': 15,
'<PAD>': 16,
'<UNK>': 17
}
{
0: '0',
1: '1',
2: '2',
3: '3',
4: '4',
5: '5',
6: '6',
7: '7',
8: '8',
9: '9',
10: '+',
11: '*',
12: '-',
13: '=',
14: '<BOS>',
15: '<EOS>',
16: '<PAD>',
17: '<UNK>'
}
18

Assign vocab_size to composer.model because we don’t want to hardcode vocab_size beforehand, and want to derive concrete values from the Vocabulary object.

try:
    composer.model.vocab_size = vocab_size
except AttributeError as err:
    LOGGER.error(err)
[2024-09-20 14:10:04] ERROR    _Missing instances are immutable                                     2890644827.py:4

Ah okay haha, this is the price of writing overly complex and useless code to look fancy and you end up a mess. Anyways, we will handle this later on where we can explicitly instantiate the model config class.

Tokenization#

tokenizer = AdderTokenizer(vocabulary=vocabulary)
assert tokenizer.vocabulary.token_to_index == token_to_index
assert tokenizer.vocabulary.index_to_token == index_to_token
pprint(tokenizer.encode("1"))
[14, 1, 15]
sequence = "15+57=072"
sequences = ["15+57=072", "01+02=003"]
encoded_sentence = tokenizer.encode(sequence)
print(f"Encoded sentence: {encoded_sentence}")

decoded_sentence = tokenizer.decode(encoded_sentence)
print(f"Decoded sentence: {decoded_sentence}")
Encoded sentence: [14, 1, 5, 10, 5, 7, 13, 0, 7, 2, 15]
Decoded sentence: 15+57=072
encoded_sentences = tokenizer.encode_batch(sequences)  # type: ignore[attr-defined]
print(f"Encoded sentences: {encoded_sentences}")
decoded_sentences = tokenizer.decode_batch(encoded_sentences)  # type: ignore[attr-defined]
print(f"Decoded sentences: {decoded_sentences}")
Encoded sentences: [[14, 1, 5, 10, 5, 7, 13, 0, 7, 2, 15], [14, 0, 1, 10, 0, 2, 13, 0, 0, 3, 15]]
Decoded sentences: ['15+57=072', '01+02=003']

Dataset#

Create Dataset#

def pad_number(num: int, length: int) -> str:
    """
    Pad numbers with zeros in front so that they have uniform length.

    Note, if a + b = c and num digits allowed to add is 2, then for
    a and b we always pad to length 2, but for c we always pad to length 3.

    Example
    -------
    6 + 90 = 96 -> 06 + 90 = 096

    Parameters
    ----------
    num : int
        Number to be padded.
    num_digits : int
        Length of the resulting padded number string.

    Returns
    -------
    str
        Padded number string.
    """
    return str(num).zfill(length)


def equation_to_string(a: int, b: int, c: int, num_digits: int) -> str:
    """
    Formats the addition equation as a string.

    Parameters
    ----------
    a : int
        First addend.
    b : int
        Second addend.
    c : int
        Sum of a and b.
    num_digits : int
        Number of digits each number in the equation should have.

    Returns
    -------
    str
        Formatted equation string.
    """
    padded_a = pad_number(a, num_digits)
    padded_b = pad_number(b, num_digits)
    padded_c = pad_number(c, num_digits + 1) # note the padding here!
    return f"{padded_a}+{padded_b}={padded_c}"

def decode_equation(vocab: AdderVocabulary, equation: torch.Tensor | List[int], show_special_tokens: bool = False) -> str:
    """
    Convert an equation in list format to string format.

    Parameters
    ----------
    equation : List[int]
        The equation in list format.

    Returns
    -------
    str
        The equation in string format.
    """
    if isinstance(equation, torch.Tensor):
        equation = equation.tolist()

    UNK = vocab.token_to_index[vocab.UNK]
    decoded_equation = "".join([str(index_to_token.get(x, UNK)) for x in equation])
    if show_special_tokens:
        return decoded_equation
    return decoded_equation.replace("<BOS>", "").replace("<EOS>", "").replace("<PAD>", "").replace("<UNK>", "")

def batch_decode_equation(vocab: AdderVocabulary, equations: torch.Tensor | List[List[int]]) -> List[str]:
    decoded_equations = []
    for equation in equations:
        decoded_equation = decode_equation(vocab, equation)
        decoded_equations.append(decoded_equation)
    return decoded_equations

def encode_equation(vocab: AdderVocabulary, equation: str, num_digits: int, device: torch.device) -> torch.Tensor:
    """
    Convert an equation (up to the equal sign in it) in string format to a list.

    Parameters
    ----------
    equation : str
        The equation in string format.
    num_digits : int
        Number of digits each number in the equation should have.
    device : torch.device
        The device to which the tensor should be sent.

    Returns
    -------
    torch.Tensor
        The equation in list format as a tensor.
    """
    plus_idx = equation.index(vocab.ADD)
    equal_idx = equation.index(vocab.EQUAL)

    BOS = vocab.token_to_index[vocab.BOS]
    UNK = vocab.token_to_index[vocab.UNK]

    a = pad_number(int(equation[:plus_idx]), num_digits)
    b = pad_number(int(equation[plus_idx + 1:equal_idx]), num_digits)

    new_equation = f"{a}+{b}="

    return torch.tensor(
        [BOS] + [token_to_index.get(n, UNK) for n in new_equation],
        dtype=torch.int
    ).to(device)
def create_add_dataset(
    vocab: AdderVocabulary, num_digits: int, dataset_size: int, rng_seed: int = 1337
) -> Tuple[List[torch.Tensor], List[str]]:
    BOS = vocab.token_to_index[vocab.BOS]
    EOS = vocab.token_to_index[vocab.EOS]
    UNK = vocab.token_to_index[vocab.UNK]

    rng = torch.Generator()
    rng.manual_seed(rng_seed)

    max_num = 10**num_digits - 1

    dataset_str = []
    for _ in range(dataset_size):
        a = torch.randint(low=0, high=max_num + 1, size=(1,), generator=rng).item()
        b = torch.randint(low=0, high=max_num + 1, size=(1,), generator=rng).item()
        c = a + b

        equation = equation_to_string(a, b, c, num_digits)

        dataset_str.append(equation)

    dataset_tensor = [
        torch.tensor([BOS] + [token_to_index.get(n, UNK) for n in x] + [EOS])
        for x in dataset_str
    ]
    return dataset_tensor, dataset_str
dataset_tensor, dataset_str = create_add_dataset(vocab=vocabulary, num_digits=2, dataset_size=4)
pprint(dataset_tensor)
pprint(dataset_str)
[
tensor([14,  1,  5, 10,  5,  7, 13,  0,  7,  2, 15]),
tensor([14,  9,  2, 10,  0,  0, 13,  0,  9,  2, 15]),
tensor([14,  9,  5, 10,  5,  3, 13,  1,  4,  8, 15]),
tensor([14,  1,  5, 10,  1,  0, 13,  0,  2,  5, 15])
]
['15+57=072', '92+00=092', '95+53=148', '15+10=025']
print(f"Decoded equation: {decode_equation(vocabulary, dataset_tensor[0])}")
assert (
    decode_equation(vocabulary, dataset_tensor[0])
    == dataset_str[0]
    == decode_equation(vocabulary, [15, 1, 5, 10, 5, 7, 13, 0, 7, 2, 14])
)
Decoded equation: 15+57=072

if we encode equation, we can encode up to equal sign like below.

print(f"Encoded equation: {encode_equation(vocabulary, dataset_str[0], num_digits=2, device=composer.trainer.device)}")

torch.testing.assert_close(
    encode_equation(vocabulary, dataset_str[0], num_digits=2, device=composer.trainer.device),
    torch.tensor([14, 1, 5, 10, 5, 7, 13], dtype=torch.int32),
)
Encoded equation: tensor([14,  1,  5, 10,  5,  7, 13], dtype=torch.int32)

Uncomment the below code to generate the dataset into a text file and yes, I am lazy to add a config variable for whether to generate the dataset or not.

# dataset, dataset_str = create_add_dataset(vocab, self.num_digits, self.dataset_size)

# write dataset_str to a file
# with open("dataset_str.txt", "w") as f:
#     for item in dataset_str:
#         f.write("%s\n" % item)

Encoding Strategy Overview#

Our strategy for encoding arithmetic expressions is pretty self-explanatory, where given a string D1 + D2 = D3, we encode it as <BOS>D1+D2=0D3<EOS>. However, this is verbose for clarity sake. In fact, Karpathy’s encoding strategy simplifies arithmetic expressions by concatenating the digits of operands and the result into a single string without explicit symbols for operations or equality. This method relies on a fixed number of digits (num_digits) for operands, which streamlines the model’s interpretation of the sequence. For example, if num_digits is set to 2, every encoded expression is structured to follow a predictable pattern: the first two digits represent the first operand, the next two digits represent the second operand, and the final digits are encoded as 3 digits because the max sum of two 2-digit numbers is 199, which is 3 digits. The digits of the result are encoded in reverse order. This counterintuitive approach is designed to align with the GPT model’s learning algorithm, facilitating easier learning of the addition operation by mimicking the traditional right-to-left calculation process in addition.

To illustrate, let’s examine the encoding of arithmetic expressions with num_digits=2:

For the expression 6 + 39 = 45, we have the following:

  • The first two digits 06 represent the number 6, zero-padded to adhere to the num_digits=2 requirement.

  • The next two digits 39 represent the number 39, already fitting the digit requirement.

  • The final part 054 represents the result 45, reversed to 54 and preceded by a zero to maintain the total length of \(2n + (n + 1) = 7 \) digits for num_digits=2.

Constructing PyTorch Dataset#

create_directory(composer.data.dataset_dir)
download_file(url=composer.data.dataset_url, output_path=composer.data.dataset_path)
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  8   97k    8  8259    0     0  52703      0  0:00:01 --:--:--  0:00:01 52605
100   97k  100   97k    0     0   574k      0 --:--:-- --:--:-- --:--:--  574k
with open(composer.data.dataset_path, "r") as file:
    sequences = [line.strip() for line in file]

dataset = AdderDataset(data=sequences, tokenizer=tokenizer)

pprint(next(iter(dataset)))
(
tensor([14,  1,  5, 10,  5,  7, 13,  0,  7,  2]),
tensor([16, 16, 16, 16, 16, 16,  0,  7,  2, 15]),
tensor([True, True, True, True, True, True, True, True, True, True]),
tensor([[ True, False, False, False, False, False, False, False, False, False],
│   │   [ True,  True, False, False, False, False, False, False, False, False],
│   │   [ True,  True,  True, False, False, False, False, False, False, False],
│   │   [ True,  True,  True,  True, False, False, False, False, False, False],
│   │   [ True,  True,  True,  True,  True, False, False, False, False, False],
│   │   [ True,  True,  True,  True,  True,  True, False, False, False, False],
│   │   [ True,  True,  True,  True,  True,  True,  True, False, False, False],
│   │   [ True,  True,  True,  True,  True,  True,  True,  True, False, False],
│   │   [ True,  True,  True,  True,  True,  True,  True,  True,  True, False],
│   │   [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True]])
)

Construct Batches, Collate Function and DataLoader#

We first reverse engineer what our dataset is returning. The disclaimer here is that for decoder only models like GPT, many people often omit the padding mask since all the samples \(\mathbf{x}\) are chunked to sequence/context length of window size \(T\), and future masks are usually handled within the Attention class since we will never attend to the future tokens. However, for the sake of clarity, we will include the padding and future mask in the dataset (i.e. actually it is for the sake of my own understanding when I started to implement decoder from scratch).

input, target, target_padding_mask, future_mask = next(iter(dataset))

Input and Target#

I think if you’ve read my section here, then we would easily see that given an input sequence \(\mathbf{x}\), the target sequence \(\mathbf{y}\) is simply the input sequence \(\mathbf{x}\) shifted by one time step to the left.

print(f"Input : {input}")
print(f"Target: {target}")
Input : tensor([14,  1,  5, 10,  5,  7, 13,  0,  7,  2])
Target: tensor([16, 16, 16, 16, 16, 16,  0,  7,  2, 15])

Target Padding Mask#

When you’re dealing with sequences of different lengths, you pad the shorter sequences with a special token PAD (usually \(0\) or \(-100\)) to make them the same length as the longest one in the batch. These paddings should not contribute to the model’s learning, so you need to mask them out. In practice, you’ll often see a mask argument in Attention layers in PyTorch where if True, the attention scores are set to -inf for the padded positions so that these positions become zero after the softmax operation, thereby not contributing to the weighted sum of the input sequence.

In a decoder-only model like GPT, the input sequence is essentially the target. The model aims to generate tokens that come after the given input, treating it as the “history” or “context” for the task of text generation. Unlike encoder-decoder models like the original Transformer, where the encoder processes a source sequence and the decoder generates a target sequence, a decoder-only model works solely with what would traditionally be considered the target sequence.

Consequently, although the terminology “target padding mask” might seem more intuitive in the context of encoder-decoder models, where the distinction between source (input) and target (output) sequences is clear. The distinction is blurred in decoder-only models like GPT as the model processes input to predict the next token in a sequence. Here, the source is essentially the target at different stages of processing: the model uses previous tokens (source) to predict the next token (target). However, during my implementation, I was mainly referring to transformer models that use encoder-decoder architecture, and the terminology therefore stemmed from that context.

The definition of a target padding mask is a binary mark that ignores pad-tokens in the source input (in decoder only model, the source is the target). And the shape is \((\mathcal{B}, T)\).

Let’s illustrate the target padding mask with an example. Suppose we have a batch of sequences with different lengths:

target_batch = [
    [5, 7, 9],
    [8, 6],
    [3, 12, 4, 11, 17],
    [2, 1, 4, 5],
]
pprint(target_batch)
[[5, 7, 9], [8, 6], [3, 12, 4, 11, 17], [2, 1, 4, 5]]

If we try to “batch” these sequences, PyTorch would throw an error indicating that you need all sequences to have the same length.

try:
    target_batch = torch.tensor(target_batch, dtype=torch.int64)
except ValueError as err:
    LOGGER.error(err)
                      ERROR    expected sequence of length 3 at dim 1 (got 2)                       1205213247.py:4

To address this issue, we could pad the sequences to the same length and create a mask to indicate which positions are padded. We pad the shorter sequences with a special token PAD to make them the same length as the longest one in the batch.

PAD = vocabulary.token_to_index[vocabulary.PAD]

max_len = max(len(seq) for seq in target_batch)
target_batch = [seq + [PAD] * (max_len - len(seq)) for seq in target_batch]
pprint(target_batch)

target_batch = torch.tensor(target_batch, dtype=torch.int64)
pprint(target_batch)
[[5, 7, 9, 16, 16], [8, 6, 16, 16, 16], [3, 12, 4, 11, 17], [2, 1, 4, 5, 16]]
tensor([[ 5,  7,  9, 16, 16],
│   │   [ 8,  6, 16, 16, 16],
│   │   [ 3, 12,  4, 11, 17],
│   │   [ 2,  1,  4,  5, 16]])
batch_size, seq_len = target_batch.size()

target_padding_mask = target_batch != PAD

pprint(target_padding_mask)

assert target_padding_mask.size() == (batch_size, seq_len) == (4, 5)
tensor([[ True,  True,  True, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True,  True,  True],
│   │   [ True,  True,  True,  True, False]])

Of course, we would need a batch of these masks, so we would have a shape of \((\mathcal{B}, T)\) like mentioned above. As we will see later, we will still need to broadcast the shape to \((\mathcal{B}, 1, T, T)\) to match the shape of the attention scores.

Theoretically speaking, it is possible for the sequence length \(T\) to vary across samples \(\mathbf{x}\). However, we usually have the same length for all samples in GPT, and in this particular case, we do know that each sample necessarily have the same length by design. However, for the sake of explanation, we note that in our Dataset, it will only generate 1 single sample data point and do not worry about different sequence length across other samples in the dataset \(\mathcal{S}\), but in deep learning we train in mini-batches \(\mathcal{B}\), and with different batch sizes we may encounter issues (i.e. matrix multiplication may not work).

Future Mask#

In the decoder, each position can only attend to positions that come before it in the sequence to maintain the auto-regressive property. This is different from the encoder, where all positions can attend to all other positions.

The definition of future mask is basically a look-ahead mask to ensure that each position only attends to positions before it in the sequence where we mask out future positions (i.e., positions that come after the current position) so that they don’t contribute to the current attention scores. Before the softmax operation, we’ll mark these positions as -inf so that they become zero after the softmax operation - effectively zeroing out the attention scores for future positions. What does zeroing out these masked logits actually does? Basically, the attention mechanism can be thought of as a weighted average of all the tokens in the input sequence. Each token is assigned a weight, with higher weights indicating more relevance to the token under consideration. If a certain token should not be considered at all (e.g., it’s a future token that should not be visible to the current decoder step, or it’s a padding token), its weight should be zero.

The shape of the future mask is \((T, T)\) for a target sequence/sample \(\mathbf{x}\) of length \(T\). Let’s see a concrete example to illustrate the future mask.

seq_len = 5
future_mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1)
future_mask = future_mask == 0

pprint(future_mask)
assert future_mask.size() == (seq_len, seq_len) == (5, 5)
tensor([[ True, False, False, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True,  True, False],
│   │   [ True,  True,  True,  True,  True]])

Merge Padding and Future Masks#

We see from our decoder implementation below, that one of the method is creating the target masks. In other words, we are creating the target padding masks and future masks, and merging them together.

 1def create_target_masks(
 2    self,
 3    batch_size: int,
 4    seq_len: int,
 5    target_padding_masks: torch.BoolTensor | NotGiven = NOT_GIVEN,
 6    future_masks: torch.BoolTensor | NotGiven = NOT_GIVEN,
 7) -> torch.BoolTensor:
 8    target_masks_shape = (batch_size, 1, seq_len, seq_len)
 9    if target_padding_masks is NOT_GIVEN and future_masks is NOT_GIVEN:
10        target_padding_masks = cast(
11            torch.BoolTensor, construct_dummy_batch_target_padding_masks(batch_size, seq_len)
12        )
13        future_masks = cast(torch.BoolTensor, construct_dummy_batch_future_masks(batch_size, seq_len))
14
15    if target_padding_masks is NOT_GIVEN:
16        target_padding_masks = cast(
17            torch.BoolTensor, construct_dummy_batch_target_padding_masks(batch_size, seq_len)
18        )
19
20    if future_masks is NOT_GIVEN:
21        future_masks = cast(torch.BoolTensor, construct_dummy_batch_future_masks(batch_size, seq_len))
22
23    assert target_padding_masks.shape == future_masks.shape == target_masks_shape  # type: ignore[union-attr]
24
25    return cast(
26        torch.BoolTensor,
27        torch.logical_and(cast(torch.Tensor, target_padding_masks), cast(torch.Tensor, future_masks)).bool(),
28    )

The purpose of applying logical_and between target_padding_mask and future_mask is to combine the constraints from both masks when calculating self-attention scores in the transformer’s decoder. The target_padding_mask is designed to mask out the padding tokens in the input sequence, while the future_mask ensures that a given position cannot attend to future positions in the sequence. By combining these masks, you can perform the necessary masking for both padding and future tokens in a single step.

Here’s how it works:

  1. target_padding_mask: Masks out the padding tokens so that they don’t contribute to the attention calculations. True values mean “attend to this token,” and False values mean “ignore this token.”

  2. future_mask: The future mask is created as a lower triangular matrix, where the lower triangle, including the diagonal, is filled with ones, and the upper triangle is filled with zeros. Masks out future tokens in a sequence so that a token at a given position can only attend to positions that come before it (and itself). True values mean “attend to this token,” and False values mean “ignore this token.”

  3. logical_and(target_padding_mask, future_mask): Combines the two masks. A True in the resulting mask means that the condition for both padding and future attention is satisfied.

By combining these two masks, the decoder obeys the autoregressive property, ensuring it doesn’t see future tokens, while also ignoring padding tokens in the input sequence. We may term it the target_mask.

First Sample First Token#

  • target_padding_mask has size of [4, 5].

    • We zoom in to the first row (sample) which is of length 5.

    • This length 5 is the sequence length, which is T, T, T, F, F indicating the last 2 tokens being padded.

  • future_mask has size of [5, 5].

    • We note that this is indepedent of batch size. Each sample should have the same future mask shape of [L, L].

    • This L=5 should necessary be same for the sequence length in target_padding_mask.

  • First, let’s consider one batch of 4 samples. What we do first is to broadcast future_mask to [4, 5, 5] because we want each sample/row in the batch to have the same future mask. As shown below:

pprint(future_mask)
future_mask = future_mask.view(1, seq_len, seq_len).expand(size=(batch_size, -1, -1))
pprint(future_mask)
pprint(future_mask.shape)
tensor([[ True, False, False, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True,  True, False],
│   │   [ True,  True,  True,  True,  True]])
tensor([[[ True, False, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True,  True]],
│   │   
│   │   [[ True, False, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True,  True]],
│   │   
│   │   [[ True, False, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True,  True]],
│   │   
│   │   [[ True, False, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True,  True]]])
torch.Size([4, 5, 5])
  • Now, we can zoom in to one particular sample since both target_padding_mask and future_mask have the same first dimension of batch size.

  • What is incomplete is that we need to broadcast target_padding_mask’s last dimension to have the same dimensions as future_mask. This means we broadcast [4, 5] to [4, 5, 5]. But why?

  • For simplicity, we slice the first same of both below.

  • The first row of the future_mask of the first sample is T, F, F, F, F. This corresponds to what? This is the future mask of the first token in the sequence. Well, that is confusing, because it apparently have 5 elements, and has “information” of the other 4 tokens in the sequence. Let’s explain in details below:

    • Regarding the first row of the future_mask in the first sample, which is [T, F, F, F, F], it might initially seem confusing why there are 5 elements. Each of these elements, in fact, corresponds to whether the first token can attend to other tokens at each respective position in the sequence. Here’s how to interpret it:

      • The first element (True) indicates that the first token can attend to itself.

      • The next four elements (False) specify that the first token should not attend to any of the future tokens in the sequence.

  • Consequently, what is the first token in the sequence of the target_padding_mask? Recall earlier we mentioned that the first sample’s target_padding_mask is T, T, T, F, F and therefore the first token in the sequence is T.

  • What do we want to achieve here? We want to make sure that the model does not attend to tokens in the sequence that are masked with False.

  • In other words, the first token in the sequence of the first sample has target_padding_mask of T and future_masks of T, F, F, F, F.

  • We need to broadcast this T to T, T, T, T, T to align with T, F, F, F, F because? Because we need ensure that this first token in the sequence is also able to considered in relation to every other token in the sequence.

  • So the first token is not a padded token, which is T, similarly, the first token needs to attend to itself at the first position, hence T and T give T. But for the second T in the now broadcasted target_padding_mask, it is still representing the first token or?

  • Broadcasting the first token’s target_padding_mask value of T to [T, T, T, T, T] ensures that when this first token is being considered for attention computations, it is free to attend to any position, barring any restrictions set by future_mask.

  • Tricky: after broadcasting, each T in [T, T, T, T, T] is still representing the first token. They indicate that when the first token is compared with any token in the sequence (including itself), it is not a padding token. The element-wise AND with the future_mask then further refines this by restricting it from attending to future tokens.

pprint(target_padding_mask)
pprint(target_padding_mask[0])

target_padding_mask = target_padding_mask.view(batch_size, 1, seq_len).expand(size=(batch_size, seq_len, seq_len))
pprint(target_padding_mask)
pprint(target_padding_mask.shape)
tensor([[ True,  True,  True, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True,  True,  True],
│   │   [ True,  True,  True,  True, False]])
tensor([ True,  True,  True, False, False])
tensor([[[ True,  True,  True, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True, False, False],
│   │    [ True,  True,  True, False, False]],
│   │   
│   │   [[ True,  True, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True, False, False, False],
│   │    [ True,  True, False, False, False]],
│   │   
│   │   [[ True,  True,  True,  True,  True],
│   │    [ True,  True,  True,  True,  True],
│   │    [ True,  True,  True,  True,  True],
│   │    [ True,  True,  True,  True,  True],
│   │    [ True,  True,  True,  True,  True]],
│   │   
│   │   [[ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True, False],
│   │    [ True,  True,  True,  True, False]]])
torch.Size([4, 5, 5])
pprint(target_padding_mask[0])
pprint(future_mask[0])
pprint(target_padding_mask[0] & future_mask[0])
tensor([[ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False]])
tensor([[ True, False, False, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True,  True, False],
│   │   [ True,  True,  True,  True,  True]])
tensor([[ True, False, False, False, False],
│   │   [ True,  True, False, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False],
│   │   [ True,  True,  True, False, False]])

First Sample Fourth Token#

Now let’s look at another example—the 4th token in the sequence, where target_padding_mask = [T, T, T, F, F] and future_mask is a lower triangular matrix with Trues.

  1. 4th Token’s target_padding_mask: The 4th token has a value of F in target_padding_mask, indicating it’s a padding token.

  2. 4th Row of future_mask: The 4th row in future_mask is [True, True, True, True, False]. This means that if this token were not a padding token, it would be allowed to attend to all the previous tokens in the sequence and itself, but not to any future token.

  3. Broadcast target_padding_mask: To align target_padding_mask with future_mask, we’d broadcast F from the target_padding_mask to [F, F, F, F, F]. This way, when we consider the 4th token in relation to any other token in the sequence, it’s still marked as a padding token.

  4. Element-wise AND with future_mask: After broadcasting, you’d perform an element-wise AND between [F, F, F, F, F] and [True, True, True, True, False], resulting in [F, F, F, F, F].

  5. Interpretation: This effectively means that the 4th token won’t attend to any other token in the sequence, and no token will attend to it either, as it is a padding token.

So, the masks are doing their jobs correctly: the target_padding_mask indicates whether each token is a padding token or not, and future_mask dictates the “rules” of attention regarding what each token can attend to. Combining them ensures that both conditions are met.

Further Add a Singleton Dimension in Target Masks#

Now both masks are of shape: (B, L, L) but we need to add a singleton dimension to the last dimension to make it (B, 1, L, L).

In deep learning frameworks like PyTorch, the dimensions of the tensors involved in operations like matrix multiplication or attention mechanisms often have specific semantic meanings. In the context of attention mechanisms, especially in the transformer architecture, the attention mask usually has a shape that is compatible with the attention logits for element-wise multiplication.

In the transformer model, the attention logits are often computed as a dot product between query and key vectors, resulting in a tensor of shape (Batch size, Num heads, Sequence length, Sequence length) or (B, H, L, L). Here, B is the batch size, H is the number of attention heads, and L is the sequence length.

To make the mask tensor compatible for element-wise operations with this 4D tensor, it needs to have a shape that can be broadcasted to (B, H, L, L). A mask of shape (B, 1, L, L) fulfills this requirement.

The singleton dimension is added so that the mask can be easily broadcast to the shape of the attention logits tensor during the computation. When a tensor with shape (B, 1, L, L) is element-wise multiplied with a tensor of shape (B, H, L, L), the singleton dimension (the 1) allows the mask to be used for each attention head without explicitly replicating the mask H times. This is more memory-efficient and often faster.

Thus, adding a singleton dimension in masks is a preparatory step that allows for efficient element-wise operations later in the model’s forward pass.

target_padding_mask = target_padding_mask.unsqueeze(1)
pprint(target_padding_mask.shape)

future_mask = future_mask.unsqueeze(1)
pprint(future_mask.shape)

target_mask = target_padding_mask & future_mask
pprint(target_mask.shape)
torch.Size([4, 1, 5, 5])
torch.Size([4, 1, 5, 5])
torch.Size([4, 1, 5, 5])

Why mask our target in Adder?#

If you see the source code of how the AdderDataset is constructed, you will see that we masked out all the tokens before (and including) the equal sign.

For example, if our sequence is 12+97=109, the input sequence will be tokenized to the following:

input = [BOS, 1, 2, +, 9, 7, =, 1, 0, 9]
target = [1, 2, +, 9, 7, =, 1, 0, 9, EOS]

What our code below does is to mask out the tokens before the equal sign for the target sequence.

target = [MASK, MASK, MASK, MASK, MASK, MASK, 1, 0, 9, EOS]
def construct_target_tensor(self, input_sequence: torch.Tensor) -> torch.LongTensor:
    target = input_sequence.clone()
    where_equal_index = torch.where(input_sequence == self.equal_token_id)[0].item()
    where_equal_index = int(where_equal_index)  # to appease mypy lol
    target[: where_equal_index + 1] = self.pad_token_id
    return torch.LongTensor(target[1:])

Simply put, we do not care what the model predict for anything before the equal sign. By masking out (or ignoring) the tokens before the =, we are asking the model to “focus” on generating the correct answer after the equal sign.

Split to Train-Valid-Test#

batch_size   = 256

composer.data.train_loader["batch_size"] = batch_size
composer.data.valid_loader["batch_size"] = batch_size
composer.data.test_loader["batch_size"] = batch_size

train_dataset, valid_dataset, test_dataset = split_dataset(
    dataset=dataset, split=composer.data.split, seed=composer.global_.seed
)

train_size, valid_size, test_size = len(train_dataset), len(valid_dataset), len(test_dataset)
train_size, valid_size, test_size
(7000, 2000, 1000)
# max_seq_len is determined by 1+ num_digits + 1 + num_digits + 1 + num_digits + 1 + 1
# where the 1s represent BOS, Plus sign, Equal sign, the extra digit in the sum, EOS, respectively.
max_seq_len = 1 + 1 + 1 + 1 + 2 * composer.constants.NUM_DIGITS + (composer.constants.NUM_DIGITS + 1)
assert max_seq_len == composer.data.context_length

Create DataLoader#

train_loader = create_loader(
    dataset=train_dataset,
    loader_config=composer.data.train_loader,
    collate_fn_config=composer.data.collate_fn,
)

valid_loader = create_loader(
    dataset=valid_dataset,
    loader_config=composer.data.valid_loader,
    collate_fn_config=composer.data.collate_fn,
)

test_loader = create_loader(
    dataset=test_dataset,
    loader_config=composer.data.test_loader,
    collate_fn_config=composer.data.collate_fn,
)

The collate_fn defines how to combine these variable-length samples into a batch. This usually involves padding the sequences in the batch to a common length, which is typically the length of the longest sequence in the batch. Note here the padding in collate is “redundant” since in our earlier code we ensured that all sample has same number of characters by way of padding zeros in front. For example, 23 + 3 =26 will become 23 + 03 = 026. Consequently, all samples in the mini-batch will have same length by definition.

torch.manual_seed(composer.global_.seed)

batch_index = 0
for batch in train_loader:
    # Each batch is a tuple containing all elements for the batch
    inputs_padded, targets_padded, padding_masks_padded_and_expanded, future_masks_expanded = batch

    # Print the length of each component in the batch
    print("Batch Size:", len(inputs_padded))

    # Now you can print shapes or other properties of each batch element
    print("Inputs Shape:", inputs_padded.shape)
    print("Targets Shape:", targets_padded.shape)

    # Decoding and other processing can be done here
    # For example, decoding the first sequence in the batch
    print("Decoded First Equation/Sample of the Batch:", decode_equation(vocabulary, inputs_padded[0].tolist()))

    print("-" * 80)

    batch_index += 1
    if batch_index == 4: break
Batch Size: 256
Inputs Shape: torch.Size([256, 10])
Targets Shape: torch.Size([256, 10])
Decoded First Equation/Sample of the Batch: 31+04=035
--------------------------------------------------------------------------------
Batch Size: 256
Inputs Shape: torch.Size([256, 10])
Targets Shape: torch.Size([256, 10])
Decoded First Equation/Sample of the Batch: 37+49=086
--------------------------------------------------------------------------------
Batch Size: 256
Inputs Shape: torch.Size([256, 10])
Targets Shape: torch.Size([256, 10])
Decoded First Equation/Sample of the Batch: 47+26=073
--------------------------------------------------------------------------------
Batch Size: 256
Inputs Shape: torch.Size([256, 10])
Targets Shape: torch.Size([256, 10])
Decoded First Equation/Sample of the Batch: 53+05=058
--------------------------------------------------------------------------------

Model#

We have went into extensive details on the implementation of the decoder in the implementation section. We will not repeat the concepts here, instead we will just compile the model with the configurations.

# Create individual component configurations
masked_self_attention_mha_config = MultiHeadedAttentionConfig(
     attention=ScaledDotProductAttention(),
    d_model=128, H=4, dropout=0.1
)

feed_forward_config = PositionwiseFeedForwardConfig(
    d_model=128, d_ff=256, activation=nn.GELU(approximate="tanh"), dropout=0.1, bias=True
)

add_norm_config_1 = AddNormConfig(feature_dim=128, dropout=0.1)
add_norm_config_2 = AddNormConfig(feature_dim=128, dropout=0.1)

# Create DecoderBlockConfig
decoder_block_config = DecoderBlockConfig(
    masked_self_attention_mha=masked_self_attention_mha_config,
    feed_forward=feed_forward_config,
    add_norm_1=add_norm_config_1,
    add_norm_2=add_norm_config_2,
)

# Create the overall DecoderConfig
model_config = DecoderConfig(
    d_model=128,
    vocab_size=vocab_size,
    context_length=max_seq_len,
    num_decoder_blocks=2,
    dropout=0.1,
    decoder_block=decoder_block_config,
)

model = GPTDecoder(model_config)
model = model.to(device=composer.trainer.device, dtype=next(model.parameters()).dtype, non_blocking=True)

model_size = model.total_trainable_parameters
print(f'model_size: {model_size}, train_set_size: {train_size}')

composer.model = model_config
model_size: 270226, train_set_size: 7000

Training Paradigm#

Here, we would list some of the training paradigms that we would be using in this project.

Optimizer#

We start off by defining the optimizer for GPT-2. A common choice used is the Adam [Kingma and Ba, 2014] or AdamW [Loshchilov and Hutter, 2017]. We conveniently take the configuration provided in Karpathy’s nanoGPT.

\[\begin{split} \begin{aligned} \eta_{\max} &= 6 \times 10^{-4} \\ \beta_1 &= 0.9 \\ \beta_2 &= 0.95 \\ \epsilon &= 10^{-8} \\ \lambda &= 10^{-1} \end{aligned} \end{split}\]

Furthermore, we briefly mention that Karpathy applies weight decay to different parameter groups - which is quite a common practice. As we can see from the code below, we define whitelisted and blacklisted modules that we want to apply weight decay to. The whitelist modules are nn.Linear and the blacklist modules are nn.LayerNorm, nn.Embedding.

Weight decay, which is basically L2 regularization penalizes the square of the weights, encouraging smaller weight values. This can lead to a “spreading out” effect, as it discourages the model from relying too heavily on a small number of input features, promoting a more even distribution of weight values and, by extension, a more balanced consideration of input dimensions. This regularization technique is particularly beneficial for layers that perform matrix multiplication, as it helps in ensuring that the model utilizes a broader range of input features rather than becoming overly dependent on a few dominant ones. We can find more intuition in the discussion Why not perform weight decay on layernorm/embedding?, Weight decay in the optimizers is a bad idea (especially with BatchNorm) and Weight decay exclusions (Karpathy).

 1def apply_weight_decay_to_different_param_groups(
 2    model: nn.Module, weight_decay: float
 3) -> List[Dict[Literal["params", "weight_decay"], List[torch.nn.Parameter] | float]]:
 4    decay: Set[str] = set()
 5    no_decay: Set[str] = set()
 6    whitelist_weight_modules: Tuple[Type[nn.Module], ...] = (nn.Linear,)
 7    blacklist_weight_modules: Tuple[Type[nn.Module], ...] = (nn.LayerNorm, nn.Embedding, LayerNorm)
 8
 9    for module_name, module in model.named_modules():
10        for parameter_name, _parameter in module.named_parameters():
11            full_parameter_name = f"{module_name}.{parameter_name}" if module_name else parameter_name
12            if parameter_name.endswith("bias"):
13                # biases of all modules are not decayed
14                no_decay.add(full_parameter_name)
15            elif parameter_name.endswith("weight") and isinstance(module, whitelist_weight_modules):
16                # weights of whitelisted modules are decayed
17                decay.add(full_parameter_name)
18            elif parameter_name.endswith("in_proj_weight"):
19                # MHA projection layer, does not exist in my implementation
20                decay.add(full_parameter_name)
21            elif parameter_name.endswith("weight") and isinstance(module, blacklist_weight_modules):
22                # weights of blacklisted modules are not decayed
23                no_decay.add(full_parameter_name)
24            elif (parameter_name.endswith("gamma") or parameter_name.endswith("beta")) and isinstance(
25                module, LayerNorm
26            ):
27                # weights of LayerNorm modules are not decayed
28                # TODO: why do I need to do this is because my custom LayerNorm has gamma and beta
29                # as their "weight" and "bias" attributes, respectively.
30                no_decay.add(full_parameter_name)
31            elif parameter_name.endswith("pos_embed"):
32                no_decay.add(full_parameter_name)
33
34    param_dict = {parameter_name: parameter for parameter_name, parameter in model.named_parameters()}  # noqa: C416
35    inter_params = decay & no_decay
36    union_params = decay | no_decay
37    assert not inter_params, f"Parameters {inter_params} are in both decay and no_decay sets."
38    assert not (
39        param_dict.keys() - union_params
40    ), f"Parameters {param_dict.keys() - union_params} were not categorized."
41
42    optim_groups: List[Dict[Literal["params", "weight_decay"], List[torch.nn.Parameter] | float]] = [
43        {"params": [param_dict[parameter_name] for parameter_name in sorted(decay)], "weight_decay": weight_decay},
44        {"params": [param_dict[parameter_name] for parameter_name in sorted(no_decay)], "weight_decay": 0.0},
45    ]
46
47    return optim_groups

We won’t go into too much technical rigour on the optimizer, but note that more modern variations exist, for instance DecoupledAdamW, which furthers decouple the weight decay term \(\lambda\) from the learning rate, as well RAdam [Liu et al., 2019], which is intended to address bias correction factors leading to higher variance in the adaptive learning rate for the initial training iterations.

To this end, we create the optimizer in code as follows, noting that we would not use the exact same configuration as Karpathy, but rather use what is deemed fit for the case at hand.

pprint(composer.optimizer)
optimizer_config_cls = OPTIMIZER_REGISTRY[composer.optimizer.name]
optimizer_pydantic_config = optimizer_config_cls(**composer.optimizer.model_dump(mode="python"))
pprint(optimizer_pydantic_config)
AdamConfig(name='torch.optim.Adam', lr=0.2, betas=(0.9, 0.98), eps=1e-09, weight_decay=0.0)
AdamConfig(name='torch.optim.Adam', lr=0.2, betas=(0.9, 0.98), eps=1e-09, weight_decay=0.0)
assert hasattr(composer.optimizer, "weight_decay")

optimizer = optimizer_pydantic_config.build(
    params=apply_weight_decay_to_different_param_groups(
        model=model, weight_decay=composer.optimizer.weight_decay
    )
)
pprint(optimizer)
Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.98)
capturable: False
differentiable: False
eps: 1e-09
foreach: None
fused: None
lr: 0.2
maximize: False
weight_decay: 0.0

Parameter Group 1
amsgrad: False
betas: (0.9, 0.98)
capturable: False
differentiable: False
eps: 1e-09
foreach: None
fused: None
lr: 0.2
maximize: False
weight_decay: 0.0
)

Learning Rate Scheduler#

Motivation#

In training deep neural networks, learning rate is definitely one of the most important parameter to tune. Optimization algorithms like Adam and SGD tell us how the weights \(\boldsymbol{\theta} \in \boldsymbol{\Theta}\) should be updated, but the learning rate \(\eta\) tells us the rate at which the weights are being updated.

Theoretically and empircally, the magnitude of the learning rate \(\eta\) can have a significant impact on the training process. If the learning rate is too large, we might experience divergence, on the other hand, if the learning rate is too small, the model might take longer to converge or might get stuck in a local minima. The condition number of the problem also impacts optimization efficiency, as discussed in the momentum section, where the concept can be understood as the ratio between the smallest and largest changes possible in response to adjustments in different directions of the parameter space, reflecting the variance in sensitivity across these directions[^1] [Zhang et al., 2023]. As we progress through the training steps, it is also equally important to apply a learning rate scheduler to adjust (may not be monotonous decay) the learning rate discriminatively.

In the paper SGDR: Stochastic Gradient Descent with Restarts by Loshchilov and Hutter, they introduced an heuristic that relies on the empirical observation that we can improve the convergence of the model (usually in ill-conditioned situations) if we want follow an annealing process over the learning rate. This means that at the beginning of training, we do not want to decrease the learning too drastically. My (potentially wrong) intuition is that this may allow the model to consider exploring a larger parameter space without too much constraints than if we were to rapidly decrease the learning rate. The authors further claim that as we progress towards the end of the training, we would want to “fine-tune” the model parameters with a very small learning rate, as it could potentially help “refine” the solution space to find a “more optimal” set of parameters [Loshchilov and Hutter, 2016]. This idea naturally lands us to using cosine function because the cosine curve starts with a gentle slope, which coincides with the idea of gradual decrease in learning rate in the beginning, and the cosine curve naturally flattens and approaches zero towards the end as it reaches the end of its cycle, which again coincides with the idea of fine-tuning the model parameters with a very small learning rate.

Consequently, a cosine decaying scheduler has the below function form for learning rates in the range \(t \in [0, T]\):

\[ \eta_t=\eta_T+\frac{\eta_0-\eta_T}{2}\left(1+\cos \left(\frac{\pi t}{T}\right)\right) \]

Here \(\eta_0\) is the initial learning rate, \(\eta_T\) is the target rate at time \(T\). Furthermore, for \(t>T\) we simply pin the value to \(\eta_T\) without increasing it again. \(T\) represents the end of the learning rate annealing phase rather than the absolute end of training. It’s the point in time when the learning rate reaches \(\eta_T\), the target rate, and beyond which the learning rate is maintained constant at \(\eta_T\).

  • During \(0 \leq t < T\): The learning rate \(\eta_t\) is actively adjusted according to the cosine annealing formula. It transitions from the initial learning rate \(\eta_0\) towards the target rate \(\eta_T\), following a half-cosine wave.

  • For \(t \geq T\): The learning rate is set to \(\eta_T\) and no longer changes. This doesn’t necessarily mean that training must stop at \(t = T\). Training can continue beyond \(T\) with the learning rate fixed at \(\eta_T\).

In code, we can observe the behavior of the cosine annealing scheduler as follows:

from __future__ import annotations

from typing import Any, List

import matplotlib.pyplot as plt
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import CosineAnnealingLR, _LRScheduler

def get_learning_rates(optimizer: Optimizer, scheduler: _LRScheduler, steps: int) -> List[float]:
    lrs = []
    for _ in range(steps):
        lrs.append(optimizer.param_groups[0]["lr"])
        optimizer.step()
        scheduler.step()
    return lrs

def plot_learning_rates(
    lrs: List[float], title: str, marker: str = "o", ax: plt.Axes | None = None, **kwargs: Any
) -> None:
    ax = ax or plt.gca()

    ax.plot(lrs, label=title, marker=marker, **kwargs)
    ax.set_title(title)
    ax.set_xlabel("Step")
    ax.set_ylabel("Learning Rate")
    ax.legend()

def main() -> None:
    initial_lr = 0.1
    eta_min = 0
    steps = 100
    model = torch.nn.Linear(2, 1)

    optimizer = torch.optim.SGD(model.parameters(), lr=initial_lr)
    scheduler_non_cyclic = CosineAnnealingLR(optimizer, T_max=steps, eta_min=eta_min)
    lrs_non_cyclic = get_learning_rates(optimizer, scheduler_non_cyclic, steps)

    optimizer = torch.optim.SGD(model.parameters(), lr=initial_lr)
    scheduler_cyclic = CosineAnnealingLR(optimizer, T_max=steps // 8, eta_min=eta_min)
    lrs_cyclic = get_learning_rates(optimizer, scheduler_cyclic, steps)

    # Plotting
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    plot_learning_rates(lrs_non_cyclic, 'Non-Cyclic Cosine Annealing', ax=axes[0])
    plot_learning_rates(lrs_cyclic, 'Cyclic Cosine Annealing', ax=axes[1])

    plt.tight_layout()
    plt.show()

main()
../../_images/fb309fdb16ab31ac353909a2013c64448baaf84a92a7c9d94320b6d32da2d003.png

Warmup#

Our motivation would have ended here, but in practice, we often see that the cosine annealing scheduler is often combined with a warmup phase. In Fig. 18, we can see that the loss curve is relatively smooth and converges way better than the ones without warmup.

../../_images/warmup_loss_plot_uvadlc.svg

Fig. 4 Training loss v.s. # of iterations of Transformers on the De-En IWSLT’14 dataset.#

It might be worth having some intuition on why warmup works so well in practice, and in particular, in language models like Transformers.

Firstly, the RAdam paper suggests warmup works as a variance reduction technique, which overcomes the problem of bias correction factors in optimizers like Adam, where having these bias correction factors would lead to larger variance in the adaptive learning rate during the initial training iterations [Lippe, 2023]. More concretely, Adam estimates the first and second moments of the gradient to change the learning rate of each individual parameter (hence adaptive) and having high variance between adaptive learning rates may de-stablize the training. If we don’t want to swap out Adam, then this calls for a warmup phase to stabilize the learning rate and reduce the variance in the early stages of training.

Secondly, language models like Transformers use iteratively applied Layer Normalization across layers can lead to very high gradients during the first iterations, which can be solved by using Pre-Layer Normalization (similar to Pre-Activation ResNet), which applies normalization before the layer’s main operations, contributing to gradient stabilization and reducing the necessity for a warm-up phase, or replacing Layer Normalization by other techniques (Adaptive Normalization, Power Normalization) [Lippe, 2023].

However, even though there are solutions to the problem, certain setups still use the Adam optimizer, and therefore warmup is still a simple and effective technique to stabilize the learning rate in the early stages of training - solving the afforementioned problems (i.e. stabilize the bias correction factors, moving averages of gradients and squared gradients).

To this end, we end our discussion on the motivation behind 1) using cosine annealing schedulers and 2) using warmup phases, often coupled with cosine annealing schedulers. In what follows, we will provide a more formal definition of the cosine annealing scheduler with warmup, and provide a running example to illustrate the behavior of the scheduler.

Definition#

The CosineAnnealingWithWarmupScheduler decays the learning rate \(\eta\) according to the decreasing part of a cosine curve, with an initial warmup \(t_{\text{warmup}}\).

This scheduler modulates \(\eta\) within defined upper and lower bounds over a predetermined interval, employing a cosine function. The formula for cosine annealing reflects the shape of a half-cosine wave, which decreases from a maximum value to a minimum and then increases back to the maximum. This cycle can repeat multiple times over the training process, depending on how the scheduler is configured. Although this approach suggests cyclic adjustments (oscillations) within the training duration, for simplicity’s sake, our specific implementation, inspired by MosaicML’s Composer’s CosineAnnealingWithWarmupScheduler, explicitly excludes considerations for such cycles/oscillations.

Definition 17 (Cosine Annealing With Warmup)

The CosineAnnealingWithWarmupScheduler modulates the learning rate \(\eta\) according to a two-phase process: a warmup phase followed by a cosine annealing phase. The learning rate multiplier[^lr-multiplier] \(\alpha_{t}\) at any given time (step) \(t\) is given by:

\[\begin{split} \begin{equation} \alpha_{t} = \begin{cases} \frac{t}{t_{\text{warmup}}}, & \text{if } t < t_{\text{warmup}} \\ \alpha_f + (1 - \alpha_f) \times \frac{1}{2} \left[1 + \cos(\pi \times \tau_w) \right], & \text{otherwise} \end{cases} \end{equation} \end{split}\]

where we denote:

  • \(t\) represents the current training step or epoch.

  • \(\eta_{\max}\) as the maximum learning rate reached during training, and often is the initial learning rate given into an optimizer.

  • \(t_{\text{warmup}}\) denotes the duration of the warmup period, in terms of the number of steps or epochs, during which the learning rate linearly increases to the maximum learning rate \(\eta_{\max}\).

  • \(t_{\max}\) as the maximum number of training steps, or maximum number of iterations in an epoch (see here).

  • \(\tau_w = \frac{t - t_{\text{warmup}}}{t_{\max}}\), the fraction of post-warmup time elapsed,

  • \(\alpha_f\) is a scaling factor that determines the final learning rate multiplier to decay to (a value between \(0\) and \(1\)), and this is a fixed value. For example, if \(\alpha_f = 0.1\) and the initial learning rate is \(\eta_{\max} = 3e-4\), then the final learning rate will be \(\eta_{\min} = 3e-4 \times 0.1 = 3e-5\).

The actual learning rate \(\eta_{t}\) at time (step) \(t\) is then computed as:

\[ \begin{equation} \eta_{t} = \alpha_{t} \times \eta_{\max} \end{equation} \]

where we emphasize again that \(\eta_{\max}\) is the maximum learning rate reached during training.

A Word on Oscillations

Note that if you set \(t_{\max}\) to the total number of training steps that is needed for the entire dataset \(\mathcal{S}\), the scheduler will only decay the learning rate after the warmup phase and not oscillate further. This configuration means that after completing the linear increase during the warmup, the learning rate will decrease following a cosine curve until it reaches the final learning rate determined by \(\alpha_f\).

  • Single Cycle (No Oscillation): If \(t_{\max}\) is set to cover exactly one half-cycle of the cosine function from the end of the warmup phase to the conclusion of training, the learning rate will monotonically decrease from its maximum value (at the end of warmup) to its minimum value (as determined by \(\alpha_f\)) without oscillating. This is because the scheduler’s active period only spans a single descent phase of the cosine wave.

  • Multiple Cycles (Oscillation): If \(t_{\max}\) is set to allow for a longer duration than what is needed for a single half-cycle descent, the cosine annealing function can complete its initial descent and then begin to ascend as part of a new cycle. This leads to oscillations in the learning rate—after decreasing, it will start to increase again, potentially multiple times, depending on the total number of cycles fitted within \(t_{\max}\). This is where the term “oscillation” comes into play; it describes the periodic increase and decrease in the learning rate according to the cosine function over multiple cycles.

True oscillation, where the learning rate decreases and then increases within a training regime, typically requires either a restart mechanism (as seen in Cosine Annealing with Warm Restarts) or an explicit multi-cycle configuration. A standard cosine annealing scheduler, especially with a warmup phase, generally only supports a monotonic decrease within a single cycle, unless it is specifically designed to handle restarts or multiple cycles.

Implementation#

from __future__ import annotations

import math
from functools import partial

from torch.optim.lr_scheduler import LambdaLR
from torch.optim.optimizer import Optimizer

def _get_cosine_schedule_with_warmup_lr_lambda(
    current_step: int, *, num_warmup_steps: int, num_training_steps: int, alpha_f: float
) -> float:
    """
    Helper function for calculating the learning rate using cosine annealing
    with warmup.

    Parameters
    ----------
    current_step: int
        The current step in the training process.
    num_warmup_steps: int
        The number of steps for the warmup phase.
    num_training_steps: int
        The total number of training steps.
    alpha_f: float
        The minimum learning rate at the end of the schedule.

    Returns
    -------
    float
        The calculated learning rate.
    """

    if current_step < num_warmup_steps:
        alpha = current_step / max(1, num_warmup_steps)
    else:
        tau_w = (current_step - num_warmup_steps) / num_training_steps
        tau_w = min(1.0, tau_w)
        alpha = alpha_f + (1 - alpha_f) * (1 + math.cos(math.pi * tau_w)) / 2
    return alpha


def get_cosine_annealing_with_warmup(
    optimizer: Optimizer,
    num_warmup_steps: int,
    num_training_steps: int,
    alpha_f: float = 0.1,
    last_epoch: int = -1,
    verbose: bool = False,
) -> LambdaLR:
    """
    Create a schedule with a learning rate that decreases following the values
    of the cosine function between the initial lr set in the optimizer to 0,
    after a warmup period during which it increases linearly between 0 and the
    initial lr set in the optimizer.

    Parameters
    ----------
    optimizer: `~torch.optim.Optimizer`
        The optimizer for which to schedule the learning rate.
    num_warmup_steps: int
        The number of steps for the warmup phase.
    num_training_steps: int
        The total number of training steps.
    alpha_f: float
        The minimum learning rate at the end of the schedule, by default 0.1.
    last_epoch: int
        The index of the last epoch when resuming training, by default -1.
    verbose: bool
        Whether to print the learning rate at every update, by default False.

    Returns
    -------
    `torch.optim.lr_scheduler.LambdaLR`
        The scheduler with the appropriate schedule.

    Examples
    --------
    >>> from torch import nn
    >>> from torch.optim import Adam
    >>> dummy_model = nn.Linear(1, 1)
    >>> optimizer = Adam(dummy_model.parameters(), lr=3e-4)
    >>> scheduler = get_cosine_annealing_with_warmup(optimizer, num_warmup_steps=5, num_training_steps=10, alpha_f=0.5)
    >>> assert isinstance(scheduler, LambdaLR)
    """

    lr_lambda = partial(
        _get_cosine_schedule_with_warmup_lr_lambda,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
        alpha_f=alpha_f,
    )
    return LambdaLR(optimizer, lr_lambda, last_epoch, verbose)
num_warmup_steps = 3 * len(train_loader)
num_training_steps = composer.trainer.max_epochs * (len(train_dataset) // composer.data.train_loader["batch_size"])
alpha_f = 1 # as if no decay
scheduler = get_cosine_annealing_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, alpha_f=alpha_f)
from omnivault.transformer.core.scheduler import noam_lr_decay
warmup_steps = 3 * len(train_loader)

# lr first increases in the warmup steps, and then decays
noam = lambda step: noam_lr_decay(step, d_model=128, warmup_steps=warmup_steps)  # noqa: E731

scheduler_config_cls = SCHEDULER_REGISTRY[cfg.scheduler.name]

if issubclass(scheduler_config_cls, LambdaLRConfig):
    scheduler_pydantic_config = scheduler_config_cls(lr_lambda=noam, **cfg.scheduler)
else:
    scheduler_pydantic_config = scheduler_config_cls(**cfg.scheduler)  # type: ignore[assignment]

composer.scheduler = scheduler_pydantic_config
scheduler = scheduler_pydantic_config.build(optimizer=optimizer)

Criterion#

The Cross Entropy Loss function calculates the difference between two probability distributions - the predicted probability distribution output by the model (logits) and the actual distribution (target labels). It’s primarily used in classification tasks involving \(C\) classes.

  • \(\mathcal{B}\) : Denotes batch size,

  • \(K\) : The number of additional dimensions beyond batch and class, representing spatial or other feature dimensions in the input tensor,

  • \(N=\mathcal{B} \times d_1 \times \ldots \times d_K\) : Total count of individual elements across all dimensions, including batch and spatial dimensions. This value adjusts as per the dimensional complexity:

  • For \(K=0, N=\mathcal{B}\),

  • For \(K=1, N=\mathcal{B} \times d_1\),

  • For \(K>1, N\) scales accordingly.

  • \(C\) : The total number of classification categories,

  • \(x\) : Represents the input logits tensor,

  • \(y\) : Denotes the target tensor,

  • \(w\) : An optional tensor assigning weights to each class,

  • \(\mathcal{L}\) : Symbolizes the aggregate loss prior to any reduction,

  • \(l_b\) : The loss corresponding to the \(b\) th element within the batch, ranging over \(b=1\) to \(\mathcal{B}\).

Inputs and Targets#

  • Inputs (Logits): The function expects unnormalized logits for each class per input. These logits do not necessarily need to be positive values nor sum to 1. The shape of the input tensor can be:

    • For unbatched input: \((C)\),

    • For batched input: \((\mathcal{B}, C)\),

    • For \(K\)-dimensional input: \((\mathcal{B}, C, d_1, d_2, \ldots, d_K)\), suitable for tasks like pixel-wise classification in images where \(K \geq 1\).

  • Targets: When configuring the targets for the Cross Entropy Loss function, their expected shapes vary based on the nature of the targets (class indices vs. probabilities) and the dimensionality of the input:

    • For Class Indices as Targets:

      • Unbatched input: The shape should be a scalar representing a single class index in \([0, C)\).

      • Batched input: The shape should be \((\mathcal{B},)\), where each element is a class index for the corresponding input in the batch.

      • \(K\)-dimensional input: The shape should be \((\mathcal{B}, d_1, d_2, \ldots, d_K)\) for the \(K\)-dimensional case, with each element representing a class index for the corresponding spatial location.

    • For Probabilities as Targets (applicable in advanced scenarios like label smoothing or multi-label classification):

      • The shape of the targets must match the shape of the input logits tensor: \((\mathcal{B}, C)\) for batched input or \((\mathcal{B}, C, d_1, d_2, \ldots, d_K)\) for \(K\)-dimensional input. Each element in this tensor should be a probability corresponding to the likelihood of the class, with values in \([0, 1]\).

Loss Computation#

  1. For Class Indices as Targets:

    The loss for each element \(n\), accurately spanning across all considered dimensions, is calculated as:

    \[ \ell(x, y) = \mathcal{L} = \{l_1, \ldots, l_{N}\}^{\top}, \quad l_n = -w_{y_n} \cdot \log \left( \frac{\exp(x_{n, y_n})}{\sum_{c=1}^{C} \exp(x_{n, c})} \right) \cdot \mathbb{1}\{y_n \neq \text{ignore_index}\} \]

    Here, \(N\) correctly reflects the aggregate count of elements when considering \(\mathcal{B}\) and the \(K\)-dimensional context. Consequently, if \(K=0\), \(N\) reduces to \(\mathcal{B}\).

  2. For Probabilities as Targets:

    In cases where the targets are probabilities, the calculation over each element \(n\), aligning with \(N\)’s definition, should be:

    \[ \ell(x, y) = \mathcal{L} = \{l_1, \ldots, l_{N}\}^{\top}, \quad l_n = -\sum_{c=1}^{C} w_c \cdot y_{n, c} \cdot \log \left( \frac{\exp(x_{n, c})}{\sum_{i=1}^{C} \exp(x_{n, i})} \right) \]

Reduction#

  • No Reduction (reduction='none'):

    When the reduction is set to ‘none’, the loss computation preserves the original dimensionality of the input, effectively returning a tensor that maps directly to each input element’s loss. This tensor has the shape \((\mathcal{B}, d_1, \ldots, d_K)\), where each element \(l_{n}\) within this tensor represents the computed loss for the corresponding input element across all dimensions, including the batch and any \(K\)-dimensional space:

    \[ \mathcal{L} = \{l_1, \ldots, l_N\} \]

    This preserves the granularity of loss across the dataset, allowing for detailed analysis or custom reduction post hoc.

  • Mean Reduction (reduction='mean'):

    For the ‘mean’ reduction, the losses across all elements are averaged to yield a single scalar value. This operation accounts for the total count of elements (\(N\)), including those spanning batch and additional dimensions, and is not merely an average over the batch size \(\mathcal{B}\), but over all \(N\) elements:

    \[ \mathcal{L}_{mean} = \frac{1}{N} \sum_{n=1}^{N} l_n \]

    Here, traditionally we think of \(N\) as just the number of elements in the batch, but in the implementation, it spans all elements across the batch and \(K\)-dimensional spaces.

  • Sum Reduction (reduction='sum'):

    With ‘sum’ reduction, the losses for all elements are aggregated into a single scalar through summation, without averaging. This sums the losses across all elements, including those across the batch and \(K\)-dimensional spaces:

    \[ \mathcal{L}_{sum} = \sum_{n=1}^{N} l_n \]

    This scalar represents the total loss accumulated across the entire input set, providing a measure of overall loss magnitude without normalization by the number of elements.

Simple Binary Classification Example#

  • nn.CrossEntropyLoss in PyTorch expects the input logits to be of shape [N, C, d1, d2, ..., dK] (where N is the batch size, C is the number of classes, and d1 to dK are optional additional dimensions) and the target to be of shape [N, d1, d2, ..., dK].

  • Let’s look a simplified example in image classification. The target is a single integer representing the class label, and the input logits are a vector of length C (the number of classes).

  • More concretely, in the below example, we have \(\mathcal{B} = 4\) (batch size), \(C = 2\) (number of classes), \(K = 0\) (no additional dimensions), and \(N = 4\) (total number of elements across all dimensions).

    • Our inputs (logits) are of shape \((\mathcal{B}, C) = (4, 2)\).

    • Our targets are of shape \((\mathcal{B}) = (4)\).

rng = torch.Generator().manual_seed(composer.global_.seed)
criterion = nn.CrossEntropyLoss(reduction="mean")
B, C = 4, 2
targets = torch.tensor([1, 0, 0, 0]) # indicating sample 1 is class 1 and sample 2 is class 0
logits  = torch.tensor([[0.1, 0.9], [0.9, 0.1], [0.8, 0.2], [0.3, 0.7]])
assert logits.size() == (B, C)
loss   = criterion(logits, targets)
pprint(loss)
tensor(0.5232)

GPT Example#

First we set up the criterion for the model. We use the nn.CrossEntropyLoss criterion, which is commonly used for classification tasks.

criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=vocabulary.token_to_index[vocabulary.PAD])

In scenarios involving classification tasks, targets and inputs (logits) usually align in a straightforward manner where each target is a single integer that signifies the class label, and the corresponding input logits form a vector of length \(C\) (the number of classes).

However, complexity arises when dealing with sequences, such as in decoder outputs, this is because we are predicting a sequence of class labels for each token in the sequence. So if a sequence \(\mathbf{x}\) has \(10\) tokens \(x_1, x_2, \ldots, x_{10}\), the target is a sequence of class labels \(y_1, y_2, \ldots, y_{10}\). While the target shape is still \((\mathcal{B}, T)\), and for each sample, you can think of it as \(10\) samples embedded within, each with a single class label corresponding to the token at that step. Consequently, the logits shape becomes \((\mathcal{B}, T, \mathcal{V})\), where \(\mathcal{V}\) aligns with \(\mathcal{C}\), the number of classes. In other words, this counter-intuitive structure can be easily reduced to our normal classification problem if we remove the batch dimension to have \((T, )\) and \((T, \mathcal{V})\) respectively for the target and logits - in which case we can treat \(T\) as the batch size \(\mathcal{B}\) and proceed as usual. To this end, if we want to introduce the batch dimension back, then a simple idea is to flatten the target and logits to \((\mathcal{B} \times T, )\) and \((\mathcal{B} \times T, \mathcal{V})\) respectively. This way, say we have a batch size of \(2\) and a sequence length of \(3\), we can easily think of it as \(6\) samples in total, each with a single class label. Why this idea isn’t obvious to me at first is because not having the fundamentals from earlier models such as RNNs and LSTMs.

Let’s consider the example below, with batch size \(\mathcal{B} = 2\), sequence length \(T = 10\), and number of classes \(V = 18\).

rng = torch.Generator().manual_seed(composer.global_.seed)
input_tokens, targets, _, _ = next(iter(train_loader))

B, T, V = 2, 10, 18

# get only first two samples
input_tokens = input_tokens[0:B, :]
targets = targets[0:B, :]
assert input_tokens.size() == (B, T)
assert targets.size() == (B, T)

# get logits
logits = model(input_tokens)
assert logits.size() == (B, T, V)  # 2, 10, 18
logits.view(-1, logits.size(-1)).size(), targets.view(-1).size()
(torch.Size([20, 18]), torch.Size([20]))
logits_flattened = logits.view(-1, logits.size(-1))
targets_flattened = targets.view(-1)

assert logits_flattened.size() == (B * T, V)

loss = criterion(logits_flattened, targets_flattened)
pprint(loss)
tensor(2.8830, grad_fn=<NllLossBackward0>)

The second way we will use is essentially the same as the first way, but more implicit.

logits.permute(0, 2, 1).contiguous().shape
torch.Size([2, 18, 10])

We first permute the logits tensor to have the shape \((\mathcal{B} \times \mathcal{V}, T)\), why so? Because recall our earlier definition in PyTorch’s nn.CrossEntropyLoss where the input logits should be of shape [B, C, d1, d2, ..., dK] (where B is the batch size, C is the number of classes, and d1 to dK are optional additional dimensions) and the target to be of shape [B, d1, d2, ..., dK]. In our case, we have \(\mathcal{B} = 2\) (batch size), \(C = 18\) (number of classes), \(K = 1\) (additional dimension), but however we are in the wrong order of dimension because PyTorch expects the first dimension to be of batch size, which is fine, but the second dimension must be the class dimension, which in our case is not because our second dimension is the sequence length \(T\). So we swap the second and third dimension to have the shape \((\mathcal{B}, \mathcal{V}, T)\), and then PyTorch’s nn.CrossEntropyLoss would then reshape the logits to \((\mathcal{B} \times T, \mathcal{V})\) and the targets to \((\mathcal{B} \times T, )\) - coinciding with our earlier discussion.

loss = criterion(logits.permute(0, 2, 1).contiguous(), targets.contiguous())
pprint(loss) # same
tensor(2.8830, grad_fn=<NllLoss2DBackward0>)

In a GPT-like model, each token in the sequence is used to predict the next token. The model takes a sequence of tokens and produces a new sequence of the same length where each new token is predicted based on all the preceding tokens in the input sequence. The loss is then computed between the predicted sequence and the target sequence.

Let’s take a closer look at an example:

  • The original tensor: [15, 9, 0, 10, 3, 8, 13, 1, 2, 8, 14] which corresponds to <BOS>90+38=128<EOS>

  • Input tensor: [15, 9, 0, 10, 3, 8, 13, 1, 2, 8], which corresponds to <BOS>90+38=128 without EOS

  • Target tensor: [9, 0, 10, 3, 8, 13, 1, 2, 8, 14] [16, 16, 16, 16, 16, 16, 1, 2, 8, 14]

During training:

  1. First Timestep: The model takes [15] (or [<BOS>] if 15 is your BOS token) and tries to predict the next token. Ideally, it should predict 9. But here, your target sequence starts with masked tokens (16, if 16 is your masking token). So the loss is computed between the predicted token and the masked token 16. But since CrossEntropyLoss has an ignore_index (now you know what they are right!), you can set it to say 16 or (default -1 but you would need to change padding number) and tell the model that whenever the ground truth is 16, the loss is zeroed out so it is not counted? This allows the model to focus on learning from the relevant parts of the sequence while ignoring the masked portions.

  2. Second Timestep: The model takes [15, 9] and predicts the next token, which should be 0. Again, the target is a masked token 16.

  3. Eighth Timestep: The model takes [15, 9, 0, 10, 3, 8, 13] (which is 90+38=) and predicts the next token. Now the target is 1, so the loss is computed between the predicted token and 1. There is no mask anymore here, so the loss will be computed.

  4. Ninth Timestep: The model takes [15, 9, 0, 10, 3, 8, 13, 1] (which is 90+38=1) and predicts the next token. Now the target is 2, so the loss is computed between the predicted token and 2.

    1. Here’s an important thing for beginners (me), In a typical GPT-like architecture used for sequence-to-sequence tasks like this one, the model doesn’t use its own predictions as input during training. Instead, it uses the original, ground-truth input sequence. This is known as “teacher forcing.” In teacher forcing, even if the model predicts a wrong token at some timestep, it doesn’t affect the input sequence for subsequent timesteps. The model continues to get the original input sequence for the entire training epoch.

    2. So if model predicts a 3 during the eighth timestep, where the ground trut is 1, the model would simply incur a higher loss for that prediction. However, the input for the ninth timestep would still be the ground truth sequence up to that point, regardless of what the model predicted at the eighth timestep.

    3. But it is noted that this behaviour is still autoregressive.

  5. Tenth: The model takes [15, 9, 0, 10, 3, 8, 13, 1, 2] and predicts the next token which is 8.

  6. Last: The model takes [15, 9, 0, 10, 3, 8, 13, 1, 2, 8] and predicts the next token which is 14 the EOS.

    1. The reason you need to predict EOS is simple intuitively, consider the case where there’s no need for EOS, then the model will not know when to stop.

This goes on until the entire sequence is processed. Note that the model never actually “sees” the target tokens during the prediction. It is solely relying on the tokens that came before the current token in the input sequence. After the model makes its prediction, then the predicted tokens are compared to the target tokens to compute the loss, which is then backpropagated to update the model weights.

A Smaller Example for Illustration#

The above example has too big of a dimension, let’s scale down \(T=10\) to \(T=3\) and \(V=18\) to \(V=4\).

# fmt: off
rng        = torch.Generator().manual_seed(composer.global_.seed)

B, T, V    = 2, 3, 4                                                   # Assuming we have B = batch size, T = sequence length, V = vocab size

logits     = torch.randn(B, T, V, generator=rng)                       # logits from the head
targets    = torch.randint(low=0, high=V, size=(B, T), generator=rng)  # targets are the labels
# fmt: on

pprint(logits)
pprint(targets)
pprint(logits[0]) # logits for the first sequence [T=3, V=4]
pprint(targets[0]) # target for the first sequence [T=3]
tensor([[[ 1.9269,  1.4873,  0.9007, -2.1055],
│   │    [ 0.6784, -1.2345, -0.0431, -1.6047],
│   │    [ 0.3559, -0.6866, -0.4934,  0.2415]],
│   │   
│   │   [[-1.1109,  0.0915, -2.3169, -0.2168],
│   │    [-0.3097, -0.3957,  0.8034, -0.6216],
│   │    [-0.5920, -0.0631, -0.8286,  0.3309]]])
tensor([[2, 2, 1],
│   │   [3, 3, 3]])
tensor([[ 1.9269,  1.4873,  0.9007, -2.1055],
│   │   [ 0.6784, -1.2345, -0.0431, -1.6047],
│   │   [ 0.3559, -0.6866, -0.4934,  0.2415]])
tensor([2, 2, 1])

We establish some conceptual understanding first:

  • Each sample in the batch has the following characteristics:

    • Denote target and logit as the target and logits for a particular sample in the batch.

    • The target is of shape [T] = [3] and each element is the class/vocab label for each token in the sequence.

    • The logit is of shape [T, V] = [3, 4] and each row is the logits for each token in the sequence.

    • Therefore, we want to compare each row in logit with each element in target to compute the loss.

    • We can think of each row in logit as the prediction for each token in the sequence, and each element in target as the ground truth for each token in the sequence.

    • Intuitively this means that within each sample, there are many “sub-samples” where each sub-sample is a token in the sequence. If you can visualize this, then there should be no confusion.

  • In code, we can do so with the following manner:

    • Calculate loss for each token in each sample individually and then sum them up.

    • Reduction by mean will mean we need to divide our total_loss by the total number of samples in the batch. But remember that even though technically we have 2 samples in the batch, we are actually treating each token in each sample as a sub-sample, so the total samples is B * T where B is the batch size and T is the sequence length.

criterion  = nn.CrossEntropyLoss(reduction="mean")

total_loss = 0
for b in range(B):
    for t in range(T):
        logit      = logits[b, t].unsqueeze(0)
        target     = targets[b, t].unsqueeze(0)
        total_loss += criterion(logit, target)

pprint(total_loss)
total_loss  = total_loss / (B * T)
pprint(total_loss)
tensor(9.0105)
tensor(1.5017)

In PyTorch however, if you have a logits tensor of shape [B, S, V], you need to permute it to [B, V, S] to align with the format that CrossEntropyLoss wants, so that V (vocab size) is treated as C (number of classes), and S (sequence length) is treated as one of the additional dimensions d1, d2, ..., dK.

But all in all, if you understood the previous loop to calculate the loss for each token in each sample individually and then sum them up, then dividing to fulfill reduction of mean, then you should be fine.

Masking and Ignore Index#

# fmt: off
rng        = torch.Generator().manual_seed(composer.global_.seed)

B, T, V    = 2, 3, 4                                                   # Assuming we have B = batch size, L = sequence length, V = vocab size

logits     = torch.randn(B, T, V, generator=rng)                       # logits from the head
targets    = torch.randint(low=0, high=V, size=(B, T), generator=rng)  # targets are the labels
# fmt: on

pprint(logits)
pprint(targets)
pprint(logits[0]) # logits for the first sequence [L=10, V=18]
pprint(targets[0]) # target for the first sequence [L=10]
tensor([[[ 1.9269,  1.4873,  0.9007, -2.1055],
│   │    [ 0.6784, -1.2345, -0.0431, -1.6047],
│   │    [ 0.3559, -0.6866, -0.4934,  0.2415]],
│   │   
│   │   [[-1.1109,  0.0915, -2.3169, -0.2168],
│   │    [-0.3097, -0.3957,  0.8034, -0.6216],
│   │    [-0.5920, -0.0631, -0.8286,  0.3309]]])
tensor([[2, 2, 1],
│   │   [3, 3, 3]])
tensor([[ 1.9269,  1.4873,  0.9007, -2.1055],
│   │   [ 0.6784, -1.2345, -0.0431, -1.6047],
│   │   [ 0.3559, -0.6866, -0.4934,  0.2415]])
tensor([2, 2, 1])
targets[:, 0] = -123
PAD_ = -123

criterion  = nn.CrossEntropyLoss(reduction="mean", ignore_index=PAD_)

NON_IGNORE_COUNT = 0

total_loss = 0
for b in range(B):
    for t in range(T):
        logit      = logits[b, t].unsqueeze(0)
        target     = targets[b, t].unsqueeze(0)
        if target == torch.tensor([PAD_]):
            continue
        total_loss += criterion(logit, target)
        NON_IGNORE_COUNT += 1

pprint(total_loss)
total_loss  = total_loss / NON_IGNORE_COUNT
pprint(total_loss)
tensor(6.2188)
tensor(1.5547)

NOTE: NON_IGNORE_COUNT is used instead of BxT, why? Cause we are averaging over all non-ignored guys!

# Permute logits to shape [B, V, S]
logits_permuted = logits.permute(0, 2, 1)

# Instantiate the CrossEntropyLoss
# By default, it reduces by averaging the losses over each observation in the input
criterion  = nn.CrossEntropyLoss(reduction="mean", ignore_index=PAD_)

loss = criterion(logits_permuted, targets)
pprint(loss)
tensor(1.5547)
# Permute logits to shape [B, V, S]
logits_permuted = logits.permute(0, 2, 1)

# Instantiate the CrossEntropyLoss
# By default, it reduces by averaging the losses over each observation in the input
criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=PAD_)

loss = criterion(logits_permuted, targets)
pprint(loss)
tensor(1.5547)

Initializing Criterion With Composer#

from omnivault.transformer.config.criterion import CRITERION_REGISTRY

criterion_config_cls = CRITERION_REGISTRY[cfg.criterion.name]
criterion_pydantic_config = criterion_config_cls(**cfg.criterion)

criterion = criterion_pydantic_config.create_instance()
assert criterion.ignore_index == vocabulary.token_to_index[vocabulary.PAD]

pprint(criterion.ignore_index)
pprint(criterion.reduction)
16
'mean'

State#

from omnivault.transformer.core.state import State

state = State(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    vocabulary=vocabulary,
    tokenizer=tokenizer,
)
state.pretty_print()
State(
model=GPTDecoder(
  (tok_embed): Embedding(18, 128)
  (decoder_blocks): ModuleList(
(0-1): 2 x GPTDecoderBlock(
(masked_self_attention_mha): MultiHeadedAttention(
│   │   (W_Q): Linear(in_features=128, out_features=128, bias=False)
│   │   (W_K): Linear(in_features=128, out_features=128, bias=False)
│   │   (W_V): Linear(in_features=128, out_features=128, bias=False)
│   │   (W_O): Linear(in_features=128, out_features=128, bias=False)
│   │   (attention): ScaledDotProductAttention(
│   │     (dropout): Dropout(p=0.0, inplace=False)
│   │   )
│   │   (dropout): Dropout(p=0.1, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
│   │   (ffn): ModuleDict(
│   │     (context_fc): Linear(in_features=128, out_features=256, bias=True)
│   │     (activation): GELU(approximate='tanh')
│   │     (context_projection): Linear(in_features=256, out_features=128, bias=True)
│   │     (dropout): Dropout(p=0.1, inplace=False)
│   │   )
)
(add_norm_1): AddNorm(
│   │   (dropout): Dropout(p=0.1, inplace=False)
│   │   (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)
(add_norm_2): AddNorm(
│   │   (dropout): Dropout(p=0.1, inplace=False)
│   │   (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)
)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (head): Linear(in_features=128, out_features=18, bias=True)
),
criterion=CrossEntropyLoss(),
optimizer=Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.98)
capturable: False
differentiable: False
eps: 1e-09
foreach: None
fused: None
initial_lr: 0.2
lr: 2.2961808030073203e-05
maximize: False
weight_decay: 0.0

Parameter Group 1
amsgrad: False
betas: (0.9, 0.98)
capturable: False
differentiable: False
eps: 1e-09
foreach: None
fused: None
initial_lr: 0.2
lr: 2.2961808030073203e-05
maximize: False
weight_decay: 0.0
),
scheduler=<torch.optim.lr_scheduler.LambdaLR object at 0x7f084aecb250>,
epoch_index=0,
train_batch_index=0,
step_index=0,
history={},
vocabulary=<omnivault.transformer.core.vocabulary.AdderVocabulary object at 0x7f078ee128e0>,
tokenizer=<omnivault.transformer.core.tokenizer.AdderTokenizer object at 0x7f078ee12790>,
tokens_per_iter=None
)

Trainer#

trainer = Trainer(
    state=state,
    composer=composer,
    # logger=LOGGER,
    device=composer.trainer.device,  # type: ignore[arg-type]
)
trainer.remove_callback(event=TrainerEvent.ON_VALID_EPOCH_END.value, callback=save_state)

trainer.add_callback(
    TrainerEvent.ON_VALID_EPOCH_END.value,
    lambda trainer: evaluate_and_generate_on_valid_epoch_end(trainer, num_batches_to_eval=None),
)
_trained_state = trainer.fit(train_loader=train_loader, valid_loader=valid_loader, test_loader=test_loader)
# _trained_state.pretty_print()
history = _trained_state.history
/home/runner/work/omniverse/omniverse/omnivault/transformer/core/trainer.py:173: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
  self.scaler = torch.cuda.amp.GradScaler(**self.scaler_config)
2024-09-20 14:10:06,059 - root - INFO - Total Parameters:                   270226
2024-09-20 14:10:06,059 - root - INFO - Trainable Parameters:               270226
2024-09-20 14:10:06,060 - root - INFO - Vocabulary Size:                    18
2024-09-20 14:10:06,060 - root - INFO - Context Length:                     11
2024-09-20 14:10:06,061 - root - INFO - Device:                             cpu
2024-09-20 14:10:06,061 - root - INFO - Tokens per Iteration:               2816
2024-09-20 14:10:06,062 - root - INFO - Total Tokens Consumed In Training:  126720
2024-09-20 14:10:06,062 - root - INFO - Initial Learning Rate(s):           0.000022962, 0.000022962
2024-09-20 14:10:06,062 - root - INFO - 
2024-09-20 14:10:06,065 - root - INFO - ====================================================== Starting Train Epoch: 1/9 ======================================================
2024-09-20 14:10:06,066 - root - INFO - Learning rates for each parameter group: 0.00002296180803007320, 0.00002296180803007320
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 1, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 1, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=747.79352, average_batch_loss=2.92107, average_batch_perplexity=18.56111, lr=0.000022962, 0.000022962]
Epoch: 1, Step: 1:   4%|▎         | 1/28 [00:00<00:02,  9.92it/s, total_batch_loss=747.79352, average_batch_loss=2.92107, average_batch_perplexity=18.56111, lr=0.000022962, 0.000022962]
Epoch: 1, Step: 2:   4%|▎         | 1/28 [00:00<00:02,  9.92it/s, total_batch_loss=747.79352, average_batch_loss=2.92107, average_batch_perplexity=18.56111, lr=0.000022962, 0.000022962]
Epoch: 1, Step: 2:   4%|▎         | 1/28 [00:00<00:02,  9.92it/s, total_batch_loss=739.30005, average_batch_loss=2.88789, average_batch_perplexity=17.95540, lr=0.000045924, 0.000045924]
Epoch: 1, Step: 3:   4%|▎         | 1/28 [00:00<00:02,  9.92it/s, total_batch_loss=739.30005, average_batch_loss=2.88789, average_batch_perplexity=17.95540, lr=0.000045924, 0.000045924]
Epoch: 1, Step: 3:   4%|▎         | 1/28 [00:00<00:02,  9.92it/s, total_batch_loss=720.04199, average_batch_loss=2.81266, average_batch_perplexity=16.65422, lr=0.000068885, 0.000068885]
Epoch: 1, Step: 3:  11%|█         | 3/28 [00:00<00:02, 10.82it/s, total_batch_loss=720.04199, average_batch_loss=2.81266, average_batch_perplexity=16.65422, lr=0.000068885, 0.000068885]
Epoch: 1, Step: 4:  11%|█         | 3/28 [00:00<00:02, 10.82it/s, total_batch_loss=720.04199, average_batch_loss=2.81266, average_batch_perplexity=16.65422, lr=0.000068885, 0.000068885]
Epoch: 1, Step: 4:  11%|█         | 3/28 [00:00<00:02, 10.82it/s, total_batch_loss=695.30609, average_batch_loss=2.71604, average_batch_perplexity=15.12032, lr=0.000091847, 0.000091847]
Epoch: 1, Step: 5:  11%|█         | 3/28 [00:00<00:02, 10.82it/s, total_batch_loss=695.30609, average_batch_loss=2.71604, average_batch_perplexity=15.12032, lr=0.000091847, 0.000091847]
Epoch: 1, Step: 5:  11%|█         | 3/28 [00:00<00:02, 10.82it/s, total_batch_loss=662.83459, average_batch_loss=2.58920, average_batch_perplexity=13.31908, lr=0.000114809, 0.000114809]
Epoch: 1, Step: 5:  18%|█▊        | 5/28 [00:00<00:01, 11.63it/s, total_batch_loss=662.83459, average_batch_loss=2.58920, average_batch_perplexity=13.31908, lr=0.000114809, 0.000114809]
Epoch: 1, Step: 6:  18%|█▊        | 5/28 [00:00<00:01, 11.63it/s, total_batch_loss=662.83459, average_batch_loss=2.58920, average_batch_perplexity=13.31908, lr=0.000114809, 0.000114809]
Epoch: 1, Step: 6:  18%|█▊        | 5/28 [00:00<00:01, 11.63it/s, total_batch_loss=639.73022, average_batch_loss=2.49895, average_batch_perplexity=12.16967, lr=0.000137771, 0.000137771]
Epoch: 1, Step: 7:  18%|█▊        | 5/28 [00:00<00:01, 11.63it/s, total_batch_loss=639.73022, average_batch_loss=2.49895, average_batch_perplexity=12.16967, lr=0.000137771, 0.000137771]
Epoch: 1, Step: 7:  18%|█▊        | 5/28 [00:00<00:01, 11.63it/s, total_batch_loss=617.32874, average_batch_loss=2.41144, average_batch_perplexity=11.15001, lr=0.000160733, 0.000160733]
Epoch: 1, Step: 7:  25%|██▌       | 7/28 [00:00<00:01, 11.90it/s, total_batch_loss=617.32874, average_batch_loss=2.41144, average_batch_perplexity=11.15001, lr=0.000160733, 0.000160733]
Epoch: 1, Step: 8:  25%|██▌       | 7/28 [00:00<00:01, 11.90it/s, total_batch_loss=617.32874, average_batch_loss=2.41144, average_batch_perplexity=11.15001, lr=0.000160733, 0.000160733]
Epoch: 1, Step: 8:  25%|██▌       | 7/28 [00:00<00:01, 11.90it/s, total_batch_loss=597.33167, average_batch_loss=2.33333, average_batch_perplexity=10.31219, lr=0.000183694, 0.000183694]
Epoch: 1, Step: 9:  25%|██▌       | 7/28 [00:00<00:01, 11.90it/s, total_batch_loss=597.33167, average_batch_loss=2.33333, average_batch_perplexity=10.31219, lr=0.000183694, 0.000183694]
Epoch: 1, Step: 9:  25%|██▌       | 7/28 [00:00<00:01, 11.90it/s, total_batch_loss=581.93115, average_batch_loss=2.27317, average_batch_perplexity=9.71012, lr=0.000206656, 0.000206656] 
Epoch: 1, Step: 9:  32%|███▏      | 9/28 [00:00<00:01, 12.02it/s, total_batch_loss=581.93115, average_batch_loss=2.27317, average_batch_perplexity=9.71012, lr=0.000206656, 0.000206656]
Epoch: 1, Step: 10:  32%|███▏      | 9/28 [00:00<00:01, 12.02it/s, total_batch_loss=581.93115, average_batch_loss=2.27317, average_batch_perplexity=9.71012, lr=0.000206656, 0.000206656]
Epoch: 1, Step: 10:  32%|███▏      | 9/28 [00:00<00:01, 12.02it/s, total_batch_loss=566.71552, average_batch_loss=2.21373, average_batch_perplexity=9.14981, lr=0.000229618, 0.000229618]
Epoch: 1, Step: 11:  32%|███▏      | 9/28 [00:00<00:01, 12.02it/s, total_batch_loss=566.71552, average_batch_loss=2.21373, average_batch_perplexity=9.14981, lr=0.000229618, 0.000229618]
Epoch: 1, Step: 11:  32%|███▏      | 9/28 [00:00<00:01, 12.02it/s, total_batch_loss=552.24475, average_batch_loss=2.15721, average_batch_perplexity=8.64694, lr=0.000252580, 0.000252580]
Epoch: 1, Step: 11:  39%|███▉      | 11/28 [00:00<00:01, 11.95it/s, total_batch_loss=552.24475, average_batch_loss=2.15721, average_batch_perplexity=8.64694, lr=0.000252580, 0.000252580]
Epoch: 1, Step: 12:  39%|███▉      | 11/28 [00:01<00:01, 11.95it/s, total_batch_loss=552.24475, average_batch_loss=2.15721, average_batch_perplexity=8.64694, lr=0.000252580, 0.000252580]
Epoch: 1, Step: 12:  39%|███▉      | 11/28 [00:01<00:01, 11.95it/s, total_batch_loss=543.99951, average_batch_loss=2.12500, average_batch_perplexity=8.37288, lr=0.000275542, 0.000275542]
Epoch: 1, Step: 13:  39%|███▉      | 11/28 [00:01<00:01, 11.95it/s, total_batch_loss=543.99951, average_batch_loss=2.12500, average_batch_perplexity=8.37288, lr=0.000275542, 0.000275542]
Epoch: 1, Step: 13:  39%|███▉      | 11/28 [00:01<00:01, 11.95it/s, total_batch_loss=530.47626, average_batch_loss=2.07217, average_batch_perplexity=7.94206, lr=0.000298504, 0.000298504]
Epoch: 1, Step: 13:  46%|████▋     | 13/28 [00:01<00:01, 12.08it/s, total_batch_loss=530.47626, average_batch_loss=2.07217, average_batch_perplexity=7.94206, lr=0.000298504, 0.000298504]
Epoch: 1, Step: 14:  46%|████▋     | 13/28 [00:01<00:01, 12.08it/s, total_batch_loss=530.47626, average_batch_loss=2.07217, average_batch_perplexity=7.94206, lr=0.000298504, 0.000298504]
Epoch: 1, Step: 14:  46%|████▋     | 13/28 [00:01<00:01, 12.08it/s, total_batch_loss=522.51752, average_batch_loss=2.04108, average_batch_perplexity=7.69895, lr=0.000321465, 0.000321465]
Epoch: 1, Step: 15:  46%|████▋     | 13/28 [00:01<00:01, 12.08it/s, total_batch_loss=522.51752, average_batch_loss=2.04108, average_batch_perplexity=7.69895, lr=0.000321465, 0.000321465]
Epoch: 1, Step: 15:  46%|████▋     | 13/28 [00:01<00:01, 12.08it/s, total_batch_loss=515.61969, average_batch_loss=2.01414, average_batch_perplexity=7.49428, lr=0.000344427, 0.000344427]
Epoch: 1, Step: 15:  54%|█████▎    | 15/28 [00:01<00:01, 12.21it/s, total_batch_loss=515.61969, average_batch_loss=2.01414, average_batch_perplexity=7.49428, lr=0.000344427, 0.000344427]
Epoch: 1, Step: 16:  54%|█████▎    | 15/28 [00:01<00:01, 12.21it/s, total_batch_loss=515.61969, average_batch_loss=2.01414, average_batch_perplexity=7.49428, lr=0.000344427, 0.000344427]
Epoch: 1, Step: 16:  54%|█████▎    | 15/28 [00:01<00:01, 12.21it/s, total_batch_loss=504.18988, average_batch_loss=1.96949, average_batch_perplexity=7.16703, lr=0.000367389, 0.000367389]
Epoch: 1, Step: 17:  54%|█████▎    | 15/28 [00:01<00:01, 12.21it/s, total_batch_loss=504.18988, average_batch_loss=1.96949, average_batch_perplexity=7.16703, lr=0.000367389, 0.000367389]
Epoch: 1, Step: 17:  54%|█████▎    | 15/28 [00:01<00:01, 12.21it/s, total_batch_loss=500.77026, average_batch_loss=1.95613, average_batch_perplexity=7.07193, lr=0.000390351, 0.000390351]
Epoch: 1, Step: 17:  61%|██████    | 17/28 [00:01<00:00, 12.31it/s, total_batch_loss=500.77026, average_batch_loss=1.95613, average_batch_perplexity=7.07193, lr=0.000390351, 0.000390351]
Epoch: 1, Step: 18:  61%|██████    | 17/28 [00:01<00:00, 12.31it/s, total_batch_loss=500.77026, average_batch_loss=1.95613, average_batch_perplexity=7.07193, lr=0.000390351, 0.000390351]
Epoch: 1, Step: 18:  61%|██████    | 17/28 [00:01<00:00, 12.31it/s, total_batch_loss=489.87445, average_batch_loss=1.91357, average_batch_perplexity=6.77726, lr=0.000413313, 0.000413313]
Epoch: 1, Step: 19:  61%|██████    | 17/28 [00:01<00:00, 12.31it/s, total_batch_loss=489.87445, average_batch_loss=1.91357, average_batch_perplexity=6.77726, lr=0.000413313, 0.000413313]
Epoch: 1, Step: 19:  61%|██████    | 17/28 [00:01<00:00, 12.31it/s, total_batch_loss=484.02982, average_batch_loss=1.89074, average_batch_perplexity=6.62428, lr=0.000436274, 0.000436274]
Epoch: 1, Step: 19:  68%|██████▊   | 19/28 [00:01<00:00, 12.31it/s, total_batch_loss=484.02982, average_batch_loss=1.89074, average_batch_perplexity=6.62428, lr=0.000436274, 0.000436274]
Epoch: 1, Step: 20:  68%|██████▊   | 19/28 [00:01<00:00, 12.31it/s, total_batch_loss=484.02982, average_batch_loss=1.89074, average_batch_perplexity=6.62428, lr=0.000436274, 0.000436274]
Epoch: 1, Step: 20:  68%|██████▊   | 19/28 [00:01<00:00, 12.31it/s, total_batch_loss=479.82721, average_batch_loss=1.87433, average_batch_perplexity=6.51642, lr=0.000459236, 0.000459236]
Epoch: 1, Step: 21:  68%|██████▊   | 19/28 [00:01<00:00, 12.31it/s, total_batch_loss=479.82721, average_batch_loss=1.87433, average_batch_perplexity=6.51642, lr=0.000459236, 0.000459236]
Epoch: 1, Step: 21:  68%|██████▊   | 19/28 [00:01<00:00, 12.31it/s, total_batch_loss=467.47736, average_batch_loss=1.82608, average_batch_perplexity=6.20952, lr=0.000482198, 0.000482198]
Epoch: 1, Step: 21:  75%|███████▌  | 21/28 [00:01<00:00, 12.41it/s, total_batch_loss=467.47736, average_batch_loss=1.82608, average_batch_perplexity=6.20952, lr=0.000482198, 0.000482198]
Epoch: 1, Step: 22:  75%|███████▌  | 21/28 [00:01<00:00, 12.41it/s, total_batch_loss=467.47736, average_batch_loss=1.82608, average_batch_perplexity=6.20952, lr=0.000482198, 0.000482198]
Epoch: 1, Step: 22:  75%|███████▌  | 21/28 [00:01<00:00, 12.41it/s, total_batch_loss=458.11777, average_batch_loss=1.78952, average_batch_perplexity=5.98659, lr=0.000505160, 0.000505160]
Epoch: 1, Step: 23:  75%|███████▌  | 21/28 [00:01<00:00, 12.41it/s, total_batch_loss=458.11777, average_batch_loss=1.78952, average_batch_perplexity=5.98659, lr=0.000505160, 0.000505160]
Epoch: 1, Step: 23:  75%|███████▌  | 21/28 [00:01<00:00, 12.41it/s, total_batch_loss=454.91241, average_batch_loss=1.77700, average_batch_perplexity=5.91210, lr=0.000528122, 0.000528122]
Epoch: 1, Step: 23:  82%|████████▏ | 23/28 [00:01<00:00, 12.33it/s, total_batch_loss=454.91241, average_batch_loss=1.77700, average_batch_perplexity=5.91210, lr=0.000528122, 0.000528122]
Epoch: 1, Step: 24:  82%|████████▏ | 23/28 [00:01<00:00, 12.33it/s, total_batch_loss=454.91241, average_batch_loss=1.77700, average_batch_perplexity=5.91210, lr=0.000528122, 0.000528122]
Epoch: 1, Step: 24:  82%|████████▏ | 23/28 [00:01<00:00, 12.33it/s, total_batch_loss=440.47971, average_batch_loss=1.72062, average_batch_perplexity=5.58801, lr=0.000551083, 0.000551083]
Epoch: 1, Step: 25:  82%|████████▏ | 23/28 [00:02<00:00, 12.33it/s, total_batch_loss=440.47971, average_batch_loss=1.72062, average_batch_perplexity=5.58801, lr=0.000551083, 0.000551083]
Epoch: 1, Step: 25:  82%|████████▏ | 23/28 [00:02<00:00, 12.33it/s, total_batch_loss=433.07043, average_batch_loss=1.69168, average_batch_perplexity=5.42860, lr=0.000574045, 0.000574045]
Epoch: 1, Step: 25:  89%|████████▉ | 25/28 [00:02<00:00, 12.07it/s, total_batch_loss=433.07043, average_batch_loss=1.69168, average_batch_perplexity=5.42860, lr=0.000574045, 0.000574045]
Epoch: 1, Step: 26:  89%|████████▉ | 25/28 [00:02<00:00, 12.07it/s, total_batch_loss=433.07043, average_batch_loss=1.69168, average_batch_perplexity=5.42860, lr=0.000574045, 0.000574045]
Epoch: 1, Step: 26:  89%|████████▉ | 25/28 [00:02<00:00, 12.07it/s, total_batch_loss=419.93542, average_batch_loss=1.64037, average_batch_perplexity=5.15709, lr=0.000597007, 0.000597007]
Epoch: 1, Step: 27:  89%|████████▉ | 25/28 [00:02<00:00, 12.07it/s, total_batch_loss=419.93542, average_batch_loss=1.64037, average_batch_perplexity=5.15709, lr=0.000597007, 0.000597007]
Epoch: 1, Step: 27:  89%|████████▉ | 25/28 [00:02<00:00, 12.07it/s, total_batch_loss=411.37436, average_batch_loss=1.60693, average_batch_perplexity=4.98748, lr=0.000619969, 0.000619969]
Epoch: 1, Step: 27:  96%|█████████▋| 27/28 [00:02<00:00, 12.19it/s, total_batch_loss=411.37436, average_batch_loss=1.60693, average_batch_perplexity=4.98748, lr=0.000619969, 0.000619969]
Epoch: 1, Step: 28:  96%|█████████▋| 27/28 [00:02<00:00, 12.19it/s, total_batch_loss=411.37436, average_batch_loss=1.60693, average_batch_perplexity=4.98748, lr=0.000619969, 0.000619969]
Epoch: 1, Step: 28:  96%|█████████▋| 27/28 [00:02<00:00, 12.19it/s, total_batch_loss=136.92018, average_batch_loss=1.55591, average_batch_perplexity=4.73940, lr=0.000642931, 0.000642931]
                                                                                                                                                                                          
2024-09-20 14:10:08,339 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:08,340 - root - INFO - Total Batches:                   28
2024-09-20 14:10:08,340 - root - INFO - Average Epoch Train Loss:        2.13059
2024-09-20 14:10:08,341 - root - INFO - Average Epoch Train Perplexity:  8.41986
2024-09-20 14:10:08,341 - root - INFO - 
2024-09-20 14:10:08,342 - root - INFO - ====================================================== Starting Valid Epoch: 1/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 1, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 1, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=381.24884, average_batch_loss=1.48925, average_batch_perplexity=4.43378]
Epoch: 1, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=381.24884, average_batch_loss=1.48925, average_batch_perplexity=4.43378]
Epoch: 1, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=382.19833, average_batch_loss=1.49296, average_batch_perplexity=4.45026]
Epoch: 1, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=382.19833, average_batch_loss=1.49296, average_batch_perplexity=4.45026]
Epoch: 1, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=385.25818, average_batch_loss=1.50491, average_batch_perplexity=4.50377]
Epoch: 1, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=385.25818, average_batch_loss=1.50491, average_batch_perplexity=4.50377]
Epoch: 1, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=385.25818, average_batch_loss=1.50491, average_batch_perplexity=4.50377]
Epoch: 1, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=384.94681, average_batch_loss=1.50370, average_batch_perplexity=4.49829]
Epoch: 1, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=384.94681, average_batch_loss=1.50370, average_batch_perplexity=4.49829]
Epoch: 1, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=387.60138, average_batch_loss=1.51407, average_batch_perplexity=4.54518]
Epoch: 1, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=387.60138, average_batch_loss=1.51407, average_batch_perplexity=4.54518]
Epoch: 1, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.38it/s, total_batch_loss=382.18872, average_batch_loss=1.49292, average_batch_perplexity=4.45009]
Epoch: 1, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.45it/s, total_batch_loss=382.18872, average_batch_loss=1.49292, average_batch_perplexity=4.45009]
Epoch: 1, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.45it/s, total_batch_loss=382.18872, average_batch_loss=1.49292, average_batch_perplexity=4.45009]
Epoch: 1, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.45it/s, total_batch_loss=380.99832, average_batch_loss=1.48827, average_batch_perplexity=4.42945]
Epoch: 1, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.45it/s, total_batch_loss=380.99832, average_batch_loss=1.48827, average_batch_perplexity=4.42945]
Epoch: 1, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.45it/s, total_batch_loss=309.65563, average_batch_loss=1.48873, average_batch_perplexity=4.43146]
                                                                                                                                                          
2024-09-20 14:10:08,680 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:08,680 - root - INFO - Total Batches:                   8
2024-09-20 14:10:08,681 - root - INFO - Average Epoch Valid Loss:        1.49705
2024-09-20 14:10:08,681 - root - INFO - Average Epoch Valid Perplexity:  4.46848
2024-09-20 14:10:08,682 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0117]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0078]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.35it/s, accuracy: 0.0078]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.35it/s, accuracy: 0.0078]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.35it/s, accuracy: 0.0216]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.55it/s, accuracy: 0.0216]
                                                                                           
2024-09-20 14:10:09,068 - root - INFO - Correct/Total Samples:           12/1000
2024-09-20 14:10:09,069 - root - INFO - Eval Accuracy:                   0.012
2024-09-20 14:10:09,089 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     1            1             0 13+48=061 13+48=090    False
     1            1             1 16+55=071 16+55=090    False
     1            1             2 79+34=113 79+34=111    False
     1            1             3 35+44=079 35+44=091    False
     1            1             4 16+50=066 16+50=090    False
     1            1             5 28+47=075 28+47=109    False
     1            1             6 00+74=074 00+74=090    False
     1            1             7 15+20=035 15+20=090    False
     1            1             8 72+60=132 72+60=091    False
     1            1             9 63+68=131 63+68=111    False
     1            1            10 29+45=074 29+45=109    False
     1            1            11 34+60=094 34+60=091    False
     1            1            12 53+70=123 53+70=091    False
     1            1            13 70+50=120 70+50=091    False
     1            1            14 11+84=095 11+84=090    False
     1            1            15 42+71=113 42+71=090    False
     1            1            16 98+22=120 98+22=112    False
     1            1            17 02+02=004 02+02=090    False
     1            1            18 15+85=100 15+85=091    False
     1            1            19 21+78=099 21+78=091    False
     1            1            20 61+79=140 61+79=110    False
     1            1            21 25+99=124 25+99=110    False
     1            1            22 09+85=094 09+85=121    False
     1            1            23 60+91=151 60+91=091    False
     1            1            24 35+30=065 35+30=090    False
     1            1            25 24+51=075 24+51=090    False
     1            1            26 93+91=184 93+91=112    False
     1            1            27 39+96=135 39+96=111    False
     1            1            28 64+35=099 64+35=091    False
     1            1            29 36+22=058 36+22=090    False
     1            1            30 68+45=113 68+45=111    False
     1            1            31 16+84=100 16+84=091    False
     1            1            32 91+52=143 91+52=091    False
     1            1            33 97+36=133 97+36=111    False
     1            1            34 27+37=064 27+37=091    False
     1            1            35 99+82=181 99+82=111    False
     1            1            36 03+42=045 03+42=090    False
     1            1            37 18+38=056 18+38=091    False
     1            1            38 32+20=052 32+20=090    False
     1            1            39 38+13=051 38+13=091    False
     1            1            40 68+42=110 68+42=110     True
     1            1            41 64+00=064 64+00=091    False
     1            1            42 48+94=142 48+94=111    False
     1            1            43 58+36=094 58+36=111    False
     1            1            44 41+22=063 41+22=090    False
     1            1            45 23+58=081 23+58=091    False
     1            1            46 67+46=113 67+46=111    False
     1            1            47 40+78=118 40+78=091    False
     1            1            48 90+38=128 90+38=121    False
     1            1            49 89+52=141 89+52=111    False
     1            1            50 37+77=114 37+77=111    False
     1            1            51 29+76=105 29+76=112    False
     1            1            52 42+90=132 42+90=091    False
     1            1            53 45+82=127 45+82=109    False
     1            1            54 35+95=130 35+95=110    False
     1            1            55 92+98=190 92+98=111    False
     1            1            56 73+91=164 73+91=112    False
     1            1            57 53+97=150 53+97=111    False
     1            1            58 98+69=167 98+69=111    False
     1            1            59 20+46=066 20+46=090    False
     1            1            60 48+69=117 48+69=111    False
     1            1            61 62+31=093 62+31=090    False
     1            1            62 80+59=139 80+59=121    False
     1            1            63 58+12=070 58+12=091    False
     1            1            64 08+96=104 08+96=121    False
     1            1            65 67+06=073 67+06=110    False
     1            1            66 22+04=026 22+04=090    False
     1            1            67 61+87=148 61+87=110    False
     1            1            68 95+27=122 95+27=111    False
     1            1            69 49+83=132 49+83=111    False
     1            1            70 43+00=043 43+00=090    False
     1            1            71 01+85=086 01+85=090    False
     1            1            72 11+68=079 11+68=090    False
     1            1            73 80+03=083 80+03=091    False
     1            1            74 54+83=137 54+83=110    False
     1            1            75 73+47=120 73+47=110    False
     1            1            76 99+93=192 99+93=111    False
     1            1            77 99+13=112 99+13=112     True
     1            1            78 92+66=158 92+66=111    False
     1            1            79 90+31=121 90+31=091    False
     1            1            80 25+69=094 25+69=109    False
     1            1            81 25+44=069 25+44=090    False
     1            1            82 00+93=093 00+93=090    False
     1            1            83 88+87=175 88+87=111    False
     1            1            84 47+56=103 47+56=110    False
     1            1            85 43+59=102 43+59=109    False
     1            1            86 22+00=022 22+00=090    False
     1            1            87 34+04=038 34+04=090    False
     1            1            88 65+13=078 65+13=091    False
     1            1            89 39+82=121 39+82=112    False
     1            1            90 66+83=149 66+83=111    False
     1            1            91 51+69=120 51+69=091    False
     1            1            92 80+21=101 80+21=091    False
     1            1            93 36+79=115 36+79=111    False
     1            1            94 21+68=089 21+68=090    False
     1            1            95 11+66=077 11+66=090    False
     1            1            96 55+19=074 55+19=091    False
     1            1            97 51+61=112 51+61=090    False
     1            1            98 38+88=126 38+88=111    False
     1            1            99 37+27=064 37+27=109    False
     1            1           100 18+63=081 18+63=091    False
     1            1           101 48+11=059 48+11=090    False
     1            1           102 72+68=140 72+68=112    False
     1            1           103 37+39=076 37+39=112    False
     1            1           104 64+95=159 64+95=111    False
     1            1           105 49+75=124 49+75=111    False
     1            1           106 45+66=111 45+66=109    False
     1            1           107 34+87=121 34+87=110    False
     1            1           108 02+84=086 02+84=090    False
     1            1           109 95+00=095 95+00=091    False
     1            1           110 09+56=065 09+56=121    False
     1            1           111 22+66=088 22+66=090    False
     1            1           112 43+18=061 43+18=091    False
     1            1           113 61+35=096 61+35=091    False
     1            1           114 13+73=086 13+73=090    False
     1            1           115 25+95=120 25+95=109    False
     1            1           116 73+96=169 73+96=111    False
     1            1           117 03+96=099 03+96=091    False
     1            1           118 97+82=179 97+82=111    False
     1            1           119 18+42=060 18+42=090    False
     1            1           120 29+98=127 29+98=111    False
     1            1           121 61+00=061 61+00=090    False
     1            1           122 22+98=120 22+98=109    False
     1            1           123 12+50=062 12+50=090    False
     1            1           124 02+58=060 02+58=090    False
     1            1           125 75+86=161 75+86=111    False
     1            1           126 31+57=088 31+57=090    False
     1            1           127 49+82=131 49+82=112    False
     1            1           128 15+33=048 15+33=090    False
     1            1           129 49+57=106 49+57=111    False
     1            1           130 61+70=131 61+70=091    False
     1            1           131 91+51=142 91+51=091    False
     1            1           132 50+05=055 50+05=090    False
     1            1           133 44+16=060 44+16=090    False
     1            1           134 92+01=093 92+01=091    False
     1            1           135 85+82=167 85+82=111    False
     1            1           136 07+41=048 07+41=090    False
     1            1           137 40+06=046 40+06=090    False
     1            1           138 79+62=141 79+62=111    False
     1            1           139 95+62=157 95+62=111    False
     1            1           140 42+93=135 42+93=091    False
     1            1           141 32+73=105 32+73=091    False
     1            1           142 47+09=056 47+09=121    False
     1            1           143 59+50=109 59+50=113    False
     1            1           144 61+77=138 61+77=109    False
     1            1           145 64+06=070 64+06=091    False
     1            1           146 35+10=045 35+10=090    False
     1            1           147 32+88=120 32+88=110    False
     1            1           148 03+95=098 03+95=091    False
     1            1           149 11+38=049 11+38=090    False
     1            1           150 21+67=088 21+67=090    False
     1            1           151 33+25=058 33+25=090    False
     1            1           152 63+45=108 63+45=091    False
     1            1           153 56+12=068 56+12=090    False
     1            1           154 19+79=098 19+79=112    False
     1            1           155 60+43=103 60+43=091    False
     1            1           156 07+61=068 07+61=090    False
     1            1           157 58+03=061 58+03=091    False
     1            1           158 11+10=021 11+10=090    False
     1            1           159 49+89=138 49+89=111    False
     1            1           160 37+58=095 37+58=111    False
     1            1           161 59+78=137 59+78=111    False
     1            1           162 11+21=032 11+21=090    False
     1            1           163 37+43=080 37+43=091    False
     1            1           164 44+21=065 44+21=090    False
     1            1           165 22+97=119 22+97=091    False
     1            1           166 65+35=100 65+35=109    False
     1            1           167 06+51=057 06+51=090    False
     1            1           168 65+25=090 65+25=091    False
     1            1           169 74+94=168 74+94=111    False
     1            1           170 87+55=142 87+55=111    False
     1            1           171 90+67=157 90+67=121    False
     1            1           172 11+02=013 11+02=090    False
     1            1           173 01+66=067 01+66=090    False
     1            1           174 56+00=056 56+00=091    False
     1            1           175 58+52=110 58+52=110     True
     1            1           176 24+99=123 24+99=110    False
     1            1           177 97+13=110 97+13=112    False
     1            1           178 42+94=136 42+94=091    False
     1            1           179 60+15=075 60+15=090    False
     1            1           180 20+46=066 20+46=090    False
     1            1           181 40+70=110 40+70=090    False
     1            1           182 95+45=140 95+45=111    False
     1            1           183 96+95=191 96+95=111    False
     1            1           184 98+20=118 98+20=126    False
     1            1           185 43+19=062 43+19=091    False
     1            1           186 50+69=119 50+69=121    False
     1            1           187 27+53=080 27+53=091    False
     1            1           188 24+25=049 24+25=090    False
     1            1           189 65+92=157 65+92=110    False
     1            1           190 28+14=042 28+14=090    False
     1            1           191 20+57=077 20+57=090    False
     1            1           192 59+97=156 59+97=111    False
     1            1           193 98+32=130 98+32=112    False
     1            1           194 55+84=139 55+84=110    False
     1            1           195 20+39=059 20+39=090    False
     1            1           196 86+47=133 86+47=111    False
     1            1           197 92+36=128 92+36=110    False
     1            1           198 05+38=043 05+38=091    False
     1            1           199 77+36=113 77+36=111    False
     1            1           200 41+64=105 41+64=090    False
     1            1           201 74+51=125 74+51=091    False
     1            1           202 74+55=129 74+55=110    False
     1            1           203 64+64=128 64+64=109    False
     1            1           204 60+19=079 60+19=091    False
     1            1           205 77+96=173 77+96=111    False
     1            1           206 22+30=052 22+30=090    False
     1            1           207 82+49=131 82+49=112    False
     1            1           208 39+67=106 39+67=111    False
     1            1           209 62+40=102 62+40=090    False
     1            1           210 28+71=099 28+71=091    False
     1            1           211 47+26=073 47+26=091    False
     1            1           212 98+54=152 98+54=111    False
     1            1           213 38+70=108 38+70=121    False
     1            1           214 63+40=103 63+40=091    False
     1            1           215 86+62=148 86+62=111    False
     1            1           216 22+65=087 22+65=090    False
     1            1           217 41+17=058 41+17=090    False
     1            1           218 68+88=156 68+88=111    False
     1            1           219 96+70=166 96+70=113    False
     1            1           220 99+29=128 99+29=111    False
     1            1           221 83+39=122 83+39=111    False
     1            1           222 26+55=081 26+55=091    False
     1            1           223 53+70=123 53+70=091    False
     1            1           224 94+12=106 94+12=091    False
     1            1           225 00+37=037 00+37=090    False
     1            1           226 36+94=130 36+94=110    False
     1            1           227 40+58=098 40+58=091    False
     1            1           228 19+80=099 19+80=091    False
     1            1           229 49+44=093 49+44=110    False
     1            1           230 70+27=097 70+27=091    False
     1            1           231 52+80=132 52+80=091    False
     1            1           232 77+90=167 77+90=113    False
     1            1           233 13+92=105 13+92=090    False
     1            1           234 59+09=068 59+09=112    False
     1            1           235 33+55=088 33+55=091    False
     1            1           236 85+16=101 85+16=110    False
     1            1           237 25+65=090 25+65=091    False
     1            1           238 46+20=066 46+20=090    False
     1            1           239 29+52=081 29+52=091    False
     1            1           240 32+36=068 32+36=090    False
     1            1           241 47+08=055 47+08=121    False
     1            1           242 21+84=105 21+84=090    False
     1            1           243 24+45=069 24+45=090    False
     1            1           244 29+15=044 29+15=091    False
     1            1           245 83+03=086 83+03=091    False
     1            1           246 83+36=119 83+36=111    False
     1            1           247 58+95=153 58+95=111    False
     1            1           248 76+79=155 76+79=111    False
     1            1           249 63+30=093 63+30=091    False
     1            1           250 38+24=062 38+24=091    False
     1            1           251 19+46=065 19+46=091    False
     1            1           252 99+66=165 99+66=111    False
     1            1           253 95+73=168 95+73=111    False
     1            1           254 65+27=092 65+27=109    False
     1            1           255 91+83=174 91+83=112    False
     1            2             0 65+49=114 65+49=111    False
     1            2             1 03+08=011 03+08=090    False
     1            2             2 67+81=148 67+81=111    False
     1            2             3 47+23=070 47+23=091    False
     1            2             4 43+91=134 43+91=091    False
     1            2             5 41+67=108 41+67=091    False
     1            2             6 02+33=035 02+33=090    False
     1            2             7 64+84=148 64+84=110    False
     1            2             8 81+64=145 81+64=091    False
     1            2             9 80+11=091 80+11=090    False
     1            2            10 78+01=079 78+01=091    False
     1            2            11 89+18=107 89+18=112    False
     1            2            12 45+52=097 45+52=091    False
     1            2            13 35+30=065 35+30=090    False
     1            2            14 53+32=085 53+32=091    False
     1            2            15 49+90=139 49+90=121    False
     1            2            16 41+37=078 41+37=090    False
     1            2            17 35+14=049 35+14=090    False
     1            2            18 92+50=142 92+50=123    False
     1            2            19 37+60=097 37+60=091    False
     1            2            20 91+61=152 91+61=091    False
     1            2            21 80+77=157 80+77=121    False
     1            2            22 66+24=090 66+24=091    False
     1            2            23 81+07=088 81+07=091    False
     1            2            24 85+59=144 85+59=111    False
     1            2            25 19+69=088 19+69=112    False
     1            2            26 91+44=135 91+44=091    False
     1            2            27 25+29=054 25+29=091    False
     1            2            28 27+08=035 27+08=091    False
     1            2            29 66+14=080 66+14=091    False
     1            2            30 95+11=106 95+11=091    False
     1            2            31 13+97=110 13+97=091    False
     1            2            32 94+40=134 94+40=121    False
     1            2            33 74+31=105 74+31=091    False
     1            2            34 49+00=049 49+00=091    False
     1            2            35 59+18=077 59+18=112    False
     1            2            36 07+65=072 07+65=091    False
     1            2            37 83+55=138 83+55=111    False
     1            2            38 49+80=129 49+80=121    False
     1            2            39 64+17=081 64+17=091    False
     1            2            40 48+83=131 48+83=111    False
     1            2            41 95+44=139 95+44=111    False
     1            2            42 71+26=097 71+26=091    False
     1            2            43 06+74=080 06+74=091    False
     1            2            44 34+24=058 34+24=090    False
     1            2            45 59+71=130 59+71=112    False
     1            2            46 68+32=100 68+32=110    False
     1            2            47 38+81=119 38+81=110    False
     1            2            48 29+56=085 29+56=110    False
     1            2            49 54+55=109 54+55=091    False
     1            2            50 31+27=058 31+27=090    False
     1            2            51 97+89=186 97+89=111    False
     1            2            52 48+09=057 48+09=121    False
     1            2            53 86+76=162 86+76=111    False
     1            2            54 82+59=141 82+59=111    False
     1            2            55 01+67=068 01+67=090    False
     1            2            56 26+06=032 26+06=090    False
     1            2            57 22+46=068 22+46=090    False
     1            2            58 85+16=101 85+16=110    False
     1            2            59 29+08=037 29+08=123    False
     1            2            60 73+94=167 73+94=111    False
     1            2            61 19+62=081 19+62=091    False
     1            2            62 86+62=148 86+62=111    False
     1            2            63 38+99=137 38+99=111    False
     1            2            64 64+25=089 64+25=091    False
     1            2            65 61+72=133 61+72=091    False
     1            2            66 78+88=166 78+88=111    False
     1            2            67 43+66=109 43+66=091    False
     1            2            68 69+35=104 69+35=111    False
     1            2            69 33+77=110 33+77=110     True
     1            2            70 37+37=074 37+37=110    False
     1            2            71 87+54=141 87+54=111    False
     1            2            72 68+90=158 68+90=113    False
     1            2            73 83+44=127 83+44=110    False
     1            2            74 41+09=050 41+09=090    False
     1            2            75 13+48=061 13+48=090    False
     1            2            76 01+41=042 01+41=090    False
     1            2            77 19+74=093 19+74=109    False
     1            2            78 15+05=020 15+05=090    False
     1            2            79 55+46=101 55+46=109    False
     1            2            80 68+33=101 68+33=111    False
     1            2            81 44+40=084 44+40=090    False
     1            2            82 88+03=091 88+03=121    False
     1            2            83 81+79=160 81+79=112    False
     1            2            84 18+98=116 18+98=112    False
     1            2            85 70+64=134 70+64=091    False
     1            2            86 26+44=070 26+44=090    False
     1            2            87 98+87=185 98+87=111    False
     1            2            88 18+74=092 18+74=091    False
     1            2            89 50+68=118 50+68=091    False
     1            2            90 13+51=064 13+51=090    False
     1            2            91 90+89=179 90+89=113    False
     1            2            92 47+78=125 47+78=111    False
     1            2            93 81+57=138 81+57=110    False
     1            2            94 34+47=081 34+47=091    False
     1            2            95 94+23=117 94+23=110    False
     1            2            96 07+70=077 07+70=096    False
     1            2            97 56+33=089 56+33=091    False
     1            2            98 33+04=037 33+04=090    False
     1            2            99 26+09=035 26+09=091    False
     1            2           100 14+92=106 14+92=090    False
     1            2           101 78+54=132 78+54=111    False
     1            2           102 36+76=112 36+76=110    False
     1            2           103 17+47=064 17+47=091    False
     1            2           104 28+18=046 28+18=091    False
     1            2           105 78+54=132 78+54=111    False
     1            2           106 84+72=156 84+72=111    False
     1            2           107 00+44=044 00+44=090    False
     1            2           108 50+41=091 50+41=090    False
     1            2           109 87+88=175 87+88=111    False
     1            2           110 11+66=077 11+66=090    False
     1            2           111 80+60=140 80+60=091    False
     1            2           112 78+76=154 78+76=111    False
     1            2           113 24+74=098 24+74=091    False
     1            2           114 88+48=136 88+48=111    False
     1            2           115 38+31=069 38+31=091    False
     1            2           116 29+27=056 29+27=109    False
     1            2           117 08+45=053 08+45=091    False
     1            2           118 28+13=041 28+13=090    False
     1            2           119 53+99=152 53+99=111    False
     1            2           120 47+92=139 47+92=110    False
     1            2           121 76+21=097 76+21=091    False
     1            2           122 53+96=149 53+96=111    False
     1            2           123 93+91=184 93+91=112    False
     1            2           124 97+33=130 97+33=111    False
     1            2           125 67+78=145 67+78=111    False
     1            2           126 58+05=063 58+05=121    False
     1            2           127 00+16=016 00+16=090    False
     1            2           128 80+19=099 80+19=091    False
     1            2           129 98+22=120 98+22=112    False
     1            2           130 09+62=071 09+62=091    False
     1            2           131 06+23=029 06+23=090    False
     1            2           132 32+99=131 32+99=110    False
     1            2           133 17+02=019 17+02=090    False
     1            2           134 64+35=099 64+35=091    False
     1            2           135 35+83=118 35+83=110    False
     1            2           136 71+36=107 71+36=091    False
     1            2           137 75+06=081 75+06=091    False
     1            2           138 88+95=183 88+95=111    False
     1            2           139 19+98=117 19+98=112    False
     1            2           140 28+89=117 28+89=111    False
     1            2           141 33+11=044 33+11=090    False
     1            2           142 34+49=083 34+49=109    False
     1            2           143 90+35=125 90+35=121    False
     1            2           144 22+90=112 22+90=090    False
     1            2           145 98+89=187 98+89=111    False
     1            2           146 88+47=135 88+47=111    False
     1            2           147 30+86=116 30+86=091    False
     1            2           148 31+48=079 31+48=090    False
     1            2           149 39+21=060 39+21=091    False
     1            2           150 19+17=036 19+17=091    False
     1            2           151 27+60=087 27+60=091    False
     1            2           152 12+16=028 12+16=090    False
     1            2           153 51+75=126 51+75=091    False
     1            2           154 10+74=084 10+74=090    False
     1            2           155 42+63=105 42+63=090    False
     1            2           156 40+14=054 40+14=090    False
     1            2           157 23+93=116 23+93=091    False
     1            2           158 85+26=111 85+26=110    False
     1            2           159 28+46=074 28+46=109    False
     1            2           160 28+33=061 28+33=091    False
     1            2           161 43+30=073 43+30=090    False
     1            2           162 89+72=161 89+72=111    False
     1            2           163 52+21=073 52+21=090    False
     1            2           164 21+54=075 21+54=090    False
     1            2           165 69+13=082 69+13=109    False
     1            2           166 07+60=067 07+60=091    False
     1            2           167 63+83=146 63+83=110    False
     1            2           168 80+69=149 80+69=121    False
     1            2           169 27+28=055 27+28=091    False
     1            2           170 42+31=073 42+31=090    False
     1            2           171 51+99=150 51+99=110    False
     1            2           172 28+75=103 28+75=110    False
     1            2           173 38+57=095 38+57=111    False
     1            2           174 83+16=099 83+16=091    False
     1            2           175 92+94=186 92+94=111    False
     1            2           176 55+75=130 55+75=110    False
     1            2           177 59+51=110 59+51=110     True
     1            2           178 33+09=042 33+09=091    False
     1            2           179 53+13=066 53+13=090    False
     1            2           180 05+70=075 05+70=090    False
     1            2           181 12+20=032 12+20=090    False
     1            2           182 11+49=060 11+49=090    False
     1            2           183 63+45=108 63+45=091    False
     1            2           184 92+23=115 92+23=091    False
     1            2           185 82+45=127 82+45=109    False
     1            2           186 23+41=064 23+41=090    False
     1            2           187 64+26=090 64+26=091    False
     1            2           188 91+24=115 91+24=091    False
     1            2           189 20+32=052 20+32=090    False
     1            2           190 83+21=104 83+21=091    False
     1            2           191 07+20=027 07+20=090    False
     1            2           192 94+14=108 94+14=109    False
     1            2           193 96+89=185 96+89=111    False
     1            2           194 13+08=021 13+08=090    False
     1            2           195 32+05=037 32+05=090    False
     1            2           196 09+51=060 09+51=091    False
     1            2           197 26+29=055 26+29=091    False
     1            2           198 49+65=114 49+65=111    False
     1            2           199 32+66=098 32+66=091    False
     1            2           200 41+08=049 41+08=090    False
     1            2           201 26+79=105 26+79=110    False
     1            2           202 29+91=120 29+91=109    False
     1            2           203 51+00=051 51+00=090    False
     1            2           204 61+60=121 61+60=090    False
     1            2           205 45+78=123 45+78=111    False
     1            2           206 56+16=072 56+16=091    False
     1            2           207 66+68=134 66+68=111    False
     1            2           208 32+16=048 32+16=090    False
     1            2           209 84+49=133 84+49=111    False
     1            2           210 45+09=054 45+09=091    False
     1            2           211 96+78=174 96+78=111    False
     1            2           212 10+02=012 10+02=090    False
     1            2           213 36+60=096 36+60=091    False
     1            2           214 44+36=080 44+36=091    False
     1            2           215 12+86=098 12+86=090    False
     1            2           216 94+54=148 94+54=111    False
     1            2           217 64+73=137 64+73=110    False
     1            2           218 73+10=083 73+10=091    False
     1            2           219 14+62=076 14+62=090    False
     1            2           220 25+22=047 25+22=090    False
     1            2           221 94+22=116 94+22=109    False
     1            2           222 41+76=117 41+76=091    False
     1            2           223 38+46=084 38+46=110    False
     1            2           224 71+72=143 71+72=091    False
     1            2           225 74+79=153 74+79=111    False
     1            2           226 99+67=166 99+67=111    False
     1            2           227 78+71=149 78+71=112    False
     1            2           228 23+19=042 23+19=090    False
     1            2           229 51+65=116 51+65=091    False
     1            2           230 94+86=180 94+86=111    False
     1            2           231 09+79=088 09+79=126    False
     1            2           232 69+39=108 69+39=111    False
     1            2           233 84+13=097 84+13=091    False
     1            2           234 36+59=095 36+59=110    False
     1            2           235 87+47=134 87+47=111    False
     1            2           236 50+00=050 50+00=090    False
     1            2           237 76+96=172 76+96=111    False
     1            2           238 12+18=030 12+18=090    False
     1            2           239 99+95=194 99+95=111    False
     1            2           240 22+00=022 22+00=090    False
     1            2           241 96+18=114 96+18=112    False
     1            2           242 51+20=071 51+20=090    False
     1            2           243 66+81=147 66+81=110    False
     1            2           244 78+18=096 78+18=112    False
     1            2           245 09+78=087 09+78=126    False
     1            2           246 24+20=044 24+20=090    False
     1            2           247 76+13=089 76+13=091    False
     1            2           248 05+10=015 05+10=090    False
     1            2           249 97+14=111 97+14=112    False
     1            2           250 92+38=130 92+38=112    False
     1            2           251 77+13=090 77+13=109    False
     1            2           252 70+19=089 70+19=091    False
     1            2           253 90+45=135 90+45=121    False
     1            2           254 50+09=059 50+09=091    False
     1            2           255 78+06=084 78+06=121    False
     1            3             0 03+25=028 03+25=090    False
     1            3             1 48+43=091 48+43=110    False
     1            3             2 39+47=086 39+47=111    False
     1            3             3 48+19=067 48+19=110    False
     1            3             4 07+22=029 07+22=090    False
     1            3             5 73+68=141 73+68=111    False
     1            3             6 14+56=070 14+56=090    False
     1            3             7 96+95=191 96+95=111    False
     1            3             8 96+28=124 96+28=111    False
     1            3             9 82+05=087 82+05=091    False
     1            3            10 27+94=121 27+94=110    False
     1            3            11 87+86=173 87+86=111    False
     1            3            12 00+68=068 00+68=091    False
     1            3            13 11+37=048 11+37=090    False
     1            3            14 95+93=188 95+93=111    False
     1            3            15 75+82=157 75+82=111    False
     1            3            16 41+71=112 41+71=090    False
     1            3            17 60+14=074 60+14=090    False
     1            3            18 77+77=154 77+77=111    False
     1            3            19 31+84=115 31+84=091    False
     1            3            20 31+57=088 31+57=090    False
     1            3            21 27+87=114 27+87=112    False
     1            3            22 31+89=120 31+89=109    False
     1            3            23 22+18=040 22+18=090    False
     1            3            24 38+25=063 38+25=109    False
     1            3            25 64+54=118 64+54=109    False
     1            3            26 85+60=145 85+60=121    False
     1            3            27 14+71=085 14+71=090    False
     1            3            28 06+16=022 06+16=090    False
     1            3            29 78+61=139 78+61=112    False
     1            3            30 65+75=140 65+75=111    False
     1            3            31 13+83=096 13+83=090    False
     1            3            32 75+49=124 75+49=111    False
     1            3            33 05+78=083 05+78=091    False
     1            3            34 66+55=121 66+55=110    False
     1            3            35 03+05=008 03+05=090    False
     1            3            36 69+99=168 69+99=111    False
     1            3            37 52+82=134 52+82=091    False
     1            3            38 45+97=142 45+97=111    False
     1            3            39 66+17=083 66+17=091    False
     1            3            40 36+17=053 36+17=091    False
     1            3            41 92+74=166 92+74=112    False
     1            3            42 48+44=092 48+44=110    False
     1            3            43 34+17=051 34+17=090    False
     1            3            44 56+11=067 56+11=090    False
     1            3            45 77+23=100 77+23=110    False
     1            3            46 10+11=021 10+11=090    False
     1            3            47 32+65=097 32+65=091    False
     1            3            48 53+49=102 53+49=110    False
     1            3            49 68+86=154 68+86=111    False
     1            3            50 52+94=146 52+94=109    False
     1            3            51 97+71=168 97+71=112    False
     1            3            52 05+37=042 05+37=091    False
     1            3            53 58+75=133 58+75=111    False
     1            3            54 06+24=030 06+24=090    False
     1            3            55 15+44=059 15+44=090    False
     1            3            56 90+49=139 90+49=121    False
     1            3            57 50+37=087 50+37=091    False
     1            3            58 88+61=149 88+61=111    False
     1            3            59 21+57=078 21+57=090    False
     1            3            60 24+85=109 24+85=091    False
     1            3            61 01+66=067 01+66=090    False
     1            3            62 50+46=096 50+46=091    False
     1            3            63 76+65=141 76+65=111    False
     1            3            64 23+74=097 23+74=091    False
     1            3            65 76+16=092 76+16=109    False
     1            3            66 06+08=014 06+08=091    False
     1            3            67 69+25=094 69+25=111    False
     1            3            68 15+23=038 15+23=090    False
     1            3            69 41+02=043 41+02=090    False
     1            3            70 16+66=082 16+66=091    False
     1            3            71 59+94=153 59+94=111    False
     1            3            72 32+88=120 32+88=110    False
     1            3            73 46+21=067 46+21=090    False
     1            3            74 57+28=085 57+28=110    False
     1            3            75 00+31=031 00+31=090    False
     1            3            76 77+07=084 77+07=126    False
     1            3            77 28+70=098 28+70=091    False
     1            3            78 05+61=066 05+61=090    False
     1            3            79 22+09=031 22+09=090    False
     1            3            80 08+94=102 08+94=121    False
     1            3            81 40+11=051 40+11=090    False
     1            3            82 10+48=058 10+48=090    False
     1            3            83 27+56=083 27+56=091    False
     1            3            84 42+16=058 42+16=090    False
     1            3            85 69+43=112 69+43=111    False
     1            3            86 57+69=126 57+69=111    False
     1            3            87 18+86=104 18+86=110    False
     1            3            88 86+80=166 86+80=113    False
     1            3            89 30+85=115 30+85=091    False
     1            3            90 77+66=143 77+66=111    False
     1            3            91 39+64=103 39+64=110    False
     1            3            92 76+61=137 76+61=110    False
     1            3            93 42+61=103 42+61=090    False
     1            3            94 07+30=037 07+30=090    False
     1            3            95 35+93=128 35+93=110    False
     1            3            96 40+90=130 40+90=091    False
     1            3            97 08+91=099 08+91=091    False
     1            3            98 62+34=096 62+34=091    False
     1            3            99 86+49=135 86+49=111    False
     1            3           100 73+23=096 73+23=091    False
     1            3           101 87+35=122 87+35=111    False
     1            3           102 35+31=066 35+31=090    False
     1            3           103 07+13=020 07+13=090    False
     1            3           104 39+41=080 39+41=091    False
     1            3           105 44+63=107 44+63=091    False
     1            3           106 94+66=160 94+66=111    False
     1            3           107 49+54=103 49+54=110    False
     1            3           108 79+46=125 79+46=111    False
     1            3           109 53+12=065 53+12=090    False
     1            3           110 60+92=152 60+92=091    False
     1            3           111 25+60=085 25+60=090    False
     1            3           112 64+53=117 64+53=091    False
     1            3           113 41+02=043 41+02=090    False
     1            3           114 00+97=097 00+97=096    False
     1            3           115 12+52=064 12+52=090    False
     1            3           116 39+50=089 39+50=121    False
     1            3           117 87+21=108 87+21=110    False
     1            3           118 04+99=103 04+99=123    False
     1            3           119 19+75=094 19+75=109    False
     1            3           120 90+05=095 90+05=091    False
     1            3           121 54+39=093 54+39=110    False
     1            3           122 29+26=055 29+26=091    False
     1            3           123 82+95=177 82+95=111    False
     1            3           124 55+09=064 55+09=109    False
     1            3           125 02+62=064 02+62=090    False
     1            3           126 68+30=098 68+30=121    False
     1            3           127 99+16=115 99+16=112    False
     1            3           128 63+11=074 63+11=090    False
     1            3           129 42+92=134 42+92=091    False
     1            3           130 99+16=115 99+16=112    False
     1            3           131 50+31=081 50+31=090    False
     1            3           132 23+46=069 23+46=090    False
     1            3           133 45+73=118 45+73=109    False
     1            3           134 89+77=166 89+77=111    False
     1            3           135 45+78=123 45+78=111    False
     1            3           136 96+60=156 96+60=113    False
     1            3           137 74+61=135 74+61=091    False
     1            3           138 87+01=088 87+01=091    False
     1            3           139 63+88=151 63+88=111    False
     1            3           140 59+72=131 59+72=111    False
     1            3           141 17+96=113 17+96=110    False
     1            3           142 89+77=166 89+77=111    False
     1            3           143 24+69=093 24+69=091    False
     1            3           144 75+83=158 75+83=111    False
     1            3           145 50+54=104 50+54=091    False
     1            3           146 93+47=140 93+47=111    False
     1            3           147 20+55=075 20+55=090    False
     1            3           148 91+79=170 91+79=112    False
     1            3           149 15+13=028 15+13=090    False
     1            3           150 86+09=095 86+09=113    False
     1            3           151 29+58=087 29+58=112    False
     1            3           152 01+29=030 01+29=090    False
     1            3           153 65+48=113 65+48=111    False
     1            3           154 96+45=141 96+45=111    False
     1            3           155 58+69=127 58+69=111    False
     1            3           156 84+43=127 84+43=110    False
     1            3           157 90+38=128 90+38=121    False
     1            3           158 39+97=136 39+97=111    False
     1            3           159 74+84=158 74+84=111    False
     1            3           160 86+22=108 86+22=110    False
     1            3           161 01+86=087 01+86=090    False
     1            3           162 81+63=144 81+63=091    False
     1            3           163 80+94=174 80+94=121    False
     1            3           164 44+42=086 44+42=090    False
     1            3           165 72+60=132 72+60=091    False
     1            3           166 28+07=035 28+07=091    False
     1            3           167 69+54=123 69+54=111    False
     1            3           168 68+77=145 68+77=111    False
     1            3           169 90+16=106 90+16=091    False
     1            3           170 64+50=114 64+50=091    False
     1            3           171 46+88=134 46+88=111    False
     1            3           172 55+99=154 55+99=111    False
     1            3           173 31+97=128 31+97=109    False
     1            3           174 79+28=107 79+28=111    False
     1            3           175 81+43=124 81+43=091    False
     1            3           176 41+15=056 41+15=090    False
     1            3           177 38+77=115 38+77=111    False
     1            3           178 25+06=031 25+06=090    False
     1            3           179 01+93=094 01+93=090    False
     1            3           180 97+22=119 97+22=112    False
     1            3           181 71+84=155 71+84=109    False
     1            3           182 26+36=062 26+36=091    False
     1            3           183 60+92=152 60+92=091    False
     1            3           184 02+94=096 02+94=090    False
     1            3           185 31+58=089 31+58=091    False
     1            3           186 70+52=122 70+52=091    False
     1            3           187 19+42=061 19+42=090    False
     1            3           188 95+73=168 95+73=111    False
     1            3           189 21+25=046 21+25=090    False
     1            3           190 13+58=071 13+58=090    False
     1            3           191 62+28=090 62+28=091    False
     1            3           192 38+14=052 38+14=091    False
     1            3           193 66+75=141 66+75=111    False
     1            3           194 24+59=083 24+59=091    False
     1            3           195 97+66=163 97+66=111    False
     1            3           196 76+70=146 76+70=113    False
     1            3           197 08+40=048 08+40=091    False
     1            3           198 84+00=084 84+00=091    False
     1            3           199 54+73=127 54+73=109    False
     1            3           200 16+88=104 16+88=110    False
     1            3           201 99+47=146 99+47=111    False
     1            3           202 31+95=126 31+95=091    False
     1            3           203 01+79=080 01+79=091    False
     1            3           204 03+68=071 03+68=091    False
     1            3           205 10+05=015 10+05=090    False
     1            3           206 98+90=188 98+90=113    False
     1            3           207 58+53=111 58+53=111     True
     1            3           208 34+87=121 34+87=110    False
     1            3           209 07+31=038 07+31=090    False
     1            3           210 59+08=067 59+08=121    False
     1            3           211 51+38=089 51+38=091    False
     1            3           212 62+62=124 62+62=091    False
     1            3           213 80+32=112 80+32=091    False
     1            3           214 69+16=085 69+16=110    False
     1            3           215 01+17=018 01+17=090    False
     1            3           216 74+41=115 74+41=091    False
     1            3           217 20+89=109 20+89=091    False
     1            3           218 53+50=103 53+50=091    False
     1            3           219 82+85=167 82+85=111    False
     1            3           220 34+47=081 34+47=091    False
     1            3           221 34+45=079 34+45=091    False
     1            3           222 77+34=111 77+34=111     True
     1            3           223 56+33=089 56+33=091    False
     1            3           224 97+56=153 97+56=111    False
     1            3           225 29+06=035 29+06=091    False
     1            3           226 78+96=174 78+96=111    False
     1            3           227 28+65=093 28+65=109    False
     1            3           228 61+64=125 61+64=091    False
     1            3           229 32+64=096 32+64=090    False
     1            3           230 98+32=130 98+32=112    False
     1            3           231 25+35=060 25+35=090    False
     1            3           232 05+08=013 05+08=091    False
     1            3           233 05+26=031 05+26=090    False
     1            3           234 84+71=155 84+71=110    False
     1            3           235 33+10=043 33+10=090    False
     1            3           236 98+35=133 98+35=111    False
     1            3           237 68+98=166 68+98=111    False
     1            3           238 03+63=066 03+63=090    False
     1            3           239 12+96=108 12+96=090    False
     1            3           240 02+81=083 02+81=090    False
     1            3           241 83+13=096 83+13=091    False
     1            3           242 55+92=147 55+92=110    False
     1            3           243 96+09=105 96+09=113    False
     1            3           244 61+08=069 61+08=091    False
     1            3           245 39+75=114 39+75=111    False
     1            3           246 40+74=114 40+74=091    False
     1            3           247 39+80=119 39+80=121    False
     1            3           248 57+95=152 57+95=111    False
     1            3           249 92+97=189 92+97=111    False
     1            3           250 33+03=036 33+03=090    False
     1            3           251 74+92=166 74+92=111    False
     1            3           252 99+09=108 99+09=113    False
     1            3           253 98+10=108 98+10=123    False
     1            3           254 46+77=123 46+77=111    False
     1            3           255 85+78=163 85+78=111    False
     1            4             0 41+21=062 41+21=090    False
     1            4             1 49+13=062 49+13=091    False
     1            4             2 59+07=066 59+07=121    False
     1            4             3 31+11=042 31+11=090    False
     1            4             4 74+16=090 74+16=091    False
     1            4             5 43+38=081 43+38=091    False
     1            4             6 08+67=075 08+67=121    False
     1            4             7 31+66=097 31+66=090    False
     1            4             8 10+31=041 10+31=090    False
     1            4             9 34+59=093 34+59=109    False
     1            4            10 78+42=120 78+42=111    False
     1            4            11 13+41=054 13+41=090    False
     1            4            12 97+89=186 97+89=111    False
     1            4            13 15+62=077 15+62=090    False
     1            4            14 39+36=075 39+36=110    False
     1            4            15 21+25=046 21+25=090    False
     1            4            16 74+56=130 74+56=110    False
     1            4            17 85+47=132 85+47=111    False
     1            4            18 47+32=079 47+32=091    False
     1            4            19 37+66=103 37+66=110    False
     1            4            20 16+29=045 16+29=090    False
     1            4            21 86+77=163 86+77=111    False
     1            4            22 80+07=087 80+07=091    False
     1            4            23 87+05=092 87+05=121    False
     1            4            24 58+16=074 58+16=109    False
     1            4            25 52+79=131 52+79=110    False
     1            4            26 91+08=099 91+08=123    False
     1            4            27 47+78=125 47+78=111    False
     1            4            28 86+96=182 86+96=111    False
     1            4            29 90+22=112 90+22=091    False
     1            4            30 31+18=049 31+18=090    False
     1            4            31 86+15=101 86+15=110    False
     1            4            32 15+95=110 15+95=091    False
     1            4            33 42+11=053 42+11=090    False
     1            4            34 65+99=164 65+99=111    False
     1            4            35 89+29=118 89+29=111    False
     1            4            36 35+11=046 35+11=090    False
     1            4            37 71+41=112 71+41=090    False
     1            4            38 16+24=040 16+24=090    False
     1            4            39 77+82=159 77+82=111    False
     1            4            40 55+89=144 55+89=111    False
     1            4            41 17+88=105 17+88=112    False
     1            4            42 54+72=126 54+72=091    False
     1            4            43 34+98=132 34+98=112    False
     1            4            44 09+97=106 09+97=126    False
     1            4            45 91+07=098 91+07=091    False
     1            4            46 55+94=149 55+94=110    False
     1            4            47 22+58=080 22+58=090    False
     1            4            48 91+37=128 91+37=110    False
     1            4            49 16+10=026 16+10=090    False
     1            4            50 96+32=128 96+32=111    False
     1            4            51 35+75=110 35+75=109    False
     1            4            52 88+73=161 88+73=111    False
     1            4            53 35+18=053 35+18=091    False
     1            4            54 33+10=043 33+10=090    False
     1            4            55 08+50=058 08+50=091    False
     1            4            56 22+62=084 22+62=090    False
     1            4            57 26+37=063 26+37=091    False
     1            4            58 80+27=107 80+27=091    False
     1            4            59 68+28=096 68+28=111    False
     1            4            60 48+03=051 48+03=091    False
     1            4            61 40+18=058 40+18=090    False
     1            4            62 16+59=075 16+59=091    False
     1            4            63 02+19=021 02+19=090    False
     1            4            64 01+09=010 01+09=090    False
     1            4            65 62+68=130 62+68=110    False
     1            4            66 09+71=080 09+71=091    False
     1            4            67 00+58=058 00+58=090    False
     1            4            68 16+45=061 16+45=090    False
     1            4            69 24+98=122 24+98=110    False
     1            4            70 47+92=139 47+92=110    False
     1            4            71 94+84=178 94+84=111    False
     1            4            72 21+32=053 21+32=090    False
     1            4            73 29+82=111 29+82=110    False
     1            4            74 32+79=111 32+79=109    False
     1            4            75 13+98=111 13+98=109    False
     1            4            76 41+94=135 41+94=091    False
     1            4            77 51+84=135 51+84=091    False
     1            4            78 42+05=047 42+05=090    False
     1            4            79 39+03=042 39+03=091    False
     1            4            80 02+92=094 02+92=090    False
     1            4            81 99+81=180 99+81=111    False
     1            4            82 32+68=100 32+68=091    False
     1            4            83 52+17=069 52+17=090    False
     1            4            84 56+58=114 56+58=111    False
     1            4            85 21+48=069 21+48=090    False
     1            4            86 61+71=132 61+71=090    False
     1            4            87 17+01=018 17+01=090    False
     1            4            88 68+23=091 68+23=110    False
     1            4            89 00+37=037 00+37=090    False
     1            4            90 94+88=182 94+88=111    False
     1            4            91 06+31=037 06+31=090    False
     1            4            92 27+18=045 27+18=091    False
     1            4            93 41+81=122 41+81=090    False
     1            4            94 15+86=101 15+86=091    False
     1            4            95 36+87=123 36+87=111    False
     1            4            96 17+37=054 17+37=091    False
     1            4            97 13+86=099 13+86=091    False
     1            4            98 29+69=098 29+69=112    False
     1            4            99 31+99=130 31+99=109    False
     1            4           100 47+29=076 47+29=110    False
     1            4           101 08+81=089 08+81=091    False
     1            4           102 72+82=154 72+82=110    False
     1            4           103 46+91=137 46+91=109    False
     1            4           104 70+35=105 70+35=091    False
     1            4           105 90+55=145 90+55=121    False
     1            4           106 99+99=198 99+99=111    False
     1            4           107 60+97=157 60+97=121    False
     1            4           108 03+40=043 03+40=090    False
     1            4           109 35+49=084 35+49=109    False
     1            4           110 32+02=034 32+02=090    False
     1            4           111 70+18=088 70+18=091    False
     1            4           112 99+05=104 99+05=113    False
     1            4           113 78+73=151 78+73=111    False
     1            4           114 03+02=005 03+02=090    False
     1            4           115 50+14=064 50+14=090    False
     1            4           116 62+02=064 62+02=090    False
     1            4           117 16+74=090 16+74=091    False
     1            4           118 68+65=133 68+65=111    False
     1            4           119 74+81=155 74+81=110    False
     1            4           120 37+48=085 37+48=110    False
     1            4           121 63+04=067 63+04=091    False
     1            4           122 06+62=068 06+62=090    False
     1            4           123 95+75=170 95+75=111    False
     1            4           124 92+37=129 92+37=112    False
     1            4           125 81+32=113 81+32=091    False
     1            4           126 53+28=081 53+28=091    False
     1            4           127 52+42=094 52+42=090    False
     1            4           128 66+97=163 66+97=111    False
     1            4           129 00+48=048 00+48=090    False
     1            4           130 65+32=097 65+32=091    False
     1            4           131 60+89=149 60+89=112    False
     1            4           132 71+61=132 71+61=091    False
     1            4           133 98+50=148 98+50=113    False
     1            4           134 90+96=186 90+96=113    False
     1            4           135 02+96=098 02+96=091    False
     1            4           136 62+75=137 62+75=109    False
     1            4           137 41+28=069 41+28=090    False
     1            4           138 95+79=174 95+79=111    False
     1            4           139 48+41=089 48+41=091    False
     1            4           140 87+95=182 87+95=111    False
     1            4           141 75+38=113 75+38=111    False
     1            4           142 31+55=086 31+55=090    False
     1            4           143 54+63=117 54+63=091    False
     1            4           144 75+82=157 75+82=111    False
     1            4           145 46+45=091 46+45=091     True
     1            4           146 13+08=021 13+08=090    False
     1            4           147 77+97=174 77+97=111    False
     1            4           148 37+35=072 37+35=109    False
     1            4           149 21+89=110 21+89=091    False
     1            4           150 58+51=109 58+51=109     True
     1            4           151 91+48=139 91+48=112    False
     1            4           152 33+23=056 33+23=090    False
     1            4           153 80+96=176 80+96=121    False
     1            4           154 78+02=080 78+02=123    False
     1            4           155 38+95=133 38+95=111    False
     1            4           156 99+25=124 99+25=111    False
     1            4           157 30+76=106 30+76=091    False
     1            4           158 42+40=082 42+40=090    False
     1            4           159 85+58=143 85+58=111    False
     1            4           160 44+46=090 44+46=091    False
     1            4           161 06+41=047 06+41=090    False
     1            4           162 65+90=155 65+90=112    False
     1            4           163 43+83=126 43+83=109    False
     1            4           164 36+61=097 36+61=091    False
     1            4           165 61+51=112 61+51=090    False
     1            4           166 38+09=047 38+09=121    False
     1            4           167 21+97=118 21+97=091    False
     1            4           168 83+30=113 83+30=091    False
     1            4           169 11+79=090 11+79=090     True
     1            4           170 14+29=043 14+29=090    False
     1            4           171 21+11=032 21+11=090    False
     1            4           172 43+53=096 43+53=091    False
     1            4           173 02+58=060 02+58=090    False
     1            4           174 78+82=160 78+82=111    False
     1            4           175 91+11=102 91+11=090    False
     1            4           176 58+54=112 58+54=111    False
     1            4           177 00+15=015 00+15=090    False
     1            4           178 83+51=134 83+51=091    False
     1            4           179 44+72=116 44+72=091    False
     1            4           180 71+20=091 71+20=090    False
     1            4           181 24+99=123 24+99=110    False
     1            4           182 46+30=076 46+30=091    False
     1            4           183 08+67=075 08+67=121    False
     1            4           184 47+42=089 47+42=091    False
     1            4           185 95+67=162 95+67=111    False
     1            4           186 40+56=096 40+56=091    False
     1            4           187 17+95=112 17+95=109    False
     1            4           188 94+66=160 94+66=111    False
     1            4           189 14+58=072 14+58=090    False
     1            4           190 56+05=061 56+05=091    False
     1            4           191 70+01=071 70+01=090    False
     1            4           192 97+59=156 97+59=111    False
     1            4           193 94+67=161 94+67=111    False
     1            4           194 13+41=054 13+41=090    False
     1            4           195 85+15=100 85+15=109    False
     1            4           196 48+53=101 48+53=110    False
     1            4           197 62+75=137 62+75=109    False
     1            4           198 87+47=134 87+47=111    False
     1            4           199 31+88=119 31+88=091    False
     1            4           200 97+16=113 97+16=112    False
     1            4           201 48+45=093 48+45=110    False
     1            4           202 99+00=099 99+00=136    False
     1            4           203 15+01=016 15+01=090    False
     1            4           204 28+96=124 28+96=112    False
     1            4           205 20+11=031 20+11=090    False
     1            4           206 07+56=063 07+56=091    False
     1            4           207 06+08=014 06+08=091    False
     1            4           208 45+46=091 45+46=091     True
     1            4           209 48+85=133 48+85=111    False
     1            4           210 62+14=076 62+14=090    False
     1            4           211 82+31=113 82+31=091    False
     1            4           212 85+88=173 85+88=111    False
     1            4           213 77+08=085 77+08=126    False
     1            4           214 16+64=080 16+64=090    False
     1            4           215 00+27=027 00+27=090    False
     1            4           216 36+75=111 36+75=110    False
     1            4           217 38+38=076 38+38=112    False
     1            4           218 88+32=120 88+32=112    False
     1            4           219 09+88=097 09+88=126    False
     1            4           220 96+87=183 96+87=111    False
     1            4           221 71+29=100 71+29=091    False
     1            4           222 99+13=112 99+13=112     True
     1            4           223 03+13=016 03+13=090    False
     1            4           224 67+23=090 67+23=109    False
     1            4           225 15+98=113 15+98=109    False
     1            4           226 10+08=018 10+08=090    False
     1            4           227 46+24=070 46+24=091    False
     1            4           228 55+63=118 55+63=109    False
     1            4           229 28+06=034 28+06=091    False
     1            4           230 43+87=130 43+87=110    False
     1            4           231 34+05=039 34+05=090    False
2024-09-20 14:10:09,090 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:09,091 - root - INFO - ====================================================== Starting Train Epoch: 2/9 ======================================================
2024-09-20 14:10:09,092 - root - INFO - Learning rates for each parameter group: 0.00066589243287212298, 0.00066589243287212298
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 2, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 2, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=387.97672, average_batch_loss=1.51553, average_batch_perplexity=4.55185, lr=0.000665892, 0.000665892]
Epoch: 2, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=387.97672, average_batch_loss=1.51553, average_batch_perplexity=4.55185, lr=0.000665892, 0.000665892]
Epoch: 2, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=386.84814, average_batch_loss=1.51113, average_batch_perplexity=4.53183, lr=0.000688854, 0.000688854]
Epoch: 2, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=386.84814, average_batch_loss=1.51113, average_batch_perplexity=4.53183, lr=0.000688854, 0.000688854]
Epoch: 2, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=386.84814, average_batch_loss=1.51113, average_batch_perplexity=4.53183, lr=0.000688854, 0.000688854]
Epoch: 2, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=379.06183, average_batch_loss=1.48071, average_batch_perplexity=4.39607, lr=0.000711816, 0.000711816]
Epoch: 2, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=379.06183, average_batch_loss=1.48071, average_batch_perplexity=4.39607, lr=0.000711816, 0.000711816]
Epoch: 2, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=372.22934, average_batch_loss=1.45402, average_batch_perplexity=4.28029, lr=0.000734778, 0.000734778]
Epoch: 2, Step: 4:  14%|█▍        | 4/28 [00:00<00:02, 11.81it/s, total_batch_loss=372.22934, average_batch_loss=1.45402, average_batch_perplexity=4.28029, lr=0.000734778, 0.000734778]
Epoch: 2, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.81it/s, total_batch_loss=372.22934, average_batch_loss=1.45402, average_batch_perplexity=4.28029, lr=0.000734778, 0.000734778]
Epoch: 2, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.81it/s, total_batch_loss=369.99042, average_batch_loss=1.44528, average_batch_perplexity=4.24302, lr=0.000757740, 0.000757740]
Epoch: 2, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.81it/s, total_batch_loss=369.99042, average_batch_loss=1.44528, average_batch_perplexity=4.24302, lr=0.000757740, 0.000757740]
Epoch: 2, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.81it/s, total_batch_loss=359.53345, average_batch_loss=1.40443, average_batch_perplexity=4.07319, lr=0.000780701, 0.000780701]
Epoch: 2, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.01it/s, total_batch_loss=359.53345, average_batch_loss=1.40443, average_batch_perplexity=4.07319, lr=0.000780701, 0.000780701]
Epoch: 2, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.01it/s, total_batch_loss=359.53345, average_batch_loss=1.40443, average_batch_perplexity=4.07319, lr=0.000780701, 0.000780701]
Epoch: 2, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.01it/s, total_batch_loss=357.81116, average_batch_loss=1.39770, average_batch_perplexity=4.04588, lr=0.000803663, 0.000803663]
Epoch: 2, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.01it/s, total_batch_loss=357.81116, average_batch_loss=1.39770, average_batch_perplexity=4.04588, lr=0.000803663, 0.000803663]
Epoch: 2, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.01it/s, total_batch_loss=350.02820, average_batch_loss=1.36730, average_batch_perplexity=3.92473, lr=0.000826625, 0.000826625]
Epoch: 2, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 12.17it/s, total_batch_loss=350.02820, average_batch_loss=1.36730, average_batch_perplexity=3.92473, lr=0.000826625, 0.000826625]
Epoch: 2, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.17it/s, total_batch_loss=350.02820, average_batch_loss=1.36730, average_batch_perplexity=3.92473, lr=0.000826625, 0.000826625]
Epoch: 2, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.17it/s, total_batch_loss=346.96631, average_batch_loss=1.35534, average_batch_perplexity=3.87807, lr=0.000849587, 0.000849587]
Epoch: 2, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.17it/s, total_batch_loss=346.96631, average_batch_loss=1.35534, average_batch_perplexity=3.87807, lr=0.000849587, 0.000849587]
Epoch: 2, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.17it/s, total_batch_loss=338.66644, average_batch_loss=1.32292, average_batch_perplexity=3.75435, lr=0.000872549, 0.000872549]
Epoch: 2, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.12it/s, total_batch_loss=338.66644, average_batch_loss=1.32292, average_batch_perplexity=3.75435, lr=0.000872549, 0.000872549]
Epoch: 2, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.12it/s, total_batch_loss=338.66644, average_batch_loss=1.32292, average_batch_perplexity=3.75435, lr=0.000872549, 0.000872549]
Epoch: 2, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.12it/s, total_batch_loss=334.35333, average_batch_loss=1.30607, average_batch_perplexity=3.69163, lr=0.000895511, 0.000895511]
Epoch: 2, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.12it/s, total_batch_loss=334.35333, average_batch_loss=1.30607, average_batch_perplexity=3.69163, lr=0.000895511, 0.000895511]
Epoch: 2, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.12it/s, total_batch_loss=331.53091, average_batch_loss=1.29504, average_batch_perplexity=3.65115, lr=0.000918472, 0.000918472]
Epoch: 2, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.03it/s, total_batch_loss=331.53091, average_batch_loss=1.29504, average_batch_perplexity=3.65115, lr=0.000918472, 0.000918472]
Epoch: 2, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.03it/s, total_batch_loss=331.53091, average_batch_loss=1.29504, average_batch_perplexity=3.65115, lr=0.000918472, 0.000918472]
Epoch: 2, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.03it/s, total_batch_loss=327.52557, average_batch_loss=1.27940, average_batch_perplexity=3.59447, lr=0.000941434, 0.000941434]
Epoch: 2, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.03it/s, total_batch_loss=327.52557, average_batch_loss=1.27940, average_batch_perplexity=3.59447, lr=0.000941434, 0.000941434]
Epoch: 2, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.03it/s, total_batch_loss=324.04407, average_batch_loss=1.26580, average_batch_perplexity=3.54592, lr=0.000964396, 0.000964396]
Epoch: 2, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.00it/s, total_batch_loss=324.04407, average_batch_loss=1.26580, average_batch_perplexity=3.54592, lr=0.000964396, 0.000964396]
Epoch: 2, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.00it/s, total_batch_loss=324.04407, average_batch_loss=1.26580, average_batch_perplexity=3.54592, lr=0.000964396, 0.000964396]
Epoch: 2, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.00it/s, total_batch_loss=313.24115, average_batch_loss=1.22360, average_batch_perplexity=3.39940, lr=0.000987358, 0.000987358]
Epoch: 2, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.00it/s, total_batch_loss=313.24115, average_batch_loss=1.22360, average_batch_perplexity=3.39940, lr=0.000987358, 0.000987358]
Epoch: 2, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.00it/s, total_batch_loss=315.94284, average_batch_loss=1.23415, average_batch_perplexity=3.43546, lr=0.001010320, 0.001010320]
Epoch: 2, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.12it/s, total_batch_loss=315.94284, average_batch_loss=1.23415, average_batch_perplexity=3.43546, lr=0.001010320, 0.001010320]
Epoch: 2, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.12it/s, total_batch_loss=315.94284, average_batch_loss=1.23415, average_batch_perplexity=3.43546, lr=0.001010320, 0.001010320]
Epoch: 2, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.12it/s, total_batch_loss=305.40536, average_batch_loss=1.19299, average_batch_perplexity=3.29692, lr=0.001033281, 0.001033281]
Epoch: 2, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.12it/s, total_batch_loss=305.40536, average_batch_loss=1.19299, average_batch_perplexity=3.29692, lr=0.001033281, 0.001033281]
Epoch: 2, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.12it/s, total_batch_loss=301.18091, average_batch_loss=1.17649, average_batch_perplexity=3.24296, lr=0.001056243, 0.001056243]
Epoch: 2, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.16it/s, total_batch_loss=301.18091, average_batch_loss=1.17649, average_batch_perplexity=3.24296, lr=0.001056243, 0.001056243]
Epoch: 2, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.16it/s, total_batch_loss=301.18091, average_batch_loss=1.17649, average_batch_perplexity=3.24296, lr=0.001056243, 0.001056243]
Epoch: 2, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.16it/s, total_batch_loss=299.07318, average_batch_loss=1.16825, average_batch_perplexity=3.21637, lr=0.001079205, 0.001079205]
Epoch: 2, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.16it/s, total_batch_loss=299.07318, average_batch_loss=1.16825, average_batch_perplexity=3.21637, lr=0.001079205, 0.001079205]
Epoch: 2, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.16it/s, total_batch_loss=304.44934, average_batch_loss=1.18926, average_batch_perplexity=3.28463, lr=0.001102167, 0.001102167]
Epoch: 2, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.09it/s, total_batch_loss=304.44934, average_batch_loss=1.18926, average_batch_perplexity=3.28463, lr=0.001102167, 0.001102167]
Epoch: 2, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.09it/s, total_batch_loss=304.44934, average_batch_loss=1.18926, average_batch_perplexity=3.28463, lr=0.001102167, 0.001102167]
Epoch: 2, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.09it/s, total_batch_loss=292.82129, average_batch_loss=1.14383, average_batch_perplexity=3.13878, lr=0.001125129, 0.001125129]
Epoch: 2, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.09it/s, total_batch_loss=292.82129, average_batch_loss=1.14383, average_batch_perplexity=3.13878, lr=0.001125129, 0.001125129]
Epoch: 2, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.09it/s, total_batch_loss=289.08966, average_batch_loss=1.12926, average_batch_perplexity=3.09336, lr=0.001148090, 0.001148090]
Epoch: 2, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.05it/s, total_batch_loss=289.08966, average_batch_loss=1.12926, average_batch_perplexity=3.09336, lr=0.001148090, 0.001148090]
Epoch: 2, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.05it/s, total_batch_loss=289.08966, average_batch_loss=1.12926, average_batch_perplexity=3.09336, lr=0.001148090, 0.001148090]
Epoch: 2, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.05it/s, total_batch_loss=285.48624, average_batch_loss=1.11518, average_batch_perplexity=3.05012, lr=0.001171052, 0.001171052]
Epoch: 2, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.05it/s, total_batch_loss=285.48624, average_batch_loss=1.11518, average_batch_perplexity=3.05012, lr=0.001171052, 0.001171052]
Epoch: 2, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.05it/s, total_batch_loss=277.67117, average_batch_loss=1.08465, average_batch_perplexity=2.95841, lr=0.001194014, 0.001194014]
Epoch: 2, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.20it/s, total_batch_loss=277.67117, average_batch_loss=1.08465, average_batch_perplexity=2.95841, lr=0.001194014, 0.001194014]
Epoch: 2, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=277.67117, average_batch_loss=1.08465, average_batch_perplexity=2.95841, lr=0.001194014, 0.001194014]
Epoch: 2, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=282.78699, average_batch_loss=1.10464, average_batch_perplexity=3.01813, lr=0.001216976, 0.001216976]
Epoch: 2, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=282.78699, average_batch_loss=1.10464, average_batch_perplexity=3.01813, lr=0.001216976, 0.001216976]
Epoch: 2, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=276.07965, average_batch_loss=1.07844, average_batch_perplexity=2.94008, lr=0.001239938, 0.001239938]
Epoch: 2, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.17it/s, total_batch_loss=276.07965, average_batch_loss=1.07844, average_batch_perplexity=2.94008, lr=0.001239938, 0.001239938]
Epoch: 2, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.17it/s, total_batch_loss=276.07965, average_batch_loss=1.07844, average_batch_perplexity=2.94008, lr=0.001239938, 0.001239938]
Epoch: 2, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.17it/s, total_batch_loss=276.06375, average_batch_loss=1.07837, average_batch_perplexity=2.93990, lr=0.001262899, 0.001262899]
Epoch: 2, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.17it/s, total_batch_loss=276.06375, average_batch_loss=1.07837, average_batch_perplexity=2.93990, lr=0.001262899, 0.001262899]
Epoch: 2, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.17it/s, total_batch_loss=92.09214, average_batch_loss=1.04650, average_batch_perplexity=2.84767, lr=0.001285861, 0.001285861] 
Epoch: 2, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.33it/s, total_batch_loss=92.09214, average_batch_loss=1.04650, average_batch_perplexity=2.84767, lr=0.001285861, 0.001285861]
                                                                                                                                                                                         
2024-09-20 14:10:11,361 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:11,362 - root - INFO - Total Batches:                   28
2024-09-20 14:10:11,363 - root - INFO - Average Epoch Train Loss:        1.26828
2024-09-20 14:10:11,363 - root - INFO - Average Epoch Train Perplexity:  3.55473
2024-09-20 14:10:11,363 - root - INFO - 
2024-09-20 14:10:11,364 - root - INFO - ====================================================== Starting Valid Epoch: 2/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 2, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 2, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=272.26123, average_batch_loss=1.06352, average_batch_perplexity=2.89655]
Epoch: 2, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=272.26123, average_batch_loss=1.06352, average_batch_perplexity=2.89655]
Epoch: 2, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=267.66589, average_batch_loss=1.04557, average_batch_perplexity=2.84502]
Epoch: 2, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=267.66589, average_batch_loss=1.04557, average_batch_perplexity=2.84502]
Epoch: 2, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=267.72772, average_batch_loss=1.04581, average_batch_perplexity=2.84571]
Epoch: 2, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=267.72772, average_batch_loss=1.04581, average_batch_perplexity=2.84571]
Epoch: 2, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=267.72772, average_batch_loss=1.04581, average_batch_perplexity=2.84571]
Epoch: 2, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=270.36462, average_batch_loss=1.05611, average_batch_perplexity=2.87517]
Epoch: 2, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=270.36462, average_batch_loss=1.05611, average_batch_perplexity=2.87517]
Epoch: 2, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=274.43536, average_batch_loss=1.07201, average_batch_perplexity=2.92125]
Epoch: 2, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=274.43536, average_batch_loss=1.07201, average_batch_perplexity=2.92125]
Epoch: 2, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.07it/s, total_batch_loss=267.62402, average_batch_loss=1.04541, average_batch_perplexity=2.84455]
Epoch: 2, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.33it/s, total_batch_loss=267.62402, average_batch_loss=1.04541, average_batch_perplexity=2.84455]
Epoch: 2, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.33it/s, total_batch_loss=267.62402, average_batch_loss=1.04541, average_batch_perplexity=2.84455]
Epoch: 2, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.33it/s, total_batch_loss=267.19635, average_batch_loss=1.04374, average_batch_perplexity=2.83981]
Epoch: 2, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.33it/s, total_batch_loss=267.19635, average_batch_loss=1.04374, average_batch_perplexity=2.83981]
Epoch: 2, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.33it/s, total_batch_loss=218.63084, average_batch_loss=1.05111, average_batch_perplexity=2.86082]
                                                                                                                                                          
2024-09-20 14:10:11,702 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:11,703 - root - INFO - Total Batches:                   8
2024-09-20 14:10:11,703 - root - INFO - Average Epoch Valid Loss:        1.05295
2024-09-20 14:10:11,704 - root - INFO - Average Epoch Valid Perplexity:  2.86610
2024-09-20 14:10:11,704 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0273]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0469]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.40it/s, accuracy: 0.0469]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.40it/s, accuracy: 0.0508]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.40it/s, accuracy: 0.0431]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.72it/s, accuracy: 0.0431]
                                                                                           
2024-09-20 14:10:12,085 - root - INFO - Correct/Total Samples:           42/1000
2024-09-20 14:10:12,085 - root - INFO - Eval Accuracy:                   0.042
2024-09-20 14:10:12,110 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     2            1             0 13+48=061 13+48=055    False
     2            1             1 16+55=071 16+55=075    False
     2            1             2 79+34=113 79+34=111    False
     2            1             3 35+44=079 35+44=085    False
     2            1             4 16+50=066 16+50=055    False
     2            1             5 28+47=075 28+47=085    False
     2            1             6 00+74=074 00+74=085    False
     2            1             7 15+20=035 15+20=031    False
     2            1             8 72+60=132 72+60=135    False
     2            1             9 63+68=131 63+68=125    False
     2            1            10 29+45=074 29+45=085    False
     2            1            11 34+60=094 34+60=111    False
     2            1            12 53+70=123 53+70=125    False
     2            1            13 70+50=120 70+50=125    False
     2            1            14 11+84=095 11+84=091    False
     2            1            15 42+71=113 42+71=111    False
     2            1            16 98+22=120 98+22=111    False
     2            1            17 02+02=004 02+02=031    False
     2            1            18 15+85=100 15+85=107    False
     2            1            19 21+78=099 21+78=107    False
     2            1            20 61+79=140 61+79=135    False
     2            1            21 25+99=124 25+99=111    False
     2            1            22 09+85=094 09+85=095    False
     2            1            23 60+91=151 60+91=135    False
     2            1            24 35+30=065 35+30=075    False
     2            1            25 24+51=075 24+51=085    False
     2            1            26 93+91=184 93+91=151    False
     2            1            27 39+96=135 39+96=135     True
     2            1            28 64+35=099 64+35=111    False
     2            1            29 36+22=058 36+22=075    False
     2            1            30 68+45=113 68+45=111    False
     2            1            31 16+84=100 16+84=107    False
     2            1            32 91+52=143 91+52=131    False
     2            1            33 97+36=133 97+36=135    False
     2            1            34 27+37=064 27+37=085    False
     2            1            35 99+82=181 99+82=175    False
     2            1            36 03+42=045 03+42=051    False
     2            1            37 18+38=056 18+38=075    False
     2            1            38 32+20=052 32+20=055    False
     2            1            39 38+13=051 38+13=055    False
     2            1            40 68+42=110 68+42=111    False
     2            1            41 64+00=064 64+00=085    False
     2            1            42 48+94=142 48+94=135    False
     2            1            43 58+36=094 58+36=111    False
     2            1            44 41+22=063 41+22=075    False
     2            1            45 23+58=081 23+58=085    False
     2            1            46 67+46=113 67+46=111    False
     2            1            47 40+78=118 40+78=125    False
     2            1            48 90+38=128 90+38=135    False
     2            1            49 89+52=141 89+52=135    False
     2            1            50 37+77=114 37+77=111    False
     2            1            51 29+76=105 29+76=107    False
     2            1            52 42+90=132 42+90=135    False
     2            1            53 45+82=127 45+82=135    False
     2            1            54 35+95=130 35+95=135    False
     2            1            55 92+98=190 92+98=177    False
     2            1            56 73+91=164 73+91=151    False
     2            1            57 53+97=150 53+97=155    False
     2            1            58 98+69=167 98+69=155    False
     2            1            59 20+46=066 20+46=075    False
     2            1            60 48+69=117 48+69=111    False
     2            1            61 62+31=093 62+31=111    False
     2            1            62 80+59=139 80+59=135    False
     2            1            63 58+12=070 58+12=075    False
     2            1            64 08+96=104 08+96=095    False
     2            1            65 67+06=073 67+06=085    False
     2            1            66 22+04=026 22+04=035    False
     2            1            67 61+87=148 61+87=135    False
     2            1            68 95+27=122 95+27=111    False
     2            1            69 49+83=132 49+83=135    False
     2            1            70 43+00=043 43+00=055    False
     2            1            71 01+85=086 01+85=095    False
     2            1            72 11+68=079 11+68=081    False
     2            1            73 80+03=083 80+03=095    False
     2            1            74 54+83=137 54+83=135    False
     2            1            75 73+47=120 73+47=125    False
     2            1            76 99+93=192 99+93=155    False
     2            1            77 99+13=112 99+13=111    False
     2            1            78 92+66=158 92+66=155    False
     2            1            79 90+31=121 90+31=131    False
     2            1            80 25+69=094 25+69=095    False
     2            1            81 25+44=069 25+44=085    False
     2            1            82 00+93=093 00+93=085    False
     2            1            83 88+87=175 88+87=155    False
     2            1            84 47+56=103 47+56=111    False
     2            1            85 43+59=102 43+59=111    False
     2            1            86 22+00=022 22+00=033    False
     2            1            87 34+04=038 34+04=055    False
     2            1            88 65+13=078 65+13=081    False
     2            1            89 39+82=121 39+82=125    False
     2            1            90 66+83=149 66+83=135    False
     2            1            91 51+69=120 51+69=111    False
     2            1            92 80+21=101 80+21=111    False
     2            1            93 36+79=115 36+79=111    False
     2            1            94 21+68=089 21+68=095    False
     2            1            95 11+66=077 11+66=081    False
     2            1            96 55+19=074 55+19=075    False
     2            1            97 51+61=112 51+61=111    False
     2            1            98 38+88=126 38+88=125    False
     2            1            99 37+27=064 37+27=085    False
     2            1           100 18+63=081 18+63=081     True
     2            1           101 48+11=059 48+11=051    False
     2            1           102 72+68=140 72+68=135    False
     2            1           103 37+39=076 37+39=085    False
     2            1           104 64+95=159 64+95=155    False
     2            1           105 49+75=124 49+75=125    False
     2            1           106 45+66=111 45+66=111     True
     2            1           107 34+87=121 34+87=125    False
     2            1           108 02+84=086 02+84=095    False
     2            1           109 95+00=095 95+00=095     True
     2            1           110 09+56=065 09+56=075    False
     2            1           111 22+66=088 22+66=095    False
     2            1           112 43+18=061 43+18=055    False
     2            1           113 61+35=096 61+35=111    False
     2            1           114 13+73=086 13+73=091    False
     2            1           115 25+95=120 25+95=111    False
     2            1           116 73+96=169 73+96=155    False
     2            1           117 03+96=099 03+96=095    False
     2            1           118 97+82=179 97+82=175    False
     2            1           119 18+42=060 18+42=055    False
     2            1           120 29+98=127 29+98=111    False
     2            1           121 61+00=061 61+00=085    False
     2            1           122 22+98=120 22+98=111    False
     2            1           123 12+50=062 12+50=051    False
     2            1           124 02+58=060 02+58=075    False
     2            1           125 75+86=161 75+86=155    False
     2            1           126 31+57=088 31+57=085    False
     2            1           127 49+82=131 49+82=135    False
     2            1           128 15+33=048 15+33=051    False
     2            1           129 49+57=106 49+57=111    False
     2            1           130 61+70=131 61+70=135    False
     2            1           131 91+51=142 91+51=131    False
     2            1           132 50+05=055 50+05=055     True
     2            1           133 44+16=060 44+16=055    False
     2            1           134 92+01=093 92+01=095    False
     2            1           135 85+82=167 85+82=155    False
     2            1           136 07+41=048 07+41=055    False
     2            1           137 40+06=046 40+06=055    False
     2            1           138 79+62=141 79+62=135    False
     2            1           139 95+62=157 95+62=155    False
     2            1           140 42+93=135 42+93=135     True
     2            1           141 32+73=105 32+73=111    False
     2            1           142 47+09=056 47+09=075    False
     2            1           143 59+50=109 59+50=111    False
     2            1           144 61+77=138 61+77=135    False
     2            1           145 64+06=070 64+06=085    False
     2            1           146 35+10=045 35+10=031    False
     2            1           147 32+88=120 32+88=125    False
     2            1           148 03+95=098 03+95=085    False
     2            1           149 11+38=049 11+38=053    False
     2            1           150 21+67=088 21+67=095    False
     2            1           151 33+25=058 33+25=075    False
     2            1           152 63+45=108 63+45=111    False
     2            1           153 56+12=068 56+12=055    False
     2            1           154 19+79=098 19+79=095    False
     2            1           155 60+43=103 60+43=111    False
     2            1           156 07+61=068 07+61=085    False
     2            1           157 58+03=061 58+03=075    False
     2            1           158 11+10=021 11+10=031    False
     2            1           159 49+89=138 49+89=135    False
     2            1           160 37+58=095 37+58=111    False
     2            1           161 59+78=137 59+78=135    False
     2            1           162 11+21=032 11+21=031    False
     2            1           163 37+43=080 37+43=085    False
     2            1           164 44+21=065 44+21=075    False
     2            1           165 22+97=119 22+97=111    False
     2            1           166 65+35=100 65+35=111    False
     2            1           167 06+51=057 06+51=055    False
     2            1           168 65+25=090 65+25=095    False
     2            1           169 74+94=168 74+94=155    False
     2            1           170 87+55=142 87+55=135    False
     2            1           171 90+67=157 90+67=155    False
     2            1           172 11+02=013 11+02=031    False
     2            1           173 01+66=067 01+66=085    False
     2            1           174 56+00=056 56+00=055    False
     2            1           175 58+52=110 58+52=111    False
     2            1           176 24+99=123 24+99=111    False
     2            1           177 97+13=110 97+13=111    False
     2            1           178 42+94=136 42+94=135    False
     2            1           179 60+15=075 60+15=081    False
     2            1           180 20+46=066 20+46=075    False
     2            1           181 40+70=110 40+70=111    False
     2            1           182 95+45=140 95+45=135    False
     2            1           183 96+95=191 96+95=155    False
     2            1           184 98+20=118 98+20=111    False
     2            1           185 43+19=062 43+19=055    False
     2            1           186 50+69=119 50+69=111    False
     2            1           187 27+53=080 27+53=085    False
     2            1           188 24+25=049 24+25=075    False
     2            1           189 65+92=157 65+92=155    False
     2            1           190 28+14=042 28+14=055    False
     2            1           191 20+57=077 20+57=085    False
     2            1           192 59+97=156 59+97=155    False
     2            1           193 98+32=130 98+32=135    False
     2            1           194 55+84=139 55+84=135    False
     2            1           195 20+39=059 20+39=075    False
     2            1           196 86+47=133 86+47=135    False
     2            1           197 92+36=128 92+36=135    False
     2            1           198 05+38=043 05+38=055    False
     2            1           199 77+36=113 77+36=111    False
     2            1           200 41+64=105 41+64=111    False
     2            1           201 74+51=125 74+51=125     True
     2            1           202 74+55=129 74+55=135    False
     2            1           203 64+64=128 64+64=125    False
     2            1           204 60+19=079 60+19=081    False
     2            1           205 77+96=173 77+96=155    False
     2            1           206 22+30=052 22+30=055    False
     2            1           207 82+49=131 82+49=135    False
     2            1           208 39+67=106 39+67=111    False
     2            1           209 62+40=102 62+40=111    False
     2            1           210 28+71=099 28+71=107    False
     2            1           211 47+26=073 47+26=085    False
     2            1           212 98+54=152 98+54=155    False
     2            1           213 38+70=108 38+70=111    False
     2            1           214 63+40=103 63+40=111    False
     2            1           215 86+62=148 86+62=135    False
     2            1           216 22+65=087 22+65=095    False
     2            1           217 41+17=058 41+17=053    False
     2            1           218 68+88=156 68+88=135    False
     2            1           219 96+70=166 96+70=155    False
     2            1           220 99+29=128 99+29=111    False
     2            1           221 83+39=122 83+39=125    False
     2            1           222 26+55=081 26+55=085    False
     2            1           223 53+70=123 53+70=125    False
     2            1           224 94+12=106 94+12=111    False
     2            1           225 00+37=037 00+37=055    False
     2            1           226 36+94=130 36+94=135    False
     2            1           227 40+58=098 40+58=111    False
     2            1           228 19+80=099 19+80=107    False
     2            1           229 49+44=093 49+44=111    False
     2            1           230 70+27=097 70+27=107    False
     2            1           231 52+80=132 52+80=135    False
     2            1           232 77+90=167 77+90=155    False
     2            1           233 13+92=105 13+92=111    False
     2            1           234 59+09=068 59+09=085    False
     2            1           235 33+55=088 33+55=085    False
     2            1           236 85+16=101 85+16=107    False
     2            1           237 25+65=090 25+65=095    False
     2            1           238 46+20=066 46+20=075    False
     2            1           239 29+52=081 29+52=085    False
     2            1           240 32+36=068 32+36=085    False
     2            1           241 47+08=055 47+08=075    False
     2            1           242 21+84=105 21+84=111    False
     2            1           243 24+45=069 24+45=085    False
     2            1           244 29+15=044 29+15=055    False
     2            1           245 83+03=086 83+03=095    False
     2            1           246 83+36=119 83+36=125    False
     2            1           247 58+95=153 58+95=155    False
     2            1           248 76+79=155 76+79=135    False
     2            1           249 63+30=093 63+30=111    False
     2            1           250 38+24=062 38+24=085    False
     2            1           251 19+46=065 19+46=075    False
     2            1           252 99+66=165 99+66=155    False
     2            1           253 95+73=168 95+73=155    False
     2            1           254 65+27=092 65+27=095    False
     2            1           255 91+83=174 91+83=151    False
     2            2             0 65+49=114 65+49=111    False
     2            2             1 03+08=011 03+08=035    False
     2            2             2 67+81=148 67+81=135    False
     2            2             3 47+23=070 47+23=085    False
     2            2             4 43+91=134 43+91=131    False
     2            2             5 41+67=108 41+67=111    False
     2            2             6 02+33=035 02+33=051    False
     2            2             7 64+84=148 64+84=135    False
     2            2             8 81+64=145 81+64=135    False
     2            2             9 80+11=091 80+11=091     True
     2            2            10 78+01=079 78+01=095    False
     2            2            11 89+18=107 89+18=107     True
     2            2            12 45+52=097 45+52=111    False
     2            2            13 35+30=065 35+30=075    False
     2            2            14 53+32=085 53+32=085     True
     2            2            15 49+90=139 49+90=135    False
     2            2            16 41+37=078 41+37=085    False
     2            2            17 35+14=049 35+14=051    False
     2            2            18 92+50=142 92+50=131    False
     2            2            19 37+60=097 37+60=111    False
     2            2            20 91+61=152 91+61=131    False
     2            2            21 80+77=157 80+77=135    False
     2            2            22 66+24=090 66+24=095    False
     2            2            23 81+07=088 81+07=095    False
     2            2            24 85+59=144 85+59=135    False
     2            2            25 19+69=088 19+69=095    False
     2            2            26 91+44=135 91+44=131    False
     2            2            27 25+29=054 25+29=075    False
     2            2            28 27+08=035 27+08=055    False
     2            2            29 66+14=080 66+14=081    False
     2            2            30 95+11=106 95+11=111    False
     2            2            31 13+97=110 13+97=111    False
     2            2            32 94+40=134 94+40=131    False
     2            2            33 74+31=105 74+31=111    False
     2            2            34 49+00=049 49+00=055    False
     2            2            35 59+18=077 59+18=081    False
     2            2            36 07+65=072 07+65=085    False
     2            2            37 83+55=138 83+55=135    False
     2            2            38 49+80=129 49+80=135    False
     2            2            39 64+17=081 64+17=081     True
     2            2            40 48+83=131 48+83=135    False
     2            2            41 95+44=139 95+44=135    False
     2            2            42 71+26=097 71+26=107    False
     2            2            43 06+74=080 06+74=085    False
     2            2            44 34+24=058 34+24=075    False
     2            2            45 59+71=130 59+71=125    False
     2            2            46 68+32=100 68+32=111    False
     2            2            47 38+81=119 38+81=125    False
     2            2            48 29+56=085 29+56=085     True
     2            2            49 54+55=109 54+55=111    False
     2            2            50 31+27=058 31+27=085    False
     2            2            51 97+89=186 97+89=177    False
     2            2            52 48+09=057 48+09=075    False
     2            2            53 86+76=162 86+76=155    False
     2            2            54 82+59=141 82+59=135    False
     2            2            55 01+67=068 01+67=085    False
     2            2            56 26+06=032 26+06=055    False
     2            2            57 22+46=068 22+46=085    False
     2            2            58 85+16=101 85+16=107    False
     2            2            59 29+08=037 29+08=055    False
     2            2            60 73+94=167 73+94=155    False
     2            2            61 19+62=081 19+62=081     True
     2            2            62 86+62=148 86+62=135    False
     2            2            63 38+99=137 38+99=135    False
     2            2            64 64+25=089 64+25=095    False
     2            2            65 61+72=133 61+72=135    False
     2            2            66 78+88=166 78+88=155    False
     2            2            67 43+66=109 43+66=111    False
     2            2            68 69+35=104 69+35=111    False
     2            2            69 33+77=110 33+77=111    False
     2            2            70 37+37=074 37+37=085    False
     2            2            71 87+54=141 87+54=135    False
     2            2            72 68+90=158 68+90=155    False
     2            2            73 83+44=127 83+44=135    False
     2            2            74 41+09=050 41+09=055    False
     2            2            75 13+48=061 13+48=055    False
     2            2            76 01+41=042 01+41=031    False
     2            2            77 19+74=093 19+74=095    False
     2            2            78 15+05=020 15+05=031    False
     2            2            79 55+46=101 55+46=111    False
     2            2            80 68+33=101 68+33=111    False
     2            2            81 44+40=084 44+40=111    False
     2            2            82 88+03=091 88+03=095    False
     2            2            83 81+79=160 81+79=135    False
     2            2            84 18+98=116 18+98=111    False
     2            2            85 70+64=134 70+64=135    False
     2            2            86 26+44=070 26+44=085    False
     2            2            87 98+87=185 98+87=177    False
     2            2            88 18+74=092 18+74=095    False
     2            2            89 50+68=118 50+68=111    False
     2            2            90 13+51=064 13+51=051    False
     2            2            91 90+89=179 90+89=155    False
     2            2            92 47+78=125 47+78=125     True
     2            2            93 81+57=138 81+57=135    False
     2            2            94 34+47=081 34+47=085    False
     2            2            95 94+23=117 94+23=111    False
     2            2            96 07+70=077 07+70=085    False
     2            2            97 56+33=089 56+33=111    False
     2            2            98 33+04=037 33+04=055    False
     2            2            99 26+09=035 26+09=055    False
     2            2           100 14+92=106 14+92=111    False
     2            2           101 78+54=132 78+54=135    False
     2            2           102 36+76=112 36+76=111    False
     2            2           103 17+47=064 17+47=075    False
     2            2           104 28+18=046 28+18=055    False
     2            2           105 78+54=132 78+54=135    False
     2            2           106 84+72=156 84+72=135    False
     2            2           107 00+44=044 00+44=055    False
     2            2           108 50+41=091 50+41=111    False
     2            2           109 87+88=175 87+88=155    False
     2            2           110 11+66=077 11+66=081    False
     2            2           111 80+60=140 80+60=135    False
     2            2           112 78+76=154 78+76=135    False
     2            2           113 24+74=098 24+74=107    False
     2            2           114 88+48=136 88+48=135    False
     2            2           115 38+31=069 38+31=085    False
     2            2           116 29+27=056 29+27=085    False
     2            2           117 08+45=053 08+45=055    False
     2            2           118 28+13=041 28+13=055    False
     2            2           119 53+99=152 53+99=155    False
     2            2           120 47+92=139 47+92=135    False
     2            2           121 76+21=097 76+21=107    False
     2            2           122 53+96=149 53+96=155    False
     2            2           123 93+91=184 93+91=151    False
     2            2           124 97+33=130 97+33=135    False
     2            2           125 67+78=145 67+78=135    False
     2            2           126 58+05=063 58+05=075    False
     2            2           127 00+16=016 00+16=033    False
     2            2           128 80+19=099 80+19=107    False
     2            2           129 98+22=120 98+22=111    False
     2            2           130 09+62=071 09+62=085    False
     2            2           131 06+23=029 06+23=055    False
     2            2           132 32+99=131 32+99=135    False
     2            2           133 17+02=019 17+02=033    False
     2            2           134 64+35=099 64+35=111    False
     2            2           135 35+83=118 35+83=125    False
     2            2           136 71+36=107 71+36=111    False
     2            2           137 75+06=081 75+06=095    False
     2            2           138 88+95=183 88+95=177    False
     2            2           139 19+98=117 19+98=111    False
     2            2           140 28+89=117 28+89=111    False
     2            2           141 33+11=044 33+11=031    False
     2            2           142 34+49=083 34+49=085    False
     2            2           143 90+35=125 90+35=135    False
     2            2           144 22+90=112 22+90=111    False
     2            2           145 98+89=187 98+89=177    False
     2            2           146 88+47=135 88+47=135     True
     2            2           147 30+86=116 30+86=125    False
     2            2           148 31+48=079 31+48=085    False
     2            2           149 39+21=060 39+21=075    False
     2            2           150 19+17=036 19+17=035    False
     2            2           151 27+60=087 27+60=095    False
     2            2           152 12+16=028 12+16=031    False
     2            2           153 51+75=126 51+75=125    False
     2            2           154 10+74=084 10+74=091    False
     2            2           155 42+63=105 42+63=111    False
     2            2           156 40+14=054 40+14=031    False
     2            2           157 23+93=116 23+93=111    False
     2            2           158 85+26=111 85+26=111     True
     2            2           159 28+46=074 28+46=085    False
     2            2           160 28+33=061 28+33=085    False
     2            2           161 43+30=073 43+30=085    False
     2            2           162 89+72=161 89+72=145    False
     2            2           163 52+21=073 52+21=085    False
     2            2           164 21+54=075 21+54=085    False
     2            2           165 69+13=082 69+13=081    False
     2            2           166 07+60=067 07+60=085    False
     2            2           167 63+83=146 63+83=135    False
     2            2           168 80+69=149 80+69=135    False
     2            2           169 27+28=055 27+28=085    False
     2            2           170 42+31=073 42+31=085    False
     2            2           171 51+99=150 51+99=135    False
     2            2           172 28+75=103 28+75=107    False
     2            2           173 38+57=095 38+57=111    False
     2            2           174 83+16=099 83+16=107    False
     2            2           175 92+94=186 92+94=151    False
     2            2           176 55+75=130 55+75=135    False
     2            2           177 59+51=110 59+51=111    False
     2            2           178 33+09=042 33+09=055    False
     2            2           179 53+13=066 53+13=051    False
     2            2           180 05+70=075 05+70=085    False
     2            2           181 12+20=032 12+20=031    False
     2            2           182 11+49=060 11+49=053    False
     2            2           183 63+45=108 63+45=111    False
     2            2           184 92+23=115 92+23=111    False
     2            2           185 82+45=127 82+45=135    False
     2            2           186 23+41=064 23+41=075    False
     2            2           187 64+26=090 64+26=095    False
     2            2           188 91+24=115 91+24=111    False
     2            2           189 20+32=052 20+32=055    False
     2            2           190 83+21=104 83+21=111    False
     2            2           191 07+20=027 07+20=055    False
     2            2           192 94+14=108 94+14=111    False
     2            2           193 96+89=185 96+89=177    False
     2            2           194 13+08=021 13+08=035    False
     2            2           195 32+05=037 32+05=055    False
     2            2           196 09+51=060 09+51=055    False
     2            2           197 26+29=055 26+29=075    False
     2            2           198 49+65=114 49+65=111    False
     2            2           199 32+66=098 32+66=111    False
     2            2           200 41+08=049 41+08=055    False
     2            2           201 26+79=105 26+79=107    False
     2            2           202 29+91=120 29+91=111    False
     2            2           203 51+00=051 51+00=051     True
     2            2           204 61+60=121 61+60=125    False
     2            2           205 45+78=123 45+78=125    False
     2            2           206 56+16=072 56+16=075    False
     2            2           207 66+68=134 66+68=125    False
     2            2           208 32+16=048 32+16=053    False
     2            2           209 84+49=133 84+49=135    False
     2            2           210 45+09=054 45+09=075    False
     2            2           211 96+78=174 96+78=155    False
     2            2           212 10+02=012 10+02=031    False
     2            2           213 36+60=096 36+60=111    False
     2            2           214 44+36=080 44+36=085    False
     2            2           215 12+86=098 12+86=095    False
     2            2           216 94+54=148 94+54=135    False
     2            2           217 64+73=137 64+73=135    False
     2            2           218 73+10=083 73+10=091    False
     2            2           219 14+62=076 14+62=081    False
     2            2           220 25+22=047 25+22=055    False
     2            2           221 94+22=116 94+22=111    False
     2            2           222 41+76=117 41+76=111    False
     2            2           223 38+46=084 38+46=085    False
     2            2           224 71+72=143 71+72=135    False
     2            2           225 74+79=153 74+79=135    False
     2            2           226 99+67=166 99+67=155    False
     2            2           227 78+71=149 78+71=135    False
     2            2           228 23+19=042 23+19=055    False
     2            2           229 51+65=116 51+65=111    False
     2            2           230 94+86=180 94+86=177    False
     2            2           231 09+79=088 09+79=095    False
     2            2           232 69+39=108 69+39=111    False
     2            2           233 84+13=097 84+13=107    False
     2            2           234 36+59=095 36+59=111    False
     2            2           235 87+47=134 87+47=135    False
     2            2           236 50+00=050 50+00=055    False
     2            2           237 76+96=172 76+96=155    False
     2            2           238 12+18=030 12+18=031    False
     2            2           239 99+95=194 99+95=177    False
     2            2           240 22+00=022 22+00=033    False
     2            2           241 96+18=114 96+18=111    False
     2            2           242 51+20=071 51+20=075    False
     2            2           243 66+81=147 66+81=135    False
     2            2           244 78+18=096 78+18=095    False
     2            2           245 09+78=087 09+78=095    False
     2            2           246 24+20=044 24+20=055    False
     2            2           247 76+13=089 76+13=091    False
     2            2           248 05+10=015 05+10=031    False
     2            2           249 97+14=111 97+14=111     True
     2            2           250 92+38=130 92+38=135    False
     2            2           251 77+13=090 77+13=091    False
     2            2           252 70+19=089 70+19=095    False
     2            2           253 90+45=135 90+45=135     True
     2            2           254 50+09=059 50+09=075    False
     2            2           255 78+06=084 78+06=095    False
     2            3             0 03+25=028 03+25=055    False
     2            3             1 48+43=091 48+43=111    False
     2            3             2 39+47=086 39+47=085    False
     2            3             3 48+19=067 48+19=075    False
     2            3             4 07+22=029 07+22=055    False
     2            3             5 73+68=141 73+68=135    False
     2            3             6 14+56=070 14+56=075    False
     2            3             7 96+95=191 96+95=155    False
     2            3             8 96+28=124 96+28=111    False
     2            3             9 82+05=087 82+05=095    False
     2            3            10 27+94=121 27+94=111    False
     2            3            11 87+86=173 87+86=155    False
     2            3            12 00+68=068 00+68=085    False
     2            3            13 11+37=048 11+37=055    False
     2            3            14 95+93=188 95+93=155    False
     2            3            15 75+82=157 75+82=135    False
     2            3            16 41+71=112 41+71=111    False
     2            3            17 60+14=074 60+14=081    False
     2            3            18 77+77=154 77+77=135    False
     2            3            19 31+84=115 31+84=111    False
     2            3            20 31+57=088 31+57=085    False
     2            3            21 27+87=114 27+87=111    False
     2            3            22 31+89=120 31+89=125    False
     2            3            23 22+18=040 22+18=053    False
     2            3            24 38+25=063 38+25=085    False
     2            3            25 64+54=118 64+54=111    False
     2            3            26 85+60=145 85+60=135    False
     2            3            27 14+71=085 14+71=091    False
     2            3            28 06+16=022 06+16=035    False
     2            3            29 78+61=139 78+61=135    False
     2            3            30 65+75=140 65+75=135    False
     2            3            31 13+83=096 13+83=091    False
     2            3            32 75+49=124 75+49=125    False
     2            3            33 05+78=083 05+78=095    False
     2            3            34 66+55=121 66+55=111    False
     2            3            35 03+05=008 03+05=035    False
     2            3            36 69+99=168 69+99=155    False
     2            3            37 52+82=134 52+82=135    False
     2            3            38 45+97=142 45+97=135    False
     2            3            39 66+17=083 66+17=085    False
     2            3            40 36+17=053 36+17=055    False
     2            3            41 92+74=166 92+74=155    False
     2            3            42 48+44=092 48+44=111    False
     2            3            43 34+17=051 34+17=055    False
     2            3            44 56+11=067 56+11=051    False
     2            3            45 77+23=100 77+23=107    False
     2            3            46 10+11=021 10+11=031    False
     2            3            47 32+65=097 32+65=111    False
     2            3            48 53+49=102 53+49=111    False
     2            3            49 68+86=154 68+86=135    False
     2            3            50 52+94=146 52+94=135    False
     2            3            51 97+71=168 97+71=155    False
     2            3            52 05+37=042 05+37=055    False
     2            3            53 58+75=133 58+75=135    False
     2            3            54 06+24=030 06+24=055    False
     2            3            55 15+44=059 15+44=051    False
     2            3            56 90+49=139 90+49=135    False
     2            3            57 50+37=087 50+37=085    False
     2            3            58 88+61=149 88+61=135    False
     2            3            59 21+57=078 21+57=085    False
     2            3            60 24+85=109 24+85=111    False
     2            3            61 01+66=067 01+66=085    False
     2            3            62 50+46=096 50+46=111    False
     2            3            63 76+65=141 76+65=135    False
     2            3            64 23+74=097 23+74=107    False
     2            3            65 76+16=092 76+16=095    False
     2            3            66 06+08=014 06+08=055    False
     2            3            67 69+25=094 69+25=095    False
     2            3            68 15+23=038 15+23=031    False
     2            3            69 41+02=043 41+02=051    False
     2            3            70 16+66=082 16+66=085    False
     2            3            71 59+94=153 59+94=155    False
     2            3            72 32+88=120 32+88=125    False
     2            3            73 46+21=067 46+21=075    False
     2            3            74 57+28=085 57+28=085     True
     2            3            75 00+31=031 00+31=031     True
     2            3            76 77+07=084 77+07=095    False
     2            3            77 28+70=098 28+70=107    False
     2            3            78 05+61=066 05+61=085    False
     2            3            79 22+09=031 22+09=055    False
     2            3            80 08+94=102 08+94=095    False
     2            3            81 40+11=051 40+11=031    False
     2            3            82 10+48=058 10+48=055    False
     2            3            83 27+56=083 27+56=085    False
     2            3            84 42+16=058 42+16=053    False
     2            3            85 69+43=112 69+43=111    False
     2            3            86 57+69=126 57+69=113    False
     2            3            87 18+86=104 18+86=107    False
     2            3            88 86+80=166 86+80=155    False
     2            3            89 30+85=115 30+85=111    False
     2            3            90 77+66=143 77+66=135    False
     2            3            91 39+64=103 39+64=111    False
     2            3            92 76+61=137 76+61=135    False
     2            3            93 42+61=103 42+61=111    False
     2            3            94 07+30=037 07+30=055    False
     2            3            95 35+93=128 35+93=135    False
     2            3            96 40+90=130 40+90=135    False
     2            3            97 08+91=099 08+91=085    False
     2            3            98 62+34=096 62+34=111    False
     2            3            99 86+49=135 86+49=135     True
     2            3           100 73+23=096 73+23=107    False
     2            3           101 87+35=122 87+35=125    False
     2            3           102 35+31=066 35+31=075    False
     2            3           103 07+13=020 07+13=031    False
     2            3           104 39+41=080 39+41=085    False
     2            3           105 44+63=107 44+63=111    False
     2            3           106 94+66=160 94+66=155    False
     2            3           107 49+54=103 49+54=111    False
     2            3           108 79+46=125 79+46=125     True
     2            3           109 53+12=065 53+12=051    False
     2            3           110 60+92=152 60+92=151    False
     2            3           111 25+60=085 25+60=085     True
     2            3           112 64+53=117 64+53=111    False
     2            3           113 41+02=043 41+02=051    False
     2            3           114 00+97=097 00+97=085    False
     2            3           115 12+52=064 12+52=051    False
     2            3           116 39+50=089 39+50=085    False
     2            3           117 87+21=108 87+21=111    False
     2            3           118 04+99=103 04+99=095    False
     2            3           119 19+75=094 19+75=095    False
     2            3           120 90+05=095 90+05=095     True
     2            3           121 54+39=093 54+39=111    False
     2            3           122 29+26=055 29+26=075    False
     2            3           123 82+95=177 82+95=155    False
     2            3           124 55+09=064 55+09=075    False
     2            3           125 02+62=064 02+62=085    False
     2            3           126 68+30=098 68+30=111    False
     2            3           127 99+16=115 99+16=111    False
     2            3           128 63+11=074 63+11=081    False
     2            3           129 42+92=134 42+92=131    False
     2            3           130 99+16=115 99+16=111    False
     2            3           131 50+31=081 50+31=085    False
     2            3           132 23+46=069 23+46=085    False
     2            3           133 45+73=118 45+73=125    False
     2            3           134 89+77=166 89+77=155    False
     2            3           135 45+78=123 45+78=125    False
     2            3           136 96+60=156 96+60=155    False
     2            3           137 74+61=135 74+61=135     True
     2            3           138 87+01=088 87+01=095    False
     2            3           139 63+88=151 63+88=135    False
     2            3           140 59+72=131 59+72=135    False
     2            3           141 17+96=113 17+96=111    False
     2            3           142 89+77=166 89+77=155    False
     2            3           143 24+69=093 24+69=095    False
     2            3           144 75+83=158 75+83=145    False
     2            3           145 50+54=104 50+54=111    False
     2            3           146 93+47=140 93+47=135    False
     2            3           147 20+55=075 20+55=085    False
     2            3           148 91+79=170 91+79=155    False
     2            3           149 15+13=028 15+13=031    False
     2            3           150 86+09=095 86+09=095     True
     2            3           151 29+58=087 29+58=085    False
     2            3           152 01+29=030 01+29=055    False
     2            3           153 65+48=113 65+48=111    False
     2            3           154 96+45=141 96+45=135    False
     2            3           155 58+69=127 58+69=113    False
     2            3           156 84+43=127 84+43=135    False
     2            3           157 90+38=128 90+38=135    False
     2            3           158 39+97=136 39+97=135    False
     2            3           159 74+84=158 74+84=135    False
     2            3           160 86+22=108 86+22=111    False
     2            3           161 01+86=087 01+86=095    False
     2            3           162 81+63=144 81+63=135    False
     2            3           163 80+94=174 80+94=155    False
     2            3           164 44+42=086 44+42=111    False
     2            3           165 72+60=132 72+60=135    False
     2            3           166 28+07=035 28+07=055    False
     2            3           167 69+54=123 69+54=111    False
     2            3           168 68+77=145 68+77=135    False
     2            3           169 90+16=106 90+16=111    False
     2            3           170 64+50=114 64+50=111    False
     2            3           171 46+88=134 46+88=135    False
     2            3           172 55+99=154 55+99=155    False
     2            3           173 31+97=128 31+97=135    False
     2            3           174 79+28=107 79+28=107     True
     2            3           175 81+43=124 81+43=131    False
     2            3           176 41+15=056 41+15=031    False
     2            3           177 38+77=115 38+77=111    False
     2            3           178 25+06=031 25+06=055    False
     2            3           179 01+93=094 01+93=085    False
     2            3           180 97+22=119 97+22=111    False
     2            3           181 71+84=155 71+84=135    False
     2            3           182 26+36=062 26+36=085    False
     2            3           183 60+92=152 60+92=151    False
     2            3           184 02+94=096 02+94=085    False
     2            3           185 31+58=089 31+58=085    False
     2            3           186 70+52=122 70+52=125    False
     2            3           187 19+42=061 19+42=055    False
     2            3           188 95+73=168 95+73=155    False
     2            3           189 21+25=046 21+25=055    False
     2            3           190 13+58=071 13+58=075    False
     2            3           191 62+28=090 62+28=095    False
     2            3           192 38+14=052 38+14=055    False
     2            3           193 66+75=141 66+75=135    False
     2            3           194 24+59=083 24+59=085    False
     2            3           195 97+66=163 97+66=155    False
     2            3           196 76+70=146 76+70=135    False
     2            3           197 08+40=048 08+40=055    False
     2            3           198 84+00=084 84+00=095    False
     2            3           199 54+73=127 54+73=125    False
     2            3           200 16+88=104 16+88=107    False
     2            3           201 99+47=146 99+47=135    False
     2            3           202 31+95=126 31+95=135    False
     2            3           203 01+79=080 01+79=095    False
     2            3           204 03+68=071 03+68=085    False
     2            3           205 10+05=015 10+05=031    False
     2            3           206 98+90=188 98+90=155    False
     2            3           207 58+53=111 58+53=111     True
     2            3           208 34+87=121 34+87=125    False
     2            3           209 07+31=038 07+31=055    False
     2            3           210 59+08=067 59+08=085    False
     2            3           211 51+38=089 51+38=111    False
     2            3           212 62+62=124 62+62=125    False
     2            3           213 80+32=112 80+32=111    False
     2            3           214 69+16=085 69+16=085     True
     2            3           215 01+17=018 01+17=033    False
     2            3           216 74+41=115 74+41=111    False
     2            3           217 20+89=109 20+89=111    False
     2            3           218 53+50=103 53+50=111    False
     2            3           219 82+85=167 82+85=155    False
     2            3           220 34+47=081 34+47=085    False
     2            3           221 34+45=079 34+45=085    False
     2            3           222 77+34=111 77+34=111     True
     2            3           223 56+33=089 56+33=111    False
     2            3           224 97+56=153 97+56=155    False
     2            3           225 29+06=035 29+06=055    False
     2            3           226 78+96=174 78+96=155    False
     2            3           227 28+65=093 28+65=095    False
     2            3           228 61+64=125 61+64=125     True
     2            3           229 32+64=096 32+64=111    False
     2            3           230 98+32=130 98+32=135    False
     2            3           231 25+35=060 25+35=075    False
     2            3           232 05+08=013 05+08=055    False
     2            3           233 05+26=031 05+26=055    False
     2            3           234 84+71=155 84+71=135    False
     2            3           235 33+10=043 33+10=031    False
     2            3           236 98+35=133 98+35=135    False
     2            3           237 68+98=166 68+98=155    False
     2            3           238 03+63=066 03+63=085    False
     2            3           239 12+96=108 12+96=111    False
     2            3           240 02+81=083 02+81=095    False
     2            3           241 83+13=096 83+13=107    False
     2            3           242 55+92=147 55+92=135    False
     2            3           243 96+09=105 96+09=095    False
     2            3           244 61+08=069 61+08=085    False
     2            3           245 39+75=114 39+75=111    False
     2            3           246 40+74=114 40+74=111    False
     2            3           247 39+80=119 39+80=125    False
     2            3           248 57+95=152 57+95=155    False
     2            3           249 92+97=189 92+97=155    False
     2            3           250 33+03=036 33+03=055    False
     2            3           251 74+92=166 74+92=155    False
     2            3           252 99+09=108 99+09=095    False
     2            3           253 98+10=108 98+10=111    False
     2            3           254 46+77=123 46+77=125    False
     2            3           255 85+78=163 85+78=155    False
     2            4             0 41+21=062 41+21=051    False
     2            4             1 49+13=062 49+13=055    False
     2            4             2 59+07=066 59+07=085    False
     2            4             3 31+11=042 31+11=031    False
     2            4             4 74+16=090 74+16=091    False
     2            4             5 43+38=081 43+38=085    False
     2            4             6 08+67=075 08+67=085    False
     2            4             7 31+66=097 31+66=111    False
     2            4             8 10+31=041 10+31=031    False
     2            4             9 34+59=093 34+59=111    False
     2            4            10 78+42=120 78+42=125    False
     2            4            11 13+41=054 13+41=031    False
     2            4            12 97+89=186 97+89=177    False
     2            4            13 15+62=077 15+62=081    False
     2            4            14 39+36=075 39+36=085    False
     2            4            15 21+25=046 21+25=055    False
     2            4            16 74+56=130 74+56=135    False
     2            4            17 85+47=132 85+47=135    False
     2            4            18 47+32=079 47+32=085    False
     2            4            19 37+66=103 37+66=111    False
     2            4            20 16+29=045 16+29=055    False
     2            4            21 86+77=163 86+77=155    False
     2            4            22 80+07=087 80+07=095    False
     2            4            23 87+05=092 87+05=095    False
     2            4            24 58+16=074 58+16=081    False
     2            4            25 52+79=131 52+79=135    False
     2            4            26 91+08=099 91+08=095    False
     2            4            27 47+78=125 47+78=125     True
     2            4            28 86+96=182 86+96=177    False
     2            4            29 90+22=112 90+22=111    False
     2            4            30 31+18=049 31+18=053    False
     2            4            31 86+15=101 86+15=107    False
     2            4            32 15+95=110 15+95=111    False
     2            4            33 42+11=053 42+11=031    False
     2            4            34 65+99=164 65+99=155    False
     2            4            35 89+29=118 89+29=111    False
     2            4            36 35+11=046 35+11=031    False
     2            4            37 71+41=112 71+41=111    False
     2            4            38 16+24=040 16+24=055    False
     2            4            39 77+82=159 77+82=145    False
     2            4            40 55+89=144 55+89=135    False
     2            4            41 17+88=105 17+88=107    False
     2            4            42 54+72=126 54+72=125    False
     2            4            43 34+98=132 34+98=135    False
     2            4            44 09+97=106 09+97=095    False
     2            4            45 91+07=098 91+07=095    False
     2            4            46 55+94=149 55+94=155    False
     2            4            47 22+58=080 22+58=085    False
     2            4            48 91+37=128 91+37=135    False
     2            4            49 16+10=026 16+10=031    False
     2            4            50 96+32=128 96+32=135    False
     2            4            51 35+75=110 35+75=111    False
     2            4            52 88+73=161 88+73=145    False
     2            4            53 35+18=053 35+18=055    False
     2            4            54 33+10=043 33+10=031    False
     2            4            55 08+50=058 08+50=055    False
     2            4            56 22+62=084 22+62=095    False
     2            4            57 26+37=063 26+37=085    False
     2            4            58 80+27=107 80+27=111    False
     2            4            59 68+28=096 68+28=095    False
     2            4            60 48+03=051 48+03=055    False
     2            4            61 40+18=058 40+18=055    False
     2            4            62 16+59=075 16+59=081    False
     2            4            63 02+19=021 02+19=033    False
     2            4            64 01+09=010 01+09=035    False
     2            4            65 62+68=130 62+68=125    False
     2            4            66 09+71=080 09+71=085    False
     2            4            67 00+58=058 00+58=055    False
     2            4            68 16+45=061 16+45=055    False
     2            4            69 24+98=122 24+98=111    False
     2            4            70 47+92=139 47+92=135    False
     2            4            71 94+84=178 94+84=155    False
     2            4            72 21+32=053 21+32=053     True
     2            4            73 29+82=111 29+82=111     True
     2            4            74 32+79=111 32+79=111     True
     2            4            75 13+98=111 13+98=111     True
     2            4            76 41+94=135 41+94=131    False
     2            4            77 51+84=135 51+84=135     True
     2            4            78 42+05=047 42+05=055    False
     2            4            79 39+03=042 39+03=055    False
     2            4            80 02+92=094 02+92=085    False
     2            4            81 99+81=180 99+81=155    False
     2            4            82 32+68=100 32+68=111    False
     2            4            83 52+17=069 52+17=075    False
     2            4            84 56+58=114 56+58=111    False
     2            4            85 21+48=069 21+48=085    False
     2            4            86 61+71=132 61+71=135    False
     2            4            87 17+01=018 17+01=033    False
     2            4            88 68+23=091 68+23=095    False
     2            4            89 00+37=037 00+37=055    False
     2            4            90 94+88=182 94+88=177    False
     2            4            91 06+31=037 06+31=055    False
     2            4            92 27+18=045 27+18=055    False
     2            4            93 41+81=122 41+81=131    False
     2            4            94 15+86=101 15+86=107    False
     2            4            95 36+87=123 36+87=125    False
     2            4            96 17+37=054 17+37=075    False
     2            4            97 13+86=099 13+86=095    False
     2            4            98 29+69=098 29+69=095    False
     2            4            99 31+99=130 31+99=135    False
     2            4           100 47+29=076 47+29=085    False
     2            4           101 08+81=089 08+81=095    False
     2            4           102 72+82=154 72+82=135    False
     2            4           103 46+91=137 46+91=135    False
     2            4           104 70+35=105 70+35=111    False
     2            4           105 90+55=145 90+55=135    False
     2            4           106 99+99=198 99+99=177    False
     2            4           107 60+97=157 60+97=155    False
     2            4           108 03+40=043 03+40=051    False
     2            4           109 35+49=084 35+49=085    False
     2            4           110 32+02=034 32+02=051    False
     2            4           111 70+18=088 70+18=091    False
     2            4           112 99+05=104 99+05=095    False
     2            4           113 78+73=151 78+73=135    False
     2            4           114 03+02=005 03+02=033    False
     2            4           115 50+14=064 50+14=051    False
     2            4           116 62+02=064 62+02=085    False
     2            4           117 16+74=090 16+74=095    False
     2            4           118 68+65=133 68+65=125    False
     2            4           119 74+81=155 74+81=135    False
     2            4           120 37+48=085 37+48=085     True
     2            4           121 63+04=067 63+04=085    False
     2            4           122 06+62=068 06+62=085    False
     2            4           123 95+75=170 95+75=155    False
     2            4           124 92+37=129 92+37=135    False
     2            4           125 81+32=113 81+32=111    False
     2            4           126 53+28=081 53+28=085    False
     2            4           127 52+42=094 52+42=111    False
     2            4           128 66+97=163 66+97=155    False
     2            4           129 00+48=048 00+48=055    False
     2            4           130 65+32=097 65+32=111    False
     2            4           131 60+89=149 60+89=135    False
     2            4           132 71+61=132 71+61=135    False
     2            4           133 98+50=148 98+50=135    False
     2            4           134 90+96=186 90+96=155    False
     2            4           135 02+96=098 02+96=095    False
     2            4           136 62+75=137 62+75=135    False
     2            4           137 41+28=069 41+28=085    False
     2            4           138 95+79=174 95+79=155    False
     2            4           139 48+41=089 48+41=111    False
     2            4           140 87+95=182 87+95=177    False
     2            4           141 75+38=113 75+38=111    False
     2            4           142 31+55=086 31+55=085    False
     2            4           143 54+63=117 54+63=111    False
     2            4           144 75+82=157 75+82=135    False
     2            4           145 46+45=091 46+45=111    False
     2            4           146 13+08=021 13+08=035    False
     2            4           147 77+97=174 77+97=155    False
     2            4           148 37+35=072 37+35=085    False
     2            4           149 21+89=110 21+89=111    False
     2            4           150 58+51=109 58+51=111    False
     2            4           151 91+48=139 91+48=135    False
     2            4           152 33+23=056 33+23=075    False
     2            4           153 80+96=176 80+96=155    False
     2            4           154 78+02=080 78+02=095    False
     2            4           155 38+95=133 38+95=135    False
     2            4           156 99+25=124 99+25=111    False
     2            4           157 30+76=106 30+76=111    False
     2            4           158 42+40=082 42+40=111    False
     2            4           159 85+58=143 85+58=135    False
     2            4           160 44+46=090 44+46=111    False
     2            4           161 06+41=047 06+41=055    False
     2            4           162 65+90=155 65+90=155     True
     2            4           163 43+83=126 43+83=135    False
     2            4           164 36+61=097 36+61=111    False
     2            4           165 61+51=112 61+51=111    False
     2            4           166 38+09=047 38+09=075    False
     2            4           167 21+97=118 21+97=111    False
     2            4           168 83+30=113 83+30=125    False
     2            4           169 11+79=090 11+79=095    False
     2            4           170 14+29=043 14+29=055    False
     2            4           171 21+11=032 21+11=031    False
     2            4           172 43+53=096 43+53=111    False
     2            4           173 02+58=060 02+58=075    False
     2            4           174 78+82=160 78+82=155    False
     2            4           175 91+11=102 91+11=111    False
     2            4           176 58+54=112 58+54=111    False
     2            4           177 00+15=015 00+15=031    False
     2            4           178 83+51=134 83+51=131    False
     2            4           179 44+72=116 44+72=111    False
     2            4           180 71+20=091 71+20=107    False
     2            4           181 24+99=123 24+99=111    False
     2            4           182 46+30=076 46+30=085    False
     2            4           183 08+67=075 08+67=085    False
     2            4           184 47+42=089 47+42=111    False
     2            4           185 95+67=162 95+67=155    False
     2            4           186 40+56=096 40+56=111    False
     2            4           187 17+95=112 17+95=111    False
     2            4           188 94+66=160 94+66=155    False
     2            4           189 14+58=072 14+58=075    False
     2            4           190 56+05=061 56+05=075    False
     2            4           191 70+01=071 70+01=085    False
     2            4           192 97+59=156 97+59=155    False
     2            4           193 94+67=161 94+67=155    False
     2            4           194 13+41=054 13+41=031    False
     2            4           195 85+15=100 85+15=107    False
     2            4           196 48+53=101 48+53=111    False
     2            4           197 62+75=137 62+75=135    False
     2            4           198 87+47=134 87+47=135    False
     2            4           199 31+88=119 31+88=125    False
     2            4           200 97+16=113 97+16=111    False
     2            4           201 48+45=093 48+45=111    False
     2            4           202 99+00=099 99+00=095    False
     2            4           203 15+01=016 15+01=031    False
     2            4           204 28+96=124 28+96=111    False
     2            4           205 20+11=031 20+11=031     True
     2            4           206 07+56=063 07+56=075    False
     2            4           207 06+08=014 06+08=055    False
     2            4           208 45+46=091 45+46=111    False
     2            4           209 48+85=133 48+85=135    False
     2            4           210 62+14=076 62+14=081    False
     2            4           211 82+31=113 82+31=111    False
     2            4           212 85+88=173 85+88=155    False
     2            4           213 77+08=085 77+08=095    False
     2            4           214 16+64=080 16+64=081    False
     2            4           215 00+27=027 00+27=055    False
     2            4           216 36+75=111 36+75=111     True
     2            4           217 38+38=076 38+38=085    False
     2            4           218 88+32=120 88+32=125    False
     2            4           219 09+88=097 09+88=095    False
     2            4           220 96+87=183 96+87=177    False
     2            4           221 71+29=100 71+29=107    False
     2            4           222 99+13=112 99+13=111    False
     2            4           223 03+13=016 03+13=031    False
     2            4           224 67+23=090 67+23=095    False
     2            4           225 15+98=113 15+98=111    False
     2            4           226 10+08=018 10+08=033    False
     2            4           227 46+24=070 46+24=085    False
     2            4           228 55+63=118 55+63=111    False
     2            4           229 28+06=034 28+06=055    False
     2            4           230 43+87=130 43+87=135    False
     2            4           231 34+05=039 34+05=055    False
2024-09-20 14:10:12,112 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:12,113 - root - INFO - ====================================================== Starting Train Epoch: 3/9 ======================================================
2024-09-20 14:10:12,114 - root - INFO - Learning rates for each parameter group: 0.00130882305771417282, 0.00130882305771417282
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 3, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 3, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=269.40225, average_batch_loss=1.05235, average_batch_perplexity=2.86438, lr=0.001308823, 0.001308823]
Epoch: 3, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=269.40225, average_batch_loss=1.05235, average_batch_perplexity=2.86438, lr=0.001308823, 0.001308823]
Epoch: 3, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=266.84833, average_batch_loss=1.04238, average_batch_perplexity=2.83595, lr=0.001331785, 0.001331785]
Epoch: 3, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 11.86it/s, total_batch_loss=266.84833, average_batch_loss=1.04238, average_batch_perplexity=2.83595, lr=0.001331785, 0.001331785]
Epoch: 3, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.86it/s, total_batch_loss=266.84833, average_batch_loss=1.04238, average_batch_perplexity=2.83595, lr=0.001331785, 0.001331785]
Epoch: 3, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.86it/s, total_batch_loss=266.31369, average_batch_loss=1.04029, average_batch_perplexity=2.83003, lr=0.001354747, 0.001354747]
Epoch: 3, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.86it/s, total_batch_loss=266.31369, average_batch_loss=1.04029, average_batch_perplexity=2.83003, lr=0.001354747, 0.001354747]
Epoch: 3, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.86it/s, total_batch_loss=260.87216, average_batch_loss=1.01903, average_batch_perplexity=2.77051, lr=0.001377708, 0.001377708]
Epoch: 3, Step: 4:  14%|█▍        | 4/28 [00:00<00:02, 11.78it/s, total_batch_loss=260.87216, average_batch_loss=1.01903, average_batch_perplexity=2.77051, lr=0.001377708, 0.001377708]
Epoch: 3, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.78it/s, total_batch_loss=260.87216, average_batch_loss=1.01903, average_batch_perplexity=2.77051, lr=0.001377708, 0.001377708]
Epoch: 3, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.78it/s, total_batch_loss=262.74243, average_batch_loss=1.02634, average_batch_perplexity=2.79083, lr=0.001400670, 0.001400670]
Epoch: 3, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.78it/s, total_batch_loss=262.74243, average_batch_loss=1.02634, average_batch_perplexity=2.79083, lr=0.001400670, 0.001400670]
Epoch: 3, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.78it/s, total_batch_loss=262.01190, average_batch_loss=1.02348, average_batch_perplexity=2.78287, lr=0.001423632, 0.001423632]
Epoch: 3, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 11.89it/s, total_batch_loss=262.01190, average_batch_loss=1.02348, average_batch_perplexity=2.78287, lr=0.001423632, 0.001423632]
Epoch: 3, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 11.89it/s, total_batch_loss=262.01190, average_batch_loss=1.02348, average_batch_perplexity=2.78287, lr=0.001423632, 0.001423632]
Epoch: 3, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 11.89it/s, total_batch_loss=255.61806, average_batch_loss=0.99851, average_batch_perplexity=2.71423, lr=0.001446594, 0.001446594]
Epoch: 3, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 11.89it/s, total_batch_loss=255.61806, average_batch_loss=0.99851, average_batch_perplexity=2.71423, lr=0.001446594, 0.001446594]
Epoch: 3, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 11.89it/s, total_batch_loss=252.61301, average_batch_loss=0.98677, average_batch_perplexity=2.68255, lr=0.001469556, 0.001469556]
Epoch: 3, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=252.61301, average_batch_loss=0.98677, average_batch_perplexity=2.68255, lr=0.001469556, 0.001469556]
Epoch: 3, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=252.61301, average_batch_loss=0.98677, average_batch_perplexity=2.68255, lr=0.001469556, 0.001469556]
Epoch: 3, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=254.40053, average_batch_loss=0.99375, average_batch_perplexity=2.70135, lr=0.001492518, 0.001492518]
Epoch: 3, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=254.40053, average_batch_loss=0.99375, average_batch_perplexity=2.70135, lr=0.001492518, 0.001492518]
Epoch: 3, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=247.41672, average_batch_loss=0.96647, average_batch_perplexity=2.62865, lr=0.001515479, 0.001515479]
Epoch: 3, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.05it/s, total_batch_loss=247.41672, average_batch_loss=0.96647, average_batch_perplexity=2.62865, lr=0.001515479, 0.001515479]
Epoch: 3, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.05it/s, total_batch_loss=247.41672, average_batch_loss=0.96647, average_batch_perplexity=2.62865, lr=0.001515479, 0.001515479]
Epoch: 3, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.05it/s, total_batch_loss=251.45728, average_batch_loss=0.98225, average_batch_perplexity=2.67047, lr=0.001538441, 0.001538441]
Epoch: 3, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.05it/s, total_batch_loss=251.45728, average_batch_loss=0.98225, average_batch_perplexity=2.67047, lr=0.001538441, 0.001538441]
Epoch: 3, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.05it/s, total_batch_loss=249.68826, average_batch_loss=0.97534, average_batch_perplexity=2.65208, lr=0.001561403, 0.001561403]
Epoch: 3, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.16it/s, total_batch_loss=249.68826, average_batch_loss=0.97534, average_batch_perplexity=2.65208, lr=0.001561403, 0.001561403]
Epoch: 3, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.16it/s, total_batch_loss=249.68826, average_batch_loss=0.97534, average_batch_perplexity=2.65208, lr=0.001561403, 0.001561403]
Epoch: 3, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.16it/s, total_batch_loss=250.21922, average_batch_loss=0.97742, average_batch_perplexity=2.65759, lr=0.001584365, 0.001584365]
Epoch: 3, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.16it/s, total_batch_loss=250.21922, average_batch_loss=0.97742, average_batch_perplexity=2.65759, lr=0.001584365, 0.001584365]
Epoch: 3, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.16it/s, total_batch_loss=248.51582, average_batch_loss=0.97076, average_batch_perplexity=2.63996, lr=0.001607327, 0.001607327]
Epoch: 3, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.10it/s, total_batch_loss=248.51582, average_batch_loss=0.97076, average_batch_perplexity=2.63996, lr=0.001607327, 0.001607327]
Epoch: 3, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.10it/s, total_batch_loss=248.51582, average_batch_loss=0.97076, average_batch_perplexity=2.63996, lr=0.001607327, 0.001607327]
Epoch: 3, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.10it/s, total_batch_loss=242.27272, average_batch_loss=0.94638, average_batch_perplexity=2.57636, lr=0.001630288, 0.001630288]
Epoch: 3, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.10it/s, total_batch_loss=242.27272, average_batch_loss=0.94638, average_batch_perplexity=2.57636, lr=0.001630288, 0.001630288]
Epoch: 3, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.10it/s, total_batch_loss=248.85542, average_batch_loss=0.97209, average_batch_perplexity=2.64347, lr=0.001653250, 0.001653250]
Epoch: 3, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.22it/s, total_batch_loss=248.85542, average_batch_loss=0.97209, average_batch_perplexity=2.64347, lr=0.001653250, 0.001653250]
Epoch: 3, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.22it/s, total_batch_loss=248.85542, average_batch_loss=0.97209, average_batch_perplexity=2.64347, lr=0.001653250, 0.001653250]
Epoch: 3, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.22it/s, total_batch_loss=246.14943, average_batch_loss=0.96152, average_batch_perplexity=2.61567, lr=0.001676212, 0.001676212]
Epoch: 3, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.22it/s, total_batch_loss=246.14943, average_batch_loss=0.96152, average_batch_perplexity=2.61567, lr=0.001676212, 0.001676212]
Epoch: 3, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.22it/s, total_batch_loss=238.63667, average_batch_loss=0.93217, average_batch_perplexity=2.54003, lr=0.001699174, 0.001699174]
Epoch: 3, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.19it/s, total_batch_loss=238.63667, average_batch_loss=0.93217, average_batch_perplexity=2.54003, lr=0.001699174, 0.001699174]
Epoch: 3, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.19it/s, total_batch_loss=238.63667, average_batch_loss=0.93217, average_batch_perplexity=2.54003, lr=0.001699174, 0.001699174]
Epoch: 3, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.19it/s, total_batch_loss=238.24759, average_batch_loss=0.93065, average_batch_perplexity=2.53617, lr=0.001722136, 0.001722136]
Epoch: 3, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.19it/s, total_batch_loss=238.24759, average_batch_loss=0.93065, average_batch_perplexity=2.53617, lr=0.001722136, 0.001722136]
Epoch: 3, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.19it/s, total_batch_loss=236.54965, average_batch_loss=0.92402, average_batch_perplexity=2.51940, lr=0.001745097, 0.001745097]
Epoch: 3, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.18it/s, total_batch_loss=236.54965, average_batch_loss=0.92402, average_batch_perplexity=2.51940, lr=0.001745097, 0.001745097]
Epoch: 3, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.18it/s, total_batch_loss=236.54965, average_batch_loss=0.92402, average_batch_perplexity=2.51940, lr=0.001745097, 0.001745097]
Epoch: 3, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.18it/s, total_batch_loss=233.92700, average_batch_loss=0.91378, average_batch_perplexity=2.49372, lr=0.001768059, 0.001768059]
Epoch: 3, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.18it/s, total_batch_loss=233.92700, average_batch_loss=0.91378, average_batch_perplexity=2.49372, lr=0.001768059, 0.001768059]
Epoch: 3, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.18it/s, total_batch_loss=234.13594, average_batch_loss=0.91459, average_batch_perplexity=2.49576, lr=0.001791021, 0.001791021]
Epoch: 3, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.14it/s, total_batch_loss=234.13594, average_batch_loss=0.91459, average_batch_perplexity=2.49576, lr=0.001791021, 0.001791021]
Epoch: 3, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.14it/s, total_batch_loss=234.13594, average_batch_loss=0.91459, average_batch_perplexity=2.49576, lr=0.001791021, 0.001791021]
Epoch: 3, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.14it/s, total_batch_loss=242.58105, average_batch_loss=0.94758, average_batch_perplexity=2.57947, lr=0.001813983, 0.001813983]
Epoch: 3, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.14it/s, total_batch_loss=242.58105, average_batch_loss=0.94758, average_batch_perplexity=2.57947, lr=0.001813983, 0.001813983]
Epoch: 3, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.14it/s, total_batch_loss=236.37764, average_batch_loss=0.92335, average_batch_perplexity=2.51771, lr=0.001836945, 0.001836945]
Epoch: 3, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.18it/s, total_batch_loss=236.37764, average_batch_loss=0.92335, average_batch_perplexity=2.51771, lr=0.001836945, 0.001836945]
Epoch: 3, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.18it/s, total_batch_loss=236.37764, average_batch_loss=0.92335, average_batch_perplexity=2.51771, lr=0.001836945, 0.001836945]
Epoch: 3, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.18it/s, total_batch_loss=234.06291, average_batch_loss=0.91431, average_batch_perplexity=2.49505, lr=0.001859906, 0.001859906]
Epoch: 3, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.18it/s, total_batch_loss=234.06291, average_batch_loss=0.91431, average_batch_perplexity=2.49505, lr=0.001859906, 0.001859906]
Epoch: 3, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.18it/s, total_batch_loss=233.00504, average_batch_loss=0.91018, average_batch_perplexity=2.48476, lr=0.001882868, 0.001882868]
Epoch: 3, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.15it/s, total_batch_loss=233.00504, average_batch_loss=0.91018, average_batch_perplexity=2.48476, lr=0.001882868, 0.001882868]
Epoch: 3, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.15it/s, total_batch_loss=233.00504, average_batch_loss=0.91018, average_batch_perplexity=2.48476, lr=0.001882868, 0.001882868]
Epoch: 3, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.15it/s, total_batch_loss=232.18446, average_batch_loss=0.90697, average_batch_perplexity=2.47681, lr=0.001905830, 0.001905830]
Epoch: 3, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.15it/s, total_batch_loss=232.18446, average_batch_loss=0.90697, average_batch_perplexity=2.47681, lr=0.001905830, 0.001905830]
Epoch: 3, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.15it/s, total_batch_loss=78.54578, average_batch_loss=0.89257, average_batch_perplexity=2.44139, lr=0.001928792, 0.001928792] 
Epoch: 3, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 10.33it/s, total_batch_loss=78.54578, average_batch_loss=0.89257, average_batch_perplexity=2.44139, lr=0.001928792, 0.001928792]
                                                                                                                                                                                         
2024-09-20 14:10:14,526 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:14,527 - root - INFO - Total Batches:                   28
2024-09-20 14:10:14,527 - root - INFO - Average Epoch Train Loss:        0.96766
2024-09-20 14:10:14,528 - root - INFO - Average Epoch Train Perplexity:  2.63179
2024-09-20 14:10:14,528 - root - INFO - 
2024-09-20 14:10:14,529 - root - INFO - ====================================================== Starting Valid Epoch: 3/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 3, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 3, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=226.52034, average_batch_loss=0.88485, average_batch_perplexity=2.42261]
Epoch: 3, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=226.52034, average_batch_loss=0.88485, average_batch_perplexity=2.42261]
Epoch: 3, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=220.53197, average_batch_loss=0.86145, average_batch_perplexity=2.36660]
Epoch: 3, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=220.53197, average_batch_loss=0.86145, average_batch_perplexity=2.36660]
Epoch: 3, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=222.36171, average_batch_loss=0.86860, average_batch_perplexity=2.38357]
Epoch: 3, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=222.36171, average_batch_loss=0.86860, average_batch_perplexity=2.38357]
Epoch: 3, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=222.36171, average_batch_loss=0.86860, average_batch_perplexity=2.38357]
Epoch: 3, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=226.95125, average_batch_loss=0.88653, average_batch_perplexity=2.42669]
Epoch: 3, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=226.95125, average_batch_loss=0.88653, average_batch_perplexity=2.42669]
Epoch: 3, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=228.85178, average_batch_loss=0.89395, average_batch_perplexity=2.44477]
Epoch: 3, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=228.85178, average_batch_loss=0.89395, average_batch_perplexity=2.44477]
Epoch: 3, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.85it/s, total_batch_loss=222.37132, average_batch_loss=0.86864, average_batch_perplexity=2.38366]
Epoch: 3, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.13it/s, total_batch_loss=222.37132, average_batch_loss=0.86864, average_batch_perplexity=2.38366]
Epoch: 3, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.13it/s, total_batch_loss=222.37132, average_batch_loss=0.86864, average_batch_perplexity=2.38366]
Epoch: 3, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.13it/s, total_batch_loss=223.47276, average_batch_loss=0.87294, average_batch_perplexity=2.39394]
Epoch: 3, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.13it/s, total_batch_loss=223.47276, average_batch_loss=0.87294, average_batch_perplexity=2.39394]
Epoch: 3, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.13it/s, total_batch_loss=178.52605, average_batch_loss=0.85830, average_batch_perplexity=2.35914]
                                                                                                                                                          
2024-09-20 14:10:14,871 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:14,872 - root - INFO - Total Batches:                   8
2024-09-20 14:10:14,873 - root - INFO - Average Epoch Valid Loss:        0.87479
2024-09-20 14:10:14,873 - root - INFO - Average Epoch Valid Perplexity:  2.39838
2024-09-20 14:10:14,874 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0430]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0352]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.16it/s, accuracy: 0.0352]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.16it/s, accuracy: 0.0703]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.16it/s, accuracy: 0.0517]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.48it/s, accuracy: 0.0517]
                                                                                           
2024-09-20 14:10:15,262 - root - INFO - Correct/Total Samples:           50/1000
2024-09-20 14:10:15,263 - root - INFO - Eval Accuracy:                   0.05
2024-09-20 14:10:15,281 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     3            1             0 13+48=061 13+48=058    False
     3            1             1 16+55=071 16+55=078    False
     3            1             2 79+34=113 79+34=108    False
     3            1             3 35+44=079 35+44=078    False
     3            1             4 16+50=066 16+50=078    False
     3            1             5 28+47=075 28+47=078    False
     3            1             6 00+74=074 00+74=078    False
     3            1             7 15+20=035 15+20=050    False
     3            1             8 72+60=132 72+60=128    False
     3            1             9 63+68=131 63+68=128    False
     3            1            10 29+45=074 29+45=078    False
     3            1            11 34+60=094 34+60=098    False
     3            1            12 53+70=123 53+70=128    False
     3            1            13 70+50=120 70+50=113    False
     3            1            14 11+84=095 11+84=108    False
     3            1            15 42+71=113 42+71=113     True
     3            1            16 98+22=120 98+22=113    False
     3            1            17 02+02=004 02+02=020    False
     3            1            18 15+85=100 15+85=108    False
     3            1            19 21+78=099 21+78=098    False
     3            1            20 61+79=140 61+79=138    False
     3            1            21 25+99=124 25+99=128    False
     3            1            22 09+85=094 09+85=098    False
     3            1            23 60+91=151 60+91=158    False
     3            1            24 35+30=065 35+30=068    False
     3            1            25 24+51=075 24+51=078    False
     3            1            26 93+91=184 93+91=178    False
     3            1            27 39+96=135 39+96=158    False
     3            1            28 64+35=099 64+35=098    False
     3            1            29 36+22=058 36+22=058     True
     3            1            30 68+45=113 68+45=108    False
     3            1            31 16+84=100 16+84=108    False
     3            1            32 91+52=143 91+52=158    False
     3            1            33 97+36=133 97+36=138    False
     3            1            34 27+37=064 27+37=068    False
     3            1            35 99+82=181 99+82=188    False
     3            1            36 03+42=045 03+42=050    False
     3            1            37 18+38=056 18+38=058    False
     3            1            38 32+20=052 32+20=050    False
     3            1            39 38+13=051 38+13=058    False
     3            1            40 68+42=110 68+42=108    False
     3            1            41 64+00=064 64+00=078    False
     3            1            42 48+94=142 48+94=158    False
     3            1            43 58+36=094 58+36=098    False
     3            1            44 41+22=063 41+22=078    False
     3            1            45 23+58=081 23+58=088    False
     3            1            46 67+46=113 67+46=108    False
     3            1            47 40+78=118 40+78=113    False
     3            1            48 90+38=128 90+38=138    False
     3            1            49 89+52=141 89+52=158    False
     3            1            50 37+77=114 37+77=108    False
     3            1            51 29+76=105 29+76=108    False
     3            1            52 42+90=132 42+90=148    False
     3            1            53 45+82=127 45+82=138    False
     3            1            54 35+95=130 35+95=138    False
     3            1            55 92+98=190 92+98=188    False
     3            1            56 73+91=164 73+91=178    False
     3            1            57 53+97=150 53+97=168    False
     3            1            58 98+69=167 98+69=178    False
     3            1            59 20+46=066 20+46=078    False
     3            1            60 48+69=117 48+69=113    False
     3            1            61 62+31=093 62+31=098    False
     3            1            62 80+59=139 80+59=158    False
     3            1            63 58+12=070 58+12=078    False
     3            1            64 08+96=104 08+96=108    False
     3            1            65 67+06=073 67+06=078    False
     3            1            66 22+04=026 22+04=030    False
     3            1            67 61+87=148 61+87=158    False
     3            1            68 95+27=122 95+27=113    False
     3            1            69 49+83=132 49+83=138    False
     3            1            70 43+00=043 43+00=050    False
     3            1            71 01+85=086 01+85=098    False
     3            1            72 11+68=079 11+68=088    False
     3            1            73 80+03=083 80+03=098    False
     3            1            74 54+83=137 54+83=140    False
     3            1            75 73+47=120 73+47=113    False
     3            1            76 99+93=192 99+93=188    False
     3            1            77 99+13=112 99+13=108    False
     3            1            78 92+66=158 92+66=178    False
     3            1            79 90+31=121 90+31=128    False
     3            1            80 25+69=094 25+69=098    False
     3            1            81 25+44=069 25+44=078    False
     3            1            82 00+93=093 00+93=108    False
     3            1            83 88+87=175 88+87=178    False
     3            1            84 47+56=103 47+56=108    False
     3            1            85 43+59=102 43+59=108    False
     3            1            86 22+00=022 22+00=020    False
     3            1            87 34+04=038 34+04=040    False
     3            1            88 65+13=078 65+13=088    False
     3            1            89 39+82=121 39+82=128    False
     3            1            90 66+83=149 66+83=158    False
     3            1            91 51+69=120 51+69=113    False
     3            1            92 80+21=101 80+21=108    False
     3            1            93 36+79=115 36+79=113    False
     3            1            94 21+68=089 21+68=098    False
     3            1            95 11+66=077 11+66=088    False
     3            1            96 55+19=074 55+19=078    False
     3            1            97 51+61=112 51+61=112     True
     3            1            98 38+88=126 38+88=128    False
     3            1            99 37+27=064 37+27=068    False
     3            1           100 18+63=081 18+63=088    False
     3            1           101 48+11=059 48+11=058    False
     3            1           102 72+68=140 72+68=138    False
     3            1           103 37+39=076 37+39=088    False
     3            1           104 64+95=159 64+95=168    False
     3            1           105 49+75=124 49+75=113    False
     3            1           106 45+66=111 45+66=113    False
     3            1           107 34+87=121 34+87=113    False
     3            1           108 02+84=086 02+84=098    False
     3            1           109 95+00=095 95+00=108    False
     3            1           110 09+56=065 09+56=068    False
     3            1           111 22+66=088 22+66=098    False
     3            1           112 43+18=061 43+18=058    False
     3            1           113 61+35=096 61+35=098    False
     3            1           114 13+73=086 13+73=098    False
     3            1           115 25+95=120 25+95=128    False
     3            1           116 73+96=169 73+96=178    False
     3            1           117 03+96=099 03+96=108    False
     3            1           118 97+82=179 97+82=178    False
     3            1           119 18+42=060 18+42=058    False
     3            1           120 29+98=127 29+98=128    False
     3            1           121 61+00=061 61+00=078    False
     3            1           122 22+98=120 22+98=128    False
     3            1           123 12+50=062 12+50=068    False
     3            1           124 02+58=060 02+58=058    False
     3            1           125 75+86=161 75+86=168    False
     3            1           126 31+57=088 31+57=088     True
     3            1           127 49+82=131 49+82=138    False
     3            1           128 15+33=048 15+33=050    False
     3            1           129 49+57=106 49+57=108    False
     3            1           130 61+70=131 61+70=128    False
     3            1           131 91+51=142 91+51=158    False
     3            1           132 50+05=055 50+05=050    False
     3            1           133 44+16=060 44+16=058    False
     3            1           134 92+01=093 92+01=108    False
     3            1           135 85+82=167 85+82=178    False
     3            1           136 07+41=048 07+41=058    False
     3            1           137 40+06=046 40+06=058    False
     3            1           138 79+62=141 79+62=138    False
     3            1           139 95+62=157 95+62=168    False
     3            1           140 42+93=135 42+93=158    False
     3            1           141 32+73=105 32+73=108    False
     3            1           142 47+09=056 47+09=058    False
     3            1           143 59+50=109 59+50=113    False
     3            1           144 61+77=138 61+77=138     True
     3            1           145 64+06=070 64+06=078    False
     3            1           146 35+10=045 35+10=050    False
     3            1           147 32+88=120 32+88=128    False
     3            1           148 03+95=098 03+95=108    False
     3            1           149 11+38=049 11+38=058    False
     3            1           150 21+67=088 21+67=098    False
     3            1           151 33+25=058 33+25=050    False
     3            1           152 63+45=108 63+45=108     True
     3            1           153 56+12=068 56+12=078    False
     3            1           154 19+79=098 19+79=098     True
     3            1           155 60+43=103 60+43=108    False
     3            1           156 07+61=068 07+61=078    False
     3            1           157 58+03=061 58+03=068    False
     3            1           158 11+10=021 11+10=030    False
     3            1           159 49+89=138 49+89=148    False
     3            1           160 37+58=095 37+58=098    False
     3            1           161 59+78=137 59+78=138    False
     3            1           162 11+21=032 11+21=030    False
     3            1           163 37+43=080 37+43=088    False
     3            1           164 44+21=065 44+21=078    False
     3            1           165 22+97=119 22+97=128    False
     3            1           166 65+35=100 65+35=098    False
     3            1           167 06+51=057 06+51=050    False
     3            1           168 65+25=090 65+25=098    False
     3            1           169 74+94=168 74+94=178    False
     3            1           170 87+55=142 87+55=158    False
     3            1           171 90+67=157 90+67=168    False
     3            1           172 11+02=013 11+02=020    False
     3            1           173 01+66=067 01+66=078    False
     3            1           174 56+00=056 56+00=068    False
     3            1           175 58+52=110 58+52=113    False
     3            1           176 24+99=123 24+99=128    False
     3            1           177 97+13=110 97+13=108    False
     3            1           178 42+94=136 42+94=140    False
     3            1           179 60+15=075 60+15=088    False
     3            1           180 20+46=066 20+46=078    False
     3            1           181 40+70=110 40+70=113    False
     3            1           182 95+45=140 95+45=158    False
     3            1           183 96+95=191 96+95=188    False
     3            1           184 98+20=118 98+20=113    False
     3            1           185 43+19=062 43+19=068    False
     3            1           186 50+69=119 50+69=113    False
     3            1           187 27+53=080 27+53=088    False
     3            1           188 24+25=049 24+25=050    False
     3            1           189 65+92=157 65+92=168    False
     3            1           190 28+14=042 28+14=058    False
     3            1           191 20+57=077 20+57=088    False
     3            1           192 59+97=156 59+97=178    False
     3            1           193 98+32=130 98+32=138    False
     3            1           194 55+84=139 55+84=150    False
     3            1           195 20+39=059 20+39=068    False
     3            1           196 86+47=133 86+47=138    False
     3            1           197 92+36=128 92+36=138    False
     3            1           198 05+38=043 05+38=050    False
     3            1           199 77+36=113 77+36=108    False
     3            1           200 41+64=105 41+64=108    False
     3            1           201 74+51=125 74+51=128    False
     3            1           202 74+55=129 74+55=120    False
     3            1           203 64+64=128 64+64=120    False
     3            1           204 60+19=079 60+19=088    False
     3            1           205 77+96=173 77+96=178    False
     3            1           206 22+30=052 22+30=050    False
     3            1           207 82+49=131 82+49=138    False
     3            1           208 39+67=106 39+67=108    False
     3            1           209 62+40=102 62+40=108    False
     3            1           210 28+71=099 28+71=098    False
     3            1           211 47+26=073 47+26=078    False
     3            1           212 98+54=152 98+54=168    False
     3            1           213 38+70=108 38+70=108     True
     3            1           214 63+40=103 63+40=108    False
     3            1           215 86+62=148 86+62=158    False
     3            1           216 22+65=087 22+65=098    False
     3            1           217 41+17=058 41+17=058     True
     3            1           218 68+88=156 68+88=178    False
     3            1           219 96+70=166 96+70=178    False
     3            1           220 99+29=128 99+29=118    False
     3            1           221 83+39=122 83+39=113    False
     3            1           222 26+55=081 26+55=088    False
     3            1           223 53+70=123 53+70=128    False
     3            1           224 94+12=106 94+12=108    False
     3            1           225 00+37=037 00+37=038    False
     3            1           226 36+94=130 36+94=138    False
     3            1           227 40+58=098 40+58=098     True
     3            1           228 19+80=099 19+80=108    False
     3            1           229 49+44=093 49+44=108    False
     3            1           230 70+27=097 70+27=098    False
     3            1           231 52+80=132 52+80=148    False
     3            1           232 77+90=167 77+90=178    False
     3            1           233 13+92=105 13+92=113    False
     3            1           234 59+09=068 59+09=078    False
     3            1           235 33+55=088 33+55=088     True
     3            1           236 85+16=101 85+16=108    False
     3            1           237 25+65=090 25+65=098    False
     3            1           238 46+20=066 46+20=078    False
     3            1           239 29+52=081 29+52=088    False
     3            1           240 32+36=068 32+36=078    False
     3            1           241 47+08=055 47+08=058    False
     3            1           242 21+84=105 21+84=108    False
     3            1           243 24+45=069 24+45=078    False
     3            1           244 29+15=044 29+15=050    False
     3            1           245 83+03=086 83+03=098    False
     3            1           246 83+36=119 83+36=113    False
     3            1           247 58+95=153 58+95=178    False
     3            1           248 76+79=155 76+79=158    False
     3            1           249 63+30=093 63+30=098    False
     3            1           250 38+24=062 38+24=068    False
     3            1           251 19+46=065 19+46=068    False
     3            1           252 99+66=165 99+66=178    False
     3            1           253 95+73=168 95+73=178    False
     3            1           254 65+27=092 65+27=098    False
     3            1           255 91+83=174 91+83=178    False
     3            2             0 65+49=114 65+49=108    False
     3            2             1 03+08=011 03+08=020    False
     3            2             2 67+81=148 67+81=158    False
     3            2             3 47+23=070 47+23=078    False
     3            2             4 43+91=134 43+91=148    False
     3            2             5 41+67=108 41+67=108     True
     3            2             6 02+33=035 02+33=030    False
     3            2             7 64+84=148 64+84=150    False
     3            2             8 81+64=145 81+64=158    False
     3            2             9 80+11=091 80+11=098    False
     3            2            10 78+01=079 78+01=078    False
     3            2            11 89+18=107 89+18=108    False
     3            2            12 45+52=097 45+52=088    False
     3            2            13 35+30=065 35+30=068    False
     3            2            14 53+32=085 53+32=088    False
     3            2            15 49+90=139 49+90=158    False
     3            2            16 41+37=078 41+37=088    False
     3            2            17 35+14=049 35+14=050    False
     3            2            18 92+50=142 92+50=158    False
     3            2            19 37+60=097 37+60=098    False
     3            2            20 91+61=152 91+61=158    False
     3            2            21 80+77=157 80+77=158    False
     3            2            22 66+24=090 66+24=098    False
     3            2            23 81+07=088 81+07=098    False
     3            2            24 85+59=144 85+59=158    False
     3            2            25 19+69=088 19+69=098    False
     3            2            26 91+44=135 91+44=148    False
     3            2            27 25+29=054 25+29=058    False
     3            2            28 27+08=035 27+08=048    False
     3            2            29 66+14=080 66+14=088    False
     3            2            30 95+11=106 95+11=108    False
     3            2            31 13+97=110 13+97=113    False
     3            2            32 94+40=134 94+40=148    False
     3            2            33 74+31=105 74+31=108    False
     3            2            34 49+00=049 49+00=058    False
     3            2            35 59+18=077 59+18=088    False
     3            2            36 07+65=072 07+65=078    False
     3            2            37 83+55=138 83+55=158    False
     3            2            38 49+80=129 49+80=138    False
     3            2            39 64+17=081 64+17=088    False
     3            2            40 48+83=131 48+83=138    False
     3            2            41 95+44=139 95+44=148    False
     3            2            42 71+26=097 71+26=098    False
     3            2            43 06+74=080 06+74=078    False
     3            2            44 34+24=058 34+24=050    False
     3            2            45 59+71=130 59+71=128    False
     3            2            46 68+32=100 68+32=098    False
     3            2            47 38+81=119 38+81=128    False
     3            2            48 29+56=085 29+56=088    False
     3            2            49 54+55=109 54+55=108    False
     3            2            50 31+27=058 31+27=058     True
     3            2            51 97+89=186 97+89=188    False
     3            2            52 48+09=057 48+09=058    False
     3            2            53 86+76=162 86+76=178    False
     3            2            54 82+59=141 82+59=158    False
     3            2            55 01+67=068 01+67=078    False
     3            2            56 26+06=032 26+06=038    False
     3            2            57 22+46=068 22+46=078    False
     3            2            58 85+16=101 85+16=108    False
     3            2            59 29+08=037 29+08=058    False
     3            2            60 73+94=167 73+94=178    False
     3            2            61 19+62=081 19+62=088    False
     3            2            62 86+62=148 86+62=158    False
     3            2            63 38+99=137 38+99=158    False
     3            2            64 64+25=089 64+25=098    False
     3            2            65 61+72=133 61+72=128    False
     3            2            66 78+88=166 78+88=178    False
     3            2            67 43+66=109 43+66=108    False
     3            2            68 69+35=104 69+35=108    False
     3            2            69 33+77=110 33+77=108    False
     3            2            70 37+37=074 37+37=078    False
     3            2            71 87+54=141 87+54=158    False
     3            2            72 68+90=158 68+90=178    False
     3            2            73 83+44=127 83+44=128    False
     3            2            74 41+09=050 41+09=058    False
     3            2            75 13+48=061 13+48=058    False
     3            2            76 01+41=042 01+41=050    False
     3            2            77 19+74=093 19+74=098    False
     3            2            78 15+05=020 15+05=020     True
     3            2            79 55+46=101 55+46=098    False
     3            2            80 68+33=101 68+33=098    False
     3            2            81 44+40=084 44+40=088    False
     3            2            82 88+03=091 88+03=098    False
     3            2            83 81+79=160 81+79=158    False
     3            2            84 18+98=116 18+98=113    False
     3            2            85 70+64=134 70+64=128    False
     3            2            86 26+44=070 26+44=078    False
     3            2            87 98+87=185 98+87=188    False
     3            2            88 18+74=092 18+74=098    False
     3            2            89 50+68=118 50+68=113    False
     3            2            90 13+51=064 13+51=068    False
     3            2            91 90+89=179 90+89=178    False
     3            2            92 47+78=125 47+78=113    False
     3            2            93 81+57=138 81+57=158    False
     3            2            94 34+47=081 34+47=088    False
     3            2            95 94+23=117 94+23=113    False
     3            2            96 07+70=077 07+70=078    False
     3            2            97 56+33=089 56+33=088    False
     3            2            98 33+04=037 33+04=030    False
     3            2            99 26+09=035 26+09=048    False
     3            2           100 14+92=106 14+92=113    False
     3            2           101 78+54=132 78+54=128    False
     3            2           102 36+76=112 36+76=108    False
     3            2           103 17+47=064 17+47=068    False
     3            2           104 28+18=046 28+18=058    False
     3            2           105 78+54=132 78+54=128    False
     3            2           106 84+72=156 84+72=158    False
     3            2           107 00+44=044 00+44=050    False
     3            2           108 50+41=091 50+41=088    False
     3            2           109 87+88=175 87+88=178    False
     3            2           110 11+66=077 11+66=088    False
     3            2           111 80+60=140 80+60=158    False
     3            2           112 78+76=154 78+76=158    False
     3            2           113 24+74=098 24+74=098     True
     3            2           114 88+48=136 88+48=138    False
     3            2           115 38+31=069 38+31=078    False
     3            2           116 29+27=056 29+27=058    False
     3            2           117 08+45=053 08+45=050    False
     3            2           118 28+13=041 28+13=058    False
     3            2           119 53+99=152 53+99=178    False
     3            2           120 47+92=139 47+92=158    False
     3            2           121 76+21=097 76+21=098    False
     3            2           122 53+96=149 53+96=158    False
     3            2           123 93+91=184 93+91=178    False
     3            2           124 97+33=130 97+33=138    False
     3            2           125 67+78=145 67+78=158    False
     3            2           126 58+05=063 58+05=068    False
     3            2           127 00+16=016 00+16=020    False
     3            2           128 80+19=099 80+19=108    False
     3            2           129 98+22=120 98+22=113    False
     3            2           130 09+62=071 09+62=078    False
     3            2           131 06+23=029 06+23=030    False
     3            2           132 32+99=131 32+99=148    False
     3            2           133 17+02=019 17+02=028    False
     3            2           134 64+35=099 64+35=098    False
     3            2           135 35+83=118 35+83=113    False
     3            2           136 71+36=107 71+36=108    False
     3            2           137 75+06=081 75+06=078    False
     3            2           138 88+95=183 88+95=188    False
     3            2           139 19+98=117 19+98=113    False
     3            2           140 28+89=117 28+89=113    False
     3            2           141 33+11=044 33+11=050    False
     3            2           142 34+49=083 34+49=088    False
     3            2           143 90+35=125 90+35=128    False
     3            2           144 22+90=112 22+90=113    False
     3            2           145 98+89=187 98+89=188    False
     3            2           146 88+47=135 88+47=138    False
     3            2           147 30+86=116 30+86=113    False
     3            2           148 31+48=079 31+48=088    False
     3            2           149 39+21=060 39+21=068    False
     3            2           150 19+17=036 19+17=048    False
     3            2           151 27+60=087 27+60=098    False
     3            2           152 12+16=028 12+16=030    False
     3            2           153 51+75=126 51+75=120    False
     3            2           154 10+74=084 10+74=098    False
     3            2           155 42+63=105 42+63=108    False
     3            2           156 40+14=054 40+14=050    False
     3            2           157 23+93=116 23+93=128    False
     3            2           158 85+26=111 85+26=108    False
     3            2           159 28+46=074 28+46=078    False
     3            2           160 28+33=061 28+33=058    False
     3            2           161 43+30=073 43+30=078    False
     3            2           162 89+72=161 89+72=178    False
     3            2           163 52+21=073 52+21=078    False
     3            2           164 21+54=075 21+54=078    False
     3            2           165 69+13=082 69+13=098    False
     3            2           166 07+60=067 07+60=078    False
     3            2           167 63+83=146 63+83=150    False
     3            2           168 80+69=149 80+69=158    False
     3            2           169 27+28=055 27+28=058    False
     3            2           170 42+31=073 42+31=078    False
     3            2           171 51+99=150 51+99=158    False
     3            2           172 28+75=103 28+75=108    False
     3            2           173 38+57=095 38+57=098    False
     3            2           174 83+16=099 83+16=108    False
     3            2           175 92+94=186 92+94=178    False
     3            2           176 55+75=130 55+75=120    False
     3            2           177 59+51=110 59+51=113    False
     3            2           178 33+09=042 33+09=050    False
     3            2           179 53+13=066 53+13=078    False
     3            2           180 05+70=075 05+70=078    False
     3            2           181 12+20=032 12+20=030    False
     3            2           182 11+49=060 11+49=068    False
     3            2           183 63+45=108 63+45=108     True
     3            2           184 92+23=115 92+23=113    False
     3            2           185 82+45=127 82+45=128    False
     3            2           186 23+41=064 23+41=068    False
     3            2           187 64+26=090 64+26=098    False
     3            2           188 91+24=115 91+24=113    False
     3            2           189 20+32=052 20+32=050    False
     3            2           190 83+21=104 83+21=108    False
     3            2           191 07+20=027 07+20=038    False
     3            2           192 94+14=108 94+14=108     True
     3            2           193 96+89=185 96+89=188    False
     3            2           194 13+08=021 13+08=030    False
     3            2           195 32+05=037 32+05=030    False
     3            2           196 09+51=060 09+51=068    False
     3            2           197 26+29=055 26+29=058    False
     3            2           198 49+65=114 49+65=113    False
     3            2           199 32+66=098 32+66=098     True
     3            2           200 41+08=049 41+08=058    False
     3            2           201 26+79=105 26+79=108    False
     3            2           202 29+91=120 29+91=128    False
     3            2           203 51+00=051 51+00=050    False
     3            2           204 61+60=121 61+60=113    False
     3            2           205 45+78=123 45+78=113    False
     3            2           206 56+16=072 56+16=078    False
     3            2           207 66+68=134 66+68=138    False
     3            2           208 32+16=048 32+16=058    False
     3            2           209 84+49=133 84+49=138    False
     3            2           210 45+09=054 45+09=058    False
     3            2           211 96+78=174 96+78=178    False
     3            2           212 10+02=012 10+02=020    False
     3            2           213 36+60=096 36+60=098    False
     3            2           214 44+36=080 44+36=088    False
     3            2           215 12+86=098 12+86=108    False
     3            2           216 94+54=148 94+54=158    False
     3            2           217 64+73=137 64+73=130    False
     3            2           218 73+10=083 73+10=088    False
     3            2           219 14+62=076 14+62=088    False
     3            2           220 25+22=047 25+22=050    False
     3            2           221 94+22=116 94+22=113    False
     3            2           222 41+76=117 41+76=113    False
     3            2           223 38+46=084 38+46=088    False
     3            2           224 71+72=143 71+72=138    False
     3            2           225 74+79=153 74+79=158    False
     3            2           226 99+67=166 99+67=178    False
     3            2           227 78+71=149 78+71=148    False
     3            2           228 23+19=042 23+19=058    False
     3            2           229 51+65=116 51+65=112    False
     3            2           230 94+86=180 94+86=178    False
     3            2           231 09+79=088 09+79=088     True
     3            2           232 69+39=108 69+39=108     True
     3            2           233 84+13=097 84+13=098    False
     3            2           234 36+59=095 36+59=108    False
     3            2           235 87+47=134 87+47=138    False
     3            2           236 50+00=050 50+00=058    False
     3            2           237 76+96=172 76+96=178    False
     3            2           238 12+18=030 12+18=038    False
     3            2           239 99+95=194 99+95=188    False
     3            2           240 22+00=022 22+00=020    False
     3            2           241 96+18=114 96+18=108    False
     3            2           242 51+20=071 51+20=078    False
     3            2           243 66+81=147 66+81=158    False
     3            2           244 78+18=096 78+18=098    False
     3            2           245 09+78=087 09+78=088    False
     3            2           246 24+20=044 24+20=050    False
     3            2           247 76+13=089 76+13=088    False
     3            2           248 05+10=015 05+10=020    False
     3            2           249 97+14=111 97+14=108    False
     3            2           250 92+38=130 92+38=138    False
     3            2           251 77+13=090 77+13=088    False
     3            2           252 70+19=089 70+19=098    False
     3            2           253 90+45=135 90+45=148    False
     3            2           254 50+09=059 50+09=068    False
     3            2           255 78+06=084 78+06=088    False
     3            3             0 03+25=028 03+25=030    False
     3            3             1 48+43=091 48+43=088    False
     3            3             2 39+47=086 39+47=088    False
     3            3             3 48+19=067 48+19=078    False
     3            3             4 07+22=029 07+22=038    False
     3            3             5 73+68=141 73+68=138    False
     3            3             6 14+56=070 14+56=078    False
     3            3             7 96+95=191 96+95=188    False
     3            3             8 96+28=124 96+28=113    False
     3            3             9 82+05=087 82+05=098    False
     3            3            10 27+94=121 27+94=128    False
     3            3            11 87+86=173 87+86=178    False
     3            3            12 00+68=068 00+68=078    False
     3            3            13 11+37=048 11+37=058    False
     3            3            14 95+93=188 95+93=188     True
     3            3            15 75+82=157 75+82=158    False
     3            3            16 41+71=112 41+71=113    False
     3            3            17 60+14=074 60+14=088    False
     3            3            18 77+77=154 77+77=158    False
     3            3            19 31+84=115 31+84=113    False
     3            3            20 31+57=088 31+57=088     True
     3            3            21 27+87=114 27+87=113    False
     3            3            22 31+89=120 31+89=113    False
     3            3            23 22+18=040 22+18=058    False
     3            3            24 38+25=063 38+25=068    False
     3            3            25 64+54=118 64+54=112    False
     3            3            26 85+60=145 85+60=158    False
     3            3            27 14+71=085 14+71=098    False
     3            3            28 06+16=022 06+16=020    False
     3            3            29 78+61=139 78+61=138    False
     3            3            30 65+75=140 65+75=130    False
     3            3            31 13+83=096 13+83=108    False
     3            3            32 75+49=124 75+49=113    False
     3            3            33 05+78=083 05+78=078    False
     3            3            34 66+55=121 66+55=113    False
     3            3            35 03+05=008 03+05=020    False
     3            3            36 69+99=168 69+99=178    False
     3            3            37 52+82=134 52+82=140    False
     3            3            38 45+97=142 45+97=158    False
     3            3            39 66+17=083 66+17=088    False
     3            3            40 36+17=053 36+17=058    False
     3            3            41 92+74=166 92+74=178    False
     3            3            42 48+44=092 48+44=088    False
     3            3            43 34+17=051 34+17=058    False
     3            3            44 56+11=067 56+11=078    False
     3            3            45 77+23=100 77+23=098    False
     3            3            46 10+11=021 10+11=030    False
     3            3            47 32+65=097 32+65=098    False
     3            3            48 53+49=102 53+49=108    False
     3            3            49 68+86=154 68+86=168    False
     3            3            50 52+94=146 52+94=150    False
     3            3            51 97+71=168 97+71=178    False
     3            3            52 05+37=042 05+37=040    False
     3            3            53 58+75=133 58+75=138    False
     3            3            54 06+24=030 06+24=030     True
     3            3            55 15+44=059 15+44=050    False
     3            3            56 90+49=139 90+49=158    False
     3            3            57 50+37=087 50+37=088    False
     3            3            58 88+61=149 88+61=158    False
     3            3            59 21+57=078 21+57=088    False
     3            3            60 24+85=109 24+85=113    False
     3            3            61 01+66=067 01+66=078    False
     3            3            62 50+46=096 50+46=088    False
     3            3            63 76+65=141 76+65=138    False
     3            3            64 23+74=097 23+74=098    False
     3            3            65 76+16=092 76+16=098    False
     3            3            66 06+08=014 06+08=028    False
     3            3            67 69+25=094 69+25=098    False
     3            3            68 15+23=038 15+23=050    False
     3            3            69 41+02=043 41+02=050    False
     3            3            70 16+66=082 16+66=088    False
     3            3            71 59+94=153 59+94=168    False
     3            3            72 32+88=120 32+88=128    False
     3            3            73 46+21=067 46+21=078    False
     3            3            74 57+28=085 57+28=088    False
     3            3            75 00+31=031 00+31=030    False
     3            3            76 77+07=084 77+07=078    False
     3            3            77 28+70=098 28+70=098     True
     3            3            78 05+61=066 05+61=078    False
     3            3            79 22+09=031 22+09=038    False
     3            3            80 08+94=102 08+94=108    False
     3            3            81 40+11=051 40+11=050    False
     3            3            82 10+48=058 10+48=058     True
     3            3            83 27+56=083 27+56=088    False
     3            3            84 42+16=058 42+16=058     True
     3            3            85 69+43=112 69+43=108    False
     3            3            86 57+69=126 57+69=128    False
     3            3            87 18+86=104 18+86=108    False
     3            3            88 86+80=166 86+80=178    False
     3            3            89 30+85=115 30+85=113    False
     3            3            90 77+66=143 77+66=138    False
     3            3            91 39+64=103 39+64=108    False
     3            3            92 76+61=137 76+61=138    False
     3            3            93 42+61=103 42+61=108    False
     3            3            94 07+30=037 07+30=038    False
     3            3            95 35+93=128 35+93=138    False
     3            3            96 40+90=130 40+90=148    False
     3            3            97 08+91=099 08+91=108    False
     3            3            98 62+34=096 62+34=098    False
     3            3            99 86+49=135 86+49=138    False
     3            3           100 73+23=096 73+23=098    False
     3            3           101 87+35=122 87+35=113    False
     3            3           102 35+31=066 35+31=068    False
     3            3           103 07+13=020 07+13=020     True
     3            3           104 39+41=080 39+41=088    False
     3            3           105 44+63=107 44+63=108    False
     3            3           106 94+66=160 94+66=178    False
     3            3           107 49+54=103 49+54=108    False
     3            3           108 79+46=125 79+46=113    False
     3            3           109 53+12=065 53+12=068    False
     3            3           110 60+92=152 60+92=158    False
     3            3           111 25+60=085 25+60=098    False
     3            3           112 64+53=117 64+53=113    False
     3            3           113 41+02=043 41+02=050    False
     3            3           114 00+97=097 00+97=108    False
     3            3           115 12+52=064 12+52=068    False
     3            3           116 39+50=089 39+50=088    False
     3            3           117 87+21=108 87+21=108     True
     3            3           118 04+99=103 04+99=108    False
     3            3           119 19+75=094 19+75=098    False
     3            3           120 90+05=095 90+05=108    False
     3            3           121 54+39=093 54+39=098    False
     3            3           122 29+26=055 29+26=058    False
     3            3           123 82+95=177 82+95=178    False
     3            3           124 55+09=064 55+09=078    False
     3            3           125 02+62=064 02+62=078    False
     3            3           126 68+30=098 68+30=098     True
     3            3           127 99+16=115 99+16=108    False
     3            3           128 63+11=074 63+11=088    False
     3            3           129 42+92=134 42+92=148    False
     3            3           130 99+16=115 99+16=108    False
     3            3           131 50+31=081 50+31=088    False
     3            3           132 23+46=069 23+46=078    False
     3            3           133 45+73=118 45+73=112    False
     3            3           134 89+77=166 89+77=178    False
     3            3           135 45+78=123 45+78=113    False
     3            3           136 96+60=156 96+60=168    False
     3            3           137 74+61=135 74+61=128    False
     3            3           138 87+01=088 87+01=098    False
     3            3           139 63+88=151 63+88=158    False
     3            3           140 59+72=131 59+72=128    False
     3            3           141 17+96=113 17+96=113     True
     3            3           142 89+77=166 89+77=178    False
     3            3           143 24+69=093 24+69=098    False
     3            3           144 75+83=158 75+83=158     True
     3            3           145 50+54=104 50+54=108    False
     3            3           146 93+47=140 93+47=158    False
     3            3           147 20+55=075 20+55=078    False
     3            3           148 91+79=170 91+79=178    False
     3            3           149 15+13=028 15+13=030    False
     3            3           150 86+09=095 86+09=098    False
     3            3           151 29+58=087 29+58=088    False
     3            3           152 01+29=030 01+29=030     True
     3            3           153 65+48=113 65+48=108    False
     3            3           154 96+45=141 96+45=158    False
     3            3           155 58+69=127 58+69=128    False
     3            3           156 84+43=127 84+43=128    False
     3            3           157 90+38=128 90+38=138    False
     3            3           158 39+97=136 39+97=158    False
     3            3           159 74+84=158 74+84=158     True
     3            3           160 86+22=108 86+22=108     True
     3            3           161 01+86=087 01+86=098    False
     3            3           162 81+63=144 81+63=158    False
     3            3           163 80+94=174 80+94=178    False
     3            3           164 44+42=086 44+42=088    False
     3            3           165 72+60=132 72+60=128    False
     3            3           166 28+07=035 28+07=048    False
     3            3           167 69+54=123 69+54=113    False
     3            3           168 68+77=145 68+77=158    False
     3            3           169 90+16=106 90+16=108    False
     3            3           170 64+50=114 64+50=113    False
     3            3           171 46+88=134 46+88=138    False
     3            3           172 55+99=154 55+99=178    False
     3            3           173 31+97=128 31+97=138    False
     3            3           174 79+28=107 79+28=108    False
     3            3           175 81+43=124 81+43=128    False
     3            3           176 41+15=056 41+15=050    False
     3            3           177 38+77=115 38+77=113    False
     3            3           178 25+06=031 25+06=030    False
     3            3           179 01+93=094 01+93=108    False
     3            3           180 97+22=119 97+22=113    False
     3            3           181 71+84=155 71+84=150    False
     3            3           182 26+36=062 26+36=058    False
     3            3           183 60+92=152 60+92=158    False
     3            3           184 02+94=096 02+94=108    False
     3            3           185 31+58=089 31+58=088    False
     3            3           186 70+52=122 70+52=113    False
     3            3           187 19+42=061 19+42=068    False
     3            3           188 95+73=168 95+73=178    False
     3            3           189 21+25=046 21+25=050    False
     3            3           190 13+58=071 13+58=078    False
     3            3           191 62+28=090 62+28=098    False
     3            3           192 38+14=052 38+14=058    False
     3            3           193 66+75=141 66+75=138    False
     3            3           194 24+59=083 24+59=088    False
     3            3           195 97+66=163 97+66=178    False
     3            3           196 76+70=146 76+70=148    False
     3            3           197 08+40=048 08+40=058    False
     3            3           198 84+00=084 84+00=098    False
     3            3           199 54+73=127 54+73=120    False
     3            3           200 16+88=104 16+88=108    False
     3            3           201 99+47=146 99+47=158    False
     3            3           202 31+95=126 31+95=138    False
     3            3           203 01+79=080 01+79=078    False
     3            3           204 03+68=071 03+68=078    False
     3            3           205 10+05=015 10+05=020    False
     3            3           206 98+90=188 98+90=188     True
     3            3           207 58+53=111 58+53=113    False
     3            3           208 34+87=121 34+87=113    False
     3            3           209 07+31=038 07+31=038     True
     3            3           210 59+08=067 59+08=078    False
     3            3           211 51+38=089 51+38=098    False
     3            3           212 62+62=124 62+62=113    False
     3            3           213 80+32=112 80+32=113    False
     3            3           214 69+16=085 69+16=098    False
     3            3           215 01+17=018 01+17=020    False
     3            3           216 74+41=115 74+41=113    False
     3            3           217 20+89=109 20+89=113    False
     3            3           218 53+50=103 53+50=108    False
     3            3           219 82+85=167 82+85=178    False
     3            3           220 34+47=081 34+47=088    False
     3            3           221 34+45=079 34+45=078    False
     3            3           222 77+34=111 77+34=108    False
     3            3           223 56+33=089 56+33=088    False
     3            3           224 97+56=153 97+56=178    False
     3            3           225 29+06=035 29+06=058    False
     3            3           226 78+96=174 78+96=178    False
     3            3           227 28+65=093 28+65=098    False
     3            3           228 61+64=125 61+64=113    False
     3            3           229 32+64=096 32+64=098    False
     3            3           230 98+32=130 98+32=138    False
     3            3           231 25+35=060 25+35=050    False
     3            3           232 05+08=013 05+08=020    False
     3            3           233 05+26=031 05+26=030    False
     3            3           234 84+71=155 84+71=158    False
     3            3           235 33+10=043 33+10=050    False
     3            3           236 98+35=133 98+35=138    False
     3            3           237 68+98=166 68+98=178    False
     3            3           238 03+63=066 03+63=078    False
     3            3           239 12+96=108 12+96=113    False
     3            3           240 02+81=083 02+81=098    False
     3            3           241 83+13=096 83+13=098    False
     3            3           242 55+92=147 55+92=158    False
     3            3           243 96+09=105 96+09=108    False
     3            3           244 61+08=069 61+08=078    False
     3            3           245 39+75=114 39+75=108    False
     3            3           246 40+74=114 40+74=113    False
     3            3           247 39+80=119 39+80=128    False
     3            3           248 57+95=152 57+95=168    False
     3            3           249 92+97=189 92+97=188    False
     3            3           250 33+03=036 33+03=030    False
     3            3           251 74+92=166 74+92=178    False
     3            3           252 99+09=108 99+09=108     True
     3            3           253 98+10=108 98+10=108     True
     3            3           254 46+77=123 46+77=113    False
     3            3           255 85+78=163 85+78=178    False
     3            4             0 41+21=062 41+21=068    False
     3            4             1 49+13=062 49+13=068    False
     3            4             2 59+07=066 59+07=078    False
     3            4             3 31+11=042 31+11=050    False
     3            4             4 74+16=090 74+16=098    False
     3            4             5 43+38=081 43+38=088    False
     3            4             6 08+67=075 08+67=078    False
     3            4             7 31+66=097 31+66=098    False
     3            4             8 10+31=041 10+31=050    False
     3            4             9 34+59=093 34+59=098    False
     3            4            10 78+42=120 78+42=113    False
     3            4            11 13+41=054 13+41=050    False
     3            4            12 97+89=186 97+89=188    False
     3            4            13 15+62=077 15+62=088    False
     3            4            14 39+36=075 39+36=078    False
     3            4            15 21+25=046 21+25=050    False
     3            4            16 74+56=130 74+56=128    False
     3            4            17 85+47=132 85+47=138    False
     3            4            18 47+32=079 47+32=088    False
     3            4            19 37+66=103 37+66=098    False
     3            4            20 16+29=045 16+29=058    False
     3            4            21 86+77=163 86+77=178    False
     3            4            22 80+07=087 80+07=098    False
     3            4            23 87+05=092 87+05=098    False
     3            4            24 58+16=074 58+16=078    False
     3            4            25 52+79=131 52+79=128    False
     3            4            26 91+08=099 91+08=108    False
     3            4            27 47+78=125 47+78=113    False
     3            4            28 86+96=182 86+96=188    False
     3            4            29 90+22=112 90+22=113    False
     3            4            30 31+18=049 31+18=058    False
     3            4            31 86+15=101 86+15=108    False
     3            4            32 15+95=110 15+95=113    False
     3            4            33 42+11=053 42+11=050    False
     3            4            34 65+99=164 65+99=178    False
     3            4            35 89+29=118 89+29=118     True
     3            4            36 35+11=046 35+11=050    False
     3            4            37 71+41=112 71+41=108    False
     3            4            38 16+24=040 16+24=050    False
     3            4            39 77+82=159 77+82=158    False
     3            4            40 55+89=144 55+89=158    False
     3            4            41 17+88=105 17+88=108    False
     3            4            42 54+72=126 54+72=120    False
     3            4            43 34+98=132 34+98=148    False
     3            4            44 09+97=106 09+97=108    False
     3            4            45 91+07=098 91+07=108    False
     3            4            46 55+94=149 55+94=150    False
     3            4            47 22+58=080 22+58=088    False
     3            4            48 91+37=128 91+37=128     True
     3            4            49 16+10=026 16+10=038    False
     3            4            50 96+32=128 96+32=138    False
     3            4            51 35+75=110 35+75=108    False
     3            4            52 88+73=161 88+73=178    False
     3            4            53 35+18=053 35+18=058    False
     3            4            54 33+10=043 33+10=050    False
     3            4            55 08+50=058 08+50=058     True
     3            4            56 22+62=084 22+62=098    False
     3            4            57 26+37=063 26+37=058    False
     3            4            58 80+27=107 80+27=108    False
     3            4            59 68+28=096 68+28=098    False
     3            4            60 48+03=051 48+03=058    False
     3            4            61 40+18=058 40+18=058     True
     3            4            62 16+59=075 16+59=078    False
     3            4            63 02+19=021 02+19=020    False
     3            4            64 01+09=010 01+09=020    False
     3            4            65 62+68=130 62+68=128    False
     3            4            66 09+71=080 09+71=078    False
     3            4            67 00+58=058 00+58=058     True
     3            4            68 16+45=061 16+45=050    False
     3            4            69 24+98=122 24+98=128    False
     3            4            70 47+92=139 47+92=158    False
     3            4            71 94+84=178 94+84=178     True
     3            4            72 21+32=053 21+32=050    False
     3            4            73 29+82=111 29+82=113    False
     3            4            74 32+79=111 32+79=108    False
     3            4            75 13+98=111 13+98=113    False
     3            4            76 41+94=135 41+94=140    False
     3            4            77 51+84=135 51+84=130    False
     3            4            78 42+05=047 42+05=050    False
     3            4            79 39+03=042 39+03=058    False
     3            4            80 02+92=094 02+92=108    False
     3            4            81 99+81=180 99+81=178    False
     3            4            82 32+68=100 32+68=098    False
     3            4            83 52+17=069 52+17=078    False
     3            4            84 56+58=114 56+58=113    False
     3            4            85 21+48=069 21+48=078    False
     3            4            86 61+71=132 61+71=128    False
     3            4            87 17+01=018 17+01=028    False
     3            4            88 68+23=091 68+23=098    False
     3            4            89 00+37=037 00+37=038    False
     3            4            90 94+88=182 94+88=178    False
     3            4            91 06+31=037 06+31=030    False
     3            4            92 27+18=045 27+18=058    False
     3            4            93 41+81=122 41+81=128    False
     3            4            94 15+86=101 15+86=108    False
     3            4            95 36+87=123 36+87=128    False
     3            4            96 17+37=054 17+37=058    False
     3            4            97 13+86=099 13+86=108    False
     3            4            98 29+69=098 29+69=098     True
     3            4            99 31+99=130 31+99=138    False
     3            4           100 47+29=076 47+29=088    False
     3            4           101 08+81=089 08+81=098    False
     3            4           102 72+82=154 72+82=158    False
     3            4           103 46+91=137 46+91=158    False
     3            4           104 70+35=105 70+35=108    False
     3            4           105 90+55=145 90+55=158    False
     3            4           106 99+99=198 99+99=188    False
     3            4           107 60+97=157 60+97=168    False
     3            4           108 03+40=043 03+40=050    False
     3            4           109 35+49=084 35+49=088    False
     3            4           110 32+02=034 32+02=030    False
     3            4           111 70+18=088 70+18=088     True
     3            4           112 99+05=104 99+05=108    False
     3            4           113 78+73=151 78+73=158    False
     3            4           114 03+02=005 03+02=020    False
     3            4           115 50+14=064 50+14=068    False
     3            4           116 62+02=064 62+02=078    False
     3            4           117 16+74=090 16+74=098    False
     3            4           118 68+65=133 68+65=128    False
     3            4           119 74+81=155 74+81=158    False
     3            4           120 37+48=085 37+48=088    False
     3            4           121 63+04=067 63+04=078    False
     3            4           122 06+62=068 06+62=078    False
     3            4           123 95+75=170 95+75=178    False
     3            4           124 92+37=129 92+37=138    False
     3            4           125 81+32=113 81+32=113     True
     3            4           126 53+28=081 53+28=088    False
     3            4           127 52+42=094 52+42=088    False
     3            4           128 66+97=163 66+97=178    False
     3            4           129 00+48=048 00+48=058    False
     3            4           130 65+32=097 65+32=098    False
     3            4           131 60+89=149 60+89=158    False
     3            4           132 71+61=132 71+61=128    False
     3            4           133 98+50=148 98+50=158    False
     3            4           134 90+96=186 90+96=178    False
     3            4           135 02+96=098 02+96=108    False
     3            4           136 62+75=137 62+75=130    False
     3            4           137 41+28=069 41+28=078    False
     3            4           138 95+79=174 95+79=178    False
     3            4           139 48+41=089 48+41=088    False
     3            4           140 87+95=182 87+95=188    False
     3            4           141 75+38=113 75+38=108    False
     3            4           142 31+55=086 31+55=088    False
     3            4           143 54+63=117 54+63=112    False
     3            4           144 75+82=157 75+82=158    False
     3            4           145 46+45=091 46+45=088    False
     3            4           146 13+08=021 13+08=030    False
     3            4           147 77+97=174 77+97=178    False
     3            4           148 37+35=072 37+35=078    False
     3            4           149 21+89=110 21+89=113    False
     3            4           150 58+51=109 58+51=113    False
     3            4           151 91+48=139 91+48=158    False
     3            4           152 33+23=056 33+23=050    False
     3            4           153 80+96=176 80+96=178    False
     3            4           154 78+02=080 78+02=078    False
     3            4           155 38+95=133 38+95=148    False
     3            4           156 99+25=124 99+25=113    False
     3            4           157 30+76=106 30+76=108    False
     3            4           158 42+40=082 42+40=088    False
     3            4           159 85+58=143 85+58=158    False
     3            4           160 44+46=090 44+46=088    False
     3            4           161 06+41=047 06+41=050    False
     3            4           162 65+90=155 65+90=168    False
     3            4           163 43+83=126 43+83=138    False
     3            4           164 36+61=097 36+61=098    False
     3            4           165 61+51=112 61+51=113    False
     3            4           166 38+09=047 38+09=058    False
     3            4           167 21+97=118 21+97=113    False
     3            4           168 83+30=113 83+30=113     True
     3            4           169 11+79=090 11+79=098    False
     3            4           170 14+29=043 14+29=050    False
     3            4           171 21+11=032 21+11=030    False
     3            4           172 43+53=096 43+53=088    False
     3            4           173 02+58=060 02+58=058    False
     3            4           174 78+82=160 78+82=168    False
     3            4           175 91+11=102 91+11=108    False
     3            4           176 58+54=112 58+54=113    False
     3            4           177 00+15=015 00+15=020    False
     3            4           178 83+51=134 83+51=148    False
     3            4           179 44+72=116 44+72=113    False
     3            4           180 71+20=091 71+20=098    False
     3            4           181 24+99=123 24+99=128    False
     3            4           182 46+30=076 46+30=078    False
     3            4           183 08+67=075 08+67=078    False
     3            4           184 47+42=089 47+42=088    False
     3            4           185 95+67=162 95+67=178    False
     3            4           186 40+56=096 40+56=088    False
     3            4           187 17+95=112 17+95=113    False
     3            4           188 94+66=160 94+66=178    False
     3            4           189 14+58=072 14+58=078    False
     3            4           190 56+05=061 56+05=068    False
     3            4           191 70+01=071 70+01=078    False
     3            4           192 97+59=156 97+59=178    False
     3            4           193 94+67=161 94+67=178    False
     3            4           194 13+41=054 13+41=050    False
     3            4           195 85+15=100 85+15=108    False
     3            4           196 48+53=101 48+53=108    False
     3            4           197 62+75=137 62+75=130    False
     3            4           198 87+47=134 87+47=138    False
     3            4           199 31+88=119 31+88=113    False
     3            4           200 97+16=113 97+16=108    False
     3            4           201 48+45=093 48+45=088    False
     3            4           202 99+00=099 99+00=108    False
     3            4           203 15+01=016 15+01=020    False
     3            4           204 28+96=124 28+96=128    False
     3            4           205 20+11=031 20+11=030    False
     3            4           206 07+56=063 07+56=058    False
     3            4           207 06+08=014 06+08=028    False
     3            4           208 45+46=091 45+46=088    False
     3            4           209 48+85=133 48+85=138    False
     3            4           210 62+14=076 62+14=088    False
     3            4           211 82+31=113 82+31=113     True
     3            4           212 85+88=173 85+88=178    False
     3            4           213 77+08=085 77+08=088    False
     3            4           214 16+64=080 16+64=088    False
     3            4           215 00+27=027 00+27=030    False
     3            4           216 36+75=111 36+75=108    False
     3            4           217 38+38=076 38+38=078    False
     3            4           218 88+32=120 88+32=113    False
     3            4           219 09+88=097 09+88=098    False
     3            4           220 96+87=183 96+87=188    False
     3            4           221 71+29=100 71+29=098    False
     3            4           222 99+13=112 99+13=108    False
     3            4           223 03+13=016 03+13=020    False
     3            4           224 67+23=090 67+23=098    False
     3            4           225 15+98=113 15+98=113     True
     3            4           226 10+08=018 10+08=038    False
     3            4           227 46+24=070 46+24=078    False
     3            4           228 55+63=118 55+63=112    False
     3            4           229 28+06=034 28+06=048    False
     3            4           230 43+87=130 43+87=138    False
     3            4           231 34+05=039 34+05=050    False
2024-09-20 14:10:15,283 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:15,285 - root - INFO - ====================================================== Starting Train Epoch: 4/9 ======================================================
2024-09-20 14:10:15,285 - root - INFO - Learning rates for each parameter group: 0.00191741247211842623, 0.00191741247211842623
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 4, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 4, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=230.64146, average_batch_loss=0.90094, average_batch_perplexity=2.46192, lr=0.001917412, 0.001917412]
Epoch: 4, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=230.64146, average_batch_loss=0.90094, average_batch_perplexity=2.46192, lr=0.001917412, 0.001917412]
Epoch: 4, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=234.89429, average_batch_loss=0.91756, average_batch_perplexity=2.50316, lr=0.001906232, 0.001906232]
Epoch: 4, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 12.22it/s, total_batch_loss=234.89429, average_batch_loss=0.91756, average_batch_perplexity=2.50316, lr=0.001906232, 0.001906232]
Epoch: 4, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.22it/s, total_batch_loss=234.89429, average_batch_loss=0.91756, average_batch_perplexity=2.50316, lr=0.001906232, 0.001906232]
Epoch: 4, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.22it/s, total_batch_loss=225.58919, average_batch_loss=0.88121, average_batch_perplexity=2.41381, lr=0.001895245, 0.001895245]
Epoch: 4, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.22it/s, total_batch_loss=225.58919, average_batch_loss=0.88121, average_batch_perplexity=2.41381, lr=0.001895245, 0.001895245]
Epoch: 4, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.22it/s, total_batch_loss=237.27705, average_batch_loss=0.92686, average_batch_perplexity=2.52657, lr=0.001884446, 0.001884446]
Epoch: 4, Step: 4:  14%|█▍        | 4/28 [00:00<00:01, 12.03it/s, total_batch_loss=237.27705, average_batch_loss=0.92686, average_batch_perplexity=2.52657, lr=0.001884446, 0.001884446]
Epoch: 4, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.03it/s, total_batch_loss=237.27705, average_batch_loss=0.92686, average_batch_perplexity=2.52657, lr=0.001884446, 0.001884446]
Epoch: 4, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.03it/s, total_batch_loss=234.00360, average_batch_loss=0.91408, average_batch_perplexity=2.49447, lr=0.001873829, 0.001873829]
Epoch: 4, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.03it/s, total_batch_loss=234.00360, average_batch_loss=0.91408, average_batch_perplexity=2.49447, lr=0.001873829, 0.001873829]
Epoch: 4, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.03it/s, total_batch_loss=227.43008, average_batch_loss=0.88840, average_batch_perplexity=2.43123, lr=0.001863390, 0.001863390]
Epoch: 4, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.20it/s, total_batch_loss=227.43008, average_batch_loss=0.88840, average_batch_perplexity=2.43123, lr=0.001863390, 0.001863390]
Epoch: 4, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.20it/s, total_batch_loss=227.43008, average_batch_loss=0.88840, average_batch_perplexity=2.43123, lr=0.001863390, 0.001863390]
Epoch: 4, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.20it/s, total_batch_loss=228.49860, average_batch_loss=0.89257, average_batch_perplexity=2.44140, lr=0.001853123, 0.001853123]
Epoch: 4, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.20it/s, total_batch_loss=228.49860, average_batch_loss=0.89257, average_batch_perplexity=2.44140, lr=0.001853123, 0.001853123]
Epoch: 4, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.20it/s, total_batch_loss=229.30400, average_batch_loss=0.89572, average_batch_perplexity=2.44910, lr=0.001843024, 0.001843024]
Epoch: 4, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 12.22it/s, total_batch_loss=229.30400, average_batch_loss=0.89572, average_batch_perplexity=2.44910, lr=0.001843024, 0.001843024]
Epoch: 4, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.22it/s, total_batch_loss=229.30400, average_batch_loss=0.89572, average_batch_perplexity=2.44910, lr=0.001843024, 0.001843024]
Epoch: 4, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.22it/s, total_batch_loss=218.08556, average_batch_loss=0.85190, average_batch_perplexity=2.34409, lr=0.001833089, 0.001833089]
Epoch: 4, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.22it/s, total_batch_loss=218.08556, average_batch_loss=0.85190, average_batch_perplexity=2.34409, lr=0.001833089, 0.001833089]
Epoch: 4, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.22it/s, total_batch_loss=221.96524, average_batch_loss=0.86705, average_batch_perplexity=2.37988, lr=0.001823312, 0.001823312]
Epoch: 4, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.38it/s, total_batch_loss=221.96524, average_batch_loss=0.86705, average_batch_perplexity=2.37988, lr=0.001823312, 0.001823312]
Epoch: 4, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.38it/s, total_batch_loss=221.96524, average_batch_loss=0.86705, average_batch_perplexity=2.37988, lr=0.001823312, 0.001823312]
Epoch: 4, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.38it/s, total_batch_loss=219.22734, average_batch_loss=0.85636, average_batch_perplexity=2.35457, lr=0.001813691, 0.001813691]
Epoch: 4, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.38it/s, total_batch_loss=219.22734, average_batch_loss=0.85636, average_batch_perplexity=2.35457, lr=0.001813691, 0.001813691]
Epoch: 4, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.38it/s, total_batch_loss=220.11047, average_batch_loss=0.85981, average_batch_perplexity=2.36270, lr=0.001804220, 0.001804220]
Epoch: 4, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.30it/s, total_batch_loss=220.11047, average_batch_loss=0.85981, average_batch_perplexity=2.36270, lr=0.001804220, 0.001804220]
Epoch: 4, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.30it/s, total_batch_loss=220.11047, average_batch_loss=0.85981, average_batch_perplexity=2.36270, lr=0.001804220, 0.001804220]
Epoch: 4, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.30it/s, total_batch_loss=214.46967, average_batch_loss=0.83777, average_batch_perplexity=2.31121, lr=0.001794895, 0.001794895]
Epoch: 4, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.30it/s, total_batch_loss=214.46967, average_batch_loss=0.83777, average_batch_perplexity=2.31121, lr=0.001794895, 0.001794895]
Epoch: 4, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.30it/s, total_batch_loss=219.96057, average_batch_loss=0.85922, average_batch_perplexity=2.36132, lr=0.001785714, 0.001785714]
Epoch: 4, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=219.96057, average_batch_loss=0.85922, average_batch_perplexity=2.36132, lr=0.001785714, 0.001785714]
Epoch: 4, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=219.96057, average_batch_loss=0.85922, average_batch_perplexity=2.36132, lr=0.001785714, 0.001785714]
Epoch: 4, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=218.02727, average_batch_loss=0.85167, average_batch_perplexity=2.34355, lr=0.001776673, 0.001776673]
2024-09-20 14:10:16,590 - root - INFO - Epoch: 4, Step: 100, Avg Batch Loss: 0.84106, Avg Batch Perplexity: 2.31884, LR: 0.001767767
Epoch: 4, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=218.02727, average_batch_loss=0.85167, average_batch_perplexity=2.34355, lr=0.001776673, 0.001776673]
Epoch: 4, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=215.31261, average_batch_loss=0.84106, average_batch_perplexity=2.31884, lr=0.001767767, 0.001767767]
Epoch: 4, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.26it/s, total_batch_loss=215.31261, average_batch_loss=0.84106, average_batch_perplexity=2.31884, lr=0.001767767, 0.001767767]
Epoch: 4, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.26it/s, total_batch_loss=215.31261, average_batch_loss=0.84106, average_batch_perplexity=2.31884, lr=0.001767767, 0.001767767]
Epoch: 4, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.26it/s, total_batch_loss=226.95306, average_batch_loss=0.88654, average_batch_perplexity=2.42671, lr=0.001758994, 0.001758994]
Epoch: 4, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.26it/s, total_batch_loss=226.95306, average_batch_loss=0.88654, average_batch_perplexity=2.42671, lr=0.001758994, 0.001758994]
Epoch: 4, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.26it/s, total_batch_loss=223.14464, average_batch_loss=0.87166, average_batch_perplexity=2.39087, lr=0.001750350, 0.001750350]
Epoch: 4, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.33it/s, total_batch_loss=223.14464, average_batch_loss=0.87166, average_batch_perplexity=2.39087, lr=0.001750350, 0.001750350]
Epoch: 4, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.33it/s, total_batch_loss=223.14464, average_batch_loss=0.87166, average_batch_perplexity=2.39087, lr=0.001750350, 0.001750350]
Epoch: 4, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.33it/s, total_batch_loss=223.86075, average_batch_loss=0.87446, average_batch_perplexity=2.39757, lr=0.001741833, 0.001741833]
Epoch: 4, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.33it/s, total_batch_loss=223.86075, average_batch_loss=0.87446, average_batch_perplexity=2.39757, lr=0.001741833, 0.001741833]
Epoch: 4, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.33it/s, total_batch_loss=220.87898, average_batch_loss=0.86281, average_batch_perplexity=2.36981, lr=0.001733438, 0.001733438]
Epoch: 4, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.42it/s, total_batch_loss=220.87898, average_batch_loss=0.86281, average_batch_perplexity=2.36981, lr=0.001733438, 0.001733438]
Epoch: 4, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.42it/s, total_batch_loss=220.87898, average_batch_loss=0.86281, average_batch_perplexity=2.36981, lr=0.001733438, 0.001733438]
Epoch: 4, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.42it/s, total_batch_loss=212.23076, average_batch_loss=0.82903, average_batch_perplexity=2.29109, lr=0.001725164, 0.001725164]
Epoch: 4, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.42it/s, total_batch_loss=212.23076, average_batch_loss=0.82903, average_batch_perplexity=2.29109, lr=0.001725164, 0.001725164]
Epoch: 4, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.42it/s, total_batch_loss=208.13551, average_batch_loss=0.81303, average_batch_perplexity=2.25473, lr=0.001717007, 0.001717007]
Epoch: 4, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.42it/s, total_batch_loss=208.13551, average_batch_loss=0.81303, average_batch_perplexity=2.25473, lr=0.001717007, 0.001717007]
Epoch: 4, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.42it/s, total_batch_loss=208.13551, average_batch_loss=0.81303, average_batch_perplexity=2.25473, lr=0.001717007, 0.001717007]
Epoch: 4, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.42it/s, total_batch_loss=213.27568, average_batch_loss=0.83311, average_batch_perplexity=2.30046, lr=0.001708965, 0.001708965]
Epoch: 4, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.42it/s, total_batch_loss=213.27568, average_batch_loss=0.83311, average_batch_perplexity=2.30046, lr=0.001708965, 0.001708965]
Epoch: 4, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.42it/s, total_batch_loss=213.01154, average_batch_loss=0.83208, average_batch_perplexity=2.29809, lr=0.001701035, 0.001701035]
Epoch: 4, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.22it/s, total_batch_loss=213.01154, average_batch_loss=0.83208, average_batch_perplexity=2.29809, lr=0.001701035, 0.001701035]
Epoch: 4, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.22it/s, total_batch_loss=213.01154, average_batch_loss=0.83208, average_batch_perplexity=2.29809, lr=0.001701035, 0.001701035]
Epoch: 4, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.22it/s, total_batch_loss=207.95612, average_batch_loss=0.81233, average_batch_perplexity=2.25315, lr=0.001693214, 0.001693214]
Epoch: 4, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.22it/s, total_batch_loss=207.95612, average_batch_loss=0.81233, average_batch_perplexity=2.25315, lr=0.001693214, 0.001693214]
Epoch: 4, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.22it/s, total_batch_loss=206.17104, average_batch_loss=0.80536, average_batch_perplexity=2.23749, lr=0.001685500, 0.001685500]
Epoch: 4, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.34it/s, total_batch_loss=206.17104, average_batch_loss=0.80536, average_batch_perplexity=2.23749, lr=0.001685500, 0.001685500]
Epoch: 4, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.34it/s, total_batch_loss=206.17104, average_batch_loss=0.80536, average_batch_perplexity=2.23749, lr=0.001685500, 0.001685500]
Epoch: 4, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.34it/s, total_batch_loss=206.14790, average_batch_loss=0.80527, average_batch_perplexity=2.23729, lr=0.001677890, 0.001677890]
Epoch: 4, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.34it/s, total_batch_loss=206.14790, average_batch_loss=0.80527, average_batch_perplexity=2.23729, lr=0.001677890, 0.001677890]
Epoch: 4, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.34it/s, total_batch_loss=70.41332, average_batch_loss=0.80015, average_batch_perplexity=2.22588, lr=0.001670383, 0.001670383] 
Epoch: 4, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.52it/s, total_batch_loss=70.41332, average_batch_loss=0.80015, average_batch_perplexity=2.22588, lr=0.001670383, 0.001670383]
                                                                                                                                                                                         
2024-09-20 14:10:17,517 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:17,518 - root - INFO - Total Batches:                   28
2024-09-20 14:10:17,518 - root - INFO - Average Epoch Train Loss:        0.86100
2024-09-20 14:10:17,519 - root - INFO - Average Epoch Train Perplexity:  2.36552
2024-09-20 14:10:17,519 - root - INFO - 
2024-09-20 14:10:17,520 - root - INFO - ====================================================== Starting Valid Epoch: 4/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 4, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 4, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=199.28583, average_batch_loss=0.77846, average_batch_perplexity=2.17812]
Epoch: 4, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=199.28583, average_batch_loss=0.77846, average_batch_perplexity=2.17812]
Epoch: 4, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=200.44511, average_batch_loss=0.78299, average_batch_perplexity=2.18800]
Epoch: 4, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=200.44511, average_batch_loss=0.78299, average_batch_perplexity=2.18800]
Epoch: 4, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=200.06065, average_batch_loss=0.78149, average_batch_perplexity=2.18472]
Epoch: 4, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=200.06065, average_batch_loss=0.78149, average_batch_perplexity=2.18472]
Epoch: 4, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=200.06065, average_batch_loss=0.78149, average_batch_perplexity=2.18472]
Epoch: 4, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=198.88388, average_batch_loss=0.77689, average_batch_perplexity=2.17470]
Epoch: 4, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=198.88388, average_batch_loss=0.77689, average_batch_perplexity=2.17470]
Epoch: 4, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=200.88089, average_batch_loss=0.78469, average_batch_perplexity=2.19173]
Epoch: 4, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=200.88089, average_batch_loss=0.78469, average_batch_perplexity=2.19173]
Epoch: 4, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.83it/s, total_batch_loss=199.88879, average_batch_loss=0.78082, average_batch_perplexity=2.18325]
Epoch: 4, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 22.91it/s, total_batch_loss=199.88879, average_batch_loss=0.78082, average_batch_perplexity=2.18325]
Epoch: 4, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 22.91it/s, total_batch_loss=199.88879, average_batch_loss=0.78082, average_batch_perplexity=2.18325]
Epoch: 4, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 22.91it/s, total_batch_loss=197.75778, average_batch_loss=0.77249, average_batch_perplexity=2.16515]
Epoch: 4, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 22.91it/s, total_batch_loss=197.75778, average_batch_loss=0.77249, average_batch_perplexity=2.16515]
Epoch: 4, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 22.91it/s, total_batch_loss=162.22050, average_batch_loss=0.77991, average_batch_perplexity=2.18127]
                                                                                                                                                          
2024-09-20 14:10:17,863 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:17,864 - root - INFO - Total Batches:                   8
2024-09-20 14:10:17,864 - root - INFO - Average Epoch Valid Loss:        0.77971
2024-09-20 14:10:17,865 - root - INFO - Average Epoch Valid Perplexity:  2.18084
2024-09-20 14:10:17,865 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0703]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.0898]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.33it/s, accuracy: 0.0898]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.33it/s, accuracy: 0.0938]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.33it/s, accuracy: 0.0690]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.59it/s, accuracy: 0.0690]
                                                                                           
2024-09-20 14:10:18,251 - root - INFO - Correct/Total Samples:           81/1000
2024-09-20 14:10:18,251 - root - INFO - Eval Accuracy:                   0.081
2024-09-20 14:10:18,269 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     4            1             0 13+48=061 13+48=068    False
     4            1             1 16+55=071 16+55=064    False
     4            1             2 79+34=113 79+34=110    False
     4            1             3 35+44=079 35+44=078    False
     4            1             4 16+50=066 16+50=064    False
     4            1             5 28+47=075 28+47=078    False
     4            1             6 00+74=074 00+74=070    False
     4            1             7 15+20=035 15+20=030    False
     4            1             8 72+60=132 72+60=130    False
     4            1             9 63+68=131 63+68=120    False
     4            1            10 29+45=074 29+45=078    False
     4            1            11 34+60=094 34+60=098    False
     4            1            12 53+70=123 53+70=120    False
     4            1            13 70+50=120 70+50=110    False
     4            1            14 11+84=095 11+84=098    False
     4            1            15 42+71=113 42+71=110    False
     4            1            16 98+22=120 98+22=128    False
     4            1            17 02+02=004 02+02=010    False
     4            1            18 15+85=100 15+85=108    False
     4            1            19 21+78=099 21+78=108    False
     4            1            20 61+79=140 61+79=130    False
     4            1            21 25+99=124 25+99=118    False
     4            1            22 09+85=094 09+85=098    False
     4            1            23 60+91=151 60+91=140    False
     4            1            24 35+30=065 35+30=060    False
     4            1            25 24+51=075 24+51=070    False
     4            1            26 93+91=184 93+91=170    False
     4            1            27 39+96=135 39+96=128    False
     4            1            28 64+35=099 64+35=108    False
     4            1            29 36+22=058 36+22=054    False
     4            1            30 68+45=113 68+45=110    False
     4            1            31 16+84=100 16+84=108    False
     4            1            32 91+52=143 91+52=130    False
     4            1            33 97+36=133 97+36=138    False
     4            1            34 27+37=064 27+37=068    False
     4            1            35 99+82=181 99+82=188    False
     4            1            36 03+42=045 03+42=050    False
     4            1            37 18+38=056 18+38=058    False
     4            1            38 32+20=052 32+20=050    False
     4            1            39 38+13=051 38+13=050    False
     4            1            40 68+42=110 68+42=110     True
     4            1            41 64+00=064 64+00=060    False
     4            1            42 48+94=142 48+94=148    False
     4            1            43 58+36=094 58+36=088    False
     4            1            44 41+22=063 41+22=064    False
     4            1            45 23+58=081 23+58=078    False
     4            1            46 67+46=113 67+46=113     True
     4            1            47 40+78=118 40+78=110    False
     4            1            48 90+38=128 90+38=128     True
     4            1            49 89+52=141 89+52=138    False
     4            1            50 37+77=114 37+77=114     True
     4            1            51 29+76=105 29+76=108    False
     4            1            52 42+90=132 42+90=130    False
     4            1            53 45+82=127 45+82=120    False
     4            1            54 35+95=130 35+95=128    False
     4            1            55 92+98=190 92+98=188    False
     4            1            56 73+91=164 73+91=160    False
     4            1            57 53+97=150 53+97=148    False
     4            1            58 98+69=167 98+69=178    False
     4            1            59 20+46=066 20+46=064    False
     4            1            60 48+69=117 48+69=114    False
     4            1            61 62+31=093 62+31=090    False
     4            1            62 80+59=139 80+59=138    False
     4            1            63 58+12=070 58+12=078    False
     4            1            64 08+96=104 08+96=108    False
     4            1            65 67+06=073 67+06=075    False
     4            1            66 22+04=026 22+04=030    False
     4            1            67 61+87=148 61+87=140    False
     4            1            68 95+27=122 95+27=113    False
     4            1            69 49+83=132 49+83=128    False
     4            1            70 43+00=043 43+00=040    False
     4            1            71 01+85=086 01+85=088    False
     4            1            72 11+68=079 11+68=088    False
     4            1            73 80+03=083 80+03=088    False
     4            1            74 54+83=137 54+83=130    False
     4            1            75 73+47=120 73+47=110    False
     4            1            76 99+93=192 99+93=188    False
     4            1            77 99+13=112 99+13=113    False
     4            1            78 92+66=158 92+66=164    False
     4            1            79 90+31=121 90+31=128    False
     4            1            80 25+69=094 25+69=098    False
     4            1            81 25+44=069 25+44=060    False
     4            1            82 00+93=093 00+93=098    False
     4            1            83 88+87=175 88+87=175     True
     4            1            84 47+56=103 47+56=108    False
     4            1            85 43+59=102 43+59=108    False
     4            1            86 22+00=022 22+00=020    False
     4            1            87 34+04=038 34+04=040    False
     4            1            88 65+13=078 65+13=070    False
     4            1            89 39+82=121 39+82=113    False
     4            1            90 66+83=149 66+83=144    False
     4            1            91 51+69=120 51+69=110    False
     4            1            92 80+21=101 80+21=108    False
     4            1            93 36+79=115 36+79=113    False
     4            1            94 21+68=089 21+68=088    False
     4            1            95 11+66=077 11+66=074    False
     4            1            96 55+19=074 55+19=078    False
     4            1            97 51+61=112 51+61=110    False
     4            1            98 38+88=126 38+88=114    False
     4            1            99 37+27=064 37+27=068    False
     4            1           100 18+63=081 18+63=088    False
     4            1           101 48+11=059 48+11=050    False
     4            1           102 72+68=140 72+68=138    False
     4            1           103 37+39=076 37+39=078    False
     4            1           104 64+95=159 64+95=150    False
     4            1           105 49+75=124 49+75=128    False
     4            1           106 45+66=111 45+66=110    False
     4            1           107 34+87=121 34+87=110    False
     4            1           108 02+84=086 02+84=088    False
     4            1           109 95+00=095 95+00=098    False
     4            1           110 09+56=065 09+56=068    False
     4            1           111 22+66=088 22+66=088     True
     4            1           112 43+18=061 43+18=068    False
     4            1           113 61+35=096 61+35=098    False
     4            1           114 13+73=086 13+73=080    False
     4            1           115 25+95=120 25+95=110    False
     4            1           116 73+96=169 73+96=164    False
     4            1           117 03+96=099 03+96=098    False
     4            1           118 97+82=179 97+82=178    False
     4            1           119 18+42=060 18+42=058    False
     4            1           120 29+98=127 29+98=128    False
     4            1           121 61+00=061 61+00=060    False
     4            1           122 22+98=120 22+98=113    False
     4            1           123 12+50=062 12+50=064    False
     4            1           124 02+58=060 02+58=068    False
     4            1           125 75+86=161 75+86=154    False
     4            1           126 31+57=088 31+57=088     True
     4            1           127 49+82=131 49+82=128    False
     4            1           128 15+33=048 15+33=040    False
     4            1           129 49+57=106 49+57=108    False
     4            1           130 61+70=131 61+70=120    False
     4            1           131 91+51=142 91+51=130    False
     4            1           132 50+05=055 50+05=060    False
     4            1           133 44+16=060 44+16=050    False
     4            1           134 92+01=093 92+01=098    False
     4            1           135 85+82=167 85+82=164    False
     4            1           136 07+41=048 07+41=050    False
     4            1           137 40+06=046 40+06=054    False
     4            1           138 79+62=141 79+62=138    False
     4            1           139 95+62=157 95+62=150    False
     4            1           140 42+93=135 42+93=130    False
     4            1           141 32+73=105 32+73=108    False
     4            1           142 47+09=056 47+09=058    False
     4            1           143 59+50=109 59+50=108    False
     4            1           144 61+77=138 61+77=130    False
     4            1           145 64+06=070 64+06=078    False
     4            1           146 35+10=045 35+10=040    False
     4            1           147 32+88=120 32+88=113    False
     4            1           148 03+95=098 03+95=098     True
     4            1           149 11+38=049 11+38=040    False
     4            1           150 21+67=088 21+67=088     True
     4            1           151 33+25=058 33+25=050    False
     4            1           152 63+45=108 63+45=110    False
     4            1           153 56+12=068 56+12=064    False
     4            1           154 19+79=098 19+79=098     True
     4            1           155 60+43=103 60+43=108    False
     4            1           156 07+61=068 07+61=078    False
     4            1           157 58+03=061 58+03=068    False
     4            1           158 11+10=021 11+10=020    False
     4            1           159 49+89=138 49+89=138     True
     4            1           160 37+58=095 37+58=088    False
     4            1           161 59+78=137 59+78=138    False
     4            1           162 11+21=032 11+21=030    False
     4            1           163 37+43=080 37+43=078    False
     4            1           164 44+21=065 44+21=060    False
     4            1           165 22+97=119 22+97=113    False
     4            1           166 65+35=100 65+35=108    False
     4            1           167 06+51=057 06+51=064    False
     4            1           168 65+25=090 65+25=088    False
     4            1           169 74+94=168 74+94=160    False
     4            1           170 87+55=142 87+55=148    False
     4            1           171 90+67=157 90+67=164    False
     4            1           172 11+02=013 11+02=020    False
     4            1           173 01+66=067 01+66=074    False
     4            1           174 56+00=056 56+00=064    False
     4            1           175 58+52=110 58+52=110     True
     4            1           176 24+99=123 24+99=118    False
     4            1           177 97+13=110 97+13=113    False
     4            1           178 42+94=136 42+94=130    False
     4            1           179 60+15=075 60+15=070    False
     4            1           180 20+46=066 20+46=064    False
     4            1           181 40+70=110 40+70=110     True
     4            1           182 95+45=140 95+45=140     True
     4            1           183 96+95=191 96+95=184    False
     4            1           184 98+20=118 98+20=114    False
     4            1           185 43+19=062 43+19=060    False
     4            1           186 50+69=119 50+69=110    False
     4            1           187 27+53=080 27+53=078    False
     4            1           188 24+25=049 24+25=040    False
     4            1           189 65+92=157 65+92=150    False
     4            1           190 28+14=042 28+14=040    False
     4            1           191 20+57=077 20+57=078    False
     4            1           192 59+97=156 59+97=158    False
     4            1           193 98+32=130 98+32=128    False
     4            1           194 55+84=139 55+84=130    False
     4            1           195 20+39=059 20+39=068    False
     4            1           196 86+47=133 86+47=138    False
     4            1           197 92+36=128 92+36=128     True
     4            1           198 05+38=043 05+38=040    False
     4            1           199 77+36=113 77+36=114    False
     4            1           200 41+64=105 41+64=108    False
     4            1           201 74+51=125 74+51=120    False
     4            1           202 74+55=129 74+55=120    False
     4            1           203 64+64=128 64+64=120    False
     4            1           204 60+19=079 60+19=088    False
     4            1           205 77+96=173 77+96=174    False
     4            1           206 22+30=052 22+30=050    False
     4            1           207 82+49=131 82+49=128    False
     4            1           208 39+67=106 39+67=108    False
     4            1           209 62+40=102 62+40=108    False
     4            1           210 28+71=099 28+71=098    False
     4            1           211 47+26=073 47+26=065    False
     4            1           212 98+54=152 98+54=148    False
     4            1           213 38+70=108 38+70=108     True
     4            1           214 63+40=103 63+40=108    False
     4            1           215 86+62=148 86+62=144    False
     4            1           216 22+65=087 22+65=088    False
     4            1           217 41+17=058 41+17=050    False
     4            1           218 68+88=156 68+88=155    False
     4            1           219 96+70=166 96+70=164    False
     4            1           220 99+29=128 99+29=128     True
     4            1           221 83+39=122 83+39=110    False
     4            1           222 26+55=081 26+55=078    False
     4            1           223 53+70=123 53+70=120    False
     4            1           224 94+12=106 94+12=108    False
     4            1           225 00+37=037 00+37=040    False
     4            1           226 36+94=130 36+94=128    False
     4            1           227 40+58=098 40+58=108    False
     4            1           228 19+80=099 19+80=098    False
     4            1           229 49+44=093 49+44=108    False
     4            1           230 70+27=097 70+27=098    False
     4            1           231 52+80=132 52+80=130    False
     4            1           232 77+90=167 77+90=164    False
     4            1           233 13+92=105 13+92=108    False
     4            1           234 59+09=068 59+09=078    False
     4            1           235 33+55=088 33+55=088     True
     4            1           236 85+16=101 85+16=108    False
     4            1           237 25+65=090 25+65=088    False
     4            1           238 46+20=066 46+20=064    False
     4            1           239 29+52=081 29+52=078    False
     4            1           240 32+36=068 32+36=064    False
     4            1           241 47+08=055 47+08=058    False
     4            1           242 21+84=105 21+84=108    False
     4            1           243 24+45=069 24+45=060    False
     4            1           244 29+15=044 29+15=040    False
     4            1           245 83+03=086 83+03=088    False
     4            1           246 83+36=119 83+36=113    False
     4            1           247 58+95=153 58+95=148    False
     4            1           248 76+79=155 76+79=144    False
     4            1           249 63+30=093 63+30=090    False
     4            1           250 38+24=062 38+24=068    False
     4            1           251 19+46=065 19+46=068    False
     4            1           252 99+66=165 99+66=164    False
     4            1           253 95+73=168 95+73=164    False
     4            1           254 65+27=092 65+27=088    False
     4            1           255 91+83=174 91+83=170    False
     4            2             0 65+49=114 65+49=110    False
     4            2             1 03+08=011 03+08=010    False
     4            2             2 67+81=148 67+81=140    False
     4            2             3 47+23=070 47+23=068    False
     4            2             4 43+91=134 43+91=130    False
     4            2             5 41+67=108 41+67=110    False
     4            2             6 02+33=035 02+33=040    False
     4            2             7 64+84=148 64+84=140    False
     4            2             8 81+64=145 81+64=140    False
     4            2             9 80+11=091 80+11=098    False
     4            2            10 78+01=079 78+01=078    False
     4            2            11 89+18=107 89+18=108    False
     4            2            12 45+52=097 45+52=088    False
     4            2            13 35+30=065 35+30=060    False
     4            2            14 53+32=085 53+32=088    False
     4            2            15 49+90=139 49+90=148    False
     4            2            16 41+37=078 41+37=078     True
     4            2            17 35+14=049 35+14=040    False
     4            2            18 92+50=142 92+50=140    False
     4            2            19 37+60=097 37+60=098    False
     4            2            20 91+61=152 91+61=140    False
     4            2            21 80+77=157 80+77=154    False
     4            2            22 66+24=090 66+24=088    False
     4            2            23 81+07=088 81+07=088     True
     4            2            24 85+59=144 85+59=148    False
     4            2            25 19+69=088 19+69=088     True
     4            2            26 91+44=135 91+44=130    False
     4            2            27 25+29=054 25+29=058    False
     4            2            28 27+08=035 27+08=038    False
     4            2            29 66+14=080 66+14=088    False
     4            2            30 95+11=106 95+11=108    False
     4            2            31 13+97=110 13+97=108    False
     4            2            32 94+40=134 94+40=130    False
     4            2            33 74+31=105 74+31=108    False
     4            2            34 49+00=049 49+00=058    False
     4            2            35 59+18=077 59+18=078    False
     4            2            36 07+65=072 07+65=078    False
     4            2            37 83+55=138 83+55=130    False
     4            2            38 49+80=129 49+80=128    False
     4            2            39 64+17=081 64+17=088    False
     4            2            40 48+83=131 48+83=128    False
     4            2            41 95+44=139 95+44=140    False
     4            2            42 71+26=097 71+26=098    False
     4            2            43 06+74=080 06+74=078    False
     4            2            44 34+24=058 34+24=050    False
     4            2            45 59+71=130 59+71=128    False
     4            2            46 68+32=100 68+32=108    False
     4            2            47 38+81=119 38+81=113    False
     4            2            48 29+56=085 29+56=088    False
     4            2            49 54+55=109 54+55=110    False
     4            2            50 31+27=058 31+27=050    False
     4            2            51 97+89=186 97+89=188    False
     4            2            52 48+09=057 48+09=068    False
     4            2            53 86+76=162 86+76=154    False
     4            2            54 82+59=141 82+59=138    False
     4            2            55 01+67=068 01+67=078    False
     4            2            56 26+06=032 26+06=034    False
     4            2            57 22+46=068 22+46=064    False
     4            2            58 85+16=101 85+16=108    False
     4            2            59 29+08=037 29+08=038    False
     4            2            60 73+94=167 73+94=160    False
     4            2            61 19+62=081 19+62=088    False
     4            2            62 86+62=148 86+62=144    False
     4            2            63 38+99=137 38+99=128    False
     4            2            64 64+25=089 64+25=088    False
     4            2            65 61+72=133 61+72=120    False
     4            2            66 78+88=166 78+88=165    False
     4            2            67 43+66=109 43+66=110    False
     4            2            68 69+35=104 69+35=108    False
     4            2            69 33+77=110 33+77=110     True
     4            2            70 37+37=074 37+37=078    False
     4            2            71 87+54=141 87+54=138    False
     4            2            72 68+90=158 68+90=164    False
     4            2            73 83+44=127 83+44=120    False
     4            2            74 41+09=050 41+09=050     True
     4            2            75 13+48=061 13+48=068    False
     4            2            76 01+41=042 01+41=040    False
     4            2            77 19+74=093 19+74=098    False
     4            2            78 15+05=020 15+05=020     True
     4            2            79 55+46=101 55+46=109    False
     4            2            80 68+33=101 68+33=108    False
     4            2            81 44+40=084 44+40=088    False
     4            2            82 88+03=091 88+03=088    False
     4            2            83 81+79=160 81+79=158    False
     4            2            84 18+98=116 18+98=108    False
     4            2            85 70+64=134 70+64=130    False
     4            2            86 26+44=070 26+44=068    False
     4            2            87 98+87=185 98+87=185     True
     4            2            88 18+74=092 18+74=098    False
     4            2            89 50+68=118 50+68=110    False
     4            2            90 13+51=064 13+51=060    False
     4            2            91 90+89=179 90+89=178    False
     4            2            92 47+78=125 47+78=128    False
     4            2            93 81+57=138 81+57=138     True
     4            2            94 34+47=081 34+47=078    False
     4            2            95 94+23=117 94+23=110    False
     4            2            96 07+70=077 07+70=078    False
     4            2            97 56+33=089 56+33=088    False
     4            2            98 33+04=037 33+04=040    False
     4            2            99 26+09=035 26+09=038    False
     4            2           100 14+92=106 14+92=108    False
     4            2           101 78+54=132 78+54=138    False
     4            2           102 36+76=112 36+76=113    False
     4            2           103 17+47=064 17+47=064     True
     4            2           104 28+18=046 28+18=048    False
     4            2           105 78+54=132 78+54=138    False
     4            2           106 84+72=156 84+72=140    False
     4            2           107 00+44=044 00+44=050    False
     4            2           108 50+41=091 50+41=080    False
     4            2           109 87+88=175 87+88=175     True
     4            2           110 11+66=077 11+66=074    False
     4            2           111 80+60=140 80+60=140     True
     4            2           112 78+76=154 78+76=144    False
     4            2           113 24+74=098 24+74=098     True
     4            2           114 88+48=136 88+48=138    False
     4            2           115 38+31=069 38+31=064    False
     4            2           116 29+27=056 29+27=058    False
     4            2           117 08+45=053 08+45=050    False
     4            2           118 28+13=041 28+13=040    False
     4            2           119 53+99=152 53+99=148    False
     4            2           120 47+92=139 47+92=138    False
     4            2           121 76+21=097 76+21=098    False
     4            2           122 53+96=149 53+96=140    False
     4            2           123 93+91=184 93+91=170    False
     4            2           124 97+33=130 97+33=128    False
     4            2           125 67+78=145 67+78=144    False
     4            2           126 58+05=063 58+05=068    False
     4            2           127 00+16=016 00+16=020    False
     4            2           128 80+19=099 80+19=108    False
     4            2           129 98+22=120 98+22=128    False
     4            2           130 09+62=071 09+62=078    False
     4            2           131 06+23=029 06+23=030    False
     4            2           132 32+99=131 32+99=128    False
     4            2           133 17+02=019 17+02=020    False
     4            2           134 64+35=099 64+35=108    False
     4            2           135 35+83=118 35+83=110    False
     4            2           136 71+36=107 71+36=108    False
     4            2           137 75+06=081 75+06=078    False
     4            2           138 88+95=183 88+95=178    False
     4            2           139 19+98=117 19+98=108    False
     4            2           140 28+89=117 28+89=108    False
     4            2           141 33+11=044 33+11=040    False
     4            2           142 34+49=083 34+49=088    False
     4            2           143 90+35=125 90+35=128    False
     4            2           144 22+90=112 22+90=110    False
     4            2           145 98+89=187 98+89=186    False
     4            2           146 88+47=135 88+47=138    False
     4            2           147 30+86=116 30+86=113    False
     4            2           148 31+48=079 31+48=078    False
     4            2           149 39+21=060 39+21=068    False
     4            2           150 19+17=036 19+17=038    False
     4            2           151 27+60=087 27+60=088    False
     4            2           152 12+16=028 12+16=020    False
     4            2           153 51+75=126 51+75=120    False
     4            2           154 10+74=084 10+74=088    False
     4            2           155 42+63=105 42+63=108    False
     4            2           156 40+14=054 40+14=050    False
     4            2           157 23+93=116 23+93=110    False
     4            2           158 85+26=111 85+26=113    False
     4            2           159 28+46=074 28+46=078    False
     4            2           160 28+33=061 28+33=068    False
     4            2           161 43+30=073 43+30=070    False
     4            2           162 89+72=161 89+72=168    False
     4            2           163 52+21=073 52+21=075    False
     4            2           164 21+54=075 21+54=070    False
     4            2           165 69+13=082 69+13=088    False
     4            2           166 07+60=067 07+60=078    False
     4            2           167 63+83=146 63+83=140    False
     4            2           168 80+69=149 80+69=148    False
     4            2           169 27+28=055 27+28=058    False
     4            2           170 42+31=073 42+31=075    False
     4            2           171 51+99=150 51+99=148    False
     4            2           172 28+75=103 28+75=108    False
     4            2           173 38+57=095 38+57=088    False
     4            2           174 83+16=099 83+16=108    False
     4            2           175 92+94=186 92+94=188    False
     4            2           176 55+75=130 55+75=120    False
     4            2           177 59+51=110 59+51=110     True
     4            2           178 33+09=042 33+09=040    False
     4            2           179 53+13=066 53+13=064    False
     4            2           180 05+70=075 05+70=070    False
     4            2           181 12+20=032 12+20=030    False
     4            2           182 11+49=060 11+49=060     True
     4            2           183 63+45=108 63+45=110    False
     4            2           184 92+23=115 92+23=110    False
     4            2           185 82+45=127 82+45=120    False
     4            2           186 23+41=064 23+41=064     True
     4            2           187 64+26=090 64+26=088    False
     4            2           188 91+24=115 91+24=110    False
     4            2           189 20+32=052 20+32=050    False
     4            2           190 83+21=104 83+21=108    False
     4            2           191 07+20=027 07+20=030    False
     4            2           192 94+14=108 94+14=110    False
     4            2           193 96+89=185 96+89=185     True
     4            2           194 13+08=021 13+08=020    False
     4            2           195 32+05=037 32+05=040    False
     4            2           196 09+51=060 09+51=060     True
     4            2           197 26+29=055 26+29=058    False
     4            2           198 49+65=114 49+65=110    False
     4            2           199 32+66=098 32+66=108    False
     4            2           200 41+08=049 41+08=050    False
     4            2           201 26+79=105 26+79=108    False
     4            2           202 29+91=120 29+91=110    False
     4            2           203 51+00=051 51+00=060    False
     4            2           204 61+60=121 61+60=110    False
     4            2           205 45+78=123 45+78=110    False
     4            2           206 56+16=072 56+16=074    False
     4            2           207 66+68=134 66+68=134     True
     4            2           208 32+16=048 32+16=040    False
     4            2           209 84+49=133 84+49=138    False
     4            2           210 45+09=054 45+09=050    False
     4            2           211 96+78=174 96+78=175    False
     4            2           212 10+02=012 10+02=020    False
     4            2           213 36+60=096 36+60=098    False
     4            2           214 44+36=080 44+36=078    False
     4            2           215 12+86=098 12+86=098     True
     4            2           216 94+54=148 94+54=140    False
     4            2           217 64+73=137 64+73=130    False
     4            2           218 73+10=083 73+10=080    False
     4            2           219 14+62=076 14+62=078    False
     4            2           220 25+22=047 25+22=040    False
     4            2           221 94+22=116 94+22=110    False
     4            2           222 41+76=117 41+76=110    False
     4            2           223 38+46=084 38+46=088    False
     4            2           224 71+72=143 71+72=130    False
     4            2           225 74+79=153 74+79=140    False
     4            2           226 99+67=166 99+67=165    False
     4            2           227 78+71=149 78+71=148    False
     4            2           228 23+19=042 23+19=040    False
     4            2           229 51+65=116 51+65=110    False
     4            2           230 94+86=180 94+86=178    False
     4            2           231 09+79=088 09+79=088     True
     4            2           232 69+39=108 69+39=108     True
     4            2           233 84+13=097 84+13=098    False
     4            2           234 36+59=095 36+59=098    False
     4            2           235 87+47=134 87+47=138    False
     4            2           236 50+00=050 50+00=060    False
     4            2           237 76+96=172 76+96=174    False
     4            2           238 12+18=030 12+18=030     True
     4            2           239 99+95=194 99+95=188    False
     4            2           240 22+00=022 22+00=020    False
     4            2           241 96+18=114 96+18=114     True
     4            2           242 51+20=071 51+20=070    False
     4            2           243 66+81=147 66+81=144    False
     4            2           244 78+18=096 78+18=098    False
     4            2           245 09+78=087 09+78=088    False
     4            2           246 24+20=044 24+20=040    False
     4            2           247 76+13=089 76+13=088    False
     4            2           248 05+10=015 05+10=020    False
     4            2           249 97+14=111 97+14=113    False
     4            2           250 92+38=130 92+38=128    False
     4            2           251 77+13=090 77+13=098    False
     4            2           252 70+19=089 70+19=088    False
     4            2           253 90+45=135 90+45=130    False
     4            2           254 50+09=059 50+09=068    False
     4            2           255 78+06=084 78+06=076    False
     4            3             0 03+25=028 03+25=030    False
     4            3             1 48+43=091 48+43=088    False
     4            3             2 39+47=086 39+47=088    False
     4            3             3 48+19=067 48+19=068    False
     4            3             4 07+22=029 07+22=030    False
     4            3             5 73+68=141 73+68=130    False
     4            3             6 14+56=070 14+56=064    False
     4            3             7 96+95=191 96+95=184    False
     4            3             8 96+28=124 96+28=128    False
     4            3             9 82+05=087 82+05=088    False
     4            3            10 27+94=121 27+94=113    False
     4            3            11 87+86=173 87+86=164    False
     4            3            12 00+68=068 00+68=078    False
     4            3            13 11+37=048 11+37=040    False
     4            3            14 95+93=188 95+93=188     True
     4            3            15 75+82=157 75+82=140    False
     4            3            16 41+71=112 41+71=110    False
     4            3            17 60+14=074 60+14=070    False
     4            3            18 77+77=154 77+77=144    False
     4            3            19 31+84=115 31+84=110    False
     4            3            20 31+57=088 31+57=088     True
     4            3            21 27+87=114 27+87=108    False
     4            3            22 31+89=120 31+89=110    False
     4            3            23 22+18=040 22+18=040     True
     4            3            24 38+25=063 38+25=068    False
     4            3            25 64+54=118 64+54=110    False
     4            3            26 85+60=145 85+60=140    False
     4            3            27 14+71=085 14+71=080    False
     4            3            28 06+16=022 06+16=024    False
     4            3            29 78+61=139 78+61=130    False
     4            3            30 65+75=140 65+75=130    False
     4            3            31 13+83=096 13+83=098    False
     4            3            32 75+49=124 75+49=128    False
     4            3            33 05+78=083 05+78=088    False
     4            3            34 66+55=121 66+55=110    False
     4            3            35 03+05=008 03+05=010    False
     4            3            36 69+99=168 69+99=178    False
     4            3            37 52+82=134 52+82=130    False
     4            3            38 45+97=142 45+97=148    False
     4            3            39 66+17=083 66+17=085    False
     4            3            40 36+17=053 36+17=054    False
     4            3            41 92+74=166 92+74=164    False
     4            3            42 48+44=092 48+44=088    False
     4            3            43 34+17=051 34+17=040    False
     4            3            44 56+11=067 56+11=064    False
     4            3            45 77+23=100 77+23=108    False
     4            3            46 10+11=021 10+11=020    False
     4            3            47 32+65=097 32+65=098    False
     4            3            48 53+49=102 53+49=108    False
     4            3            49 68+86=154 68+86=154     True
     4            3            50 52+94=146 52+94=140    False
     4            3            51 97+71=168 97+71=164    False
     4            3            52 05+37=042 05+37=040    False
     4            3            53 58+75=133 58+75=138    False
     4            3            54 06+24=030 06+24=030     True
     4            3            55 15+44=059 15+44=050    False
     4            3            56 90+49=139 90+49=148    False
     4            3            57 50+37=087 50+37=088    False
     4            3            58 88+61=149 88+61=148    False
     4            3            59 21+57=078 21+57=078     True
     4            3            60 24+85=109 24+85=108    False
     4            3            61 01+66=067 01+66=074    False
     4            3            62 50+46=096 50+46=088    False
     4            3            63 76+65=141 76+65=130    False
     4            3            64 23+74=097 23+74=098    False
     4            3            65 76+16=092 76+16=085    False
     4            3            66 06+08=014 06+08=014     True
     4            3            67 69+25=094 69+25=088    False
     4            3            68 15+23=038 15+23=040    False
     4            3            69 41+02=043 41+02=040    False
     4            3            70 16+66=082 16+66=084    False
     4            3            71 59+94=153 59+94=148    False
     4            3            72 32+88=120 32+88=113    False
     4            3            73 46+21=067 46+21=064    False
     4            3            74 57+28=085 57+28=088    False
     4            3            75 00+31=031 00+31=040    False
     4            3            76 77+07=084 77+07=078    False
     4            3            77 28+70=098 28+70=098     True
     4            3            78 05+61=066 05+61=070    False
     4            3            79 22+09=031 22+09=030    False
     4            3            80 08+94=102 08+94=108    False
     4            3            81 40+11=051 40+11=050    False
     4            3            82 10+48=058 10+48=058     True
     4            3            83 27+56=083 27+56=078    False
     4            3            84 42+16=058 42+16=054    False
     4            3            85 69+43=112 69+43=110    False
     4            3            86 57+69=126 57+69=128    False
     4            3            87 18+86=104 18+86=108    False
     4            3            88 86+80=166 86+80=164    False
     4            3            89 30+85=115 30+85=110    False
     4            3            90 77+66=143 77+66=144    False
     4            3            91 39+64=103 39+64=108    False
     4            3            92 76+61=137 76+61=130    False
     4            3            93 42+61=103 42+61=108    False
     4            3            94 07+30=037 07+30=040    False
     4            3            95 35+93=128 35+93=128     True
     4            3            96 40+90=130 40+90=130     True
     4            3            97 08+91=099 08+91=098    False
     4            3            98 62+34=096 62+34=098    False
     4            3            99 86+49=135 86+49=138    False
     4            3           100 73+23=096 73+23=098    False
     4            3           101 87+35=122 87+35=113    False
     4            3           102 35+31=066 35+31=060    False
     4            3           103 07+13=020 07+13=020     True
     4            3           104 39+41=080 39+41=078    False
     4            3           105 44+63=107 44+63=110    False
     4            3           106 94+66=160 94+66=164    False
     4            3           107 49+54=103 49+54=108    False
     4            3           108 79+46=125 79+46=128    False
     4            3           109 53+12=065 53+12=064    False
     4            3           110 60+92=152 60+92=150    False
     4            3           111 25+60=085 25+60=088    False
     4            3           112 64+53=117 64+53=110    False
     4            3           113 41+02=043 41+02=040    False
     4            3           114 00+97=097 00+97=098    False
     4            3           115 12+52=064 12+52=064     True
     4            3           116 39+50=089 39+50=088    False
     4            3           117 87+21=108 87+21=108     True
     4            3           118 04+99=103 04+99=108    False
     4            3           119 19+75=094 19+75=098    False
     4            3           120 90+05=095 90+05=098    False
     4            3           121 54+39=093 54+39=088    False
     4            3           122 29+26=055 29+26=058    False
     4            3           123 82+95=177 82+95=178    False
     4            3           124 55+09=064 55+09=068    False
     4            3           125 02+62=064 02+62=064     True
     4            3           126 68+30=098 68+30=098     True
     4            3           127 99+16=115 99+16=114    False
     4            3           128 63+11=074 63+11=070    False
     4            3           129 42+92=134 42+92=130    False
     4            3           130 99+16=115 99+16=114    False
     4            3           131 50+31=081 50+31=070    False
     4            3           132 23+46=069 23+46=064    False
     4            3           133 45+73=118 45+73=110    False
     4            3           134 89+77=166 89+77=165    False
     4            3           135 45+78=123 45+78=110    False
     4            3           136 96+60=156 96+60=164    False
     4            3           137 74+61=135 74+61=130    False
     4            3           138 87+01=088 87+01=088     True
     4            3           139 63+88=151 63+88=148    False
     4            3           140 59+72=131 59+72=128    False
     4            3           141 17+96=113 17+96=108    False
     4            3           142 89+77=166 89+77=165    False
     4            3           143 24+69=093 24+69=098    False
     4            3           144 75+83=158 75+83=150    False
     4            3           145 50+54=104 50+54=108    False
     4            3           146 93+47=140 93+47=148    False
     4            3           147 20+55=075 20+55=078    False
     4            3           148 91+79=170 91+79=178    False
     4            3           149 15+13=028 15+13=020    False
     4            3           150 86+09=095 86+09=088    False
     4            3           151 29+58=087 29+58=088    False
     4            3           152 01+29=030 01+29=030     True
     4            3           153 65+48=113 65+48=110    False
     4            3           154 96+45=141 96+45=148    False
     4            3           155 58+69=127 58+69=128    False
     4            3           156 84+43=127 84+43=120    False
     4            3           157 90+38=128 90+38=128     True
     4            3           158 39+97=136 39+97=128    False
     4            3           159 74+84=158 74+84=140    False
     4            3           160 86+22=108 86+22=108     True
     4            3           161 01+86=087 01+86=088    False
     4            3           162 81+63=144 81+63=140    False
     4            3           163 80+94=174 80+94=178    False
     4            3           164 44+42=086 44+42=088    False
     4            3           165 72+60=132 72+60=130    False
     4            3           166 28+07=035 28+07=038    False
     4            3           167 69+54=123 69+54=120    False
     4            3           168 68+77=145 68+77=144    False
     4            3           169 90+16=106 90+16=113    False
     4            3           170 64+50=114 64+50=110    False
     4            3           171 46+88=134 46+88=138    False
     4            3           172 55+99=154 55+99=148    False
     4            3           173 31+97=128 31+97=128     True
     4            3           174 79+28=107 79+28=108    False
     4            3           175 81+43=124 81+43=120    False
     4            3           176 41+15=056 41+15=050    False
     4            3           177 38+77=115 38+77=114    False
     4            3           178 25+06=031 25+06=030    False
     4            3           179 01+93=094 01+93=090    False
     4            3           180 97+22=119 97+22=113    False
     4            3           181 71+84=155 71+84=140    False
     4            3           182 26+36=062 26+36=064    False
     4            3           183 60+92=152 60+92=150    False
     4            3           184 02+94=096 02+94=098    False
     4            3           185 31+58=089 31+58=088    False
     4            3           186 70+52=122 70+52=120    False
     4            3           187 19+42=061 19+42=068    False
     4            3           188 95+73=168 95+73=164    False
     4            3           189 21+25=046 21+25=040    False
     4            3           190 13+58=071 13+58=078    False
     4            3           191 62+28=090 62+28=088    False
     4            3           192 38+14=052 38+14=058    False
     4            3           193 66+75=141 66+75=130    False
     4            3           194 24+59=083 24+59=088    False
     4            3           195 97+66=163 97+66=164    False
     4            3           196 76+70=146 76+70=144    False
     4            3           197 08+40=048 08+40=058    False
     4            3           198 84+00=084 84+00=088    False
     4            3           199 54+73=127 54+73=120    False
     4            3           200 16+88=104 16+88=108    False
     4            3           201 99+47=146 99+47=148    False
     4            3           202 31+95=126 31+95=128    False
     4            3           203 01+79=080 01+79=088    False
     4            3           204 03+68=071 03+68=078    False
     4            3           205 10+05=015 10+05=020    False
     4            3           206 98+90=188 98+90=188     True
     4            3           207 58+53=111 58+53=110    False
     4            3           208 34+87=121 34+87=110    False
     4            3           209 07+31=038 07+31=040    False
     4            3           210 59+08=067 59+08=068    False
     4            3           211 51+38=089 51+38=088    False
     4            3           212 62+62=124 62+62=120    False
     4            3           213 80+32=112 80+32=110    False
     4            3           214 69+16=085 69+16=088    False
     4            3           215 01+17=018 01+17=020    False
     4            3           216 74+41=115 74+41=110    False
     4            3           217 20+89=109 20+89=108    False
     4            3           218 53+50=103 53+50=108    False
     4            3           219 82+85=167 82+85=164    False
     4            3           220 34+47=081 34+47=078    False
     4            3           221 34+45=079 34+45=078    False
     4            3           222 77+34=111 77+34=110    False
     4            3           223 56+33=089 56+33=088    False
     4            3           224 97+56=153 97+56=144    False
     4            3           225 29+06=035 29+06=038    False
     4            3           226 78+96=174 78+96=175    False
     4            3           227 28+65=093 28+65=088    False
     4            3           228 61+64=125 61+64=120    False
     4            3           229 32+64=096 32+64=098    False
     4            3           230 98+32=130 98+32=128    False
     4            3           231 25+35=060 25+35=050    False
     4            3           232 05+08=013 05+08=010    False
     4            3           233 05+26=031 05+26=030    False
     4            3           234 84+71=155 84+71=140    False
     4            3           235 33+10=043 33+10=040    False
     4            3           236 98+35=133 98+35=138    False
     4            3           237 68+98=166 68+98=165    False
     4            3           238 03+63=066 03+63=070    False
     4            3           239 12+96=108 12+96=108     True
     4            3           240 02+81=083 02+81=080    False
     4            3           241 83+13=096 83+13=098    False
     4            3           242 55+92=147 55+92=140    False
     4            3           243 96+09=105 96+09=108    False
     4            3           244 61+08=069 61+08=078    False
     4            3           245 39+75=114 39+75=110    False
     4            3           246 40+74=114 40+74=110    False
     4            3           247 39+80=119 39+80=113    False
     4            3           248 57+95=152 57+95=148    False
     4            3           249 92+97=189 92+97=188    False
     4            3           250 33+03=036 33+03=040    False
     4            3           251 74+92=166 74+92=164    False
     4            3           252 99+09=108 99+09=108     True
     4            3           253 98+10=108 98+10=113    False
     4            3           254 46+77=123 46+77=113    False
     4            3           255 85+78=163 85+78=158    False
     4            4             0 41+21=062 41+21=060    False
     4            4             1 49+13=062 49+13=068    False
     4            4             2 59+07=066 59+07=068    False
     4            4             3 31+11=042 31+11=040    False
     4            4             4 74+16=090 74+16=088    False
     4            4             5 43+38=081 43+38=078    False
     4            4             6 08+67=075 08+67=078    False
     4            4             7 31+66=097 31+66=098    False
     4            4             8 10+31=041 10+31=040    False
     4            4             9 34+59=093 34+59=088    False
     4            4            10 78+42=120 78+42=113    False
     4            4            11 13+41=054 13+41=050    False
     4            4            12 97+89=186 97+89=188    False
     4            4            13 15+62=077 15+62=078    False
     4            4            14 39+36=075 39+36=078    False
     4            4            15 21+25=046 21+25=040    False
     4            4            16 74+56=130 74+56=120    False
     4            4            17 85+47=132 85+47=138    False
     4            4            18 47+32=079 47+32=078    False
     4            4            19 37+66=103 37+66=108    False
     4            4            20 16+29=045 16+29=048    False
     4            4            21 86+77=163 86+77=164    False
     4            4            22 80+07=087 80+07=088    False
     4            4            23 87+05=092 87+05=088    False
     4            4            24 58+16=074 58+16=078    False
     4            4            25 52+79=131 52+79=128    False
     4            4            26 91+08=099 91+08=108    False
     4            4            27 47+78=125 47+78=128    False
     4            4            28 86+96=182 86+96=174    False
     4            4            29 90+22=112 90+22=110    False
     4            4            30 31+18=049 31+18=040    False
     4            4            31 86+15=101 86+15=108    False
     4            4            32 15+95=110 15+95=108    False
     4            4            33 42+11=053 42+11=050    False
     4            4            34 65+99=164 65+99=168    False
     4            4            35 89+29=118 89+29=118     True
     4            4            36 35+11=046 35+11=040    False
     4            4            37 71+41=112 71+41=110    False
     4            4            38 16+24=040 16+24=040     True
     4            4            39 77+82=159 77+82=154    False
     4            4            40 55+89=144 55+89=138    False
     4            4            41 17+88=105 17+88=108    False
     4            4            42 54+72=126 54+72=120    False
     4            4            43 34+98=132 34+98=128    False
     4            4            44 09+97=106 09+97=108    False
     4            4            45 91+07=098 91+07=098     True
     4            4            46 55+94=149 55+94=140    False
     4            4            47 22+58=080 22+58=078    False
     4            4            48 91+37=128 91+37=128     True
     4            4            49 16+10=026 16+10=020    False
     4            4            50 96+32=128 96+32=128     True
     4            4            51 35+75=110 35+75=110     True
     4            4            52 88+73=161 88+73=164    False
     4            4            53 35+18=053 35+18=058    False
     4            4            54 33+10=043 33+10=040    False
     4            4            55 08+50=058 08+50=064    False
     4            4            56 22+62=084 22+62=088    False
     4            4            57 26+37=063 26+37=064    False
     4            4            58 80+27=107 80+27=108    False
     4            4            59 68+28=096 68+28=088    False
     4            4            60 48+03=051 48+03=058    False
     4            4            61 40+18=058 40+18=058     True
     4            4            62 16+59=075 16+59=078    False
     4            4            63 02+19=021 02+19=020    False
     4            4            64 01+09=010 01+09=010     True
     4            4            65 62+68=130 62+68=120    False
     4            4            66 09+71=080 09+71=088    False
     4            4            67 00+58=058 00+58=068    False
     4            4            68 16+45=061 16+45=050    False
     4            4            69 24+98=122 24+98=113    False
     4            4            70 47+92=139 47+92=138    False
     4            4            71 94+84=178 94+84=178     True
     4            4            72 21+32=053 21+32=050    False
     4            4            73 29+82=111 29+82=108    False
     4            4            74 32+79=111 32+79=110    False
     4            4            75 13+98=111 13+98=108    False
     4            4            76 41+94=135 41+94=130    False
     4            4            77 51+84=135 51+84=130    False
     4            4            78 42+05=047 42+05=050    False
     4            4            79 39+03=042 39+03=040    False
     4            4            80 02+92=094 02+92=098    False
     4            4            81 99+81=180 99+81=178    False
     4            4            82 32+68=100 32+68=108    False
     4            4            83 52+17=069 52+17=064    False
     4            4            84 56+58=114 56+58=113    False
     4            4            85 21+48=069 21+48=068    False
     4            4            86 61+71=132 61+71=120    False
     4            4            87 17+01=018 17+01=020    False
     4            4            88 68+23=091 68+23=088    False
     4            4            89 00+37=037 00+37=040    False
     4            4            90 94+88=182 94+88=178    False
     4            4            91 06+31=037 06+31=040    False
     4            4            92 27+18=045 27+18=048    False
     4            4            93 41+81=122 41+81=110    False
     4            4            94 15+86=101 15+86=108    False
     4            4            95 36+87=123 36+87=114    False
     4            4            96 17+37=054 17+37=058    False
     4            4            97 13+86=099 13+86=108    False
     4            4            98 29+69=098 29+69=098     True
     4            4            99 31+99=130 31+99=128    False
     4            4           100 47+29=076 47+29=078    False
     4            4           101 08+81=089 08+81=088    False
     4            4           102 72+82=154 72+82=140    False
     4            4           103 46+91=137 46+91=130    False
     4            4           104 70+35=105 70+35=108    False
     4            4           105 90+55=145 90+55=140    False
     4            4           106 99+99=198 99+99=188    False
     4            4           107 60+97=157 60+97=154    False
     4            4           108 03+40=043 03+40=050    False
     4            4           109 35+49=084 35+49=088    False
     4            4           110 32+02=034 32+02=040    False
     4            4           111 70+18=088 70+18=088     True
     4            4           112 99+05=104 99+05=108    False
     4            4           113 78+73=151 78+73=148    False
     4            4           114 03+02=005 03+02=010    False
     4            4           115 50+14=064 50+14=060    False
     4            4           116 62+02=064 62+02=064     True
     4            4           117 16+74=090 16+74=098    False
     4            4           118 68+65=133 68+65=130    False
     4            4           119 74+81=155 74+81=140    False
     4            4           120 37+48=085 37+48=088    False
     4            4           121 63+04=067 63+04=060    False
     4            4           122 06+62=068 06+62=078    False
     4            4           123 95+75=170 95+75=164    False
     4            4           124 92+37=129 92+37=128    False
     4            4           125 81+32=113 81+32=110    False
     4            4           126 53+28=081 53+28=078    False
     4            4           127 52+42=094 52+42=088    False
     4            4           128 66+97=163 66+97=164    False
     4            4           129 00+48=048 00+48=058    False
     4            4           130 65+32=097 65+32=098    False
     4            4           131 60+89=149 60+89=148    False
     4            4           132 71+61=132 71+61=120    False
     4            4           133 98+50=148 98+50=148     True
     4            4           134 90+96=186 90+96=174    False
     4            4           135 02+96=098 02+96=098     True
     4            4           136 62+75=137 62+75=130    False
     4            4           137 41+28=069 41+28=068    False
     4            4           138 95+79=174 95+79=178    False
     4            4           139 48+41=089 48+41=088    False
     4            4           140 87+95=182 87+95=178    False
     4            4           141 75+38=113 75+38=113     True
     4            4           142 31+55=086 31+55=088    False
     4            4           143 54+63=117 54+63=110    False
     4            4           144 75+82=157 75+82=140    False
     4            4           145 46+45=091 46+45=088    False
     4            4           146 13+08=021 13+08=020    False
     4            4           147 77+97=174 77+97=175    False
     4            4           148 37+35=072 37+35=068    False
     4            4           149 21+89=110 21+89=108    False
     4            4           150 58+51=109 58+51=108    False
     4            4           151 91+48=139 91+48=148    False
     4            4           152 33+23=056 33+23=050    False
     4            4           153 80+96=176 80+96=174    False
     4            4           154 78+02=080 78+02=078    False
     4            4           155 38+95=133 38+95=128    False
     4            4           156 99+25=124 99+25=128    False
     4            4           157 30+76=106 30+76=108    False
     4            4           158 42+40=082 42+40=078    False
     4            4           159 85+58=143 85+58=148    False
     4            4           160 44+46=090 44+46=088    False
     4            4           161 06+41=047 06+41=050    False
     4            4           162 65+90=155 65+90=150    False
     4            4           163 43+83=126 43+83=120    False
     4            4           164 36+61=097 36+61=098    False
     4            4           165 61+51=112 61+51=110    False
     4            4           166 38+09=047 38+09=048    False
     4            4           167 21+97=118 21+97=110    False
     4            4           168 83+30=113 83+30=110    False
     4            4           169 11+79=090 11+79=098    False
     4            4           170 14+29=043 14+29=040    False
     4            4           171 21+11=032 21+11=030    False
     4            4           172 43+53=096 43+53=080    False
     4            4           173 02+58=060 02+58=068    False
     4            4           174 78+82=160 78+82=154    False
     4            4           175 91+11=102 91+11=108    False
     4            4           176 58+54=112 58+54=110    False
     4            4           177 00+15=015 00+15=020    False
     4            4           178 83+51=134 83+51=130    False
     4            4           179 44+72=116 44+72=110    False
     4            4           180 71+20=091 71+20=098    False
     4            4           181 24+99=123 24+99=118    False
     4            4           182 46+30=076 46+30=078    False
     4            4           183 08+67=075 08+67=078    False
     4            4           184 47+42=089 47+42=088    False
     4            4           185 95+67=162 95+67=164    False
     4            4           186 40+56=096 40+56=088    False
     4            4           187 17+95=112 17+95=110    False
     4            4           188 94+66=160 94+66=164    False
     4            4           189 14+58=072 14+58=078    False
     4            4           190 56+05=061 56+05=068    False
     4            4           191 70+01=071 70+01=070    False
     4            4           192 97+59=156 97+59=158    False
     4            4           193 94+67=161 94+67=164    False
     4            4           194 13+41=054 13+41=050    False
     4            4           195 85+15=100 85+15=108    False
     4            4           196 48+53=101 48+53=108    False
     4            4           197 62+75=137 62+75=130    False
     4            4           198 87+47=134 87+47=138    False
     4            4           199 31+88=119 31+88=110    False
     4            4           200 97+16=113 97+16=114    False
     4            4           201 48+45=093 48+45=088    False
     4            4           202 99+00=099 99+00=098    False
     4            4           203 15+01=016 15+01=020    False
     4            4           204 28+96=124 28+96=114    False
     4            4           205 20+11=031 20+11=030    False
     4            4           206 07+56=063 07+56=064    False
     4            4           207 06+08=014 06+08=014     True
     4            4           208 45+46=091 45+46=088    False
     4            4           209 48+85=133 48+85=138    False
     4            4           210 62+14=076 62+14=070    False
     4            4           211 82+31=113 82+31=110    False
     4            4           212 85+88=173 85+88=168    False
     4            4           213 77+08=085 77+08=088    False
     4            4           214 16+64=080 16+64=088    False
     4            4           215 00+27=027 00+27=030    False
     4            4           216 36+75=111 36+75=110    False
     4            4           217 38+38=076 38+38=078    False
     4            4           218 88+32=120 88+32=113    False
     4            4           219 09+88=097 09+88=098    False
     4            4           220 96+87=183 96+87=184    False
     4            4           221 71+29=100 71+29=108    False
     4            4           222 99+13=112 99+13=113    False
     4            4           223 03+13=016 03+13=020    False
     4            4           224 67+23=090 67+23=088    False
     4            4           225 15+98=113 15+98=108    False
     4            4           226 10+08=018 10+08=028    False
     4            4           227 46+24=070 46+24=068    False
     4            4           228 55+63=118 55+63=110    False
     4            4           229 28+06=034 28+06=038    False
     4            4           230 43+87=130 43+87=128    False
     4            4           231 34+05=039 34+05=040    False
2024-09-20 14:10:18,271 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:18,272 - root - INFO - ====================================================== Starting Train Epoch: 5/9 ======================================================
2024-09-20 14:10:18,273 - root - INFO - Learning rates for each parameter group: 0.00166297526309434830, 0.00166297526309434830
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 5, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 5, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=204.42258, average_batch_loss=0.79853, average_batch_perplexity=2.22226, lr=0.001662975, 0.001662975]
Epoch: 5, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=204.42258, average_batch_loss=0.79853, average_batch_perplexity=2.22226, lr=0.001662975, 0.001662975]
Epoch: 5, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=204.94623, average_batch_loss=0.80057, average_batch_perplexity=2.22681, lr=0.001655665, 0.001655665]
Epoch: 5, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 12.24it/s, total_batch_loss=204.94623, average_batch_loss=0.80057, average_batch_perplexity=2.22681, lr=0.001655665, 0.001655665]
Epoch: 5, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.24it/s, total_batch_loss=204.94623, average_batch_loss=0.80057, average_batch_perplexity=2.22681, lr=0.001655665, 0.001655665]
Epoch: 5, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.24it/s, total_batch_loss=207.90443, average_batch_loss=0.81213, average_batch_perplexity=2.25269, lr=0.001648451, 0.001648451]
Epoch: 5, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.24it/s, total_batch_loss=207.90443, average_batch_loss=0.81213, average_batch_perplexity=2.25269, lr=0.001648451, 0.001648451]
Epoch: 5, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.24it/s, total_batch_loss=203.25394, average_batch_loss=0.79396, average_batch_perplexity=2.21214, lr=0.001641330, 0.001641330]
Epoch: 5, Step: 4:  14%|█▍        | 4/28 [00:00<00:01, 12.37it/s, total_batch_loss=203.25394, average_batch_loss=0.79396, average_batch_perplexity=2.21214, lr=0.001641330, 0.001641330]
Epoch: 5, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.37it/s, total_batch_loss=203.25394, average_batch_loss=0.79396, average_batch_perplexity=2.21214, lr=0.001641330, 0.001641330]
Epoch: 5, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.37it/s, total_batch_loss=202.27339, average_batch_loss=0.79013, average_batch_perplexity=2.20368, lr=0.001634301, 0.001634301]
Epoch: 5, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.37it/s, total_batch_loss=202.27339, average_batch_loss=0.79013, average_batch_perplexity=2.20368, lr=0.001634301, 0.001634301]
Epoch: 5, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.37it/s, total_batch_loss=212.64465, average_batch_loss=0.83064, average_batch_perplexity=2.29479, lr=0.001627361, 0.001627361]
Epoch: 5, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.17it/s, total_batch_loss=212.64465, average_batch_loss=0.83064, average_batch_perplexity=2.29479, lr=0.001627361, 0.001627361]
Epoch: 5, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.17it/s, total_batch_loss=212.64465, average_batch_loss=0.83064, average_batch_perplexity=2.29479, lr=0.001627361, 0.001627361]
Epoch: 5, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.17it/s, total_batch_loss=201.84828, average_batch_loss=0.78847, average_batch_perplexity=2.20003, lr=0.001620509, 0.001620509]
Epoch: 5, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.17it/s, total_batch_loss=201.84828, average_batch_loss=0.78847, average_batch_perplexity=2.20003, lr=0.001620509, 0.001620509]
Epoch: 5, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.17it/s, total_batch_loss=198.03368, average_batch_loss=0.77357, average_batch_perplexity=2.16749, lr=0.001613743, 0.001613743]
Epoch: 5, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=198.03368, average_batch_loss=0.77357, average_batch_perplexity=2.16749, lr=0.001613743, 0.001613743]
Epoch: 5, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=198.03368, average_batch_loss=0.77357, average_batch_perplexity=2.16749, lr=0.001613743, 0.001613743]
Epoch: 5, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=200.52016, average_batch_loss=0.78328, average_batch_perplexity=2.18864, lr=0.001607061, 0.001607061]
Epoch: 5, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=200.52016, average_batch_loss=0.78328, average_batch_perplexity=2.18864, lr=0.001607061, 0.001607061]
Epoch: 5, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.98it/s, total_batch_loss=201.06064, average_batch_loss=0.78539, average_batch_perplexity=2.19327, lr=0.001600461, 0.001600461]
Epoch: 5, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=201.06064, average_batch_loss=0.78539, average_batch_perplexity=2.19327, lr=0.001600461, 0.001600461]
Epoch: 5, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=201.06064, average_batch_loss=0.78539, average_batch_perplexity=2.19327, lr=0.001600461, 0.001600461]
Epoch: 5, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=191.97321, average_batch_loss=0.74990, average_batch_perplexity=2.11678, lr=0.001593942, 0.001593942]
Epoch: 5, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=191.97321, average_batch_loss=0.74990, average_batch_perplexity=2.11678, lr=0.001593942, 0.001593942]
Epoch: 5, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=197.50186, average_batch_loss=0.77149, average_batch_perplexity=2.16299, lr=0.001587502, 0.001587502]
Epoch: 5, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.23it/s, total_batch_loss=197.50186, average_batch_loss=0.77149, average_batch_perplexity=2.16299, lr=0.001587502, 0.001587502]
Epoch: 5, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.23it/s, total_batch_loss=197.50186, average_batch_loss=0.77149, average_batch_perplexity=2.16299, lr=0.001587502, 0.001587502]
Epoch: 5, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.23it/s, total_batch_loss=199.21339, average_batch_loss=0.77818, average_batch_perplexity=2.17750, lr=0.001581139, 0.001581139]
Epoch: 5, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.23it/s, total_batch_loss=199.21339, average_batch_loss=0.77818, average_batch_perplexity=2.17750, lr=0.001581139, 0.001581139]
Epoch: 5, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.23it/s, total_batch_loss=195.82437, average_batch_loss=0.76494, average_batch_perplexity=2.14886, lr=0.001574852, 0.001574852]
Epoch: 5, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=195.82437, average_batch_loss=0.76494, average_batch_perplexity=2.14886, lr=0.001574852, 0.001574852]
Epoch: 5, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=195.82437, average_batch_loss=0.76494, average_batch_perplexity=2.14886, lr=0.001574852, 0.001574852]
Epoch: 5, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=184.58960, average_batch_loss=0.72105, average_batch_perplexity=2.05660, lr=0.001568640, 0.001568640]
Epoch: 5, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=184.58960, average_batch_loss=0.72105, average_batch_perplexity=2.05660, lr=0.001568640, 0.001568640]
Epoch: 5, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.37it/s, total_batch_loss=192.18750, average_batch_loss=0.75073, average_batch_perplexity=2.11855, lr=0.001562500, 0.001562500]
Epoch: 5, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.38it/s, total_batch_loss=192.18750, average_batch_loss=0.75073, average_batch_perplexity=2.11855, lr=0.001562500, 0.001562500]
Epoch: 5, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.38it/s, total_batch_loss=192.18750, average_batch_loss=0.75073, average_batch_perplexity=2.11855, lr=0.001562500, 0.001562500]
Epoch: 5, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.38it/s, total_batch_loss=184.87943, average_batch_loss=0.72219, average_batch_perplexity=2.05893, lr=0.001556432, 0.001556432]
Epoch: 5, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.38it/s, total_batch_loss=184.87943, average_batch_loss=0.72219, average_batch_perplexity=2.05893, lr=0.001556432, 0.001556432]
Epoch: 5, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.38it/s, total_batch_loss=181.38622, average_batch_loss=0.70854, average_batch_perplexity=2.03102, lr=0.001550434, 0.001550434]
Epoch: 5, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=181.38622, average_batch_loss=0.70854, average_batch_perplexity=2.03102, lr=0.001550434, 0.001550434]
Epoch: 5, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=181.38622, average_batch_loss=0.70854, average_batch_perplexity=2.03102, lr=0.001550434, 0.001550434]
Epoch: 5, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=178.76575, average_batch_loss=0.69830, average_batch_perplexity=2.01034, lr=0.001544505, 0.001544505]
Epoch: 5, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=178.76575, average_batch_loss=0.69830, average_batch_perplexity=2.01034, lr=0.001544505, 0.001544505]
Epoch: 5, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=175.23106, average_batch_loss=0.68450, average_batch_perplexity=1.98277, lr=0.001538644, 0.001538644]
Epoch: 5, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.30it/s, total_batch_loss=175.23106, average_batch_loss=0.68450, average_batch_perplexity=1.98277, lr=0.001538644, 0.001538644]
Epoch: 5, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.30it/s, total_batch_loss=175.23106, average_batch_loss=0.68450, average_batch_perplexity=1.98277, lr=0.001538644, 0.001538644]
Epoch: 5, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.30it/s, total_batch_loss=175.56744, average_batch_loss=0.68581, average_batch_perplexity=1.98538, lr=0.001532848, 0.001532848]
Epoch: 5, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.30it/s, total_batch_loss=175.56744, average_batch_loss=0.68581, average_batch_perplexity=1.98538, lr=0.001532848, 0.001532848]
Epoch: 5, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.30it/s, total_batch_loss=172.26785, average_batch_loss=0.67292, average_batch_perplexity=1.95995, lr=0.001527118, 0.001527118]
Epoch: 5, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.26it/s, total_batch_loss=172.26785, average_batch_loss=0.67292, average_batch_perplexity=1.95995, lr=0.001527118, 0.001527118]
Epoch: 5, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.26it/s, total_batch_loss=172.26785, average_batch_loss=0.67292, average_batch_perplexity=1.95995, lr=0.001527118, 0.001527118]
Epoch: 5, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.26it/s, total_batch_loss=172.82648, average_batch_loss=0.67510, average_batch_perplexity=1.96424, lr=0.001521452, 0.001521452]
Epoch: 5, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.26it/s, total_batch_loss=172.82648, average_batch_loss=0.67510, average_batch_perplexity=1.96424, lr=0.001521452, 0.001521452]
Epoch: 5, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.26it/s, total_batch_loss=185.04572, average_batch_loss=0.72283, average_batch_perplexity=2.06027, lr=0.001515848, 0.001515848]
Epoch: 5, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.37it/s, total_batch_loss=185.04572, average_batch_loss=0.72283, average_batch_perplexity=2.06027, lr=0.001515848, 0.001515848]
Epoch: 5, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.37it/s, total_batch_loss=185.04572, average_batch_loss=0.72283, average_batch_perplexity=2.06027, lr=0.001515848, 0.001515848]
Epoch: 5, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.37it/s, total_batch_loss=165.06934, average_batch_loss=0.64480, average_batch_perplexity=1.90561, lr=0.001510305, 0.001510305]
Epoch: 5, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.37it/s, total_batch_loss=165.06934, average_batch_loss=0.64480, average_batch_perplexity=1.90561, lr=0.001510305, 0.001510305]
Epoch: 5, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.37it/s, total_batch_loss=161.05930, average_batch_loss=0.62914, average_batch_perplexity=1.87599, lr=0.001504823, 0.001504823]
Epoch: 5, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.36it/s, total_batch_loss=161.05930, average_batch_loss=0.62914, average_batch_perplexity=1.87599, lr=0.001504823, 0.001504823]
Epoch: 5, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.36it/s, total_batch_loss=161.05930, average_batch_loss=0.62914, average_batch_perplexity=1.87599, lr=0.001504823, 0.001504823]
Epoch: 5, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.36it/s, total_batch_loss=158.47107, average_batch_loss=0.61903, average_batch_perplexity=1.85712, lr=0.001499400, 0.001499400]
Epoch: 5, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.36it/s, total_batch_loss=158.47107, average_batch_loss=0.61903, average_batch_perplexity=1.85712, lr=0.001499400, 0.001499400]
Epoch: 5, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.36it/s, total_batch_loss=58.26158, average_batch_loss=0.66206, average_batch_perplexity=1.93879, lr=0.001494036, 0.001494036] 
Epoch: 5, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.58it/s, total_batch_loss=58.26158, average_batch_loss=0.66206, average_batch_perplexity=1.93879, lr=0.001494036, 0.001494036]
                                                                                                                                                                                         
2024-09-20 14:10:20,505 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:20,505 - root - INFO - Total Batches:                   28
2024-09-20 14:10:20,506 - root - INFO - Average Epoch Train Loss:        0.73815
2024-09-20 14:10:20,507 - root - INFO - Average Epoch Train Perplexity:  2.09206
2024-09-20 14:10:20,507 - root - INFO - 
2024-09-20 14:10:20,508 - root - INFO - ====================================================== Starting Valid Epoch: 5/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 5, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 5, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=143.81348, average_batch_loss=0.56177, average_batch_perplexity=1.75378]
Epoch: 5, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=143.81348, average_batch_loss=0.56177, average_batch_perplexity=1.75378]
Epoch: 5, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=143.66689, average_batch_loss=0.56120, average_batch_perplexity=1.75277]
Epoch: 5, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=143.66689, average_batch_loss=0.56120, average_batch_perplexity=1.75277]
Epoch: 5, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=142.74083, average_batch_loss=0.55758, average_batch_perplexity=1.74644]
Epoch: 5, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=142.74083, average_batch_loss=0.55758, average_batch_perplexity=1.74644]
Epoch: 5, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=142.74083, average_batch_loss=0.55758, average_batch_perplexity=1.74644]
Epoch: 5, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=138.69589, average_batch_loss=0.54178, average_batch_perplexity=1.71907]
Epoch: 5, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=138.69589, average_batch_loss=0.54178, average_batch_perplexity=1.71907]
Epoch: 5, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=145.46661, average_batch_loss=0.56823, average_batch_perplexity=1.76514]
Epoch: 5, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=145.46661, average_batch_loss=0.56823, average_batch_perplexity=1.76514]
Epoch: 5, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.02it/s, total_batch_loss=143.80544, average_batch_loss=0.56174, average_batch_perplexity=1.75372]
Epoch: 5, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.24it/s, total_batch_loss=143.80544, average_batch_loss=0.56174, average_batch_perplexity=1.75372]
Epoch: 5, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.24it/s, total_batch_loss=143.80544, average_batch_loss=0.56174, average_batch_perplexity=1.75372]
Epoch: 5, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.24it/s, total_batch_loss=141.84766, average_batch_loss=0.55409, average_batch_perplexity=1.74036]
Epoch: 5, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.24it/s, total_batch_loss=141.84766, average_batch_loss=0.55409, average_batch_perplexity=1.74036]
Epoch: 5, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.24it/s, total_batch_loss=112.36506, average_batch_loss=0.54022, average_batch_perplexity=1.71638]
                                                                                                                                                          
2024-09-20 14:10:20,848 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:20,848 - root - INFO - Total Batches:                   8
2024-09-20 14:10:20,849 - root - INFO - Average Epoch Valid Loss:        0.55620
2024-09-20 14:10:20,849 - root - INFO - Average Epoch Valid Perplexity:  1.74403
2024-09-20 14:10:20,850 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.2852]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.2969]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.21it/s, accuracy: 0.2969]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.21it/s, accuracy: 0.2305]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.21it/s, accuracy: 0.2672]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.58it/s, accuracy: 0.2672]
                                                                                           
2024-09-20 14:10:21,236 - root - INFO - Correct/Total Samples:           270/1000
2024-09-20 14:10:21,237 - root - INFO - Eval Accuracy:                   0.27
2024-09-20 14:10:21,256 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     5            1             0 13+48=061 13+48=061     True
     5            1             1 16+55=071 16+55=071     True
     5            1             2 79+34=113 79+34=112    False
     5            1             3 35+44=079 35+44=088    False
     5            1             4 16+50=066 16+50=064    False
     5            1             5 28+47=075 28+47=075     True
     5            1             6 00+74=074 00+74=077    False
     5            1             7 15+20=035 15+20=037    False
     5            1             8 72+60=132 72+60=132     True
     5            1             9 63+68=131 63+68=131     True
     5            1            10 29+45=074 29+45=072    False
     5            1            11 34+60=094 34+60=093    False
     5            1            12 53+70=123 53+70=123     True
     5            1            13 70+50=120 70+50=120     True
     5            1            14 11+84=095 11+84=097    False
     5            1            15 42+71=113 42+71=117    False
     5            1            16 98+22=120 98+22=111    False
     5            1            17 02+02=004 02+02=007    False
     5            1            18 15+85=100 15+85=102    False
     5            1            19 21+78=099 21+78=109    False
     5            1            20 61+79=140 61+79=140     True
     5            1            21 25+99=124 25+99=123    False
     5            1            22 09+85=094 09+85=092    False
     5            1            23 60+91=151 60+91=151     True
     5            1            24 35+30=065 35+30=063    False
     5            1            25 24+51=075 24+51=077    False
     5            1            26 93+91=184 93+91=183    False
     5            1            27 39+96=135 39+96=134    False
     5            1            28 64+35=099 64+35=098    False
     5            1            29 36+22=058 36+22=059    False
     5            1            30 68+45=113 68+45=112    False
     5            1            31 16+84=100 16+84=100     True
     5            1            32 91+52=143 91+52=147    False
     5            1            33 97+36=133 97+36=134    False
     5            1            34 27+37=064 27+37=065    False
     5            1            35 99+82=181 99+82=181     True
     5            1            36 03+42=045 03+42=057    False
     5            1            37 18+38=056 18+38=057    False
     5            1            38 32+20=052 32+20=057    False
     5            1            39 38+13=051 38+13=051     True
     5            1            40 68+42=110 68+42=111    False
     5            1            41 64+00=064 64+00=077    False
     5            1            42 48+94=142 48+94=142     True
     5            1            43 58+36=094 58+36=094     True
     5            1            44 41+22=063 41+22=067    False
     5            1            45 23+58=081 23+58=081     True
     5            1            46 67+46=113 67+46=114    False
     5            1            47 40+78=118 40+78=117    False
     5            1            48 90+38=128 90+38=127    False
     5            1            49 89+52=141 89+52=141     True
     5            1            50 37+77=114 37+77=115    False
     5            1            51 29+76=105 29+76=105     True
     5            1            52 42+90=132 42+90=132     True
     5            1            53 45+82=127 45+82=137    False
     5            1            54 35+95=130 35+95=132    False
     5            1            55 92+98=190 92+98=191    False
     5            1            56 73+91=164 73+91=167    False
     5            1            57 53+97=150 53+97=159    False
     5            1            58 98+69=167 98+69=165    False
     5            1            59 20+46=066 20+46=065    False
     5            1            60 48+69=117 48+69=115    False
     5            1            61 62+31=093 62+31=097    False
     5            1            62 80+59=139 80+59=149    False
     5            1            63 58+12=070 58+12=071    False
     5            1            64 08+96=104 08+96=105    False
     5            1            65 67+06=073 67+06=074    False
     5            1            66 22+04=026 22+04=027    False
     5            1            67 61+87=148 61+87=157    False
     5            1            68 95+27=122 95+27=122     True
     5            1            69 49+83=132 49+83=132     True
     5            1            70 43+00=043 43+00=053    False
     5            1            71 01+85=086 01+85=097    False
     5            1            72 11+68=079 11+68=078    False
     5            1            73 80+03=083 80+03=087    False
     5            1            74 54+83=137 54+83=137     True
     5            1            75 73+47=120 73+47=129    False
     5            1            76 99+93=192 99+93=192     True
     5            1            77 99+13=112 99+13=112     True
     5            1            78 92+66=158 92+66=158     True
     5            1            79 90+31=121 90+31=120    False
     5            1            80 25+69=094 25+69=092    False
     5            1            81 25+44=069 25+44=078    False
     5            1            82 00+93=093 00+93=097    False
     5            1            83 88+87=175 88+87=175     True
     5            1            84 47+56=103 47+56=105    False
     5            1            85 43+59=102 43+59=102     True
     5            1            86 22+00=022 22+00=017    False
     5            1            87 34+04=038 34+04=037    False
     5            1            88 65+13=078 65+13=077    False
     5            1            89 39+82=121 39+82=121     True
     5            1            90 66+83=149 66+83=159    False
     5            1            91 51+69=120 51+69=120     True
     5            1            92 80+21=101 80+21=109    False
     5            1            93 36+79=115 36+79=114    False
     5            1            94 21+68=089 21+68=098    False
     5            1            95 11+66=077 11+66=076    False
     5            1            96 55+19=074 55+19=072    False
     5            1            97 51+61=112 51+61=110    False
     5            1            98 38+88=126 38+88=126     True
     5            1            99 37+27=064 37+27=065    False
     5            1           100 18+63=081 18+63=081     True
     5            1           101 48+11=059 48+11=069    False
     5            1           102 72+68=140 72+68=131    False
     5            1           103 37+39=076 37+39=076     True
     5            1           104 64+95=159 64+95=152    False
     5            1           105 49+75=124 49+75=122    False
     5            1           106 45+66=111 45+66=110    False
     5            1           107 34+87=121 34+87=121     True
     5            1           108 02+84=086 02+84=087    False
     5            1           109 95+00=095 95+00=093    False
     5            1           110 09+56=065 09+56=064    False
     5            1           111 22+66=088 22+66=098    False
     5            1           112 43+18=061 43+18=061     True
     5            1           113 61+35=096 61+35=097    False
     5            1           114 13+73=086 13+73=087    False
     5            1           115 25+95=120 25+95=122    False
     5            1           116 73+96=169 73+96=169     True
     5            1           117 03+96=099 03+96=108    False
     5            1           118 97+82=179 97+82=189    False
     5            1           119 18+42=060 18+42=061    False
     5            1           120 29+98=127 29+98=127     True
     5            1           121 61+00=061 61+00=060    False
     5            1           122 22+98=120 22+98=110    False
     5            1           123 12+50=062 12+50=063    False
     5            1           124 02+58=060 02+58=060     True
     5            1           125 75+86=161 75+86=150    False
     5            1           126 31+57=088 31+57=097    False
     5            1           127 49+82=131 49+82=131     True
     5            1           128 15+33=048 15+33=058    False
     5            1           129 49+57=106 49+57=115    False
     5            1           130 61+70=131 61+70=130    False
     5            1           131 91+51=142 91+51=141    False
     5            1           132 50+05=055 50+05=063    False
     5            1           133 44+16=060 44+16=060     True
     5            1           134 92+01=093 92+01=097    False
     5            1           135 85+82=167 85+82=167     True
     5            1           136 07+41=048 07+41=058    False
     5            1           137 40+06=046 40+06=055    False
     5            1           138 79+62=141 79+62=141     True
     5            1           139 95+62=157 95+62=157     True
     5            1           140 42+93=135 42+93=137    False
     5            1           141 32+73=105 32+73=107    False
     5            1           142 47+09=056 47+09=055    False
     5            1           143 59+50=109 59+50=110    False
     5            1           144 61+77=138 61+77=137    False
     5            1           145 64+06=070 64+06=070     True
     5            1           146 35+10=045 35+10=047    False
     5            1           147 32+88=120 32+88=120     True
     5            1           148 03+95=098 03+95=097    False
     5            1           149 11+38=049 11+38=059    False
     5            1           150 21+67=088 21+67=097    False
     5            1           151 33+25=058 33+25=058     True
     5            1           152 63+45=108 63+45=107    False
     5            1           153 56+12=068 56+12=079    False
     5            1           154 19+79=098 19+79=097    False
     5            1           155 60+43=103 60+43=107    False
     5            1           156 07+61=068 07+61=078    False
     5            1           157 58+03=061 58+03=061     True
     5            1           158 11+10=021 11+10=010    False
     5            1           159 49+89=138 49+89=138     True
     5            1           160 37+58=095 37+58=095     True
     5            1           161 59+78=137 59+78=137     True
     5            1           162 11+21=032 11+21=030    False
     5            1           163 37+43=080 37+43=081    False
     5            1           164 44+21=065 44+21=067    False
     5            1           165 22+97=119 22+97=118    False
     5            1           166 65+35=100 65+35=102    False
     5            1           167 06+51=057 06+51=065    False
     5            1           168 65+25=090 65+25=092    False
     5            1           169 74+94=168 74+94=167    False
     5            1           170 87+55=142 87+55=142     True
     5            1           171 90+67=157 90+67=155    False
     5            1           172 11+02=013 11+02=007    False
     5            1           173 01+66=067 01+66=075    False
     5            1           174 56+00=056 56+00=065    False
     5            1           175 58+52=110 58+52=111    False
     5            1           176 24+99=123 24+99=123     True
     5            1           177 97+13=110 97+13=110     True
     5            1           178 42+94=136 42+94=137    False
     5            1           179 60+15=075 60+15=077    False
     5            1           180 20+46=066 20+46=065    False
     5            1           181 40+70=110 40+70=110     True
     5            1           182 95+45=140 95+45=132    False
     5            1           183 96+95=191 96+95=191     True
     5            1           184 98+20=118 98+20=115    False
     5            1           185 43+19=062 43+19=061    False
     5            1           186 50+69=119 50+69=129    False
     5            1           187 27+53=080 27+53=081    False
     5            1           188 24+25=049 24+25=058    False
     5            1           189 65+92=157 65+92=157     True
     5            1           190 28+14=042 28+14=032    False
     5            1           191 20+57=077 20+57=086    False
     5            1           192 59+97=156 59+97=155    False
     5            1           193 98+32=130 98+32=121    False
     5            1           194 55+84=139 55+84=147    False
     5            1           195 20+39=059 20+39=050    False
     5            1           196 86+47=133 86+47=135    False
     5            1           197 92+36=128 92+36=128     True
     5            1           198 05+38=043 05+38=042    False
     5            1           199 77+36=113 77+36=114    False
     5            1           200 41+64=105 41+64=107    False
     5            1           201 74+51=125 74+51=127    False
     5            1           202 74+55=129 74+55=127    False
     5            1           203 64+64=128 64+64=127    False
     5            1           204 60+19=079 60+19=078    False
     5            1           205 77+96=173 77+96=164    False
     5            1           206 22+30=052 22+30=053    False
     5            1           207 82+49=131 82+49=131     True
     5            1           208 39+67=106 39+67=106     True
     5            1           209 62+40=102 62+40=102     True
     5            1           210 28+71=099 28+71=099     True
     5            1           211 47+26=073 47+26=075    False
     5            1           212 98+54=152 98+54=152     True
     5            1           213 38+70=108 38+70=108     True
     5            1           214 63+40=103 63+40=103     True
     5            1           215 86+62=148 86+62=159    False
     5            1           216 22+65=087 22+65=087     True
     5            1           217 41+17=058 41+17=067    False
     5            1           218 68+88=156 68+88=155    False
     5            1           219 96+70=166 96+70=164    False
     5            1           220 99+29=128 99+29=127    False
     5            1           221 83+39=122 83+39=122     True
     5            1           222 26+55=081 26+55=081     True
     5            1           223 53+70=123 53+70=123     True
     5            1           224 94+12=106 94+12=107    False
     5            1           225 00+37=037 00+37=036    False
     5            1           226 36+94=130 36+94=120    False
     5            1           227 40+58=098 40+58=108    False
     5            1           228 19+80=099 19+80=098    False
     5            1           229 49+44=093 49+44=102    False
     5            1           230 70+27=097 70+27=095    False
     5            1           231 52+80=132 52+80=132     True
     5            1           232 77+90=167 77+90=165    False
     5            1           233 13+92=105 13+92=107    False
     5            1           234 59+09=068 59+09=077    False
     5            1           235 33+55=088 33+55=097    False
     5            1           236 85+16=101 85+16=101     True
     5            1           237 25+65=090 25+65=092    False
     5            1           238 46+20=066 46+20=065    False
     5            1           239 29+52=081 29+52=081     True
     5            1           240 32+36=068 32+36=068     True
     5            1           241 47+08=055 47+08=055     True
     5            1           242 21+84=105 21+84=107    False
     5            1           243 24+45=069 24+45=068    False
     5            1           244 29+15=044 29+15=042    False
     5            1           245 83+03=086 83+03=097    False
     5            1           246 83+36=119 83+36=115    False
     5            1           247 58+95=153 58+95=152    False
     5            1           248 76+79=155 76+79=155     True
     5            1           249 63+30=093 63+30=093     True
     5            1           250 38+24=062 38+24=061    False
     5            1           251 19+46=065 19+46=064    False
     5            1           252 99+66=165 99+66=164    False
     5            1           253 95+73=168 95+73=167    False
     5            1           254 65+27=092 65+27=091    False
     5            1           255 91+83=174 91+83=177    False
     5            2             0 65+49=114 65+49=112    False
     5            2             1 03+08=011 03+08=000    False
     5            2             2 67+81=148 67+81=158    False
     5            2             3 47+23=070 47+23=071    False
     5            2             4 43+91=134 43+91=137    False
     5            2             5 41+67=108 41+67=107    False
     5            2             6 02+33=035 02+33=037    False
     5            2             7 64+84=148 64+84=158    False
     5            2             8 81+64=145 81+64=147    False
     5            2             9 80+11=091 80+11=091     True
     5            2            10 78+01=079 78+01=089    False
     5            2            11 89+18=107 89+18=107     True
     5            2            12 45+52=097 45+52=107    False
     5            2            13 35+30=065 35+30=063    False
     5            2            14 53+32=085 53+32=097    False
     5            2            15 49+90=139 49+90=149    False
     5            2            16 41+37=078 41+37=087    False
     5            2            17 35+14=049 35+14=058    False
     5            2            18 92+50=142 92+50=143    False
     5            2            19 37+60=097 37+60=095    False
     5            2            20 91+61=152 91+61=151    False
     5            2            21 80+77=157 80+77=155    False
     5            2            22 66+24=090 66+24=090     True
     5            2            23 81+07=088 81+07=097    False
     5            2            24 85+59=144 85+59=142    False
     5            2            25 19+69=088 19+69=087    False
     5            2            26 91+44=135 91+44=137    False
     5            2            27 25+29=054 25+29=052    False
     5            2            28 27+08=035 27+08=035     True
     5            2            29 66+14=080 66+14=071    False
     5            2            30 95+11=106 95+11=107    False
     5            2            31 13+97=110 13+97=110     True
     5            2            32 94+40=134 94+40=133    False
     5            2            33 74+31=105 74+31=107    False
     5            2            34 49+00=049 49+00=050    False
     5            2            35 59+18=077 59+18=076    False
     5            2            36 07+65=072 07+65=071    False
     5            2            37 83+55=138 83+55=137    False
     5            2            38 49+80=129 49+80=139    False
     5            2            39 64+17=081 64+17=081     True
     5            2            40 48+83=131 48+83=131     True
     5            2            41 95+44=139 95+44=137    False
     5            2            42 71+26=097 71+26=096    False
     5            2            43 06+74=080 06+74=071    False
     5            2            44 34+24=058 34+24=058     True
     5            2            45 59+71=130 59+71=130     True
     5            2            46 68+32=100 68+32=101    False
     5            2            47 38+81=119 38+81=119     True
     5            2            48 29+56=085 29+56=084    False
     5            2            49 54+55=109 54+55=117    False
     5            2            50 31+27=058 31+27=057    False
     5            2            51 97+89=186 97+89=195    False
     5            2            52 48+09=057 48+09=057     True
     5            2            53 86+76=162 86+76=154    False
     5            2            54 82+59=141 82+59=141     True
     5            2            55 01+67=068 01+67=077    False
     5            2            56 26+06=032 26+06=035    False
     5            2            57 22+46=068 22+46=068     True
     5            2            58 85+16=101 85+16=101     True
     5            2            59 29+08=037 29+08=037     True
     5            2            60 73+94=167 73+94=167     True
     5            2            61 19+62=081 19+62=081     True
     5            2            62 86+62=148 86+62=159    False
     5            2            63 38+99=137 38+99=137     True
     5            2            64 64+25=089 64+25=097    False
     5            2            65 61+72=133 61+72=137    False
     5            2            66 78+88=166 78+88=165    False
     5            2            67 43+66=109 43+66=108    False
     5            2            68 69+35=104 69+35=103    False
     5            2            69 33+77=110 33+77=110     True
     5            2            70 37+37=074 37+37=075    False
     5            2            71 87+54=141 87+54=141     True
     5            2            72 68+90=158 68+90=157    False
     5            2            73 83+44=127 83+44=127     True
     5            2            74 41+09=050 41+09=050     True
     5            2            75 13+48=061 13+48=061     True
     5            2            76 01+41=042 01+41=050    False
     5            2            77 19+74=093 19+74=092    False
     5            2            78 15+05=020 15+05=012    False
     5            2            79 55+46=101 55+46=100    False
     5            2            80 68+33=101 68+33=101     True
     5            2            81 44+40=084 44+40=083    False
     5            2            82 88+03=091 88+03=091     True
     5            2            83 81+79=160 81+79=150    False
     5            2            84 18+98=116 18+98=107    False
     5            2            85 70+64=134 70+64=137    False
     5            2            86 26+44=070 26+44=071    False
     5            2            87 98+87=185 98+87=185     True
     5            2            88 18+74=092 18+74=092     True
     5            2            89 50+68=118 50+68=117    False
     5            2            90 13+51=064 13+51=067    False
     5            2            91 90+89=179 90+89=179     True
     5            2            92 47+78=125 47+78=125     True
     5            2            93 81+57=138 81+57=147    False
     5            2            94 34+47=081 34+47=081     True
     5            2            95 94+23=117 94+23=117     True
     5            2            96 07+70=077 07+70=075    False
     5            2            97 56+33=089 56+33=099    False
     5            2            98 33+04=037 33+04=037     True
     5            2            99 26+09=035 26+09=035     True
     5            2           100 14+92=106 14+92=107    False
     5            2           101 78+54=132 78+54=132     True
     5            2           102 36+76=112 36+76=114    False
     5            2           103 17+47=064 17+47=065    False
     5            2           104 28+18=046 28+18=047    False
     5            2           105 78+54=132 78+54=132     True
     5            2           106 84+72=156 84+72=157    False
     5            2           107 00+44=044 00+44=057    False
     5            2           108 50+41=091 50+41=092    False
     5            2           109 87+88=175 87+88=175     True
     5            2           110 11+66=077 11+66=076    False
     5            2           111 80+60=140 80+60=140     True
     5            2           112 78+76=154 78+76=154     True
     5            2           113 24+74=098 24+74=098     True
     5            2           114 88+48=136 88+48=135    False
     5            2           115 38+31=069 38+31=079    False
     5            2           116 29+27=056 29+27=055    False
     5            2           117 08+45=053 08+45=052    False
     5            2           118 28+13=041 28+13=031    False
     5            2           119 53+99=152 53+99=152     True
     5            2           120 47+92=139 47+92=139     True
     5            2           121 76+21=097 76+21=095    False
     5            2           122 53+96=149 53+96=159    False
     5            2           123 93+91=184 93+91=183    False
     5            2           124 97+33=130 97+33=121    False
     5            2           125 67+78=145 67+78=145     True
     5            2           126 58+05=063 58+05=072    False
     5            2           127 00+16=016 00+16=015    False
     5            2           128 80+19=099 80+19=098    False
     5            2           129 98+22=120 98+22=111    False
     5            2           130 09+62=071 09+62=071     True
     5            2           131 06+23=029 06+23=029     True
     5            2           132 32+99=131 32+99=121    False
     5            2           133 17+02=019 17+02=010    False
     5            2           134 64+35=099 64+35=098    False
     5            2           135 35+83=118 35+83=117    False
     5            2           136 71+36=107 71+36=107     True
     5            2           137 75+06=081 75+06=081     True
     5            2           138 88+95=183 88+95=182    False
     5            2           139 19+98=117 19+98=115    False
     5            2           140 28+89=117 28+89=115    False
     5            2           141 33+11=044 33+11=047    False
     5            2           142 34+49=083 34+49=082    False
     5            2           143 90+35=125 90+35=127    False
     5            2           144 22+90=112 22+90=112     True
     5            2           145 98+89=187 98+89=195    False
     5            2           146 88+47=135 88+47=135     True
     5            2           147 30+86=116 30+86=115    False
     5            2           148 31+48=079 31+48=088    False
     5            2           149 39+21=060 39+21=050    False
     5            2           150 19+17=036 19+17=035    False
     5            2           151 27+60=087 27+60=085    False
     5            2           152 12+16=028 12+16=028     True
     5            2           153 51+75=126 51+75=127    False
     5            2           154 10+74=084 10+74=087    False
     5            2           155 42+63=105 42+63=107    False
     5            2           156 40+14=054 40+14=053    False
     5            2           157 23+93=116 23+93=117    False
     5            2           158 85+26=111 85+26=111     True
     5            2           159 28+46=074 28+46=074     True
     5            2           160 28+33=061 28+33=051    False
     5            2           161 43+30=073 43+30=075    False
     5            2           162 89+72=161 89+72=151    False
     5            2           163 52+21=073 52+21=077    False
     5            2           164 21+54=075 21+54=077    False
     5            2           165 69+13=082 69+13=081    False
     5            2           166 07+60=067 07+60=075    False
     5            2           167 63+83=146 63+83=157    False
     5            2           168 80+69=149 80+69=150    False
     5            2           169 27+28=055 27+28=056    False
     5            2           170 42+31=073 42+31=077    False
     5            2           171 51+99=150 51+99=150     True
     5            2           172 28+75=103 28+75=102    False
     5            2           173 38+57=095 38+57=095     True
     5            2           174 83+16=099 83+16=108    False
     5            2           175 92+94=186 92+94=187    False
     5            2           176 55+75=130 55+75=132    False
     5            2           177 59+51=110 59+51=110     True
     5            2           178 33+09=042 33+09=031    False
     5            2           179 53+13=066 53+13=077    False
     5            2           180 05+70=075 05+70=077    False
     5            2           181 12+20=032 12+20=037    False
     5            2           182 11+49=060 11+49=060     True
     5            2           183 63+45=108 63+45=107    False
     5            2           184 92+23=115 92+23=117    False
     5            2           185 82+45=127 82+45=127     True
     5            2           186 23+41=064 23+41=067    False
     5            2           187 64+26=090 64+26=099    False
     5            2           188 91+24=115 91+24=117    False
     5            2           189 20+32=052 20+32=052     True
     5            2           190 83+21=104 83+21=107    False
     5            2           191 07+20=027 07+20=025    False
     5            2           192 94+14=108 94+14=107    False
     5            2           193 96+89=185 96+89=194    False
     5            2           194 13+08=021 13+08=010    False
     5            2           195 32+05=037 32+05=037     True
     5            2           196 09+51=060 09+51=060     True
     5            2           197 26+29=055 26+29=055     True
     5            2           198 49+65=114 49+65=112    False
     5            2           199 32+66=098 32+66=098     True
     5            2           200 41+08=049 41+08=059    False
     5            2           201 26+79=105 26+79=105     True
     5            2           202 29+91=120 29+91=110    False
     5            2           203 51+00=051 51+00=050    False
     5            2           204 61+60=121 61+60=120    False
     5            2           205 45+78=123 45+78=122    False
     5            2           206 56+16=072 56+16=075    False
     5            2           207 66+68=134 66+68=134     True
     5            2           208 32+16=048 32+16=058    False
     5            2           209 84+49=133 84+49=133     True
     5            2           210 45+09=054 45+09=052    False
     5            2           211 96+78=174 96+78=174     True
     5            2           212 10+02=012 10+02=007    False
     5            2           213 36+60=096 36+60=094    False
     5            2           214 44+36=080 44+36=089    False
     5            2           215 12+86=098 12+86=098     True
     5            2           216 94+54=148 94+54=147    False
     5            2           217 64+73=137 64+73=137     True
     5            2           218 73+10=083 73+10=083     True
     5            2           219 14+62=076 14+62=077    False
     5            2           220 25+22=047 25+22=057    False
     5            2           221 94+22=116 94+22=117    False
     5            2           222 41+76=117 41+76=117     True
     5            2           223 38+46=084 38+46=084     True
     5            2           224 71+72=143 71+72=137    False
     5            2           225 74+79=153 74+79=143    False
     5            2           226 99+67=166 99+67=165    False
     5            2           227 78+71=149 78+71=149     True
     5            2           228 23+19=042 23+19=031    False
     5            2           229 51+65=116 51+65=117    False
     5            2           230 94+86=180 94+86=180     True
     5            2           231 09+79=088 09+79=087    False
     5            2           232 69+39=108 69+39=108     True
     5            2           233 84+13=097 84+13=097     True
     5            2           234 36+59=095 36+59=094    False
     5            2           235 87+47=134 87+47=135    False
     5            2           236 50+00=050 50+00=050     True
     5            2           237 76+96=172 76+96=164    False
     5            2           238 12+18=030 12+18=020    False
     5            2           239 99+95=194 99+95=193    False
     5            2           240 22+00=022 22+00=017    False
     5            2           241 96+18=114 96+18=105    False
     5            2           242 51+20=071 51+20=070    False
     5            2           243 66+81=147 66+81=155    False
     5            2           244 78+18=096 78+18=095    False
     5            2           245 09+78=087 09+78=086    False
     5            2           246 24+20=044 24+20=047    False
     5            2           247 76+13=089 76+13=099    False
     5            2           248 05+10=015 05+10=007    False
     5            2           249 97+14=111 97+14=112    False
     5            2           250 92+38=130 92+38=121    False
     5            2           251 77+13=090 77+13=091    False
     5            2           252 70+19=089 70+19=098    False
     5            2           253 90+45=135 90+45=137    False
     5            2           254 50+09=059 50+09=068    False
     5            2           255 78+06=084 78+06=084     True
     5            3             0 03+25=028 03+25=027    False
     5            3             1 48+43=091 48+43=091     True
     5            3             2 39+47=086 39+47=095    False
     5            3             3 48+19=067 48+19=067     True
     5            3             4 07+22=029 07+22=029     True
     5            3             5 73+68=141 73+68=131    False
     5            3             6 14+56=070 14+56=070     True
     5            3             7 96+95=191 96+95=191     True
     5            3             8 96+28=124 96+28=125    False
     5            3             9 82+05=087 82+05=097    False
     5            3            10 27+94=121 27+94=112    False
     5            3            11 87+86=173 87+86=164    False
     5            3            12 00+68=068 00+68=077    False
     5            3            13 11+37=048 11+37=058    False
     5            3            14 95+93=188 95+93=192    False
     5            3            15 75+82=157 75+82=157     True
     5            3            16 41+71=112 41+71=110    False
     5            3            17 60+14=074 60+14=077    False
     5            3            18 77+77=154 77+77=155    False
     5            3            19 31+84=115 31+84=117    False
     5            3            20 31+57=088 31+57=097    False
     5            3            21 27+87=114 27+87=115    False
     5            3            22 31+89=120 31+89=129    False
     5            3            23 22+18=040 22+18=030    False
     5            3            24 38+25=063 38+25=062    False
     5            3            25 64+54=118 64+54=127    False
     5            3            26 85+60=145 85+60=142    False
     5            3            27 14+71=085 14+71=087    False
     5            3            28 06+16=022 06+16=014    False
     5            3            29 78+61=139 78+61=139     True
     5            3            30 65+75=140 65+75=132    False
     5            3            31 13+83=096 13+83=097    False
     5            3            32 75+49=124 75+49=122    False
     5            3            33 05+78=083 05+78=082    False
     5            3            34 66+55=121 66+55=121     True
     5            3            35 03+05=008 03+05=007    False
     5            3            36 69+99=168 69+99=165    False
     5            3            37 52+82=134 52+82=137    False
     5            3            38 45+97=142 45+97=142     True
     5            3            39 66+17=083 66+17=085    False
     5            3            40 36+17=053 36+17=055    False
     5            3            41 92+74=166 92+74=167    False
     5            3            42 48+44=092 48+44=091    False
     5            3            43 34+17=051 34+17=051     True
     5            3            44 56+11=067 56+11=075    False
     5            3            45 77+23=100 77+23=100     True
     5            3            46 10+11=021 10+11=010    False
     5            3            47 32+65=097 32+65=097     True
     5            3            48 53+49=102 53+49=102     True
     5            3            49 68+86=154 68+86=154     True
     5            3            50 52+94=146 52+94=147    False
     5            3            51 97+71=168 97+71=167    False
     5            3            52 05+37=042 05+37=041    False
     5            3            53 58+75=133 58+75=132    False
     5            3            54 06+24=030 06+24=020    False
     5            3            55 15+44=059 15+44=068    False
     5            3            56 90+49=139 90+49=139     True
     5            3            57 50+37=087 50+37=095    False
     5            3            58 88+61=149 88+61=159    False
     5            3            59 21+57=078 21+57=087    False
     5            3            60 24+85=109 24+85=112    False
     5            3            61 01+66=067 01+66=075    False
     5            3            62 50+46=096 50+46=095    False
     5            3            63 76+65=141 76+65=131    False
     5            3            64 23+74=097 23+74=097     True
     5            3            65 76+16=092 76+16=094    False
     5            3            66 06+08=014 06+08=015    False
     5            3            67 69+25=094 69+25=092    False
     5            3            68 15+23=038 15+23=037    False
     5            3            69 41+02=043 41+02=053    False
     5            3            70 16+66=082 16+66=075    False
     5            3            71 59+94=153 59+94=152    False
     5            3            72 32+88=120 32+88=120     True
     5            3            73 46+21=067 46+21=075    False
     5            3            74 57+28=085 57+28=085     True
     5            3            75 00+31=031 00+31=030    False
     5            3            76 77+07=084 77+07=085    False
     5            3            77 28+70=098 28+70=095    False
     5            3            78 05+61=066 05+61=077    False
     5            3            79 22+09=031 22+09=021    False
     5            3            80 08+94=102 08+94=102     True
     5            3            81 40+11=051 40+11=050    False
     5            3            82 10+48=058 10+48=067    False
     5            3            83 27+56=083 27+56=084    False
     5            3            84 42+16=058 42+16=068    False
     5            3            85 69+43=112 69+43=112     True
     5            3            86 57+69=126 57+69=125    False
     5            3            87 18+86=104 18+86=105    False
     5            3            88 86+80=166 86+80=164    False
     5            3            89 30+85=115 30+85=117    False
     5            3            90 77+66=143 77+66=144    False
     5            3            91 39+64=103 39+64=102    False
     5            3            92 76+61=137 76+61=135    False
     5            3            93 42+61=103 42+61=108    False
     5            3            94 07+30=037 07+30=035    False
     5            3            95 35+93=128 35+93=127    False
     5            3            96 40+90=130 40+90=130     True
     5            3            97 08+91=099 08+91=099     True
     5            3            98 62+34=096 62+34=097    False
     5            3            99 86+49=135 86+49=135     True
     5            3           100 73+23=096 73+23=097    False
     5            3           101 87+35=122 87+35=122     True
     5            3           102 35+31=066 35+31=067    False
     5            3           103 07+13=020 07+13=010    False
     5            3           104 39+41=080 39+41=080     True
     5            3           105 44+63=107 44+63=107     True
     5            3           106 94+66=160 94+66=159    False
     5            3           107 49+54=103 49+54=102    False
     5            3           108 79+46=125 79+46=124    False
     5            3           109 53+12=065 53+12=067    False
     5            3           110 60+92=152 60+92=157    False
     5            3           111 25+60=085 25+60=083    False
     5            3           112 64+53=117 64+53=117     True
     5            3           113 41+02=043 41+02=053    False
     5            3           114 00+97=097 00+97=095    False
     5            3           115 12+52=064 12+52=067    False
     5            3           116 39+50=089 39+50=090    False
     5            3           117 87+21=108 87+21=107    False
     5            3           118 04+99=103 04+99=102    False
     5            3           119 19+75=094 19+75=092    False
     5            3           120 90+05=095 90+05=097    False
     5            3           121 54+39=093 54+39=092    False
     5            3           122 29+26=055 29+26=054    False
     5            3           123 82+95=177 82+95=177     True
     5            3           124 55+09=064 55+09=072    False
     5            3           125 02+62=064 02+62=077    False
     5            3           126 68+30=098 68+30=095    False
     5            3           127 99+16=115 99+16=114    False
     5            3           128 63+11=074 63+11=077    False
     5            3           129 42+92=134 42+92=137    False
     5            3           130 99+16=115 99+16=114    False
     5            3           131 50+31=081 50+31=081     True
     5            3           132 23+46=069 23+46=079    False
     5            3           133 45+73=118 45+73=127    False
     5            3           134 89+77=166 89+77=165    False
     5            3           135 45+78=123 45+78=122    False
     5            3           136 96+60=156 96+60=155    False
     5            3           137 74+61=135 74+61=137    False
     5            3           138 87+01=088 87+01=097    False
     5            3           139 63+88=151 63+88=151     True
     5            3           140 59+72=131 59+72=131     True
     5            3           141 17+96=113 17+96=114    False
     5            3           142 89+77=166 89+77=165    False
     5            3           143 24+69=093 24+69=092    False
     5            3           144 75+83=158 75+83=151    False
     5            3           145 50+54=104 50+54=107    False
     5            3           146 93+47=140 93+47=130    False
     5            3           147 20+55=075 20+55=075     True
     5            3           148 91+79=170 91+79=169    False
     5            3           149 15+13=028 15+13=027    False
     5            3           150 86+09=095 86+09=094    False
     5            3           151 29+58=087 29+58=086    False
     5            3           152 01+29=030 01+29=020    False
     5            3           153 65+48=113 65+48=112    False
     5            3           154 96+45=141 96+45=131    False
     5            3           155 58+69=127 58+69=127     True
     5            3           156 84+43=127 84+43=127     True
     5            3           157 90+38=128 90+38=127    False
     5            3           158 39+97=136 39+97=135    False
     5            3           159 74+84=158 74+84=158     True
     5            3           160 86+22=108 86+22=108     True
     5            3           161 01+86=087 01+86=096    False
     5            3           162 81+63=144 81+63=147    False
     5            3           163 80+94=174 80+94=167    False
     5            3           164 44+42=086 44+42=087    False
     5            3           165 72+60=132 72+60=132     True
     5            3           166 28+07=035 28+07=035     True
     5            3           167 69+54=123 69+54=122    False
     5            3           168 68+77=145 68+77=145     True
     5            3           169 90+16=106 90+16=105    False
     5            3           170 64+50=114 64+50=113    False
     5            3           171 46+88=134 46+88=135    False
     5            3           172 55+99=154 55+99=153    False
     5            3           173 31+97=128 31+97=127    False
     5            3           174 79+28=107 79+28=107     True
     5            3           175 81+43=124 81+43=127    False
     5            3           176 41+15=056 41+15=057    False
     5            3           177 38+77=115 38+77=115     True
     5            3           178 25+06=031 25+06=030    False
     5            3           179 01+93=094 01+93=097    False
     5            3           180 97+22=119 97+22=119     True
     5            3           181 71+84=155 71+84=157    False
     5            3           182 26+36=062 26+36=065    False
     5            3           183 60+92=152 60+92=157    False
     5            3           184 02+94=096 02+94=097    False
     5            3           185 31+58=089 31+58=099    False
     5            3           186 70+52=122 70+52=123    False
     5            3           187 19+42=061 19+42=061     True
     5            3           188 95+73=168 95+73=167    False
     5            3           189 21+25=046 21+25=057    False
     5            3           190 13+58=071 13+58=071     True
     5            3           191 62+28=090 62+28=090     True
     5            3           192 38+14=052 38+14=051    False
     5            3           193 66+75=141 66+75=131    False
     5            3           194 24+59=083 24+59=082    False
     5            3           195 97+66=163 97+66=154    False
     5            3           196 76+70=146 76+70=144    False
     5            3           197 08+40=048 08+40=058    False
     5            3           198 84+00=084 84+00=097    False
     5            3           199 54+73=127 54+73=127     True
     5            3           200 16+88=104 16+88=105    False
     5            3           201 99+47=146 99+47=145    False
     5            3           202 31+95=126 31+95=127    False
     5            3           203 01+79=080 01+79=070    False
     5            3           204 03+68=071 03+68=071     True
     5            3           205 10+05=015 10+05=007    False
     5            3           206 98+90=188 98+90=185    False
     5            3           207 58+53=111 58+53=112    False
     5            3           208 34+87=121 34+87=121     True
     5            3           209 07+31=038 07+31=037    False
     5            3           210 59+08=067 59+08=076    False
     5            3           211 51+38=089 51+38=099    False
     5            3           212 62+62=124 62+62=127    False
     5            3           213 80+32=112 80+32=112     True
     5            3           214 69+16=085 69+16=084    False
     5            3           215 01+17=018 01+17=017    False
     5            3           216 74+41=115 74+41=117    False
     5            3           217 20+89=109 20+89=118    False
     5            3           218 53+50=103 53+50=103     True
     5            3           219 82+85=167 82+85=167     True
     5            3           220 34+47=081 34+47=081     True
     5            3           221 34+45=079 34+45=088    False
     5            3           222 77+34=111 77+34=111     True
     5            3           223 56+33=089 56+33=099    False
     5            3           224 97+56=153 97+56=144    False
     5            3           225 29+06=035 29+06=034    False
     5            3           226 78+96=174 78+96=174     True
     5            3           227 28+65=093 28+65=092    False
     5            3           228 61+64=125 61+64=127    False
     5            3           229 32+64=096 32+64=097    False
     5            3           230 98+32=130 98+32=121    False
     5            3           231 25+35=060 25+35=062    False
     5            3           232 05+08=013 05+08=003    False
     5            3           233 05+26=031 05+26=030    False
     5            3           234 84+71=155 84+71=157    False
     5            3           235 33+10=043 33+10=047    False
     5            3           236 98+35=133 98+35=132    False
     5            3           237 68+98=166 68+98=165    False
     5            3           238 03+63=066 03+63=077    False
     5            3           239 12+96=108 12+96=108     True
     5            3           240 02+81=083 02+81=087    False
     5            3           241 83+13=096 83+13=097    False
     5            3           242 55+92=147 55+92=147     True
     5            3           243 96+09=105 96+09=105     True
     5            3           244 61+08=069 61+08=079    False
     5            3           245 39+75=114 39+75=112    False
     5            3           246 40+74=114 40+74=113    False
     5            3           247 39+80=119 39+80=118    False
     5            3           248 57+95=152 57+95=152     True
     5            3           249 92+97=189 92+97=199    False
     5            3           250 33+03=036 33+03=037    False
     5            3           251 74+92=166 74+92=167    False
     5            3           252 99+09=108 99+09=108     True
     5            3           253 98+10=108 98+10=107    False
     5            3           254 46+77=123 46+77=125    False
     5            3           255 85+78=163 85+78=152    False
     5            4             0 41+21=062 41+21=061    False
     5            4             1 49+13=062 49+13=061    False
     5            4             2 59+07=066 59+07=075    False
     5            4             3 31+11=042 31+11=040    False
     5            4             4 74+16=090 74+16=090     True
     5            4             5 43+38=081 43+38=081     True
     5            4             6 08+67=075 08+67=075     True
     5            4             7 31+66=097 31+66=097     True
     5            4             8 10+31=041 10+31=040    False
     5            4             9 34+59=093 34+59=092    False
     5            4            10 78+42=120 78+42=121    False
     5            4            11 13+41=054 13+41=057    False
     5            4            12 97+89=186 97+89=195    False
     5            4            13 15+62=077 15+62=077     True
     5            4            14 39+36=075 39+36=075     True
     5            4            15 21+25=046 21+25=057    False
     5            4            16 74+56=130 74+56=129    False
     5            4            17 85+47=132 85+47=132     True
     5            4            18 47+32=079 47+32=089    False
     5            4            19 37+66=103 37+66=105    False
     5            4            20 16+29=045 16+29=045     True
     5            4            21 86+77=163 86+77=155    False
     5            4            22 80+07=087 80+07=095    False
     5            4            23 87+05=092 87+05=092     True
     5            4            24 58+16=074 58+16=074     True
     5            4            25 52+79=131 52+79=131     True
     5            4            26 91+08=099 91+08=098    False
     5            4            27 47+78=125 47+78=125     True
     5            4            28 86+96=182 86+96=184    False
     5            4            29 90+22=112 90+22=113    False
     5            4            30 31+18=049 31+18=059    False
     5            4            31 86+15=101 86+15=101     True
     5            4            32 15+95=110 15+95=102    False
     5            4            33 42+11=053 42+11=057    False
     5            4            34 65+99=164 65+99=163    False
     5            4            35 89+29=118 89+29=117    False
     5            4            36 35+11=046 35+11=057    False
     5            4            37 71+41=112 71+41=110    False
     5            4            38 16+24=040 16+24=040     True
     5            4            39 77+82=159 77+82=159     True
     5            4            40 55+89=144 55+89=143    False
     5            4            41 17+88=105 17+88=106    False
     5            4            42 54+72=126 54+72=127    False
     5            4            43 34+98=132 34+98=132     True
     5            4            44 09+97=106 09+97=105    False
     5            4            45 91+07=098 91+07=097    False
     5            4            46 55+94=149 55+94=157    False
     5            4            47 22+58=080 22+58=080     True
     5            4            48 91+37=128 91+37=127    False
     5            4            49 16+10=026 16+10=025    False
     5            4            50 96+32=128 96+32=128     True
     5            4            51 35+75=110 35+75=112    False
     5            4            52 88+73=161 88+73=151    False
     5            4            53 35+18=053 35+18=052    False
     5            4            54 33+10=043 33+10=047    False
     5            4            55 08+50=058 08+50=065    False
     5            4            56 22+62=084 22+62=087    False
     5            4            57 26+37=063 26+37=065    False
     5            4            58 80+27=107 80+27=107     True
     5            4            59 68+28=096 68+28=095    False
     5            4            60 48+03=051 48+03=051     True
     5            4            61 40+18=058 40+18=057    False
     5            4            62 16+59=075 16+59=075     True
     5            4            63 02+19=021 02+19=010    False
     5            4            64 01+09=010 01+09=009    False
     5            4            65 62+68=130 62+68=130     True
     5            4            66 09+71=080 09+71=070    False
     5            4            67 00+58=058 00+58=067    False
     5            4            68 16+45=061 16+45=061     True
     5            4            69 24+98=122 24+98=122     True
     5            4            70 47+92=139 47+92=139     True
     5            4            71 94+84=178 94+84=177    False
     5            4            72 21+32=053 21+32=057    False
     5            4            73 29+82=111 29+82=112    False
     5            4            74 32+79=111 32+79=111     True
     5            4            75 13+98=111 13+98=112    False
     5            4            76 41+94=135 41+94=137    False
     5            4            77 51+84=135 51+84=137    False
     5            4            78 42+05=047 42+05=057    False
     5            4            79 39+03=042 39+03=032    False
     5            4            80 02+92=094 02+92=097    False
     5            4            81 99+81=180 99+81=170    False
     5            4            82 32+68=100 32+68=100     True
     5            4            83 52+17=069 52+17=079    False
     5            4            84 56+58=114 56+58=114     True
     5            4            85 21+48=069 21+48=079    False
     5            4            86 61+71=132 61+71=131    False
     5            4            87 17+01=018 17+01=017    False
     5            4            88 68+23=091 68+23=091     True
     5            4            89 00+37=037 00+37=036    False
     5            4            90 94+88=182 94+88=182     True
     5            4            91 06+31=037 06+31=035    False
     5            4            92 27+18=045 27+18=045     True
     5            4            93 41+81=122 41+81=121    False
     5            4            94 15+86=101 15+86=101     True
     5            4            95 36+87=123 36+87=125    False
     5            4            96 17+37=054 17+37=055    False
     5            4            97 13+86=099 13+86=108    False
     5            4            98 29+69=098 29+69=097    False
     5            4            99 31+99=130 31+99=129    False
     5            4           100 47+29=076 47+29=075    False
     5            4           101 08+81=089 08+81=099    False
     5            4           102 72+82=154 72+82=157    False
     5            4           103 46+91=137 46+91=137     True
     5            4           104 70+35=105 70+35=107    False
     5            4           105 90+55=145 90+55=147    False
     5            4           106 99+99=198 99+99=197    False
     5            4           107 60+97=157 60+97=155    False
     5            4           108 03+40=043 03+40=057    False
     5            4           109 35+49=084 35+49=092    False
     5            4           110 32+02=034 32+02=037    False
     5            4           111 70+18=088 70+18=095    False
     5            4           112 99+05=104 99+05=103    False
     5            4           113 78+73=151 78+73=141    False
     5            4           114 03+02=005 03+02=007    False
     5            4           115 50+14=064 50+14=063    False
     5            4           116 62+02=064 62+02=077    False
     5            4           117 16+74=090 16+74=090     True
     5            4           118 68+65=133 68+65=132    False
     5            4           119 74+81=155 74+81=157    False
     5            4           120 37+48=085 37+48=085     True
     5            4           121 63+04=067 63+04=077    False
     5            4           122 06+62=068 06+62=079    False
     5            4           123 95+75=170 95+75=162    False
     5            4           124 92+37=129 92+37=129     True
     5            4           125 81+32=113 81+32=117    False
     5            4           126 53+28=081 53+28=081     True
     5            4           127 52+42=094 52+42=097    False
     5            4           128 66+97=163 66+97=164    False
     5            4           129 00+48=048 00+48=057    False
     5            4           130 65+32=097 65+32=098    False
     5            4           131 60+89=149 60+89=150    False
     5            4           132 71+61=132 71+61=131    False
     5            4           133 98+50=148 98+50=147    False
     5            4           134 90+96=186 90+96=185    False
     5            4           135 02+96=098 02+96=098     True
     5            4           136 62+75=137 62+75=137     True
     5            4           137 41+28=069 41+28=079    False
     5            4           138 95+79=174 95+79=173    False
     5            4           139 48+41=089 48+41=099    False
     5            4           140 87+95=182 87+95=172    False
     5            4           141 75+38=113 75+38=112    False
     5            4           142 31+55=086 31+55=097    False
     5            4           143 54+63=117 54+63=117     True
     5            4           144 75+82=157 75+82=157     True
     5            4           145 46+45=091 46+45=091     True
     5            4           146 13+08=021 13+08=010    False
     5            4           147 77+97=174 77+97=175    False
     5            4           148 37+35=072 37+35=071    False
     5            4           149 21+89=110 21+89=110     True
     5            4           150 58+51=109 58+51=119    False
     5            4           151 91+48=139 91+48=149    False
     5            4           152 33+23=056 33+23=057    False
     5            4           153 80+96=176 80+96=165    False
     5            4           154 78+02=080 78+02=081    False
     5            4           155 38+95=133 38+95=132    False
     5            4           156 99+25=124 99+25=123    False
     5            4           157 30+76=106 30+76=105    False
     5            4           158 42+40=082 42+40=083    False
     5            4           159 85+58=143 85+58=142    False
     5            4           160 44+46=090 44+46=099    False
     5            4           161 06+41=047 06+41=057    False
     5            4           162 65+90=155 65+90=157    False
     5            4           163 43+83=126 43+83=137    False
     5            4           164 36+61=097 36+61=095    False
     5            4           165 61+51=112 61+51=110    False
     5            4           166 38+09=047 38+09=047     True
     5            4           167 21+97=118 21+97=117    False
     5            4           168 83+30=113 83+30=113     True
     5            4           169 11+79=090 11+79=090     True
     5            4           170 14+29=043 14+29=042    False
     5            4           171 21+11=032 21+11=030    False
     5            4           172 43+53=096 43+53=107    False
     5            4           173 02+58=060 02+58=060     True
     5            4           174 78+82=160 78+82=151    False
     5            4           175 91+11=102 91+11=100    False
     5            4           176 58+54=112 58+54=112     True
     5            4           177 00+15=015 00+15=007    False
     5            4           178 83+51=134 83+51=137    False
     5            4           179 44+72=116 44+72=117    False
     5            4           180 71+20=091 71+20=090    False
     5            4           181 24+99=123 24+99=123     True
     5            4           182 46+30=076 46+30=075    False
     5            4           183 08+67=075 08+67=075     True
     5            4           184 47+42=089 47+42=099    False
     5            4           185 95+67=162 95+67=152    False
     5            4           186 40+56=096 40+56=095    False
     5            4           187 17+95=112 17+95=112     True
     5            4           188 94+66=160 94+66=159    False
     5            4           189 14+58=072 14+58=071    False
     5            4           190 56+05=061 56+05=061     True
     5            4           191 70+01=071 70+01=071     True
     5            4           192 97+59=156 97+59=155    False
     5            4           193 94+67=161 94+67=151    False
     5            4           194 13+41=054 13+41=057    False
     5            4           195 85+15=100 85+15=102    False
     5            4           196 48+53=101 48+53=101     True
     5            4           197 62+75=137 62+75=137     True
     5            4           198 87+47=134 87+47=135    False
     5            4           199 31+88=119 31+88=118    False
     5            4           200 97+16=113 97+16=105    False
     5            4           201 48+45=093 48+45=092    False
     5            4           202 99+00=099 99+00=098    False
     5            4           203 15+01=016 15+01=017    False
     5            4           204 28+96=124 28+96=124     True
     5            4           205 20+11=031 20+11=030    False
     5            4           206 07+56=063 07+56=064    False
     5            4           207 06+08=014 06+08=015    False
     5            4           208 45+46=091 45+46=090    False
     5            4           209 48+85=133 48+85=132    False
     5            4           210 62+14=076 62+14=077    False
     5            4           211 82+31=113 82+31=117    False
     5            4           212 85+88=173 85+88=173     True
     5            4           213 77+08=085 77+08=085     True
     5            4           214 16+64=080 16+64=071    False
     5            4           215 00+27=027 00+27=026    False
     5            4           216 36+75=111 36+75=111     True
     5            4           217 38+38=076 38+38=077    False
     5            4           218 88+32=120 88+32=111    False
     5            4           219 09+88=097 09+88=095    False
     5            4           220 96+87=183 96+87=184    False
     5            4           221 71+29=100 71+29=100     True
     5            4           222 99+13=112 99+13=112     True
     5            4           223 03+13=016 03+13=017    False
     5            4           224 67+23=090 67+23=091    False
     5            4           225 15+98=113 15+98=112    False
     5            4           226 10+08=018 10+08=017    False
     5            4           227 46+24=070 46+24=071    False
     5            4           228 55+63=118 55+63=127    False
     5            4           229 28+06=034 28+06=034     True
     5            4           230 43+87=130 43+87=139    False
     5            4           231 34+05=039 34+05=038    False
2024-09-20 14:10:21,257 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:21,259 - root - INFO - ====================================================== Starting Train Epoch: 6/9 ======================================================
2024-09-20 14:10:21,259 - root - INFO - Learning rates for each parameter group: 0.00148872833543853430, 0.00148872833543853430
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 6, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 6, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=155.79370, average_batch_loss=0.60857, average_batch_perplexity=1.83780, lr=0.001488728, 0.001488728]
Epoch: 6, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=155.79370, average_batch_loss=0.60857, average_batch_perplexity=1.83780, lr=0.001488728, 0.001488728]
Epoch: 6, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=147.72687, average_batch_loss=0.57706, average_batch_perplexity=1.78079, lr=0.001483477, 0.001483477]
Epoch: 6, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 12.68it/s, total_batch_loss=147.72687, average_batch_loss=0.57706, average_batch_perplexity=1.78079, lr=0.001483477, 0.001483477]
Epoch: 6, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.68it/s, total_batch_loss=147.72687, average_batch_loss=0.57706, average_batch_perplexity=1.78079, lr=0.001483477, 0.001483477]
Epoch: 6, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 12.68it/s, total_batch_loss=148.80716, average_batch_loss=0.58128, average_batch_perplexity=1.78832, lr=0.001478281, 0.001478281]
Epoch: 6, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.68it/s, total_batch_loss=148.80716, average_batch_loss=0.58128, average_batch_perplexity=1.78832, lr=0.001478281, 0.001478281]
Epoch: 6, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 12.68it/s, total_batch_loss=153.21822, average_batch_loss=0.59851, average_batch_perplexity=1.81940, lr=0.001473139, 0.001473139]
Epoch: 6, Step: 4:  14%|█▍        | 4/28 [00:00<00:01, 12.56it/s, total_batch_loss=153.21822, average_batch_loss=0.59851, average_batch_perplexity=1.81940, lr=0.001473139, 0.001473139]
Epoch: 6, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.56it/s, total_batch_loss=153.21822, average_batch_loss=0.59851, average_batch_perplexity=1.81940, lr=0.001473139, 0.001473139]
Epoch: 6, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.56it/s, total_batch_loss=143.07899, average_batch_loss=0.55890, average_batch_perplexity=1.74875, lr=0.001468051, 0.001468051]
Epoch: 6, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.56it/s, total_batch_loss=143.07899, average_batch_loss=0.55890, average_batch_perplexity=1.74875, lr=0.001468051, 0.001468051]
Epoch: 6, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.56it/s, total_batch_loss=146.54678, average_batch_loss=0.57245, average_batch_perplexity=1.77260, lr=0.001463014, 0.001463014]
Epoch: 6, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.36it/s, total_batch_loss=146.54678, average_batch_loss=0.57245, average_batch_perplexity=1.77260, lr=0.001463014, 0.001463014]
Epoch: 6, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.36it/s, total_batch_loss=146.54678, average_batch_loss=0.57245, average_batch_perplexity=1.77260, lr=0.001463014, 0.001463014]
Epoch: 6, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.36it/s, total_batch_loss=133.86508, average_batch_loss=0.52291, average_batch_perplexity=1.68693, lr=0.001458030, 0.001458030]
Epoch: 6, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.36it/s, total_batch_loss=133.86508, average_batch_loss=0.52291, average_batch_perplexity=1.68693, lr=0.001458030, 0.001458030]
Epoch: 6, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.36it/s, total_batch_loss=134.05029, average_batch_loss=0.52363, average_batch_perplexity=1.68815, lr=0.001453095, 0.001453095]
Epoch: 6, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 12.30it/s, total_batch_loss=134.05029, average_batch_loss=0.52363, average_batch_perplexity=1.68815, lr=0.001453095, 0.001453095]
Epoch: 6, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.30it/s, total_batch_loss=134.05029, average_batch_loss=0.52363, average_batch_perplexity=1.68815, lr=0.001453095, 0.001453095]
Epoch: 6, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.30it/s, total_batch_loss=124.76402, average_batch_loss=0.48736, average_batch_perplexity=1.62801, lr=0.001448211, 0.001448211]
Epoch: 6, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.30it/s, total_batch_loss=124.76402, average_batch_loss=0.48736, average_batch_perplexity=1.62801, lr=0.001448211, 0.001448211]
Epoch: 6, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.30it/s, total_batch_loss=134.03320, average_batch_loss=0.52357, average_batch_perplexity=1.68804, lr=0.001443376, 0.001443376]
Epoch: 6, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=134.03320, average_batch_loss=0.52357, average_batch_perplexity=1.68804, lr=0.001443376, 0.001443376]
Epoch: 6, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=134.03320, average_batch_loss=0.52357, average_batch_perplexity=1.68804, lr=0.001443376, 0.001443376]
Epoch: 6, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=119.91843, average_batch_loss=0.46843, average_batch_perplexity=1.59749, lr=0.001438588, 0.001438588]
Epoch: 6, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=119.91843, average_batch_loss=0.46843, average_batch_perplexity=1.59749, lr=0.001438588, 0.001438588]
Epoch: 6, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=114.41341, average_batch_loss=0.44693, average_batch_perplexity=1.56350, lr=0.001433848, 0.001433848]
Epoch: 6, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.33it/s, total_batch_loss=114.41341, average_batch_loss=0.44693, average_batch_perplexity=1.56350, lr=0.001433848, 0.001433848]
Epoch: 6, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.33it/s, total_batch_loss=114.41341, average_batch_loss=0.44693, average_batch_perplexity=1.56350, lr=0.001433848, 0.001433848]
Epoch: 6, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.33it/s, total_batch_loss=121.07416, average_batch_loss=0.47295, average_batch_perplexity=1.60471, lr=0.001429155, 0.001429155]
Epoch: 6, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.33it/s, total_batch_loss=121.07416, average_batch_loss=0.47295, average_batch_perplexity=1.60471, lr=0.001429155, 0.001429155]
Epoch: 6, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.33it/s, total_batch_loss=121.88602, average_batch_loss=0.47612, average_batch_perplexity=1.60981, lr=0.001424507, 0.001424507]
Epoch: 6, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.21it/s, total_batch_loss=121.88602, average_batch_loss=0.47612, average_batch_perplexity=1.60981, lr=0.001424507, 0.001424507]
Epoch: 6, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.21it/s, total_batch_loss=121.88602, average_batch_loss=0.47612, average_batch_perplexity=1.60981, lr=0.001424507, 0.001424507]
Epoch: 6, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.21it/s, total_batch_loss=109.31966, average_batch_loss=0.42703, average_batch_perplexity=1.53270, lr=0.001419905, 0.001419905]
Epoch: 6, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.21it/s, total_batch_loss=109.31966, average_batch_loss=0.42703, average_batch_perplexity=1.53270, lr=0.001419905, 0.001419905]
Epoch: 6, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.21it/s, total_batch_loss=112.27247, average_batch_loss=0.43856, average_batch_perplexity=1.55048, lr=0.001415346, 0.001415346]
Epoch: 6, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.31it/s, total_batch_loss=112.27247, average_batch_loss=0.43856, average_batch_perplexity=1.55048, lr=0.001415346, 0.001415346]
Epoch: 6, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.31it/s, total_batch_loss=112.27247, average_batch_loss=0.43856, average_batch_perplexity=1.55048, lr=0.001415346, 0.001415346]
Epoch: 6, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.31it/s, total_batch_loss=102.06506, average_batch_loss=0.39869, average_batch_perplexity=1.48987, lr=0.001410832, 0.001410832]
Epoch: 6, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.31it/s, total_batch_loss=102.06506, average_batch_loss=0.39869, average_batch_perplexity=1.48987, lr=0.001410832, 0.001410832]
Epoch: 6, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.31it/s, total_batch_loss=110.41208, average_batch_loss=0.43130, average_batch_perplexity=1.53925, lr=0.001406360, 0.001406360]
Epoch: 6, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=110.41208, average_batch_loss=0.43130, average_batch_perplexity=1.53925, lr=0.001406360, 0.001406360]
Epoch: 6, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=110.41208, average_batch_loss=0.43130, average_batch_perplexity=1.53925, lr=0.001406360, 0.001406360]
Epoch: 6, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=100.90804, average_batch_loss=0.39417, average_batch_perplexity=1.48316, lr=0.001401930, 0.001401930]
Epoch: 6, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=100.90804, average_batch_loss=0.39417, average_batch_perplexity=1.48316, lr=0.001401930, 0.001401930]
Epoch: 6, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.43it/s, total_batch_loss=99.20811, average_batch_loss=0.38753, average_batch_perplexity=1.47334, lr=0.001397542, 0.001397542] 
Epoch: 6, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.39it/s, total_batch_loss=99.20811, average_batch_loss=0.38753, average_batch_perplexity=1.47334, lr=0.001397542, 0.001397542]
Epoch: 6, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.39it/s, total_batch_loss=99.20811, average_batch_loss=0.38753, average_batch_perplexity=1.47334, lr=0.001397542, 0.001397542]
Epoch: 6, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.39it/s, total_batch_loss=98.92679, average_batch_loss=0.38643, average_batch_perplexity=1.47172, lr=0.001393196, 0.001393196]
Epoch: 6, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.39it/s, total_batch_loss=98.92679, average_batch_loss=0.38643, average_batch_perplexity=1.47172, lr=0.001393196, 0.001393196]
Epoch: 6, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.39it/s, total_batch_loss=103.29178, average_batch_loss=0.40348, average_batch_perplexity=1.49703, lr=0.001388889, 0.001388889]
Epoch: 6, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.38it/s, total_batch_loss=103.29178, average_batch_loss=0.40348, average_batch_perplexity=1.49703, lr=0.001388889, 0.001388889]
Epoch: 6, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.38it/s, total_batch_loss=103.29178, average_batch_loss=0.40348, average_batch_perplexity=1.49703, lr=0.001388889, 0.001388889]
Epoch: 6, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.38it/s, total_batch_loss=92.24723, average_batch_loss=0.36034, average_batch_perplexity=1.43382, lr=0.001384622, 0.001384622] 
Epoch: 6, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.38it/s, total_batch_loss=92.24723, average_batch_loss=0.36034, average_batch_perplexity=1.43382, lr=0.001384622, 0.001384622]
Epoch: 6, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.38it/s, total_batch_loss=108.63124, average_batch_loss=0.42434, average_batch_perplexity=1.52858, lr=0.001380394, 0.001380394]
Epoch: 6, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.46it/s, total_batch_loss=108.63124, average_batch_loss=0.42434, average_batch_perplexity=1.52858, lr=0.001380394, 0.001380394]
Epoch: 6, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.46it/s, total_batch_loss=108.63124, average_batch_loss=0.42434, average_batch_perplexity=1.52858, lr=0.001380394, 0.001380394]
Epoch: 6, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.46it/s, total_batch_loss=102.35754, average_batch_loss=0.39983, average_batch_perplexity=1.49158, lr=0.001376205, 0.001376205]
Epoch: 6, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.46it/s, total_batch_loss=102.35754, average_batch_loss=0.39983, average_batch_perplexity=1.49158, lr=0.001376205, 0.001376205]
Epoch: 6, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.46it/s, total_batch_loss=97.85474, average_batch_loss=0.38225, average_batch_perplexity=1.46557, lr=0.001372053, 0.001372053] 
Epoch: 6, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.31it/s, total_batch_loss=97.85474, average_batch_loss=0.38225, average_batch_perplexity=1.46557, lr=0.001372053, 0.001372053]
Epoch: 6, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.31it/s, total_batch_loss=97.85474, average_batch_loss=0.38225, average_batch_perplexity=1.46557, lr=0.001372053, 0.001372053]
Epoch: 6, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.31it/s, total_batch_loss=99.34847, average_batch_loss=0.38808, average_batch_perplexity=1.47415, lr=0.001367939, 0.001367939]
Epoch: 6, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.31it/s, total_batch_loss=99.34847, average_batch_loss=0.38808, average_batch_perplexity=1.47415, lr=0.001367939, 0.001367939]
Epoch: 6, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.31it/s, total_batch_loss=30.92808, average_batch_loss=0.35146, average_batch_perplexity=1.42113, lr=0.001363862, 0.001363862]
Epoch: 6, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.36it/s, total_batch_loss=30.92808, average_batch_loss=0.35146, average_batch_perplexity=1.42113, lr=0.001363862, 0.001363862]
                                                                                                                                                                                         
2024-09-20 14:10:23,489 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:23,490 - root - INFO - Total Batches:                   28
2024-09-20 14:10:23,490 - root - INFO - Average Epoch Train Loss:        0.46671
2024-09-20 14:10:23,491 - root - INFO - Average Epoch Train Perplexity:  1.59473
2024-09-20 14:10:23,491 - root - INFO - 
2024-09-20 14:10:23,492 - root - INFO - ====================================================== Starting Valid Epoch: 6/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 6, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 6, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=90.70788, average_batch_loss=0.35433, average_batch_perplexity=1.42522]
Epoch: 6, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=90.70788, average_batch_loss=0.35433, average_batch_perplexity=1.42522]
Epoch: 6, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=93.68127, average_batch_loss=0.36594, average_batch_perplexity=1.44187]
Epoch: 6, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=93.68127, average_batch_loss=0.36594, average_batch_perplexity=1.44187]
Epoch: 6, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=89.34997, average_batch_loss=0.34902, average_batch_perplexity=1.41768]
Epoch: 6, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=89.34997, average_batch_loss=0.34902, average_batch_perplexity=1.41768]
Epoch: 6, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=89.34997, average_batch_loss=0.34902, average_batch_perplexity=1.41768]
Epoch: 6, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=89.19657, average_batch_loss=0.34842, average_batch_perplexity=1.41683]
Epoch: 6, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=89.19657, average_batch_loss=0.34842, average_batch_perplexity=1.41683]
Epoch: 6, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=97.31064, average_batch_loss=0.38012, average_batch_perplexity=1.46246]
Epoch: 6, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=97.31064, average_batch_loss=0.38012, average_batch_perplexity=1.46246]
Epoch: 6, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.82it/s, total_batch_loss=90.34011, average_batch_loss=0.35289, average_batch_perplexity=1.42318]
Epoch: 6, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.21it/s, total_batch_loss=90.34011, average_batch_loss=0.35289, average_batch_perplexity=1.42318]
Epoch: 6, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.21it/s, total_batch_loss=90.34011, average_batch_loss=0.35289, average_batch_perplexity=1.42318]
Epoch: 6, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.21it/s, total_batch_loss=90.64169, average_batch_loss=0.35407, average_batch_perplexity=1.42485]
Epoch: 6, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.21it/s, total_batch_loss=90.64169, average_batch_loss=0.35407, average_batch_perplexity=1.42485]
Epoch: 6, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.21it/s, total_batch_loss=69.68761, average_batch_loss=0.33504, average_batch_perplexity=1.39799]
                                                                                                                                                         
2024-09-20 14:10:23,833 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:23,834 - root - INFO - Total Batches:                   8
2024-09-20 14:10:23,834 - root - INFO - Average Epoch Valid Loss:        0.35546
2024-09-20 14:10:23,835 - root - INFO - Average Epoch Valid Perplexity:  1.42683
2024-09-20 14:10:23,835 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.4414]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.4414]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.41it/s, accuracy: 0.4414]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.41it/s, accuracy: 0.4922]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.41it/s, accuracy: 0.4224]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.69it/s, accuracy: 0.4224]
                                                                                           
2024-09-20 14:10:24,216 - root - INFO - Correct/Total Samples:           450/1000
2024-09-20 14:10:24,217 - root - INFO - Eval Accuracy:                   0.45
2024-09-20 14:10:24,235 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     6            1             0 13+48=061 13+48=071    False
     6            1             1 16+55=071 16+55=071     True
     6            1             2 79+34=113 79+34=113     True
     6            1             3 35+44=079 35+44=089    False
     6            1             4 16+50=066 16+50=076    False
     6            1             5 28+47=075 28+47=075     True
     6            1             6 00+74=074 00+74=073    False
     6            1             7 15+20=035 15+20=035     True
     6            1             8 72+60=132 72+60=132     True
     6            1             9 63+68=131 63+68=121    False
     6            1            10 29+45=074 29+45=074     True
     6            1            11 34+60=094 34+60=094     True
     6            1            12 53+70=123 53+70=123     True
     6            1            13 70+50=120 70+50=110    False
     6            1            14 11+84=095 11+84=095     True
     6            1            15 42+71=113 42+71=113     True
     6            1            16 98+22=120 98+22=110    False
     6            1            17 02+02=004 02+02=013    False
     6            1            18 15+85=100 15+85=100     True
     6            1            19 21+78=099 21+78=099     True
     6            1            20 61+79=140 61+79=130    False
     6            1            21 25+99=124 25+99=114    False
     6            1            22 09+85=094 09+85=094     True
     6            1            23 60+91=151 60+91=141    False
     6            1            24 35+30=065 35+30=075    False
     6            1            25 24+51=075 24+51=075     True
     6            1            26 93+91=184 93+91=174    False
     6            1            27 39+96=135 39+96=135     True
     6            1            28 64+35=099 64+35=100    False
     6            1            29 36+22=058 36+22=068    False
     6            1            30 68+45=113 68+45=113     True
     6            1            31 16+84=100 16+84=100     True
     6            1            32 91+52=143 91+52=132    False
     6            1            33 97+36=133 97+36=133     True
     6            1            34 27+37=064 27+37=075    False
     6            1            35 99+82=181 99+82=181     True
     6            1            36 03+42=045 03+42=055    False
     6            1            37 18+38=056 18+38=066    False
     6            1            38 32+20=052 32+20=052     True
     6            1            39 38+13=051 38+13=051     True
     6            1            40 68+42=110 68+42=110     True
     6            1            41 64+00=064 64+00=074    False
     6            1            42 48+94=142 48+94=132    False
     6            1            43 58+36=094 58+36=095    False
     6            1            44 41+22=063 41+22=063     True
     6            1            45 23+58=081 23+58=081     True
     6            1            46 67+46=113 67+46=113     True
     6            1            47 40+78=118 40+78=118     True
     6            1            48 90+38=128 90+38=128     True
     6            1            49 89+52=141 89+52=141     True
     6            1            50 37+77=114 37+77=115    False
     6            1            51 29+76=105 29+76=105     True
     6            1            52 42+90=132 42+90=131    False
     6            1            53 45+82=127 45+82=128    False
     6            1            54 35+95=130 35+95=120    False
     6            1            55 92+98=190 92+98=180    False
     6            1            56 73+91=164 73+91=164     True
     6            1            57 53+97=150 53+97=140    False
     6            1            58 98+69=167 98+69=167     True
     6            1            59 20+46=066 20+46=076    False
     6            1            60 48+69=117 48+69=116    False
     6            1            61 62+31=093 62+31=092    False
     6            1            62 80+59=139 80+59=139     True
     6            1            63 58+12=070 58+12=070     True
     6            1            64 08+96=104 08+96=105    False
     6            1            65 67+06=073 67+06=073     True
     6            1            66 22+04=026 22+04=036    False
     6            1            67 61+87=148 61+87=148     True
     6            1            68 95+27=122 95+27=112    False
     6            1            69 49+83=132 49+83=131    False
     6            1            70 43+00=043 43+00=053    False
     6            1            71 01+85=086 01+85=096    False
     6            1            72 11+68=079 11+68=089    False
     6            1            73 80+03=083 80+03=082    False
     6            1            74 54+83=137 54+83=138    False
     6            1            75 73+47=120 73+47=110    False
     6            1            76 99+93=192 99+93=181    False
     6            1            77 99+13=112 99+13=102    False
     6            1            78 92+66=158 92+66=158     True
     6            1            79 90+31=121 90+31=111    False
     6            1            80 25+69=094 25+69=094     True
     6            1            81 25+44=069 25+44=079    False
     6            1            82 00+93=093 00+93=092    False
     6            1            83 88+87=175 88+87=165    False
     6            1            84 47+56=103 47+56=103     True
     6            1            85 43+59=102 43+59=102     True
     6            1            86 22+00=022 22+00=022     True
     6            1            87 34+04=038 34+04=039    False
     6            1            88 65+13=078 65+13=088    False
     6            1            89 39+82=121 39+82=111    False
     6            1            90 66+83=149 66+83=149     True
     6            1            91 51+69=120 51+69=110    False
     6            1            92 80+21=101 80+21=101     True
     6            1            93 36+79=115 36+79=115     True
     6            1            94 21+68=089 21+68=098    False
     6            1            95 11+66=077 11+66=077     True
     6            1            96 55+19=074 55+19=074     True
     6            1            97 51+61=112 51+61=112     True
     6            1            98 38+88=126 38+88=126     True
     6            1            99 37+27=064 37+27=075    False
     6            1           100 18+63=081 18+63=081     True
     6            1           101 48+11=059 48+11=068    False
     6            1           102 72+68=140 72+68=130    False
     6            1           103 37+39=076 37+39=086    False
     6            1           104 64+95=159 64+95=150    False
     6            1           105 49+75=124 49+75=124     True
     6            1           106 45+66=111 45+66=111     True
     6            1           107 34+87=121 34+87=111    False
     6            1           108 02+84=086 02+84=096    False
     6            1           109 95+00=095 95+00=096    False
     6            1           110 09+56=065 09+56=075    False
     6            1           111 22+66=088 22+66=088     True
     6            1           112 43+18=061 43+18=071    False
     6            1           113 61+35=096 61+35=096     True
     6            1           114 13+73=086 13+73=087    False
     6            1           115 25+95=120 25+95=111    False
     6            1           116 73+96=169 73+96=169     True
     6            1           117 03+96=099 03+96=099     True
     6            1           118 97+82=179 97+82=179     True
     6            1           119 18+42=060 18+42=060     True
     6            1           120 29+98=127 29+98=127     True
     6            1           121 61+00=061 61+00=061     True
     6            1           122 22+98=120 22+98=110    False
     6            1           123 12+50=062 12+50=071    False
     6            1           124 02+58=060 02+58=070    False
     6            1           125 75+86=161 75+86=151    False
     6            1           126 31+57=088 31+57=098    False
     6            1           127 49+82=131 49+82=131     True
     6            1           128 15+33=048 15+33=058    False
     6            1           129 49+57=106 49+57=106     True
     6            1           130 61+70=131 61+70=121    False
     6            1           131 91+51=142 91+51=131    False
     6            1           132 50+05=055 50+05=065    False
     6            1           133 44+16=060 44+16=060     True
     6            1           134 92+01=093 92+01=092    False
     6            1           135 85+82=167 85+82=168    False
     6            1           136 07+41=048 07+41=058    False
     6            1           137 40+06=046 40+06=056    False
     6            1           138 79+62=141 79+62=131    False
     6            1           139 95+62=157 95+62=158    False
     6            1           140 42+93=135 42+93=136    False
     6            1           141 32+73=105 32+73=106    False
     6            1           142 47+09=056 47+09=066    False
     6            1           143 59+50=109 59+50=108    False
     6            1           144 61+77=138 61+77=138     True
     6            1           145 64+06=070 64+06=070     True
     6            1           146 35+10=045 35+10=055    False
     6            1           147 32+88=120 32+88=110    False
     6            1           148 03+95=098 03+95=099    False
     6            1           149 11+38=049 11+38=059    False
     6            1           150 21+67=088 21+67=088     True
     6            1           151 33+25=058 33+25=068    False
     6            1           152 63+45=108 63+45=109    False
     6            1           153 56+12=068 56+12=078    False
     6            1           154 19+79=098 19+79=098     True
     6            1           155 60+43=103 60+43=103     True
     6            1           156 07+61=068 07+61=078    False
     6            1           157 58+03=061 58+03=071    False
     6            1           158 11+10=021 11+10=021     True
     6            1           159 49+89=138 49+89=138     True
     6            1           160 37+58=095 37+58=095     True
     6            1           161 59+78=137 59+78=137     True
     6            1           162 11+21=032 11+21=031    False
     6            1           163 37+43=080 37+43=080     True
     6            1           164 44+21=065 44+21=075    False
     6            1           165 22+97=119 22+97=119     True
     6            1           166 65+35=100 65+35=100     True
     6            1           167 06+51=057 06+51=067    False
     6            1           168 65+25=090 65+25=091    False
     6            1           169 74+94=168 74+94=169    False
     6            1           170 87+55=142 87+55=142     True
     6            1           171 90+67=157 90+67=157     True
     6            1           172 11+02=013 11+02=013     True
     6            1           173 01+66=067 01+66=077    False
     6            1           174 56+00=056 56+00=066    False
     6            1           175 58+52=110 58+52=110     True
     6            1           176 24+99=123 24+99=113    False
     6            1           177 97+13=110 97+13=100    False
     6            1           178 42+94=136 42+94=137    False
     6            1           179 60+15=075 60+15=075     True
     6            1           180 20+46=066 20+46=076    False
     6            1           181 40+70=110 40+70=110     True
     6            1           182 95+45=140 95+45=131    False
     6            1           183 96+95=191 96+95=181    False
     6            1           184 98+20=118 98+20=117    False
     6            1           185 43+19=062 43+19=072    False
     6            1           186 50+69=119 50+69=119     True
     6            1           187 27+53=080 27+53=080     True
     6            1           188 24+25=049 24+25=050    False
     6            1           189 65+92=157 65+92=158    False
     6            1           190 28+14=042 28+14=042     True
     6            1           191 20+57=077 20+57=087    False
     6            1           192 59+97=156 59+97=156     True
     6            1           193 98+32=130 98+32=120    False
     6            1           194 55+84=139 55+84=130    False
     6            1           195 20+39=059 20+39=069    False
     6            1           196 86+47=133 86+47=133     True
     6            1           197 92+36=128 92+36=128     True
     6            1           198 05+38=043 05+38=043     True
     6            1           199 77+36=113 77+36=113     True
     6            1           200 41+64=105 41+64=106    False
     6            1           201 74+51=125 74+51=126    False
     6            1           202 74+55=129 74+55=120    False
     6            1           203 64+64=128 64+64=129    False
     6            1           204 60+19=079 60+19=089    False
     6            1           205 77+96=173 77+96=163    False
     6            1           206 22+30=052 22+30=052     True
     6            1           207 82+49=131 82+49=131     True
     6            1           208 39+67=106 39+67=106     True
     6            1           209 62+40=102 62+40=101    False
     6            1           210 28+71=099 28+71=098    False
     6            1           211 47+26=073 47+26=073     True
     6            1           212 98+54=152 98+54=142    False
     6            1           213 38+70=108 38+70=107    False
     6            1           214 63+40=103 63+40=103     True
     6            1           215 86+62=148 86+62=148     True
     6            1           216 22+65=087 22+65=088    False
     6            1           217 41+17=058 41+17=068    False
     6            1           218 68+88=156 68+88=156     True
     6            1           219 96+70=166 96+70=166     True
     6            1           220 99+29=128 99+29=128     True
     6            1           221 83+39=122 83+39=122     True
     6            1           222 26+55=081 26+55=081     True
     6            1           223 53+70=123 53+70=123     True
     6            1           224 94+12=106 94+12=107    False
     6            1           225 00+37=037 00+37=047    False
     6            1           226 36+94=130 36+94=120    False
     6            1           227 40+58=098 40+58=098     True
     6            1           228 19+80=099 19+80=098    False
     6            1           229 49+44=093 49+44=093     True
     6            1           230 70+27=097 70+27=097     True
     6            1           231 52+80=132 52+80=132     True
     6            1           232 77+90=167 77+90=167     True
     6            1           233 13+92=105 13+92=106    False
     6            1           234 59+09=068 59+09=078    False
     6            1           235 33+55=088 33+55=089    False
     6            1           236 85+16=101 85+16=101     True
     6            1           237 25+65=090 25+65=091    False
     6            1           238 46+20=066 46+20=076    False
     6            1           239 29+52=081 29+52=081     True
     6            1           240 32+36=068 32+36=078    False
     6            1           241 47+08=055 47+08=065    False
     6            1           242 21+84=105 21+84=106    False
     6            1           243 24+45=069 24+45=070    False
     6            1           244 29+15=044 29+15=044     True
     6            1           245 83+03=086 83+03=087    False
     6            1           246 83+36=119 83+36=119     True
     6            1           247 58+95=153 58+95=143    False
     6            1           248 76+79=155 76+79=145    False
     6            1           249 63+30=093 63+30=092    False
     6            1           250 38+24=062 38+24=062     True
     6            1           251 19+46=065 19+46=075    False
     6            1           252 99+66=165 99+66=165     True
     6            1           253 95+73=168 95+73=168     True
     6            1           254 65+27=092 65+27=092     True
     6            1           255 91+83=174 91+83=174     True
     6            2             0 65+49=114 65+49=114     True
     6            2             1 03+08=011 03+08=011     True
     6            2             2 67+81=148 67+81=148     True
     6            2             3 47+23=070 47+23=070     True
     6            2             4 43+91=134 43+91=134     True
     6            2             5 41+67=108 41+67=108     True
     6            2             6 02+33=035 02+33=045    False
     6            2             7 64+84=148 64+84=149    False
     6            2             8 81+64=145 81+64=146    False
     6            2             9 80+11=091 80+11=091     True
     6            2            10 78+01=079 78+01=088    False
     6            2            11 89+18=107 89+18=107     True
     6            2            12 45+52=097 45+52=098    False
     6            2            13 35+30=065 35+30=075    False
     6            2            14 53+32=085 53+32=086    False
     6            2            15 49+90=139 49+90=138    False
     6            2            16 41+37=078 41+37=088    False
     6            2            17 35+14=049 35+14=059    False
     6            2            18 92+50=142 92+50=131    False
     6            2            19 37+60=097 37+60=097     True
     6            2            20 91+61=152 91+61=141    False
     6            2            21 80+77=157 80+77=157     True
     6            2            22 66+24=090 66+24=090     True
     6            2            23 81+07=088 81+07=098    False
     6            2            24 85+59=144 85+59=144     True
     6            2            25 19+69=088 19+69=088     True
     6            2            26 91+44=135 91+44=136    False
     6            2            27 25+29=054 25+29=064    False
     6            2            28 27+08=035 27+08=045    False
     6            2            29 66+14=080 66+14=080     True
     6            2            30 95+11=106 95+11=107    False
     6            2            31 13+97=110 13+97=100    False
     6            2            32 94+40=134 94+40=133    False
     6            2            33 74+31=105 74+31=106    False
     6            2            34 49+00=049 49+00=058    False
     6            2            35 59+18=077 59+18=087    False
     6            2            36 07+65=072 07+65=072     True
     6            2            37 83+55=138 83+55=139    False
     6            2            38 49+80=129 49+80=138    False
     6            2            39 64+17=081 64+17=081     True
     6            2            40 48+83=131 48+83=131     True
     6            2            41 95+44=139 95+44=139     True
     6            2            42 71+26=097 71+26=097     True
     6            2            43 06+74=080 06+74=080     True
     6            2            44 34+24=058 34+24=068    False
     6            2            45 59+71=130 59+71=120    False
     6            2            46 68+32=100 68+32=100     True
     6            2            47 38+81=119 38+81=118    False
     6            2            48 29+56=085 29+56=085     True
     6            2            49 54+55=109 54+55=100    False
     6            2            50 31+27=058 31+27=068    False
     6            2            51 97+89=186 97+89=186     True
     6            2            52 48+09=057 48+09=067    False
     6            2            53 86+76=162 86+76=153    False
     6            2            54 82+59=141 82+59=131    False
     6            2            55 01+67=068 01+67=078    False
     6            2            56 26+06=032 26+06=033    False
     6            2            57 22+46=068 22+46=078    False
     6            2            58 85+16=101 85+16=101     True
     6            2            59 29+08=037 29+08=047    False
     6            2            60 73+94=167 73+94=168    False
     6            2            61 19+62=081 19+62=081     True
     6            2            62 86+62=148 86+62=148     True
     6            2            63 38+99=137 38+99=137     True
     6            2            64 64+25=089 64+25=090    False
     6            2            65 61+72=133 61+72=122    False
     6            2            66 78+88=166 78+88=166     True
     6            2            67 43+66=109 43+66=109     True
     6            2            68 69+35=104 69+35=103    False
     6            2            69 33+77=110 33+77=100    False
     6            2            70 37+37=074 37+37=075    False
     6            2            71 87+54=141 87+54=141     True
     6            2            72 68+90=158 68+90=157    False
     6            2            73 83+44=127 83+44=128    False
     6            2            74 41+09=050 41+09=050     True
     6            2            75 13+48=061 13+48=071    False
     6            2            76 01+41=042 01+41=051    False
     6            2            77 19+74=093 19+74=093     True
     6            2            78 15+05=020 15+05=020     True
     6            2            79 55+46=101 55+46=101     True
     6            2            80 68+33=101 68+33=101     True
     6            2            81 44+40=084 44+40=084     True
     6            2            82 88+03=091 88+03=091     True
     6            2            83 81+79=160 81+79=150    False
     6            2            84 18+98=116 18+98=106    False
     6            2            85 70+64=134 70+64=134     True
     6            2            86 26+44=070 26+44=070     True
     6            2            87 98+87=185 98+87=186    False
     6            2            88 18+74=092 18+74=092     True
     6            2            89 50+68=118 50+68=118     True
     6            2            90 13+51=064 13+51=073    False
     6            2            91 90+89=179 90+89=179     True
     6            2            92 47+78=125 47+78=125     True
     6            2            93 81+57=138 81+57=138     True
     6            2            94 34+47=081 34+47=081     True
     6            2            95 94+23=117 94+23=118    False
     6            2            96 07+70=077 07+70=087    False
     6            2            97 56+33=089 56+33=099    False
     6            2            98 33+04=037 33+04=038    False
     6            2            99 26+09=035 26+09=035     True
     6            2           100 14+92=106 14+92=107    False
     6            2           101 78+54=132 78+54=132     True
     6            2           102 36+76=112 36+76=113    False
     6            2           103 17+47=064 17+47=075    False
     6            2           104 28+18=046 28+18=056    False
     6            2           105 78+54=132 78+54=132     True
     6            2           106 84+72=156 84+72=157    False
     6            2           107 00+44=044 00+44=053    False
     6            2           108 50+41=091 50+41=091     True
     6            2           109 87+88=175 87+88=165    False
     6            2           110 11+66=077 11+66=077     True
     6            2           111 80+60=140 80+60=140     True
     6            2           112 78+76=154 78+76=145    False
     6            2           113 24+74=098 24+74=099    False
     6            2           114 88+48=136 88+48=136     True
     6            2           115 38+31=069 38+31=078    False
     6            2           116 29+27=056 29+27=066    False
     6            2           117 08+45=053 08+45=063    False
     6            2           118 28+13=041 28+13=041     True
     6            2           119 53+99=152 53+99=142    False
     6            2           120 47+92=139 47+92=139     True
     6            2           121 76+21=097 76+21=097     True
     6            2           122 53+96=149 53+96=149     True
     6            2           123 93+91=184 93+91=174    False
     6            2           124 97+33=130 97+33=120    False
     6            2           125 67+78=145 67+78=145     True
     6            2           126 58+05=063 58+05=073    False
     6            2           127 00+16=016 00+16=026    False
     6            2           128 80+19=099 80+19=099     True
     6            2           129 98+22=120 98+22=110    False
     6            2           130 09+62=071 09+62=071     True
     6            2           131 06+23=029 06+23=039    False
     6            2           132 32+99=131 32+99=121    False
     6            2           133 17+02=019 17+02=029    False
     6            2           134 64+35=099 64+35=100    False
     6            2           135 35+83=118 35+83=118     True
     6            2           136 71+36=107 71+36=107     True
     6            2           137 75+06=081 75+06=081     True
     6            2           138 88+95=183 88+95=173    False
     6            2           139 19+98=117 19+98=107    False
     6            2           140 28+89=117 28+89=116    False
     6            2           141 33+11=044 33+11=053    False
     6            2           142 34+49=083 34+49=083     True
     6            2           143 90+35=125 90+35=125     True
     6            2           144 22+90=112 22+90=111    False
     6            2           145 98+89=187 98+89=187     True
     6            2           146 88+47=135 88+47=135     True
     6            2           147 30+86=116 30+86=116     True
     6            2           148 31+48=079 31+48=089    False
     6            2           149 39+21=060 39+21=060     True
     6            2           150 19+17=036 19+17=046    False
     6            2           151 27+60=087 27+60=087     True
     6            2           152 12+16=028 12+16=038    False
     6            2           153 51+75=126 51+75=127    False
     6            2           154 10+74=084 10+74=083    False
     6            2           155 42+63=105 42+63=106    False
     6            2           156 40+14=054 40+14=064    False
     6            2           157 23+93=116 23+93=117    False
     6            2           158 85+26=111 85+26=111     True
     6            2           159 28+46=074 28+46=075    False
     6            2           160 28+33=061 28+33=061     True
     6            2           161 43+30=073 43+30=073     True
     6            2           162 89+72=161 89+72=161     True
     6            2           163 52+21=073 52+21=072    False
     6            2           164 21+54=075 21+54=075     True
     6            2           165 69+13=082 69+13=081    False
     6            2           166 07+60=067 07+60=077    False
     6            2           167 63+83=146 63+83=147    False
     6            2           168 80+69=149 80+69=149     True
     6            2           169 27+28=055 27+28=065    False
     6            2           170 42+31=073 42+31=072    False
     6            2           171 51+99=150 51+99=140    False
     6            2           172 28+75=103 28+75=103     True
     6            2           173 38+57=095 38+57=095     True
     6            2           174 83+16=099 83+16=099     True
     6            2           175 92+94=186 92+94=187    False
     6            2           176 55+75=130 55+75=121    False
     6            2           177 59+51=110 59+51=100    False
     6            2           178 33+09=042 33+09=042     True
     6            2           179 53+13=066 53+13=077    False
     6            2           180 05+70=075 05+70=085    False
     6            2           181 12+20=032 12+20=032     True
     6            2           182 11+49=060 11+49=060     True
     6            2           183 63+45=108 63+45=109    False
     6            2           184 92+23=115 92+23=115     True
     6            2           185 82+45=127 82+45=128    False
     6            2           186 23+41=064 23+41=073    False
     6            2           187 64+26=090 64+26=090     True
     6            2           188 91+24=115 91+24=116    False
     6            2           189 20+32=052 20+32=052     True
     6            2           190 83+21=104 83+21=104     True
     6            2           191 07+20=027 07+20=037    False
     6            2           192 94+14=108 94+14=109    False
     6            2           193 96+89=185 96+89=185     True
     6            2           194 13+08=021 13+08=021     True
     6            2           195 32+05=037 32+05=038    False
     6            2           196 09+51=060 09+51=060     True
     6            2           197 26+29=055 26+29=065    False
     6            2           198 49+65=114 49+65=114     True
     6            2           199 32+66=098 32+66=098     True
     6            2           200 41+08=049 41+08=059    False
     6            2           201 26+79=105 26+79=105     True
     6            2           202 29+91=120 29+91=110    False
     6            2           203 51+00=051 51+00=061    False
     6            2           204 61+60=121 61+60=111    False
     6            2           205 45+78=123 45+78=113    False
     6            2           206 56+16=072 56+16=073    False
     6            2           207 66+68=134 66+68=134     True
     6            2           208 32+16=048 32+16=058    False
     6            2           209 84+49=133 84+49=133     True
     6            2           210 45+09=054 45+09=064    False
     6            2           211 96+78=174 96+78=175    False
     6            2           212 10+02=012 10+02=012     True
     6            2           213 36+60=096 36+60=096     True
     6            2           214 44+36=080 44+36=080     True
     6            2           215 12+86=098 12+86=098     True
     6            2           216 94+54=148 94+54=149    False
     6            2           217 64+73=137 64+73=138    False
     6            2           218 73+10=083 73+10=092    False
     6            2           219 14+62=076 14+62=077    False
     6            2           220 25+22=047 25+22=058    False
     6            2           221 94+22=116 94+22=117    False
     6            2           222 41+76=117 41+76=117     True
     6            2           223 38+46=084 38+46=085    False
     6            2           224 71+72=143 71+72=132    False
     6            2           225 74+79=153 74+79=143    False
     6            2           226 99+67=166 99+67=166     True
     6            2           227 78+71=149 78+71=149     True
     6            2           228 23+19=042 23+19=042     True
     6            2           229 51+65=116 51+65=117    False
     6            2           230 94+86=180 94+86=170    False
     6            2           231 09+79=088 09+79=088     True
     6            2           232 69+39=108 69+39=108     True
     6            2           233 84+13=097 84+13=098    False
     6            2           234 36+59=095 36+59=095     True
     6            2           235 87+47=134 87+47=135    False
     6            2           236 50+00=050 50+00=060    False
     6            2           237 76+96=172 76+96=163    False
     6            2           238 12+18=030 12+18=030     True
     6            2           239 99+95=194 99+95=184    False
     6            2           240 22+00=022 22+00=022     True
     6            2           241 96+18=114 96+18=105    False
     6            2           242 51+20=071 51+20=071     True
     6            2           243 66+81=147 66+81=147     True
     6            2           244 78+18=096 78+18=096     True
     6            2           245 09+78=087 09+78=087     True
     6            2           246 24+20=044 24+20=044     True
     6            2           247 76+13=089 76+13=099    False
     6            2           248 05+10=015 05+10=026    False
     6            2           249 97+14=111 97+14=101    False
     6            2           250 92+38=130 92+38=120    False
     6            2           251 77+13=090 77+13=090     True
     6            2           252 70+19=089 70+19=099    False
     6            2           253 90+45=135 90+45=135     True
     6            2           254 50+09=059 50+09=069    False
     6            2           255 78+06=084 78+06=085    False
     6            3             0 03+25=028 03+25=038    False
     6            3             1 48+43=091 48+43=091     True
     6            3             2 39+47=086 39+47=086     True
     6            3             3 48+19=067 48+19=077    False
     6            3             4 07+22=029 07+22=039    False
     6            3             5 73+68=141 73+68=131    False
     6            3             6 14+56=070 14+56=070     True
     6            3             7 96+95=191 96+95=181    False
     6            3             8 96+28=124 96+28=114    False
     6            3             9 82+05=087 82+05=088    False
     6            3            10 27+94=121 27+94=111    False
     6            3            11 87+86=173 87+86=163    False
     6            3            12 00+68=068 00+68=078    False
     6            3            13 11+37=048 11+37=058    False
     6            3            14 95+93=188 95+93=188     True
     6            3            15 75+82=157 75+82=158    False
     6            3            16 41+71=112 41+71=112     True
     6            3            17 60+14=074 60+14=073    False
     6            3            18 77+77=154 77+77=145    False
     6            3            19 31+84=115 31+84=116    False
     6            3            20 31+57=088 31+57=098    False
     6            3            21 27+87=114 27+87=115    False
     6            3            22 31+89=120 31+89=110    False
     6            3            23 22+18=040 22+18=040     True
     6            3            24 38+25=063 38+25=063     True
     6            3            25 64+54=118 64+54=119    False
     6            3            26 85+60=145 85+60=145     True
     6            3            27 14+71=085 14+71=085     True
     6            3            28 06+16=022 06+16=023    False
     6            3            29 78+61=139 78+61=138    False
     6            3            30 65+75=140 65+75=131    False
     6            3            31 13+83=096 13+83=097    False
     6            3            32 75+49=124 75+49=124     True
     6            3            33 05+78=083 05+78=083     True
     6            3            34 66+55=121 66+55=111    False
     6            3            35 03+05=008 03+05=018    False
     6            3            36 69+99=168 69+99=168     True
     6            3            37 52+82=134 52+82=134     True
     6            3            38 45+97=142 45+97=132    False
     6            3            39 66+17=083 66+17=083     True
     6            3            40 36+17=053 36+17=063    False
     6            3            41 92+74=166 92+74=166     True
     6            3            42 48+44=092 48+44=092     True
     6            3            43 34+17=051 34+17=051     True
     6            3            44 56+11=067 56+11=077    False
     6            3            45 77+23=100 77+23=090    False
     6            3            46 10+11=021 10+11=021     True
     6            3            47 32+65=097 32+65=098    False
     6            3            48 53+49=102 53+49=102     True
     6            3            49 68+86=154 68+86=155    False
     6            3            50 52+94=146 52+94=147    False
     6            3            51 97+71=168 97+71=168     True
     6            3            52 05+37=042 05+37=042     True
     6            3            53 58+75=133 58+75=133     True
     6            3            54 06+24=030 06+24=030     True
     6            3            55 15+44=059 15+44=069    False
     6            3            56 90+49=139 90+49=139     True
     6            3            57 50+37=087 50+37=097    False
     6            3            58 88+61=149 88+61=149     True
     6            3            59 21+57=078 21+57=088    False
     6            3            60 24+85=109 24+85=100    False
     6            3            61 01+66=067 01+66=077    False
     6            3            62 50+46=096 50+46=096     True
     6            3            63 76+65=141 76+65=131    False
     6            3            64 23+74=097 23+74=098    False
     6            3            65 76+16=092 76+16=093    False
     6            3            66 06+08=014 06+08=024    False
     6            3            67 69+25=094 69+25=094     True
     6            3            68 15+23=038 15+23=048    False
     6            3            69 41+02=043 41+02=052    False
     6            3            70 16+66=082 16+66=083    False
     6            3            71 59+94=153 59+94=143    False
     6            3            72 32+88=120 32+88=110    False
     6            3            73 46+21=067 46+21=077    False
     6            3            74 57+28=085 57+28=085     True
     6            3            75 00+31=031 00+31=031     True
     6            3            76 77+07=084 77+07=085    False
     6            3            77 28+70=098 28+70=097    False
     6            3            78 05+61=066 05+61=076    False
     6            3            79 22+09=031 22+09=031     True
     6            3            80 08+94=102 08+94=102     True
     6            3            81 40+11=051 40+11=061    False
     6            3            82 10+48=058 10+48=068    False
     6            3            83 27+56=083 27+56=083     True
     6            3            84 42+16=058 42+16=068    False
     6            3            85 69+43=112 69+43=112     True
     6            3            86 57+69=126 57+69=126     True
     6            3            87 18+86=104 18+86=105    False
     6            3            88 86+80=166 86+80=166     True
     6            3            89 30+85=115 30+85=116    False
     6            3            90 77+66=143 77+66=133    False
     6            3            91 39+64=103 39+64=103     True
     6            3            92 76+61=137 76+61=137     True
     6            3            93 42+61=103 42+61=103     True
     6            3            94 07+30=037 07+30=047    False
     6            3            95 35+93=128 35+93=128     True
     6            3            96 40+90=130 40+90=130     True
     6            3            97 08+91=099 08+91=098    False
     6            3            98 62+34=096 62+34=096     True
     6            3            99 86+49=135 86+49=135     True
     6            3           100 73+23=096 73+23=097    False
     6            3           101 87+35=122 87+35=122     True
     6            3           102 35+31=066 35+31=077    False
     6            3           103 07+13=020 07+13=020     True
     6            3           104 39+41=080 39+41=080     True
     6            3           105 44+63=107 44+63=108    False
     6            3           106 94+66=160 94+66=150    False
     6            3           107 49+54=103 49+54=103     True
     6            3           108 79+46=125 79+46=125     True
     6            3           109 53+12=065 53+12=075    False
     6            3           110 60+92=152 60+92=152     True
     6            3           111 25+60=085 25+60=084    False
     6            3           112 64+53=117 64+53=118    False
     6            3           113 41+02=043 41+02=052    False
     6            3           114 00+97=097 00+97=097     True
     6            3           115 12+52=064 12+52=073    False
     6            3           116 39+50=089 39+50=088    False
     6            3           117 87+21=108 87+21=108     True
     6            3           118 04+99=103 04+99=103     True
     6            3           119 19+75=094 19+75=094     True
     6            3           120 90+05=095 90+05=094    False
     6            3           121 54+39=093 54+39=093     True
     6            3           122 29+26=055 29+26=065    False
     6            3           123 82+95=177 82+95=178    False
     6            3           124 55+09=064 55+09=074    False
     6            3           125 02+62=064 02+62=073    False
     6            3           126 68+30=098 68+30=097    False
     6            3           127 99+16=115 99+16=105    False
     6            3           128 63+11=074 63+11=073    False
     6            3           129 42+92=134 42+92=134     True
     6            3           130 99+16=115 99+16=105    False
     6            3           131 50+31=081 50+31=081     True
     6            3           132 23+46=069 23+46=079    False
     6            3           133 45+73=118 45+73=118     True
     6            3           134 89+77=166 89+77=166     True
     6            3           135 45+78=123 45+78=113    False
     6            3           136 96+60=156 96+60=156     True
     6            3           137 74+61=135 74+61=135     True
     6            3           138 87+01=088 87+01=097    False
     6            3           139 63+88=151 63+88=151     True
     6            3           140 59+72=131 59+72=131     True
     6            3           141 17+96=113 17+96=103    False
     6            3           142 89+77=166 89+77=166     True
     6            3           143 24+69=093 24+69=093     True
     6            3           144 75+83=158 75+83=158     True
     6            3           145 50+54=104 50+54=104     True
     6            3           146 93+47=140 93+47=130    False
     6            3           147 20+55=075 20+55=075     True
     6            3           148 91+79=170 91+79=170     True
     6            3           149 15+13=028 15+13=038    False
     6            3           150 86+09=095 86+09=095     True
     6            3           151 29+58=087 29+58=087     True
     6            3           152 01+29=030 01+29=030     True
     6            3           153 65+48=113 65+48=113     True
     6            3           154 96+45=141 96+45=131    False
     6            3           155 58+69=127 58+69=127     True
     6            3           156 84+43=127 84+43=128    False
     6            3           157 90+38=128 90+38=128     True
     6            3           158 39+97=136 39+97=136     True
     6            3           159 74+84=158 74+84=159    False
     6            3           160 86+22=108 86+22=108     True
     6            3           161 01+86=087 01+86=097    False
     6            3           162 81+63=144 81+63=144     True
     6            3           163 80+94=174 80+94=174     True
     6            3           164 44+42=086 44+42=087    False
     6            3           165 72+60=132 72+60=132     True
     6            3           166 28+07=035 28+07=045    False
     6            3           167 69+54=123 69+54=123     True
     6            3           168 68+77=145 68+77=145     True
     6            3           169 90+16=106 90+16=106     True
     6            3           170 64+50=114 64+50=114     True
     6            3           171 46+88=134 46+88=134     True
     6            3           172 55+99=154 55+99=144    False
     6            3           173 31+97=128 31+97=128     True
     6            3           174 79+28=107 79+28=107     True
     6            3           175 81+43=124 81+43=124     True
     6            3           176 41+15=056 41+15=066    False
     6            3           177 38+77=115 38+77=115     True
     6            3           178 25+06=031 25+06=031     True
     6            3           179 01+93=094 01+93=094     True
     6            3           180 97+22=119 97+22=119     True
     6            3           181 71+84=155 71+84=155     True
     6            3           182 26+36=062 26+36=062     True
     6            3           183 60+92=152 60+92=152     True
     6            3           184 02+94=096 02+94=096     True
     6            3           185 31+58=089 31+58=099    False
     6            3           186 70+52=122 70+52=112    False
     6            3           187 19+42=061 19+42=061     True
     6            3           188 95+73=168 95+73=168     True
     6            3           189 21+25=046 21+25=057    False
     6            3           190 13+58=071 13+58=071     True
     6            3           191 62+28=090 62+28=090     True
     6            3           192 38+14=052 38+14=052     True
     6            3           193 66+75=141 66+75=131    False
     6            3           194 24+59=083 24+59=083     True
     6            3           195 97+66=163 97+66=153    False
     6            3           196 76+70=146 76+70=146     True
     6            3           197 08+40=048 08+40=057    False
     6            3           198 84+00=084 84+00=094    False
     6            3           199 54+73=127 54+73=128    False
     6            3           200 16+88=104 16+88=105    False
     6            3           201 99+47=146 99+47=146     True
     6            3           202 31+95=126 31+95=127    False
     6            3           203 01+79=080 01+79=080     True
     6            3           204 03+68=071 03+68=071     True
     6            3           205 10+05=015 10+05=015     True
     6            3           206 98+90=188 98+90=187    False
     6            3           207 58+53=111 58+53=111     True
     6            3           208 34+87=121 34+87=111    False
     6            3           209 07+31=038 07+31=047    False
     6            3           210 59+08=067 59+08=077    False
     6            3           211 51+38=089 51+38=099    False
     6            3           212 62+62=124 62+62=114    False
     6            3           213 80+32=112 80+32=112     True
     6            3           214 69+16=085 69+16=085     True
     6            3           215 01+17=018 01+17=028    False
     6            3           216 74+41=115 74+41=116    False
     6            3           217 20+89=109 20+89=109     True
     6            3           218 53+50=103 53+50=103     True
     6            3           219 82+85=167 82+85=168    False
     6            3           220 34+47=081 34+47=081     True
     6            3           221 34+45=079 34+45=080    False
     6            3           222 77+34=111 77+34=111     True
     6            3           223 56+33=089 56+33=099    False
     6            3           224 97+56=153 97+56=143    False
     6            3           225 29+06=035 29+06=035     True
     6            3           226 78+96=174 78+96=165    False
     6            3           227 28+65=093 28+65=093     True
     6            3           228 61+64=125 61+64=116    False
     6            3           229 32+64=096 32+64=097    False
     6            3           230 98+32=130 98+32=120    False
     6            3           231 25+35=060 25+35=060     True
     6            3           232 05+08=013 05+08=013     True
     6            3           233 05+26=031 05+26=031     True
     6            3           234 84+71=155 84+71=155     True
     6            3           235 33+10=043 33+10=053    False
     6            3           236 98+35=133 98+35=133     True
     6            3           237 68+98=166 68+98=166     True
     6            3           238 03+63=066 03+63=077    False
     6            3           239 12+96=108 12+96=108     True
     6            3           240 02+81=083 02+81=082    False
     6            3           241 83+13=096 83+13=097    False
     6            3           242 55+92=147 55+92=148    False
     6            3           243 96+09=105 96+09=105     True
     6            3           244 61+08=069 61+08=079    False
     6            3           245 39+75=114 39+75=114     True
     6            3           246 40+74=114 40+74=114     True
     6            3           247 39+80=119 39+80=118    False
     6            3           248 57+95=152 57+95=142    False
     6            3           249 92+97=189 92+97=189     True
     6            3           250 33+03=036 33+03=037    False
     6            3           251 74+92=166 74+92=167    False
     6            3           252 99+09=108 99+09=108     True
     6            3           253 98+10=108 98+10=107    False
     6            3           254 46+77=123 46+77=113    False
     6            3           255 85+78=163 85+78=153    False
     6            4             0 41+21=062 41+21=062     True
     6            4             1 49+13=062 49+13=071    False
     6            4             2 59+07=066 59+07=076    False
     6            4             3 31+11=042 31+11=051    False
     6            4             4 74+16=090 74+16=090     True
     6            4             5 43+38=081 43+38=081     True
     6            4             6 08+67=075 08+67=075     True
     6            4             7 31+66=097 31+66=097     True
     6            4             8 10+31=041 10+31=051    False
     6            4             9 34+59=093 34+59=093     True
     6            4            10 78+42=120 78+42=110    False
     6            4            11 13+41=054 13+41=063    False
     6            4            12 97+89=186 97+89=186     True
     6            4            13 15+62=077 15+62=088    False
     6            4            14 39+36=075 39+36=075     True
     6            4            15 21+25=046 21+25=057    False
     6            4            16 74+56=130 74+56=120    False
     6            4            17 85+47=132 85+47=132     True
     6            4            18 47+32=079 47+32=089    False
     6            4            19 37+66=103 37+66=103     True
     6            4            20 16+29=045 16+29=055    False
     6            4            21 86+77=163 86+77=153    False
     6            4            22 80+07=087 80+07=097    False
     6            4            23 87+05=092 87+05=092     True
     6            4            24 58+16=074 58+16=075    False
     6            4            25 52+79=131 52+79=121    False
     6            4            26 91+08=099 91+08=099     True
     6            4            27 47+78=125 47+78=125     True
     6            4            28 86+96=182 86+96=173    False
     6            4            29 90+22=112 90+22=111    False
     6            4            30 31+18=049 31+18=059    False
     6            4            31 86+15=101 86+15=101     True
     6            4            32 15+95=110 15+95=100    False
     6            4            33 42+11=053 42+11=062    False
     6            4            34 65+99=164 65+99=154    False
     6            4            35 89+29=118 89+29=117    False
     6            4            36 35+11=046 35+11=057    False
     6            4            37 71+41=112 71+41=111    False
     6            4            38 16+24=040 16+24=040     True
     6            4            39 77+82=159 77+82=159     True
     6            4            40 55+89=144 55+89=144     True
     6            4            41 17+88=105 17+88=106    False
     6            4            42 54+72=126 54+72=127    False
     6            4            43 34+98=132 34+98=132     True
     6            4            44 09+97=106 09+97=106     True
     6            4            45 91+07=098 91+07=098     True
     6            4            46 55+94=149 55+94=140    False
     6            4            47 22+58=080 22+58=080     True
     6            4            48 91+37=128 91+37=128     True
     6            4            49 16+10=026 16+10=036    False
     6            4            50 96+32=128 96+32=128     True
     6            4            51 35+75=110 35+75=111    False
     6            4            52 88+73=161 88+73=151    False
     6            4            53 35+18=053 35+18=063    False
     6            4            54 33+10=043 33+10=053    False
     6            4            55 08+50=058 08+50=067    False
     6            4            56 22+62=084 22+62=083    False
     6            4            57 26+37=063 26+37=063     True
     6            4            58 80+27=107 80+27=107     True
     6            4            59 68+28=096 68+28=096     True
     6            4            60 48+03=051 48+03=061    False
     6            4            61 40+18=058 40+18=068    False
     6            4            62 16+59=075 16+59=075     True
     6            4            63 02+19=021 02+19=021     True
     6            4            64 01+09=010 01+09=010     True
     6            4            65 62+68=130 62+68=120    False
     6            4            66 09+71=080 09+71=080     True
     6            4            67 00+58=058 00+58=068    False
     6            4            68 16+45=061 16+45=061     True
     6            4            69 24+98=122 24+98=112    False
     6            4            70 47+92=139 47+92=139     True
     6            4            71 94+84=178 94+84=178     True
     6            4            72 21+32=053 21+32=062    False
     6            4            73 29+82=111 29+82=100    False
     6            4            74 32+79=111 32+79=101    False
     6            4            75 13+98=111 13+98=101    False
     6            4            76 41+94=135 41+94=136    False
     6            4            77 51+84=135 51+84=136    False
     6            4            78 42+05=047 42+05=058    False
     6            4            79 39+03=042 39+03=041    False
     6            4            80 02+92=094 02+92=093    False
     6            4            81 99+81=180 99+81=170    False
     6            4            82 32+68=100 32+68=100     True
     6            4            83 52+17=069 52+17=079    False
     6            4            84 56+58=114 56+58=114     True
     6            4            85 21+48=069 21+48=079    False
     6            4            86 61+71=132 61+71=121    False
     6            4            87 17+01=018 17+01=027    False
     6            4            88 68+23=091 68+23=091     True
     6            4            89 00+37=037 00+37=047    False
     6            4            90 94+88=182 94+88=172    False
     6            4            91 06+31=037 06+31=047    False
     6            4            92 27+18=045 27+18=055    False
     6            4            93 41+81=122 41+81=121    False
     6            4            94 15+86=101 15+86=101     True
     6            4            95 36+87=123 36+87=113    False
     6            4            96 17+37=054 17+37=065    False
     6            4            97 13+86=099 13+86=099     True
     6            4            98 29+69=098 29+69=098     True
     6            4            99 31+99=130 31+99=120    False
     6            4           100 47+29=076 47+29=086    False
     6            4           101 08+81=089 08+81=098    False
     6            4           102 72+82=154 72+82=154     True
     6            4           103 46+91=137 46+91=137     True
     6            4           104 70+35=105 70+35=106    False
     6            4           105 90+55=145 90+55=134    False
     6            4           106 99+99=198 99+99=188    False
     6            4           107 60+97=157 60+97=157     True
     6            4           108 03+40=043 03+40=053    False
     6            4           109 35+49=084 35+49=084     True
     6            4           110 32+02=034 32+02=033    False
     6            4           111 70+18=088 70+18=098    False
     6            4           112 99+05=104 99+05=104     True
     6            4           113 78+73=151 78+73=141    False
     6            4           114 03+02=005 03+02=015    False
     6            4           115 50+14=064 50+14=074    False
     6            4           116 62+02=064 62+02=073    False
     6            4           117 16+74=090 16+74=090     True
     6            4           118 68+65=133 68+65=133     True
     6            4           119 74+81=155 74+81=155     True
     6            4           120 37+48=085 37+48=085     True
     6            4           121 63+04=067 63+04=078    False
     6            4           122 06+62=068 06+62=078    False
     6            4           123 95+75=170 95+75=161    False
     6            4           124 92+37=129 92+37=129     True
     6            4           125 81+32=113 81+32=113     True
     6            4           126 53+28=081 53+28=081     True
     6            4           127 52+42=094 52+42=094     True
     6            4           128 66+97=163 66+97=153    False
     6            4           129 00+48=048 00+48=058    False
     6            4           130 65+32=097 65+32=098    False
     6            4           131 60+89=149 60+89=149     True
     6            4           132 71+61=132 71+61=121    False
     6            4           133 98+50=148 98+50=147    False
     6            4           134 90+96=186 90+96=176    False
     6            4           135 02+96=098 02+96=098     True
     6            4           136 62+75=137 62+75=138    False
     6            4           137 41+28=069 41+28=079    False
     6            4           138 95+79=174 95+79=174     True
     6            4           139 48+41=089 48+41=098    False
     6            4           140 87+95=182 87+95=172    False
     6            4           141 75+38=113 75+38=113     True
     6            4           142 31+55=086 31+55=086     True
     6            4           143 54+63=117 54+63=118    False
     6            4           144 75+82=157 75+82=158    False
     6            4           145 46+45=091 46+45=091     True
     6            4           146 13+08=021 13+08=021     True
     6            4           147 77+97=174 77+97=165    False
     6            4           148 37+35=072 37+35=072     True
     6            4           149 21+89=110 21+89=100    False
     6            4           150 58+51=109 58+51=108    False
     6            4           151 91+48=139 91+48=139     True
     6            4           152 33+23=056 33+23=067    False
     6            4           153 80+96=176 80+96=176     True
     6            4           154 78+02=080 78+02=080     True
     6            4           155 38+95=133 38+95=133     True
     6            4           156 99+25=124 99+25=114    False
     6            4           157 30+76=106 30+76=106     True
     6            4           158 42+40=082 42+40=081    False
     6            4           159 85+58=143 85+58=143     True
     6            4           160 44+46=090 44+46=090     True
     6            4           161 06+41=047 06+41=057    False
     6            4           162 65+90=155 65+90=155     True
     6            4           163 43+83=126 43+83=128    False
     6            4           164 36+61=097 36+61=097     True
     6            4           165 61+51=112 61+51=112     True
     6            4           166 38+09=047 38+09=057    False
     6            4           167 21+97=118 21+97=118     True
     6            4           168 83+30=113 83+30=111    False
     6            4           169 11+79=090 11+79=090     True
     6            4           170 14+29=043 14+29=043     True
     6            4           171 21+11=032 21+11=031    False
     6            4           172 43+53=096 43+53=107    False
     6            4           173 02+58=060 02+58=070    False
     6            4           174 78+82=160 78+82=150    False
     6            4           175 91+11=102 91+11=101    False
     6            4           176 58+54=112 58+54=112     True
     6            4           177 00+15=015 00+15=015     True
     6            4           178 83+51=134 83+51=134     True
     6            4           179 44+72=116 44+72=117    False
     6            4           180 71+20=091 71+20=091     True
     6            4           181 24+99=123 24+99=113    False
     6            4           182 46+30=076 46+30=086    False
     6            4           183 08+67=075 08+67=075     True
     6            4           184 47+42=089 47+42=099    False
     6            4           185 95+67=162 95+67=152    False
     6            4           186 40+56=096 40+56=096     True
     6            4           187 17+95=112 17+95=112     True
     6            4           188 94+66=160 94+66=150    False
     6            4           189 14+58=072 14+58=072     True
     6            4           190 56+05=061 56+05=061     True
     6            4           191 70+01=071 70+01=071     True
     6            4           192 97+59=156 97+59=146    False
     6            4           193 94+67=161 94+67=151    False
     6            4           194 13+41=054 13+41=063    False
     6            4           195 85+15=100 85+15=100     True
     6            4           196 48+53=101 48+53=101     True
     6            4           197 62+75=137 62+75=138    False
     6            4           198 87+47=134 87+47=135    False
     6            4           199 31+88=119 31+88=118    False
     6            4           200 97+16=113 97+16=103    False
     6            4           201 48+45=093 48+45=093     True
     6            4           202 99+00=099 99+00=108    False
     6            4           203 15+01=016 15+01=026    False
     6            4           204 28+96=124 28+96=125    False
     6            4           205 20+11=031 20+11=031     True
     6            4           206 07+56=063 07+56=073    False
     6            4           207 06+08=014 06+08=024    False
     6            4           208 45+46=091 45+46=091     True
     6            4           209 48+85=133 48+85=133     True
     6            4           210 62+14=076 62+14=076     True
     6            4           211 82+31=113 82+31=113     True
     6            4           212 85+88=173 85+88=163    False
     6            4           213 77+08=085 77+08=085     True
     6            4           214 16+64=080 16+64=080     True
     6            4           215 00+27=027 00+27=037    False
     6            4           216 36+75=111 36+75=111     True
     6            4           217 38+38=076 38+38=086    False
     6            4           218 88+32=120 88+32=110    False
     6            4           219 09+88=097 09+88=097     True
     6            4           220 96+87=183 96+87=173    False
     6            4           221 71+29=100 71+29=090    False
     6            4           222 99+13=112 99+13=102    False
     6            4           223 03+13=016 03+13=027    False
     6            4           224 67+23=090 67+23=090     True
     6            4           225 15+98=113 15+98=103    False
     6            4           226 10+08=018 10+08=028    False
     6            4           227 46+24=070 46+24=070     True
     6            4           228 55+63=118 55+63=118     True
     6            4           229 28+06=034 28+06=035    False
     6            4           230 43+87=130 43+87=120    False
     6            4           231 34+05=039 34+05=040    False
2024-09-20 14:10:24,236 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:24,238 - root - INFO - ====================================================== Starting Train Epoch: 7/9 ======================================================
2024-09-20 14:10:24,239 - root - INFO - Learning rates for each parameter group: 0.00135982073305105332, 0.00135982073305105332
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 7, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 7, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=106.61858, average_batch_loss=0.41648, average_batch_perplexity=1.51661, lr=0.001359821, 0.001359821]
Epoch: 7, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=106.61858, average_batch_loss=0.41648, average_batch_perplexity=1.51661, lr=0.001359821, 0.001359821]
Epoch: 7, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=105.15369, average_batch_loss=0.41076, average_batch_perplexity=1.50796, lr=0.001355815, 0.001355815]
Epoch: 7, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 11.97it/s, total_batch_loss=105.15369, average_batch_loss=0.41076, average_batch_perplexity=1.50796, lr=0.001355815, 0.001355815]
Epoch: 7, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.97it/s, total_batch_loss=105.15369, average_batch_loss=0.41076, average_batch_perplexity=1.50796, lr=0.001355815, 0.001355815]
Epoch: 7, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.97it/s, total_batch_loss=98.28551, average_batch_loss=0.38393, average_batch_perplexity=1.46804, lr=0.001351845, 0.001351845] 
Epoch: 7, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.97it/s, total_batch_loss=98.28551, average_batch_loss=0.38393, average_batch_perplexity=1.46804, lr=0.001351845, 0.001351845]
Epoch: 7, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.97it/s, total_batch_loss=97.85777, average_batch_loss=0.38226, average_batch_perplexity=1.46559, lr=0.001347910, 0.001347910]
Epoch: 7, Step: 4:  14%|█▍        | 4/28 [00:00<00:01, 12.10it/s, total_batch_loss=97.85777, average_batch_loss=0.38226, average_batch_perplexity=1.46559, lr=0.001347910, 0.001347910]
Epoch: 7, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.10it/s, total_batch_loss=97.85777, average_batch_loss=0.38226, average_batch_perplexity=1.46559, lr=0.001347910, 0.001347910]
Epoch: 7, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.10it/s, total_batch_loss=91.14902, average_batch_loss=0.35605, average_batch_perplexity=1.42768, lr=0.001344008, 0.001344008]
Epoch: 7, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.10it/s, total_batch_loss=91.14902, average_batch_loss=0.35605, average_batch_perplexity=1.42768, lr=0.001344008, 0.001344008]
Epoch: 7, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.10it/s, total_batch_loss=95.17233, average_batch_loss=0.37177, average_batch_perplexity=1.45029, lr=0.001340141, 0.001340141]
Epoch: 7, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.26it/s, total_batch_loss=95.17233, average_batch_loss=0.37177, average_batch_perplexity=1.45029, lr=0.001340141, 0.001340141]
Epoch: 7, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.26it/s, total_batch_loss=95.17233, average_batch_loss=0.37177, average_batch_perplexity=1.45029, lr=0.001340141, 0.001340141]
Epoch: 7, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.26it/s, total_batch_loss=84.90469, average_batch_loss=0.33166, average_batch_perplexity=1.39328, lr=0.001336306, 0.001336306]
Epoch: 7, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.26it/s, total_batch_loss=84.90469, average_batch_loss=0.33166, average_batch_perplexity=1.39328, lr=0.001336306, 0.001336306]
Epoch: 7, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.26it/s, total_batch_loss=79.87619, average_batch_loss=0.31202, average_batch_perplexity=1.36618, lr=0.001332504, 0.001332504]
Epoch: 7, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 11.75it/s, total_batch_loss=79.87619, average_batch_loss=0.31202, average_batch_perplexity=1.36618, lr=0.001332504, 0.001332504]
Epoch: 7, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.75it/s, total_batch_loss=79.87619, average_batch_loss=0.31202, average_batch_perplexity=1.36618, lr=0.001332504, 0.001332504]
Epoch: 7, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 11.75it/s, total_batch_loss=80.34425, average_batch_loss=0.31384, average_batch_perplexity=1.36868, lr=0.001328735, 0.001328735]
Epoch: 7, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.75it/s, total_batch_loss=80.34425, average_batch_loss=0.31384, average_batch_perplexity=1.36868, lr=0.001328735, 0.001328735]
Epoch: 7, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 11.75it/s, total_batch_loss=86.42328, average_batch_loss=0.33759, average_batch_perplexity=1.40157, lr=0.001324997, 0.001324997]
Epoch: 7, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 11.59it/s, total_batch_loss=86.42328, average_batch_loss=0.33759, average_batch_perplexity=1.40157, lr=0.001324997, 0.001324997]
Epoch: 7, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 11.59it/s, total_batch_loss=86.42328, average_batch_loss=0.33759, average_batch_perplexity=1.40157, lr=0.001324997, 0.001324997]
Epoch: 7, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 11.59it/s, total_batch_loss=78.26029, average_batch_loss=0.30570, average_batch_perplexity=1.35758, lr=0.001321291, 0.001321291]
Epoch: 7, Step: 12:  36%|███▌      | 10/28 [00:01<00:01, 11.59it/s, total_batch_loss=78.26029, average_batch_loss=0.30570, average_batch_perplexity=1.35758, lr=0.001321291, 0.001321291]
Epoch: 7, Step: 12:  36%|███▌      | 10/28 [00:01<00:01, 11.59it/s, total_batch_loss=86.62181, average_batch_loss=0.33837, average_batch_perplexity=1.40265, lr=0.001317616, 0.001317616]
Epoch: 7, Step: 12:  43%|████▎     | 12/28 [00:01<00:01, 11.77it/s, total_batch_loss=86.62181, average_batch_loss=0.33837, average_batch_perplexity=1.40265, lr=0.001317616, 0.001317616]
Epoch: 7, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 11.77it/s, total_batch_loss=86.62181, average_batch_loss=0.33837, average_batch_perplexity=1.40265, lr=0.001317616, 0.001317616]
Epoch: 7, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 11.77it/s, total_batch_loss=82.72236, average_batch_loss=0.32313, average_batch_perplexity=1.38145, lr=0.001313971, 0.001313971]
Epoch: 7, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 11.77it/s, total_batch_loss=82.72236, average_batch_loss=0.32313, average_batch_perplexity=1.38145, lr=0.001313971, 0.001313971]
Epoch: 7, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 11.77it/s, total_batch_loss=73.91183, average_batch_loss=0.28872, average_batch_perplexity=1.33472, lr=0.001310356, 0.001310356]
Epoch: 7, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 11.51it/s, total_batch_loss=73.91183, average_batch_loss=0.28872, average_batch_perplexity=1.33472, lr=0.001310356, 0.001310356]
Epoch: 7, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 11.51it/s, total_batch_loss=73.91183, average_batch_loss=0.28872, average_batch_perplexity=1.33472, lr=0.001310356, 0.001310356]
Epoch: 7, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 11.51it/s, total_batch_loss=74.20255, average_batch_loss=0.28985, average_batch_perplexity=1.33623, lr=0.001306771, 0.001306771]
Epoch: 7, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 11.51it/s, total_batch_loss=74.20255, average_batch_loss=0.28985, average_batch_perplexity=1.33623, lr=0.001306771, 0.001306771]
Epoch: 7, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 11.51it/s, total_batch_loss=86.90319, average_batch_loss=0.33947, average_batch_perplexity=1.40420, lr=0.001303215, 0.001303215]
Epoch: 7, Step: 16:  57%|█████▋    | 16/28 [00:01<00:01, 11.61it/s, total_batch_loss=86.90319, average_batch_loss=0.33947, average_batch_perplexity=1.40420, lr=0.001303215, 0.001303215]
Epoch: 7, Step: 17:  57%|█████▋    | 16/28 [00:01<00:01, 11.61it/s, total_batch_loss=86.90319, average_batch_loss=0.33947, average_batch_perplexity=1.40420, lr=0.001303215, 0.001303215]
Epoch: 7, Step: 17:  57%|█████▋    | 16/28 [00:01<00:01, 11.61it/s, total_batch_loss=76.07186, average_batch_loss=0.29716, average_batch_perplexity=1.34602, lr=0.001299688, 0.001299688]
Epoch: 7, Step: 18:  57%|█████▋    | 16/28 [00:01<00:01, 11.61it/s, total_batch_loss=76.07186, average_batch_loss=0.29716, average_batch_perplexity=1.34602, lr=0.001299688, 0.001299688]
Epoch: 7, Step: 18:  57%|█████▋    | 16/28 [00:01<00:01, 11.61it/s, total_batch_loss=71.24876, average_batch_loss=0.27832, average_batch_perplexity=1.32090, lr=0.001296190, 0.001296190]
Epoch: 7, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 11.88it/s, total_batch_loss=71.24876, average_batch_loss=0.27832, average_batch_perplexity=1.32090, lr=0.001296190, 0.001296190]
Epoch: 7, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 11.88it/s, total_batch_loss=71.24876, average_batch_loss=0.27832, average_batch_perplexity=1.32090, lr=0.001296190, 0.001296190]
Epoch: 7, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 11.88it/s, total_batch_loss=81.55933, average_batch_loss=0.31859, average_batch_perplexity=1.37519, lr=0.001292719, 0.001292719]
Epoch: 7, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 11.88it/s, total_batch_loss=81.55933, average_batch_loss=0.31859, average_batch_perplexity=1.37519, lr=0.001292719, 0.001292719]
Epoch: 7, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 11.88it/s, total_batch_loss=72.77303, average_batch_loss=0.28427, average_batch_perplexity=1.32879, lr=0.001289277, 0.001289277]
Epoch: 7, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 11.91it/s, total_batch_loss=72.77303, average_batch_loss=0.28427, average_batch_perplexity=1.32879, lr=0.001289277, 0.001289277]
Epoch: 7, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 11.91it/s, total_batch_loss=72.77303, average_batch_loss=0.28427, average_batch_perplexity=1.32879, lr=0.001289277, 0.001289277]
Epoch: 7, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 11.91it/s, total_batch_loss=76.14906, average_batch_loss=0.29746, average_batch_perplexity=1.34643, lr=0.001285861, 0.001285861]
Epoch: 7, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 11.91it/s, total_batch_loss=76.14906, average_batch_loss=0.29746, average_batch_perplexity=1.34643, lr=0.001285861, 0.001285861]
Epoch: 7, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 11.91it/s, total_batch_loss=75.58844, average_batch_loss=0.29527, average_batch_perplexity=1.34349, lr=0.001282473, 0.001282473]
Epoch: 7, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 11.98it/s, total_batch_loss=75.58844, average_batch_loss=0.29527, average_batch_perplexity=1.34349, lr=0.001282473, 0.001282473]
Epoch: 7, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 11.98it/s, total_batch_loss=75.58844, average_batch_loss=0.29527, average_batch_perplexity=1.34349, lr=0.001282473, 0.001282473]
Epoch: 7, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 11.98it/s, total_batch_loss=67.71696, average_batch_loss=0.26452, average_batch_perplexity=1.30280, lr=0.001279111, 0.001279111]
Epoch: 7, Step: 24:  79%|███████▊  | 22/28 [00:02<00:00, 11.98it/s, total_batch_loss=67.71696, average_batch_loss=0.26452, average_batch_perplexity=1.30280, lr=0.001279111, 0.001279111]
Epoch: 7, Step: 24:  79%|███████▊  | 22/28 [00:02<00:00, 11.98it/s, total_batch_loss=62.94439, average_batch_loss=0.24588, average_batch_perplexity=1.27874, lr=0.001275776, 0.001275776]
Epoch: 7, Step: 24:  86%|████████▌ | 24/28 [00:02<00:00, 12.10it/s, total_batch_loss=62.94439, average_batch_loss=0.24588, average_batch_perplexity=1.27874, lr=0.001275776, 0.001275776]
Epoch: 7, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.10it/s, total_batch_loss=62.94439, average_batch_loss=0.24588, average_batch_perplexity=1.27874, lr=0.001275776, 0.001275776]
Epoch: 7, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.10it/s, total_batch_loss=70.13992, average_batch_loss=0.27398, average_batch_perplexity=1.31519, lr=0.001272466, 0.001272466]
Epoch: 7, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.10it/s, total_batch_loss=70.13992, average_batch_loss=0.27398, average_batch_perplexity=1.31519, lr=0.001272466, 0.001272466]
Epoch: 7, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.10it/s, total_batch_loss=72.43277, average_batch_loss=0.28294, average_batch_perplexity=1.32703, lr=0.001269183, 0.001269183]
Epoch: 7, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.08it/s, total_batch_loss=72.43277, average_batch_loss=0.28294, average_batch_perplexity=1.32703, lr=0.001269183, 0.001269183]
Epoch: 7, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.08it/s, total_batch_loss=72.43277, average_batch_loss=0.28294, average_batch_perplexity=1.32703, lr=0.001269183, 0.001269183]
Epoch: 7, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.08it/s, total_batch_loss=65.42526, average_batch_loss=0.25557, average_batch_perplexity=1.29119, lr=0.001265924, 0.001265924]
Epoch: 7, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.08it/s, total_batch_loss=65.42526, average_batch_loss=0.25557, average_batch_perplexity=1.29119, lr=0.001265924, 0.001265924]
Epoch: 7, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.08it/s, total_batch_loss=24.33375, average_batch_loss=0.27652, average_batch_perplexity=1.31853, lr=0.001262691, 0.001262691]
Epoch: 7, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.36it/s, total_batch_loss=24.33375, average_batch_loss=0.27652, average_batch_perplexity=1.31853, lr=0.001262691, 0.001262691]
                                                                                                                                                                                         
2024-09-20 14:10:26,540 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:26,540 - root - INFO - Total Batches:                   28
2024-09-20 14:10:26,541 - root - INFO - Average Epoch Train Loss:        0.31783
2024-09-20 14:10:26,541 - root - INFO - Average Epoch Train Perplexity:  1.37414
2024-09-20 14:10:26,542 - root - INFO - 
2024-09-20 14:10:26,542 - root - INFO - ====================================================== Starting Valid Epoch: 7/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 7, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 7, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=51.61576, average_batch_loss=0.20162, average_batch_perplexity=1.22339]
Epoch: 7, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=51.61576, average_batch_loss=0.20162, average_batch_perplexity=1.22339]
Epoch: 7, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=47.79982, average_batch_loss=0.18672, average_batch_perplexity=1.20529]
Epoch: 7, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=47.79982, average_batch_loss=0.18672, average_batch_perplexity=1.20529]
Epoch: 7, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=49.58239, average_batch_loss=0.19368, average_batch_perplexity=1.21371]
Epoch: 7, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=49.58239, average_batch_loss=0.19368, average_batch_perplexity=1.21371]
Epoch: 7, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=49.58239, average_batch_loss=0.19368, average_batch_perplexity=1.21371]
Epoch: 7, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=49.55921, average_batch_loss=0.19359, average_batch_perplexity=1.21360]
Epoch: 7, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=49.55921, average_batch_loss=0.19359, average_batch_perplexity=1.21360]
Epoch: 7, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=50.46335, average_batch_loss=0.19712, average_batch_perplexity=1.21789]
Epoch: 7, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=50.46335, average_batch_loss=0.19712, average_batch_perplexity=1.21789]
Epoch: 7, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.00it/s, total_batch_loss=50.37434, average_batch_loss=0.19677, average_batch_perplexity=1.21747]
Epoch: 7, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.28it/s, total_batch_loss=50.37434, average_batch_loss=0.19677, average_batch_perplexity=1.21747]
Epoch: 7, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.28it/s, total_batch_loss=50.37434, average_batch_loss=0.19677, average_batch_perplexity=1.21747]
Epoch: 7, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.28it/s, total_batch_loss=49.94740, average_batch_loss=0.19511, average_batch_perplexity=1.21544]
Epoch: 7, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.28it/s, total_batch_loss=49.94740, average_batch_loss=0.19511, average_batch_perplexity=1.21544]
Epoch: 7, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.28it/s, total_batch_loss=38.95474, average_batch_loss=0.18728, average_batch_perplexity=1.20597]
                                                                                                                                                         
2024-09-20 14:10:26,886 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:26,886 - root - INFO - Total Batches:                   8
2024-09-20 14:10:26,887 - root - INFO - Average Epoch Valid Loss:        0.19415
2024-09-20 14:10:26,887 - root - INFO - Average Epoch Valid Perplexity:  1.21428
2024-09-20 14:10:26,888 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.7773]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.8008]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.45it/s, accuracy: 0.8008]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.45it/s, accuracy: 0.8008]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.45it/s, accuracy: 0.7845]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.70it/s, accuracy: 0.7845]
                                                                                           
2024-09-20 14:10:27,268 - root - INFO - Correct/Total Samples:           791/1000
2024-09-20 14:10:27,269 - root - INFO - Eval Accuracy:                   0.791
2024-09-20 14:10:27,288 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     7            1             0 13+48=061 13+48=061     True
     7            1             1 16+55=071 16+55=071     True
     7            1             2 79+34=113 79+34=103    False
     7            1             3 35+44=079 35+44=089    False
     7            1             4 16+50=066 16+50=065    False
     7            1             5 28+47=075 28+47=075     True
     7            1             6 00+74=074 00+74=074     True
     7            1             7 15+20=035 15+20=035     True
     7            1             8 72+60=132 72+60=132     True
     7            1             9 63+68=131 63+68=131     True
     7            1            10 29+45=074 29+45=074     True
     7            1            11 34+60=094 34+60=094     True
     7            1            12 53+70=123 53+70=123     True
     7            1            13 70+50=120 70+50=120     True
     7            1            14 11+84=095 11+84=095     True
     7            1            15 42+71=113 42+71=113     True
     7            1            16 98+22=120 98+22=120     True
     7            1            17 02+02=004 02+02=014    False
     7            1            18 15+85=100 15+85=100     True
     7            1            19 21+78=099 21+78=099     True
     7            1            20 61+79=140 61+79=140     True
     7            1            21 25+99=124 25+99=124     True
     7            1            22 09+85=094 09+85=094     True
     7            1            23 60+91=151 60+91=151     True
     7            1            24 35+30=065 35+30=065     True
     7            1            25 24+51=075 24+51=075     True
     7            1            26 93+91=184 93+91=184     True
     7            1            27 39+96=135 39+96=135     True
     7            1            28 64+35=099 64+35=099     True
     7            1            29 36+22=058 36+22=058     True
     7            1            30 68+45=113 68+45=113     True
     7            1            31 16+84=100 16+84=100     True
     7            1            32 91+52=143 91+52=143     True
     7            1            33 97+36=133 97+36=133     True
     7            1            34 27+37=064 27+37=064     True
     7            1            35 99+82=181 99+82=181     True
     7            1            36 03+42=045 03+42=055    False
     7            1            37 18+38=056 18+38=056     True
     7            1            38 32+20=052 32+20=052     True
     7            1            39 38+13=051 38+13=051     True
     7            1            40 68+42=110 68+42=110     True
     7            1            41 64+00=064 64+00=074    False
     7            1            42 48+94=142 48+94=142     True
     7            1            43 58+36=094 58+36=094     True
     7            1            44 41+22=063 41+22=063     True
     7            1            45 23+58=081 23+58=081     True
     7            1            46 67+46=113 67+46=113     True
     7            1            47 40+78=118 40+78=117    False
     7            1            48 90+38=128 90+38=128     True
     7            1            49 89+52=141 89+52=141     True
     7            1            50 37+77=114 37+77=114     True
     7            1            51 29+76=105 29+76=105     True
     7            1            52 42+90=132 42+90=132     True
     7            1            53 45+82=127 45+82=137    False
     7            1            54 35+95=130 35+95=130     True
     7            1            55 92+98=190 92+98=180    False
     7            1            56 73+91=164 73+91=174    False
     7            1            57 53+97=150 53+97=150     True
     7            1            58 98+69=167 98+69=167     True
     7            1            59 20+46=066 20+46=065    False
     7            1            60 48+69=117 48+69=117     True
     7            1            61 62+31=093 62+31=092    False
     7            1            62 80+59=139 80+59=149    False
     7            1            63 58+12=070 58+12=070     True
     7            1            64 08+96=104 08+96=104     True
     7            1            65 67+06=073 67+06=073     True
     7            1            66 22+04=026 22+04=026     True
     7            1            67 61+87=148 61+87=158    False
     7            1            68 95+27=122 95+27=122     True
     7            1            69 49+83=132 49+83=132     True
     7            1            70 43+00=043 43+00=053    False
     7            1            71 01+85=086 01+85=086     True
     7            1            72 11+68=079 11+68=089    False
     7            1            73 80+03=083 80+03=083     True
     7            1            74 54+83=137 54+83=147    False
     7            1            75 73+47=120 73+47=120     True
     7            1            76 99+93=192 99+93=192     True
     7            1            77 99+13=112 99+13=112     True
     7            1            78 92+66=158 92+66=158     True
     7            1            79 90+31=121 90+31=121     True
     7            1            80 25+69=094 25+69=094     True
     7            1            81 25+44=069 25+44=079    False
     7            1            82 00+93=093 00+93=093     True
     7            1            83 88+87=175 88+87=175     True
     7            1            84 47+56=103 47+56=103     True
     7            1            85 43+59=102 43+59=102     True
     7            1            86 22+00=022 22+00=022     True
     7            1            87 34+04=038 34+04=048    False
     7            1            88 65+13=078 65+13=078     True
     7            1            89 39+82=121 39+82=122    False
     7            1            90 66+83=149 66+83=159    False
     7            1            91 51+69=120 51+69=120     True
     7            1            92 80+21=101 80+21=101     True
     7            1            93 36+79=115 36+79=115     True
     7            1            94 21+68=089 21+68=089     True
     7            1            95 11+66=077 11+66=077     True
     7            1            96 55+19=074 55+19=074     True
     7            1            97 51+61=112 51+61=112     True
     7            1            98 38+88=126 38+88=126     True
     7            1            99 37+27=064 37+27=064     True
     7            1           100 18+63=081 18+63=081     True
     7            1           101 48+11=059 48+11=069    False
     7            1           102 72+68=140 72+68=140     True
     7            1           103 37+39=076 37+39=076     True
     7            1           104 64+95=159 64+95=169    False
     7            1           105 49+75=124 49+75=124     True
     7            1           106 45+66=111 45+66=111     True
     7            1           107 34+87=121 34+87=121     True
     7            1           108 02+84=086 02+84=086     True
     7            1           109 95+00=095 95+00=095     True
     7            1           110 09+56=065 09+56=065     True
     7            1           111 22+66=088 22+66=088     True
     7            1           112 43+18=061 43+18=071    False
     7            1           113 61+35=096 61+35=096     True
     7            1           114 13+73=086 13+73=086     True
     7            1           115 25+95=120 25+95=120     True
     7            1           116 73+96=169 73+96=179    False
     7            1           117 03+96=099 03+96=099     True
     7            1           118 97+82=179 97+82=189    False
     7            1           119 18+42=060 18+42=060     True
     7            1           120 29+98=127 29+98=127     True
     7            1           121 61+00=061 61+00=061     True
     7            1           122 22+98=120 22+98=120     True
     7            1           123 12+50=062 12+50=062     True
     7            1           124 02+58=060 02+58=060     True
     7            1           125 75+86=161 75+86=161     True
     7            1           126 31+57=088 31+57=088     True
     7            1           127 49+82=131 49+82=131     True
     7            1           128 15+33=048 15+33=058    False
     7            1           129 49+57=106 49+57=106     True
     7            1           130 61+70=131 61+70=131     True
     7            1           131 91+51=142 91+51=142     True
     7            1           132 50+05=055 50+05=055     True
     7            1           133 44+16=060 44+16=060     True
     7            1           134 92+01=093 92+01=093     True
     7            1           135 85+82=167 85+82=167     True
     7            1           136 07+41=048 07+41=058    False
     7            1           137 40+06=046 40+06=055    False
     7            1           138 79+62=141 79+62=141     True
     7            1           139 95+62=157 95+62=157     True
     7            1           140 42+93=135 42+93=135     True
     7            1           141 32+73=105 32+73=105     True
     7            1           142 47+09=056 47+09=056     True
     7            1           143 59+50=109 59+50=109     True
     7            1           144 61+77=138 61+77=138     True
     7            1           145 64+06=070 64+06=070     True
     7            1           146 35+10=045 35+10=045     True
     7            1           147 32+88=120 32+88=120     True
     7            1           148 03+95=098 03+95=098     True
     7            1           149 11+38=049 11+38=059    False
     7            1           150 21+67=088 21+67=088     True
     7            1           151 33+25=058 33+25=058     True
     7            1           152 63+45=108 63+45=108     True
     7            1           153 56+12=068 56+12=078    False
     7            1           154 19+79=098 19+79=098     True
     7            1           155 60+43=103 60+43=103     True
     7            1           156 07+61=068 07+61=078    False
     7            1           157 58+03=061 58+03=061     True
     7            1           158 11+10=021 11+10=021     True
     7            1           159 49+89=138 49+89=138     True
     7            1           160 37+58=095 37+58=095     True
     7            1           161 59+78=137 59+78=137     True
     7            1           162 11+21=032 11+21=032     True
     7            1           163 37+43=080 37+43=080     True
     7            1           164 44+21=065 44+21=075    False
     7            1           165 22+97=119 22+97=119     True
     7            1           166 65+35=100 65+35=090    False
     7            1           167 06+51=057 06+51=067    False
     7            1           168 65+25=090 65+25=090     True
     7            1           169 74+94=168 74+94=178    False
     7            1           170 87+55=142 87+55=142     True
     7            1           171 90+67=157 90+67=157     True
     7            1           172 11+02=013 11+02=023    False
     7            1           173 01+66=067 01+66=077    False
     7            1           174 56+00=056 56+00=056     True
     7            1           175 58+52=110 58+52=110     True
     7            1           176 24+99=123 24+99=123     True
     7            1           177 97+13=110 97+13=110     True
     7            1           178 42+94=136 42+94=136     True
     7            1           179 60+15=075 60+15=075     True
     7            1           180 20+46=066 20+46=065    False
     7            1           181 40+70=110 40+70=110     True
     7            1           182 95+45=140 95+45=140     True
     7            1           183 96+95=191 96+95=181    False
     7            1           184 98+20=118 98+20=118     True
     7            1           185 43+19=062 43+19=072    False
     7            1           186 50+69=119 50+69=119     True
     7            1           187 27+53=080 27+53=080     True
     7            1           188 24+25=049 24+25=059    False
     7            1           189 65+92=157 65+92=157     True
     7            1           190 28+14=042 28+14=042     True
     7            1           191 20+57=077 20+57=077     True
     7            1           192 59+97=156 59+97=156     True
     7            1           193 98+32=130 98+32=130     True
     7            1           194 55+84=139 55+84=149    False
     7            1           195 20+39=059 20+39=069    False
     7            1           196 86+47=133 86+47=133     True
     7            1           197 92+36=128 92+36=138    False
     7            1           198 05+38=043 05+38=043     True
     7            1           199 77+36=113 77+36=113     True
     7            1           200 41+64=105 41+64=105     True
     7            1           201 74+51=125 74+51=125     True
     7            1           202 74+55=129 74+55=129     True
     7            1           203 64+64=128 64+64=128     True
     7            1           204 60+19=079 60+19=089    False
     7            1           205 77+96=173 77+96=173     True
     7            1           206 22+30=052 22+30=052     True
     7            1           207 82+49=131 82+49=131     True
     7            1           208 39+67=106 39+67=106     True
     7            1           209 62+40=102 62+40=102     True
     7            1           210 28+71=099 28+71=099     True
     7            1           211 47+26=073 47+26=073     True
     7            1           212 98+54=152 98+54=152     True
     7            1           213 38+70=108 38+70=108     True
     7            1           214 63+40=103 63+40=103     True
     7            1           215 86+62=148 86+62=148     True
     7            1           216 22+65=087 22+65=087     True
     7            1           217 41+17=058 41+17=068    False
     7            1           218 68+88=156 68+88=156     True
     7            1           219 96+70=166 96+70=175    False
     7            1           220 99+29=128 99+29=128     True
     7            1           221 83+39=122 83+39=122     True
     7            1           222 26+55=081 26+55=071    False
     7            1           223 53+70=123 53+70=123     True
     7            1           224 94+12=106 94+12=106     True
     7            1           225 00+37=037 00+37=047    False
     7            1           226 36+94=130 36+94=130     True
     7            1           227 40+58=098 40+58=098     True
     7            1           228 19+80=099 19+80=099     True
     7            1           229 49+44=093 49+44=093     True
     7            1           230 70+27=097 70+27=096    False
     7            1           231 52+80=132 52+80=132     True
     7            1           232 77+90=167 77+90=177    False
     7            1           233 13+92=105 13+92=105     True
     7            1           234 59+09=068 59+09=078    False
     7            1           235 33+55=088 33+55=088     True
     7            1           236 85+16=101 85+16=101     True
     7            1           237 25+65=090 25+65=090     True
     7            1           238 46+20=066 46+20=075    False
     7            1           239 29+52=081 29+52=081     True
     7            1           240 32+36=068 32+36=078    False
     7            1           241 47+08=055 47+08=055     True
     7            1           242 21+84=105 21+84=105     True
     7            1           243 24+45=069 24+45=079    False
     7            1           244 29+15=044 29+15=044     True
     7            1           245 83+03=086 83+03=086     True
     7            1           246 83+36=119 83+36=119     True
     7            1           247 58+95=153 58+95=153     True
     7            1           248 76+79=155 76+79=155     True
     7            1           249 63+30=093 63+30=093     True
     7            1           250 38+24=062 38+24=062     True
     7            1           251 19+46=065 19+46=065     True
     7            1           252 99+66=165 99+66=165     True
     7            1           253 95+73=168 95+73=178    False
     7            1           254 65+27=092 65+27=092     True
     7            1           255 91+83=174 91+83=174     True
     7            2             0 65+49=114 65+49=114     True
     7            2             1 03+08=011 03+08=011     True
     7            2             2 67+81=148 67+81=148     True
     7            2             3 47+23=070 47+23=070     True
     7            2             4 43+91=134 43+91=134     True
     7            2             5 41+67=108 41+67=108     True
     7            2             6 02+33=035 02+33=045    False
     7            2             7 64+84=148 64+84=158    False
     7            2             8 81+64=145 81+64=145     True
     7            2             9 80+11=091 80+11=091     True
     7            2            10 78+01=079 78+01=079     True
     7            2            11 89+18=107 89+18=107     True
     7            2            12 45+52=097 45+52=097     True
     7            2            13 35+30=065 35+30=065     True
     7            2            14 53+32=085 53+32=085     True
     7            2            15 49+90=139 49+90=149    False
     7            2            16 41+37=078 41+37=088    False
     7            2            17 35+14=049 35+14=059    False
     7            2            18 92+50=142 92+50=142     True
     7            2            19 37+60=097 37+60=097     True
     7            2            20 91+61=152 91+61=152     True
     7            2            21 80+77=157 80+77=157     True
     7            2            22 66+24=090 66+24=090     True
     7            2            23 81+07=088 81+07=098    False
     7            2            24 85+59=144 85+59=144     True
     7            2            25 19+69=088 19+69=088     True
     7            2            26 91+44=135 91+44=135     True
     7            2            27 25+29=054 25+29=054     True
     7            2            28 27+08=035 27+08=035     True
     7            2            29 66+14=080 66+14=080     True
     7            2            30 95+11=106 95+11=106     True
     7            2            31 13+97=110 13+97=100    False
     7            2            32 94+40=134 94+40=134     True
     7            2            33 74+31=105 74+31=105     True
     7            2            34 49+00=049 49+00=059    False
     7            2            35 59+18=077 59+18=077     True
     7            2            36 07+65=072 07+65=072     True
     7            2            37 83+55=138 83+55=138     True
     7            2            38 49+80=129 49+80=139    False
     7            2            39 64+17=081 64+17=081     True
     7            2            40 48+83=131 48+83=131     True
     7            2            41 95+44=139 95+44=149    False
     7            2            42 71+26=097 71+26=097     True
     7            2            43 06+74=080 06+74=080     True
     7            2            44 34+24=058 34+24=058     True
     7            2            45 59+71=130 59+71=130     True
     7            2            46 68+32=100 68+32=100     True
     7            2            47 38+81=119 38+81=129    False
     7            2            48 29+56=085 29+56=085     True
     7            2            49 54+55=109 54+55=109     True
     7            2            50 31+27=058 31+27=058     True
     7            2            51 97+89=186 97+89=186     True
     7            2            52 48+09=057 48+09=057     True
     7            2            53 86+76=162 86+76=163    False
     7            2            54 82+59=141 82+59=141     True
     7            2            55 01+67=068 01+67=078    False
     7            2            56 26+06=032 26+06=033    False
     7            2            57 22+46=068 22+46=078    False
     7            2            58 85+16=101 85+16=101     True
     7            2            59 29+08=037 29+08=037     True
     7            2            60 73+94=167 73+94=177    False
     7            2            61 19+62=081 19+62=081     True
     7            2            62 86+62=148 86+62=148     True
     7            2            63 38+99=137 38+99=137     True
     7            2            64 64+25=089 64+25=089     True
     7            2            65 61+72=133 61+72=133     True
     7            2            66 78+88=166 78+88=166     True
     7            2            67 43+66=109 43+66=109     True
     7            2            68 69+35=104 69+35=104     True
     7            2            69 33+77=110 33+77=100    False
     7            2            70 37+37=074 37+37=074     True
     7            2            71 87+54=141 87+54=141     True
     7            2            72 68+90=158 68+90=158     True
     7            2            73 83+44=127 83+44=137    False
     7            2            74 41+09=050 41+09=050     True
     7            2            75 13+48=061 13+48=061     True
     7            2            76 01+41=042 01+41=052    False
     7            2            77 19+74=093 19+74=093     True
     7            2            78 15+05=020 15+05=020     True
     7            2            79 55+46=101 55+46=101     True
     7            2            80 68+33=101 68+33=101     True
     7            2            81 44+40=084 44+40=084     True
     7            2            82 88+03=091 88+03=091     True
     7            2            83 81+79=160 81+79=160     True
     7            2            84 18+98=116 18+98=116     True
     7            2            85 70+64=134 70+64=134     True
     7            2            86 26+44=070 26+44=070     True
     7            2            87 98+87=185 98+87=185     True
     7            2            88 18+74=092 18+74=092     True
     7            2            89 50+68=118 50+68=117    False
     7            2            90 13+51=064 13+51=064     True
     7            2            91 90+89=179 90+89=189    False
     7            2            92 47+78=125 47+78=125     True
     7            2            93 81+57=138 81+57=138     True
     7            2            94 34+47=081 34+47=081     True
     7            2            95 94+23=117 94+23=117     True
     7            2            96 07+70=077 07+70=077     True
     7            2            97 56+33=089 56+33=089     True
     7            2            98 33+04=037 33+04=047    False
     7            2            99 26+09=035 26+09=035     True
     7            2           100 14+92=106 14+92=106     True
     7            2           101 78+54=132 78+54=132     True
     7            2           102 36+76=112 36+76=103    False
     7            2           103 17+47=064 17+47=064     True
     7            2           104 28+18=046 28+18=046     True
     7            2           105 78+54=132 78+54=132     True
     7            2           106 84+72=156 84+72=156     True
     7            2           107 00+44=044 00+44=054    False
     7            2           108 50+41=091 50+41=091     True
     7            2           109 87+88=175 87+88=175     True
     7            2           110 11+66=077 11+66=077     True
     7            2           111 80+60=140 80+60=140     True
     7            2           112 78+76=154 78+76=154     True
     7            2           113 24+74=098 24+74=098     True
     7            2           114 88+48=136 88+48=136     True
     7            2           115 38+31=069 38+31=079    False
     7            2           116 29+27=056 29+27=056     True
     7            2           117 08+45=053 08+45=053     True
     7            2           118 28+13=041 28+13=051    False
     7            2           119 53+99=152 53+99=152     True
     7            2           120 47+92=139 47+92=149    False
     7            2           121 76+21=097 76+21=097     True
     7            2           122 53+96=149 53+96=149     True
     7            2           123 93+91=184 93+91=184     True
     7            2           124 97+33=130 97+33=130     True
     7            2           125 67+78=145 67+78=145     True
     7            2           126 58+05=063 58+05=063     True
     7            2           127 00+16=016 00+16=025    False
     7            2           128 80+19=099 80+19=099     True
     7            2           129 98+22=120 98+22=120     True
     7            2           130 09+62=071 09+62=071     True
     7            2           131 06+23=029 06+23=039    False
     7            2           132 32+99=131 32+99=131     True
     7            2           133 17+02=019 17+02=029    False
     7            2           134 64+35=099 64+35=099     True
     7            2           135 35+83=118 35+83=118     True
     7            2           136 71+36=107 71+36=107     True
     7            2           137 75+06=081 75+06=081     True
     7            2           138 88+95=183 88+95=183     True
     7            2           139 19+98=117 19+98=117     True
     7            2           140 28+89=117 28+89=117     True
     7            2           141 33+11=044 33+11=044     True
     7            2           142 34+49=083 34+49=083     True
     7            2           143 90+35=125 90+35=125     True
     7            2           144 22+90=112 22+90=112     True
     7            2           145 98+89=187 98+89=187     True
     7            2           146 88+47=135 88+47=135     True
     7            2           147 30+86=116 30+86=115    False
     7            2           148 31+48=079 31+48=089    False
     7            2           149 39+21=060 39+21=060     True
     7            2           150 19+17=036 19+17=036     True
     7            2           151 27+60=087 27+60=087     True
     7            2           152 12+16=028 12+16=038    False
     7            2           153 51+75=126 51+75=126     True
     7            2           154 10+74=084 10+74=084     True
     7            2           155 42+63=105 42+63=105     True
     7            2           156 40+14=054 40+14=054     True
     7            2           157 23+93=116 23+93=116     True
     7            2           158 85+26=111 85+26=111     True
     7            2           159 28+46=074 28+46=074     True
     7            2           160 28+33=061 28+33=061     True
     7            2           161 43+30=073 43+30=073     True
     7            2           162 89+72=161 89+72=161     True
     7            2           163 52+21=073 52+21=073     True
     7            2           164 21+54=075 21+54=075     True
     7            2           165 69+13=082 69+13=082     True
     7            2           166 07+60=067 07+60=077    False
     7            2           167 63+83=146 63+83=146     True
     7            2           168 80+69=149 80+69=159    False
     7            2           169 27+28=055 27+28=055     True
     7            2           170 42+31=073 42+31=073     True
     7            2           171 51+99=150 51+99=150     True
     7            2           172 28+75=103 28+75=104    False
     7            2           173 38+57=095 38+57=095     True
     7            2           174 83+16=099 83+16=099     True
     7            2           175 92+94=186 92+94=186     True
     7            2           176 55+75=130 55+75=130     True
     7            2           177 59+51=110 59+51=100    False
     7            2           178 33+09=042 33+09=052    False
     7            2           179 53+13=066 53+13=066     True
     7            2           180 05+70=075 05+70=075     True
     7            2           181 12+20=032 12+20=032     True
     7            2           182 11+49=060 11+49=060     True
     7            2           183 63+45=108 63+45=108     True
     7            2           184 92+23=115 92+23=115     True
     7            2           185 82+45=127 82+45=137    False
     7            2           186 23+41=064 23+41=064     True
     7            2           187 64+26=090 64+26=090     True
     7            2           188 91+24=115 91+24=115     True
     7            2           189 20+32=052 20+32=052     True
     7            2           190 83+21=104 83+21=104     True
     7            2           191 07+20=027 07+20=037    False
     7            2           192 94+14=108 94+14=108     True
     7            2           193 96+89=185 96+89=185     True
     7            2           194 13+08=021 13+08=021     True
     7            2           195 32+05=037 32+05=047    False
     7            2           196 09+51=060 09+51=060     True
     7            2           197 26+29=055 26+29=055     True
     7            2           198 49+65=114 49+65=114     True
     7            2           199 32+66=098 32+66=098     True
     7            2           200 41+08=049 41+08=059    False
     7            2           201 26+79=105 26+79=105     True
     7            2           202 29+91=120 29+91=120     True
     7            2           203 51+00=051 51+00=051     True
     7            2           204 61+60=121 61+60=121     True
     7            2           205 45+78=123 45+78=123     True
     7            2           206 56+16=072 56+16=073    False
     7            2           207 66+68=134 66+68=134     True
     7            2           208 32+16=048 32+16=058    False
     7            2           209 84+49=133 84+49=133     True
     7            2           210 45+09=054 45+09=054     True
     7            2           211 96+78=174 96+78=174     True
     7            2           212 10+02=012 10+02=022    False
     7            2           213 36+60=096 36+60=096     True
     7            2           214 44+36=080 44+36=080     True
     7            2           215 12+86=098 12+86=098     True
     7            2           216 94+54=148 94+54=148     True
     7            2           217 64+73=137 64+73=137     True
     7            2           218 73+10=083 73+10=083     True
     7            2           219 14+62=076 14+62=076     True
     7            2           220 25+22=047 25+22=057    False
     7            2           221 94+22=116 94+22=116     True
     7            2           222 41+76=117 41+76=117     True
     7            2           223 38+46=084 38+46=084     True
     7            2           224 71+72=143 71+72=143     True
     7            2           225 74+79=153 74+79=153     True
     7            2           226 99+67=166 99+67=166     True
     7            2           227 78+71=149 78+71=159    False
     7            2           228 23+19=042 23+19=042     True
     7            2           229 51+65=116 51+65=116     True
     7            2           230 94+86=180 94+86=180     True
     7            2           231 09+79=088 09+79=088     True
     7            2           232 69+39=108 69+39=108     True
     7            2           233 84+13=097 84+13=097     True
     7            2           234 36+59=095 36+59=095     True
     7            2           235 87+47=134 87+47=134     True
     7            2           236 50+00=050 50+00=050     True
     7            2           237 76+96=172 76+96=173    False
     7            2           238 12+18=030 12+18=030     True
     7            2           239 99+95=194 99+95=184    False
     7            2           240 22+00=022 22+00=022     True
     7            2           241 96+18=114 96+18=114     True
     7            2           242 51+20=071 51+20=071     True
     7            2           243 66+81=147 66+81=147     True
     7            2           244 78+18=096 78+18=096     True
     7            2           245 09+78=087 09+78=087     True
     7            2           246 24+20=044 24+20=044     True
     7            2           247 76+13=089 76+13=089     True
     7            2           248 05+10=015 05+10=015     True
     7            2           249 97+14=111 97+14=111     True
     7            2           250 92+38=130 92+38=130     True
     7            2           251 77+13=090 77+13=090     True
     7            2           252 70+19=089 70+19=099    False
     7            2           253 90+45=135 90+45=135     True
     7            2           254 50+09=059 50+09=069    False
     7            2           255 78+06=084 78+06=084     True
     7            3             0 03+25=028 03+25=028     True
     7            3             1 48+43=091 48+43=091     True
     7            3             2 39+47=086 39+47=086     True
     7            3             3 48+19=067 48+19=067     True
     7            3             4 07+22=029 07+22=039    False
     7            3             5 73+68=141 73+68=141     True
     7            3             6 14+56=070 14+56=070     True
     7            3             7 96+95=191 96+95=181    False
     7            3             8 96+28=124 96+28=124     True
     7            3             9 82+05=087 82+05=097    False
     7            3            10 27+94=121 27+94=121     True
     7            3            11 87+86=173 87+86=173     True
     7            3            12 00+68=068 00+68=078    False
     7            3            13 11+37=048 11+37=058    False
     7            3            14 95+93=188 95+93=188     True
     7            3            15 75+82=157 75+82=157     True
     7            3            16 41+71=112 41+71=112     True
     7            3            17 60+14=074 60+14=074     True
     7            3            18 77+77=154 77+77=154     True
     7            3            19 31+84=115 31+84=115     True
     7            3            20 31+57=088 31+57=088     True
     7            3            21 27+87=114 27+87=114     True
     7            3            22 31+89=120 31+89=120     True
     7            3            23 22+18=040 22+18=040     True
     7            3            24 38+25=063 38+25=063     True
     7            3            25 64+54=118 64+54=118     True
     7            3            26 85+60=145 85+60=145     True
     7            3            27 14+71=085 14+71=085     True
     7            3            28 06+16=022 06+16=023    False
     7            3            29 78+61=139 78+61=149    False
     7            3            30 65+75=140 65+75=140     True
     7            3            31 13+83=096 13+83=096     True
     7            3            32 75+49=124 75+49=124     True
     7            3            33 05+78=083 05+78=083     True
     7            3            34 66+55=121 66+55=121     True
     7            3            35 03+05=008 03+05=007    False
     7            3            36 69+99=168 69+99=179    False
     7            3            37 52+82=134 52+82=134     True
     7            3            38 45+97=142 45+97=142     True
     7            3            39 66+17=083 66+17=083     True
     7            3            40 36+17=053 36+17=053     True
     7            3            41 92+74=166 92+74=176    False
     7            3            42 48+44=092 48+44=092     True
     7            3            43 34+17=051 34+17=051     True
     7            3            44 56+11=067 56+11=077    False
     7            3            45 77+23=100 77+23=090    False
     7            3            46 10+11=021 10+11=021     True
     7            3            47 32+65=097 32+65=097     True
     7            3            48 53+49=102 53+49=102     True
     7            3            49 68+86=154 68+86=154     True
     7            3            50 52+94=146 52+94=146     True
     7            3            51 97+71=168 97+71=178    False
     7            3            52 05+37=042 05+37=052    False
     7            3            53 58+75=133 58+75=133     True
     7            3            54 06+24=030 06+24=030     True
     7            3            55 15+44=059 15+44=069    False
     7            3            56 90+49=139 90+49=149    False
     7            3            57 50+37=087 50+37=087     True
     7            3            58 88+61=149 88+61=159    False
     7            3            59 21+57=078 21+57=078     True
     7            3            60 24+85=109 24+85=109     True
     7            3            61 01+66=067 01+66=077    False
     7            3            62 50+46=096 50+46=095    False
     7            3            63 76+65=141 76+65=141     True
     7            3            64 23+74=097 23+74=097     True
     7            3            65 76+16=092 76+16=093    False
     7            3            66 06+08=014 06+08=014     True
     7            3            67 69+25=094 69+25=094     True
     7            3            68 15+23=038 15+23=048    False
     7            3            69 41+02=043 41+02=053    False
     7            3            70 16+66=082 16+66=082     True
     7            3            71 59+94=153 59+94=153     True
     7            3            72 32+88=120 32+88=120     True
     7            3            73 46+21=067 46+21=077    False
     7            3            74 57+28=085 57+28=085     True
     7            3            75 00+31=031 00+31=031     True
     7            3            76 77+07=084 77+07=084     True
     7            3            77 28+70=098 28+70=098     True
     7            3            78 05+61=066 05+61=076    False
     7            3            79 22+09=031 22+09=031     True
     7            3            80 08+94=102 08+94=102     True
     7            3            81 40+11=051 40+11=051     True
     7            3            82 10+48=058 10+48=058     True
     7            3            83 27+56=083 27+56=083     True
     7            3            84 42+16=058 42+16=068    False
     7            3            85 69+43=112 69+43=112     True
     7            3            86 57+69=126 57+69=126     True
     7            3            87 18+86=104 18+86=104     True
     7            3            88 86+80=166 86+80=165    False
     7            3            89 30+85=115 30+85=115     True
     7            3            90 77+66=143 77+66=143     True
     7            3            91 39+64=103 39+64=103     True
     7            3            92 76+61=137 76+61=137     True
     7            3            93 42+61=103 42+61=103     True
     7            3            94 07+30=037 07+30=047    False
     7            3            95 35+93=128 35+93=138    False
     7            3            96 40+90=130 40+90=130     True
     7            3            97 08+91=099 08+91=099     True
     7            3            98 62+34=096 62+34=096     True
     7            3            99 86+49=135 86+49=135     True
     7            3           100 73+23=096 73+23=096     True
     7            3           101 87+35=122 87+35=122     True
     7            3           102 35+31=066 35+31=076    False
     7            3           103 07+13=020 07+13=020     True
     7            3           104 39+41=080 39+41=080     True
     7            3           105 44+63=107 44+63=107     True
     7            3           106 94+66=160 94+66=160     True
     7            3           107 49+54=103 49+54=103     True
     7            3           108 79+46=125 79+46=125     True
     7            3           109 53+12=065 53+12=065     True
     7            3           110 60+92=152 60+92=152     True
     7            3           111 25+60=085 25+60=085     True
     7            3           112 64+53=117 64+53=117     True
     7            3           113 41+02=043 41+02=053    False
     7            3           114 00+97=097 00+97=096    False
     7            3           115 12+52=064 12+52=064     True
     7            3           116 39+50=089 39+50=089     True
     7            3           117 87+21=108 87+21=108     True
     7            3           118 04+99=103 04+99=103     True
     7            3           119 19+75=094 19+75=094     True
     7            3           120 90+05=095 90+05=095     True
     7            3           121 54+39=093 54+39=093     True
     7            3           122 29+26=055 29+26=055     True
     7            3           123 82+95=177 82+95=177     True
     7            3           124 55+09=064 55+09=064     True
     7            3           125 02+62=064 02+62=064     True
     7            3           126 68+30=098 68+30=098     True
     7            3           127 99+16=115 99+16=115     True
     7            3           128 63+11=074 63+11=074     True
     7            3           129 42+92=134 42+92=134     True
     7            3           130 99+16=115 99+16=115     True
     7            3           131 50+31=081 50+31=081     True
     7            3           132 23+46=069 23+46=079    False
     7            3           133 45+73=118 45+73=118     True
     7            3           134 89+77=166 89+77=166     True
     7            3           135 45+78=123 45+78=123     True
     7            3           136 96+60=156 96+60=156     True
     7            3           137 74+61=135 74+61=135     True
     7            3           138 87+01=088 87+01=098    False
     7            3           139 63+88=151 63+88=151     True
     7            3           140 59+72=131 59+72=131     True
     7            3           141 17+96=113 17+96=113     True
     7            3           142 89+77=166 89+77=166     True
     7            3           143 24+69=093 24+69=093     True
     7            3           144 75+83=158 75+83=158     True
     7            3           145 50+54=104 50+54=104     True
     7            3           146 93+47=140 93+47=140     True
     7            3           147 20+55=075 20+55=075     True
     7            3           148 91+79=170 91+79=170     True
     7            3           149 15+13=028 15+13=028     True
     7            3           150 86+09=095 86+09=095     True
     7            3           151 29+58=087 29+58=087     True
     7            3           152 01+29=030 01+29=030     True
     7            3           153 65+48=113 65+48=113     True
     7            3           154 96+45=141 96+45=141     True
     7            3           155 58+69=127 58+69=127     True
     7            3           156 84+43=127 84+43=137    False
     7            3           157 90+38=128 90+38=128     True
     7            3           158 39+97=136 39+97=136     True
     7            3           159 74+84=158 74+84=158     True
     7            3           160 86+22=108 86+22=108     True
     7            3           161 01+86=087 01+86=097    False
     7            3           162 81+63=144 81+63=144     True
     7            3           163 80+94=174 80+94=174     True
     7            3           164 44+42=086 44+42=086     True
     7            3           165 72+60=132 72+60=132     True
     7            3           166 28+07=035 28+07=035     True
     7            3           167 69+54=123 69+54=123     True
     7            3           168 68+77=145 68+77=145     True
     7            3           169 90+16=106 90+16=106     True
     7            3           170 64+50=114 64+50=114     True
     7            3           171 46+88=134 46+88=134     True
     7            3           172 55+99=154 55+99=154     True
     7            3           173 31+97=128 31+97=138    False
     7            3           174 79+28=107 79+28=107     True
     7            3           175 81+43=124 81+43=124     True
     7            3           176 41+15=056 41+15=066    False
     7            3           177 38+77=115 38+77=115     True
     7            3           178 25+06=031 25+06=031     True
     7            3           179 01+93=094 01+93=094     True
     7            3           180 97+22=119 97+22=119     True
     7            3           181 71+84=155 71+84=155     True
     7            3           182 26+36=062 26+36=062     True
     7            3           183 60+92=152 60+92=152     True
     7            3           184 02+94=096 02+94=096     True
     7            3           185 31+58=089 31+58=099    False
     7            3           186 70+52=122 70+52=122     True
     7            3           187 19+42=061 19+42=061     True
     7            3           188 95+73=168 95+73=178    False
     7            3           189 21+25=046 21+25=056    False
     7            3           190 13+58=071 13+58=071     True
     7            3           191 62+28=090 62+28=090     True
     7            3           192 38+14=052 38+14=052     True
     7            3           193 66+75=141 66+75=141     True
     7            3           194 24+59=083 24+59=083     True
     7            3           195 97+66=163 97+66=163     True
     7            3           196 76+70=146 76+70=146     True
     7            3           197 08+40=048 08+40=058    False
     7            3           198 84+00=084 84+00=094    False
     7            3           199 54+73=127 54+73=127     True
     7            3           200 16+88=104 16+88=104     True
     7            3           201 99+47=146 99+47=146     True
     7            3           202 31+95=126 31+95=126     True
     7            3           203 01+79=080 01+79=080     True
     7            3           204 03+68=071 03+68=071     True
     7            3           205 10+05=015 10+05=015     True
     7            3           206 98+90=188 98+90=188     True
     7            3           207 58+53=111 58+53=111     True
     7            3           208 34+87=121 34+87=121     True
     7            3           209 07+31=038 07+31=048    False
     7            3           210 59+08=067 59+08=067     True
     7            3           211 51+38=089 51+38=099    False
     7            3           212 62+62=124 62+62=124     True
     7            3           213 80+32=112 80+32=112     True
     7            3           214 69+16=085 69+16=085     True
     7            3           215 01+17=018 01+17=027    False
     7            3           216 74+41=115 74+41=115     True
     7            3           217 20+89=109 20+89=109     True
     7            3           218 53+50=103 53+50=103     True
     7            3           219 82+85=167 82+85=167     True
     7            3           220 34+47=081 34+47=081     True
     7            3           221 34+45=079 34+45=089    False
     7            3           222 77+34=111 77+34=111     True
     7            3           223 56+33=089 56+33=089     True
     7            3           224 97+56=153 97+56=153     True
     7            3           225 29+06=035 29+06=035     True
     7            3           226 78+96=174 78+96=174     True
     7            3           227 28+65=093 28+65=093     True
     7            3           228 61+64=125 61+64=125     True
     7            3           229 32+64=096 32+64=096     True
     7            3           230 98+32=130 98+32=130     True
     7            3           231 25+35=060 25+35=060     True
     7            3           232 05+08=013 05+08=013     True
     7            3           233 05+26=031 05+26=031     True
     7            3           234 84+71=155 84+71=155     True
     7            3           235 33+10=043 33+10=043     True
     7            3           236 98+35=133 98+35=133     True
     7            3           237 68+98=166 68+98=166     True
     7            3           238 03+63=066 03+63=076    False
     7            3           239 12+96=108 12+96=108     True
     7            3           240 02+81=083 02+81=083     True
     7            3           241 83+13=096 83+13=096     True
     7            3           242 55+92=147 55+92=147     True
     7            3           243 96+09=105 96+09=105     True
     7            3           244 61+08=069 61+08=079    False
     7            3           245 39+75=114 39+75=114     True
     7            3           246 40+74=114 40+74=114     True
     7            3           247 39+80=119 39+80=129    False
     7            3           248 57+95=152 57+95=152     True
     7            3           249 92+97=189 92+97=189     True
     7            3           250 33+03=036 33+03=046    False
     7            3           251 74+92=166 74+92=176    False
     7            3           252 99+09=108 99+09=108     True
     7            3           253 98+10=108 98+10=108     True
     7            3           254 46+77=123 46+77=123     True
     7            3           255 85+78=163 85+78=163     True
     7            4             0 41+21=062 41+21=062     True
     7            4             1 49+13=062 49+13=062     True
     7            4             2 59+07=066 59+07=066     True
     7            4             3 31+11=042 31+11=042     True
     7            4             4 74+16=090 74+16=090     True
     7            4             5 43+38=081 43+38=081     True
     7            4             6 08+67=075 08+67=075     True
     7            4             7 31+66=097 31+66=097     True
     7            4             8 10+31=041 10+31=041     True
     7            4             9 34+59=093 34+59=093     True
     7            4            10 78+42=120 78+42=120     True
     7            4            11 13+41=054 13+41=054     True
     7            4            12 97+89=186 97+89=186     True
     7            4            13 15+62=077 15+62=077     True
     7            4            14 39+36=075 39+36=075     True
     7            4            15 21+25=046 21+25=056    False
     7            4            16 74+56=130 74+56=130     True
     7            4            17 85+47=132 85+47=132     True
     7            4            18 47+32=079 47+32=089    False
     7            4            19 37+66=103 37+66=103     True
     7            4            20 16+29=045 16+29=045     True
     7            4            21 86+77=163 86+77=163     True
     7            4            22 80+07=087 80+07=096    False
     7            4            23 87+05=092 87+05=092     True
     7            4            24 58+16=074 58+16=074     True
     7            4            25 52+79=131 52+79=131     True
     7            4            26 91+08=099 91+08=099     True
     7            4            27 47+78=125 47+78=125     True
     7            4            28 86+96=182 86+96=183    False
     7            4            29 90+22=112 90+22=112     True
     7            4            30 31+18=049 31+18=059    False
     7            4            31 86+15=101 86+15=101     True
     7            4            32 15+95=110 15+95=100    False
     7            4            33 42+11=053 42+11=053     True
     7            4            34 65+99=164 65+99=164     True
     7            4            35 89+29=118 89+29=118     True
     7            4            36 35+11=046 35+11=056    False
     7            4            37 71+41=112 71+41=112     True
     7            4            38 16+24=040 16+24=040     True
     7            4            39 77+82=159 77+82=169    False
     7            4            40 55+89=144 55+89=144     True
     7            4            41 17+88=105 17+88=105     True
     7            4            42 54+72=126 54+72=126     True
     7            4            43 34+98=132 34+98=132     True
     7            4            44 09+97=106 09+97=106     True
     7            4            45 91+07=098 91+07=098     True
     7            4            46 55+94=149 55+94=159    False
     7            4            47 22+58=080 22+58=080     True
     7            4            48 91+37=128 91+37=138    False
     7            4            49 16+10=026 16+10=025    False
     7            4            50 96+32=128 96+32=138    False
     7            4            51 35+75=110 35+75=100    False
     7            4            52 88+73=161 88+73=161     True
     7            4            53 35+18=053 35+18=053     True
     7            4            54 33+10=043 33+10=043     True
     7            4            55 08+50=058 08+50=068    False
     7            4            56 22+62=084 22+62=084     True
     7            4            57 26+37=063 26+37=063     True
     7            4            58 80+27=107 80+27=107     True
     7            4            59 68+28=096 68+28=096     True
     7            4            60 48+03=051 48+03=061    False
     7            4            61 40+18=058 40+18=068    False
     7            4            62 16+59=075 16+59=075     True
     7            4            63 02+19=021 02+19=021     True
     7            4            64 01+09=010 01+09=010     True
     7            4            65 62+68=130 62+68=130     True
     7            4            66 09+71=080 09+71=080     True
     7            4            67 00+58=058 00+58=068    False
     7            4            68 16+45=061 16+45=061     True
     7            4            69 24+98=122 24+98=122     True
     7            4            70 47+92=139 47+92=149    False
     7            4            71 94+84=178 94+84=178     True
     7            4            72 21+32=053 21+32=053     True
     7            4            73 29+82=111 29+82=112    False
     7            4            74 32+79=111 32+79=111     True
     7            4            75 13+98=111 13+98=111     True
     7            4            76 41+94=135 41+94=135     True
     7            4            77 51+84=135 51+84=135     True
     7            4            78 42+05=047 42+05=057    False
     7            4            79 39+03=042 39+03=042     True
     7            4            80 02+92=094 02+92=094     True
     7            4            81 99+81=180 99+81=180     True
     7            4            82 32+68=100 32+68=100     True
     7            4            83 52+17=069 52+17=079    False
     7            4            84 56+58=114 56+58=114     True
     7            4            85 21+48=069 21+48=079    False
     7            4            86 61+71=132 61+71=132     True
     7            4            87 17+01=018 17+01=028    False
     7            4            88 68+23=091 68+23=091     True
     7            4            89 00+37=037 00+37=047    False
     7            4            90 94+88=182 94+88=182     True
     7            4            91 06+31=037 06+31=047    False
     7            4            92 27+18=045 27+18=045     True
     7            4            93 41+81=122 41+81=122     True
     7            4            94 15+86=101 15+86=101     True
     7            4            95 36+87=123 36+87=123     True
     7            4            96 17+37=054 17+37=054     True
     7            4            97 13+86=099 13+86=099     True
     7            4            98 29+69=098 29+69=098     True
     7            4            99 31+99=130 31+99=130     True
     7            4           100 47+29=076 47+29=076     True
     7            4           101 08+81=089 08+81=099    False
     7            4           102 72+82=154 72+82=154     True
     7            4           103 46+91=137 46+91=137     True
     7            4           104 70+35=105 70+35=105     True
     7            4           105 90+55=145 90+55=145     True
     7            4           106 99+99=198 99+99=199    False
     7            4           107 60+97=157 60+97=157     True
     7            4           108 03+40=043 03+40=053    False
     7            4           109 35+49=084 35+49=084     True
     7            4           110 32+02=034 32+02=044    False
     7            4           111 70+18=088 70+18=088     True
     7            4           112 99+05=104 99+05=104     True
     7            4           113 78+73=151 78+73=151     True
     7            4           114 03+02=005 03+02=005     True
     7            4           115 50+14=064 50+14=064     True
     7            4           116 62+02=064 62+02=064     True
     7            4           117 16+74=090 16+74=090     True
     7            4           118 68+65=133 68+65=133     True
     7            4           119 74+81=155 74+81=155     True
     7            4           120 37+48=085 37+48=085     True
     7            4           121 63+04=067 63+04=077    False
     7            4           122 06+62=068 06+62=078    False
     7            4           123 95+75=170 95+75=170     True
     7            4           124 92+37=129 92+37=139    False
     7            4           125 81+32=113 81+32=113     True
     7            4           126 53+28=081 53+28=081     True
     7            4           127 52+42=094 52+42=094     True
     7            4           128 66+97=163 66+97=163     True
     7            4           129 00+48=048 00+48=058    False
     7            4           130 65+32=097 65+32=097     True
     7            4           131 60+89=149 60+89=159    False
     7            4           132 71+61=132 71+61=132     True
     7            4           133 98+50=148 98+50=148     True
     7            4           134 90+96=186 90+96=186     True
     7            4           135 02+96=098 02+96=098     True
     7            4           136 62+75=137 62+75=137     True
     7            4           137 41+28=069 41+28=079    False
     7            4           138 95+79=174 95+79=174     True
     7            4           139 48+41=089 48+41=089     True
     7            4           140 87+95=182 87+95=182     True
     7            4           141 75+38=113 75+38=113     True
     7            4           142 31+55=086 31+55=086     True
     7            4           143 54+63=117 54+63=117     True
     7            4           144 75+82=157 75+82=157     True
     7            4           145 46+45=091 46+45=091     True
     7            4           146 13+08=021 13+08=021     True
     7            4           147 77+97=174 77+97=174     True
     7            4           148 37+35=072 37+35=072     True
     7            4           149 21+89=110 21+89=110     True
     7            4           150 58+51=109 58+51=109     True
     7            4           151 91+48=139 91+48=149    False
     7            4           152 33+23=056 33+23=056     True
     7            4           153 80+96=176 80+96=175    False
     7            4           154 78+02=080 78+02=080     True
     7            4           155 38+95=133 38+95=133     True
     7            4           156 99+25=124 99+25=124     True
     7            4           157 30+76=106 30+76=106     True
     7            4           158 42+40=082 42+40=082     True
     7            4           159 85+58=143 85+58=143     True
     7            4           160 44+46=090 44+46=090     True
     7            4           161 06+41=047 06+41=057    False
     7            4           162 65+90=155 65+90=155     True
     7            4           163 43+83=126 43+83=136    False
     7            4           164 36+61=097 36+61=097     True
     7            4           165 61+51=112 61+51=112     True
     7            4           166 38+09=047 38+09=047     True
     7            4           167 21+97=118 21+97=117    False
     7            4           168 83+30=113 83+30=113     True
     7            4           169 11+79=090 11+79=090     True
     7            4           170 14+29=043 14+29=043     True
     7            4           171 21+11=032 21+11=032     True
     7            4           172 43+53=096 43+53=096     True
     7            4           173 02+58=060 02+58=060     True
     7            4           174 78+82=160 78+82=160     True
     7            4           175 91+11=102 91+11=102     True
     7            4           176 58+54=112 58+54=112     True
     7            4           177 00+15=015 00+15=025    False
     7            4           178 83+51=134 83+51=134     True
     7            4           179 44+72=116 44+72=116     True
     7            4           180 71+20=091 71+20=091     True
     7            4           181 24+99=123 24+99=123     True
     7            4           182 46+30=076 46+30=075    False
     7            4           183 08+67=075 08+67=075     True
     7            4           184 47+42=089 47+42=089     True
     7            4           185 95+67=162 95+67=162     True
     7            4           186 40+56=096 40+56=095    False
     7            4           187 17+95=112 17+95=112     True
     7            4           188 94+66=160 94+66=160     True
     7            4           189 14+58=072 14+58=072     True
     7            4           190 56+05=061 56+05=061     True
     7            4           191 70+01=071 70+01=071     True
     7            4           192 97+59=156 97+59=156     True
     7            4           193 94+67=161 94+67=161     True
     7            4           194 13+41=054 13+41=054     True
     7            4           195 85+15=100 85+15=100     True
     7            4           196 48+53=101 48+53=101     True
     7            4           197 62+75=137 62+75=137     True
     7            4           198 87+47=134 87+47=134     True
     7            4           199 31+88=119 31+88=129    False
     7            4           200 97+16=113 97+16=113     True
     7            4           201 48+45=093 48+45=093     True
     7            4           202 99+00=099 99+00=099     True
     7            4           203 15+01=016 15+01=026    False
     7            4           204 28+96=124 28+96=124     True
     7            4           205 20+11=031 20+11=031     True
     7            4           206 07+56=063 07+56=063     True
     7            4           207 06+08=014 06+08=014     True
     7            4           208 45+46=091 45+46=091     True
     7            4           209 48+85=133 48+85=133     True
     7            4           210 62+14=076 62+14=076     True
     7            4           211 82+31=113 82+31=113     True
     7            4           212 85+88=173 85+88=173     True
     7            4           213 77+08=085 77+08=085     True
     7            4           214 16+64=080 16+64=080     True
     7            4           215 00+27=027 00+27=036    False
     7            4           216 36+75=111 36+75=111     True
     7            4           217 38+38=076 38+38=076     True
     7            4           218 88+32=120 88+32=120     True
     7            4           219 09+88=097 09+88=097     True
     7            4           220 96+87=183 96+87=183     True
     7            4           221 71+29=100 71+29=090    False
     7            4           222 99+13=112 99+13=112     True
     7            4           223 03+13=016 03+13=026    False
     7            4           224 67+23=090 67+23=090     True
     7            4           225 15+98=113 15+98=113     True
     7            4           226 10+08=018 10+08=028    False
     7            4           227 46+24=070 46+24=070     True
     7            4           228 55+63=118 55+63=118     True
     7            4           229 28+06=034 28+06=034     True
     7            4           230 43+87=130 43+87=130     True
     7            4           231 34+05=039 34+05=049    False
2024-09-20 14:10:27,289 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:27,290 - root - INFO - ====================================================== Starting Train Epoch: 8/9 ======================================================
2024-09-20 14:10:27,291 - root - INFO - Learning rates for each parameter group: 0.00125948180464969628, 0.00125948180464969628
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 8, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 8, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=70.02212, average_batch_loss=0.27352, average_batch_perplexity=1.31459, lr=0.001259482, 0.001259482]
Epoch: 8, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=70.02212, average_batch_loss=0.27352, average_batch_perplexity=1.31459, lr=0.001259482, 0.001259482]
Epoch: 8, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=64.04005, average_batch_loss=0.25016, average_batch_perplexity=1.28423, lr=0.001256297, 0.001256297]
Epoch: 8, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=64.04005, average_batch_loss=0.25016, average_batch_perplexity=1.28423, lr=0.001256297, 0.001256297]
Epoch: 8, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=64.04005, average_batch_loss=0.25016, average_batch_perplexity=1.28423, lr=0.001256297, 0.001256297]
Epoch: 8, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=69.81609, average_batch_loss=0.27272, average_batch_perplexity=1.31353, lr=0.001253137, 0.001253137]
2024-09-20 14:10:27,628 - root - INFO - Epoch: 8, Step: 200, Avg Batch Loss: 0.23207, Avg Batch Perplexity: 1.26121, LR: 0.001250000
Epoch: 8, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=69.81609, average_batch_loss=0.27272, average_batch_perplexity=1.31353, lr=0.001253137, 0.001253137]
Epoch: 8, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.94it/s, total_batch_loss=59.41085, average_batch_loss=0.23207, average_batch_perplexity=1.26121, lr=0.001250000, 0.001250000]
Epoch: 8, Step: 4:  14%|█▍        | 4/28 [00:00<00:02, 11.85it/s, total_batch_loss=59.41085, average_batch_loss=0.23207, average_batch_perplexity=1.26121, lr=0.001250000, 0.001250000]
Epoch: 8, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.85it/s, total_batch_loss=59.41085, average_batch_loss=0.23207, average_batch_perplexity=1.26121, lr=0.001250000, 0.001250000]
Epoch: 8, Step: 5:  14%|█▍        | 4/28 [00:00<00:02, 11.85it/s, total_batch_loss=65.20641, average_batch_loss=0.25471, average_batch_perplexity=1.29009, lr=0.001246887, 0.001246887]
Epoch: 8, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.85it/s, total_batch_loss=65.20641, average_batch_loss=0.25471, average_batch_perplexity=1.29009, lr=0.001246887, 0.001246887]
Epoch: 8, Step: 6:  14%|█▍        | 4/28 [00:00<00:02, 11.85it/s, total_batch_loss=58.38728, average_batch_loss=0.22808, average_batch_perplexity=1.25618, lr=0.001243796, 0.001243796]
Epoch: 8, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 11.96it/s, total_batch_loss=58.38728, average_batch_loss=0.22808, average_batch_perplexity=1.25618, lr=0.001243796, 0.001243796]
Epoch: 8, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 11.96it/s, total_batch_loss=58.38728, average_batch_loss=0.22808, average_batch_perplexity=1.25618, lr=0.001243796, 0.001243796]
Epoch: 8, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 11.96it/s, total_batch_loss=65.68538, average_batch_loss=0.25658, average_batch_perplexity=1.29251, lr=0.001240729, 0.001240729]
Epoch: 8, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 11.96it/s, total_batch_loss=65.68538, average_batch_loss=0.25658, average_batch_perplexity=1.29251, lr=0.001240729, 0.001240729]
Epoch: 8, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 11.96it/s, total_batch_loss=64.49298, average_batch_loss=0.25193, average_batch_perplexity=1.28650, lr=0.001237684, 0.001237684]
Epoch: 8, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 12.04it/s, total_batch_loss=64.49298, average_batch_loss=0.25193, average_batch_perplexity=1.28650, lr=0.001237684, 0.001237684]
Epoch: 8, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.04it/s, total_batch_loss=64.49298, average_batch_loss=0.25193, average_batch_perplexity=1.28650, lr=0.001237684, 0.001237684]
Epoch: 8, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.04it/s, total_batch_loss=56.49179, average_batch_loss=0.22067, average_batch_perplexity=1.24691, lr=0.001234662, 0.001234662]
Epoch: 8, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.04it/s, total_batch_loss=56.49179, average_batch_loss=0.22067, average_batch_perplexity=1.24691, lr=0.001234662, 0.001234662]
Epoch: 8, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.04it/s, total_batch_loss=63.72670, average_batch_loss=0.24893, average_batch_perplexity=1.28266, lr=0.001231662, 0.001231662]
Epoch: 8, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=63.72670, average_batch_loss=0.24893, average_batch_perplexity=1.28266, lr=0.001231662, 0.001231662]
Epoch: 8, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=63.72670, average_batch_loss=0.24893, average_batch_perplexity=1.28266, lr=0.001231662, 0.001231662]
Epoch: 8, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=53.56535, average_batch_loss=0.20924, average_batch_perplexity=1.23274, lr=0.001228683, 0.001228683]
Epoch: 8, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=53.56535, average_batch_loss=0.20924, average_batch_perplexity=1.23274, lr=0.001228683, 0.001228683]
Epoch: 8, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.17it/s, total_batch_loss=56.57499, average_batch_loss=0.22100, average_batch_perplexity=1.24732, lr=0.001225726, 0.001225726]
Epoch: 8, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.27it/s, total_batch_loss=56.57499, average_batch_loss=0.22100, average_batch_perplexity=1.24732, lr=0.001225726, 0.001225726]
Epoch: 8, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=56.57499, average_batch_loss=0.22100, average_batch_perplexity=1.24732, lr=0.001225726, 0.001225726]
Epoch: 8, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=54.72008, average_batch_loss=0.21375, average_batch_perplexity=1.23831, lr=0.001222790, 0.001222790]
Epoch: 8, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=54.72008, average_batch_loss=0.21375, average_batch_perplexity=1.23831, lr=0.001222790, 0.001222790]
Epoch: 8, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=49.66970, average_batch_loss=0.19402, average_batch_perplexity=1.21412, lr=0.001219875, 0.001219875]
Epoch: 8, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.04it/s, total_batch_loss=49.66970, average_batch_loss=0.19402, average_batch_perplexity=1.21412, lr=0.001219875, 0.001219875]
Epoch: 8, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.04it/s, total_batch_loss=49.66970, average_batch_loss=0.19402, average_batch_perplexity=1.21412, lr=0.001219875, 0.001219875]
Epoch: 8, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.04it/s, total_batch_loss=50.02804, average_batch_loss=0.19542, average_batch_perplexity=1.21582, lr=0.001216981, 0.001216981]
Epoch: 8, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.04it/s, total_batch_loss=50.02804, average_batch_loss=0.19542, average_batch_perplexity=1.21582, lr=0.001216981, 0.001216981]
Epoch: 8, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.04it/s, total_batch_loss=50.38107, average_batch_loss=0.19680, average_batch_perplexity=1.21750, lr=0.001214107, 0.001214107]
Epoch: 8, Step: 16:  57%|█████▋    | 16/28 [00:01<00:01, 11.97it/s, total_batch_loss=50.38107, average_batch_loss=0.19680, average_batch_perplexity=1.21750, lr=0.001214107, 0.001214107]
Epoch: 8, Step: 17:  57%|█████▋    | 16/28 [00:01<00:01, 11.97it/s, total_batch_loss=50.38107, average_batch_loss=0.19680, average_batch_perplexity=1.21750, lr=0.001214107, 0.001214107]
Epoch: 8, Step: 17:  57%|█████▋    | 16/28 [00:01<00:01, 11.97it/s, total_batch_loss=52.12938, average_batch_loss=0.20363, average_batch_perplexity=1.22584, lr=0.001211254, 0.001211254]
Epoch: 8, Step: 18:  57%|█████▋    | 16/28 [00:01<00:01, 11.97it/s, total_batch_loss=52.12938, average_batch_loss=0.20363, average_batch_perplexity=1.22584, lr=0.001211254, 0.001211254]
Epoch: 8, Step: 18:  57%|█████▋    | 16/28 [00:01<00:01, 11.97it/s, total_batch_loss=51.27622, average_batch_loss=0.20030, average_batch_perplexity=1.22177, lr=0.001208421, 0.001208421]
Epoch: 8, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.02it/s, total_batch_loss=51.27622, average_batch_loss=0.20030, average_batch_perplexity=1.22177, lr=0.001208421, 0.001208421]
Epoch: 8, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.02it/s, total_batch_loss=51.27622, average_batch_loss=0.20030, average_batch_perplexity=1.22177, lr=0.001208421, 0.001208421]
Epoch: 8, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.02it/s, total_batch_loss=53.21026, average_batch_loss=0.20785, average_batch_perplexity=1.23103, lr=0.001205607, 0.001205607]
Epoch: 8, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.02it/s, total_batch_loss=53.21026, average_batch_loss=0.20785, average_batch_perplexity=1.23103, lr=0.001205607, 0.001205607]
Epoch: 8, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.02it/s, total_batch_loss=50.26188, average_batch_loss=0.19634, average_batch_perplexity=1.21694, lr=0.001202813, 0.001202813]
Epoch: 8, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 12.16it/s, total_batch_loss=50.26188, average_batch_loss=0.19634, average_batch_perplexity=1.21694, lr=0.001202813, 0.001202813]
Epoch: 8, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.16it/s, total_batch_loss=50.26188, average_batch_loss=0.19634, average_batch_perplexity=1.21694, lr=0.001202813, 0.001202813]
Epoch: 8, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 12.16it/s, total_batch_loss=50.20437, average_batch_loss=0.19611, average_batch_perplexity=1.21666, lr=0.001200038, 0.001200038]
Epoch: 8, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.16it/s, total_batch_loss=50.20437, average_batch_loss=0.19611, average_batch_perplexity=1.21666, lr=0.001200038, 0.001200038]
Epoch: 8, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 12.16it/s, total_batch_loss=51.38136, average_batch_loss=0.20071, average_batch_perplexity=1.22227, lr=0.001197283, 0.001197283]
Epoch: 8, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.23it/s, total_batch_loss=51.38136, average_batch_loss=0.20071, average_batch_perplexity=1.22227, lr=0.001197283, 0.001197283]
Epoch: 8, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.23it/s, total_batch_loss=51.38136, average_batch_loss=0.20071, average_batch_perplexity=1.22227, lr=0.001197283, 0.001197283]
Epoch: 8, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.23it/s, total_batch_loss=45.50115, average_batch_loss=0.17774, average_batch_perplexity=1.19451, lr=0.001194546, 0.001194546]
Epoch: 8, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.23it/s, total_batch_loss=45.50115, average_batch_loss=0.17774, average_batch_perplexity=1.19451, lr=0.001194546, 0.001194546]
Epoch: 8, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.23it/s, total_batch_loss=51.44242, average_batch_loss=0.20095, average_batch_perplexity=1.22256, lr=0.001191828, 0.001191828]
Epoch: 8, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.20it/s, total_batch_loss=51.44242, average_batch_loss=0.20095, average_batch_perplexity=1.22256, lr=0.001191828, 0.001191828]
Epoch: 8, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=51.44242, average_batch_loss=0.20095, average_batch_perplexity=1.22256, lr=0.001191828, 0.001191828]
Epoch: 8, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=47.85750, average_batch_loss=0.18694, average_batch_perplexity=1.20556, lr=0.001189129, 0.001189129]
Epoch: 8, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=47.85750, average_batch_loss=0.18694, average_batch_perplexity=1.20556, lr=0.001189129, 0.001189129]
Epoch: 8, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.20it/s, total_batch_loss=40.43978, average_batch_loss=0.15797, average_batch_perplexity=1.17113, lr=0.001186447, 0.001186447]
Epoch: 8, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 12.22it/s, total_batch_loss=40.43978, average_batch_loss=0.15797, average_batch_perplexity=1.17113, lr=0.001186447, 0.001186447]
Epoch: 8, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.22it/s, total_batch_loss=40.43978, average_batch_loss=0.15797, average_batch_perplexity=1.17113, lr=0.001186447, 0.001186447]
Epoch: 8, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 12.22it/s, total_batch_loss=50.69247, average_batch_loss=0.19802, average_batch_perplexity=1.21898, lr=0.001183784, 0.001183784]
Epoch: 8, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.22it/s, total_batch_loss=50.69247, average_batch_loss=0.19802, average_batch_perplexity=1.21898, lr=0.001183784, 0.001183784]
Epoch: 8, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 12.22it/s, total_batch_loss=18.43602, average_batch_loss=0.20950, average_batch_perplexity=1.23306, lr=0.001181139, 0.001181139]
Epoch: 8, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.47it/s, total_batch_loss=18.43602, average_batch_loss=0.20950, average_batch_perplexity=1.23306, lr=0.001181139, 0.001181139]
                                                                                                                                                                                         
2024-09-20 14:10:29,552 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:29,553 - root - INFO - Total Batches:                   28
2024-09-20 14:10:29,553 - root - INFO - Average Epoch Train Loss:        0.21644
2024-09-20 14:10:29,554 - root - INFO - Average Epoch Train Perplexity:  1.24164
2024-09-20 14:10:29,554 - root - INFO - 
2024-09-20 14:10:29,555 - root - INFO - ====================================================== Starting Valid Epoch: 8/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 8, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 8, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=29.76483, average_batch_loss=0.11627, average_batch_perplexity=1.12330]
Epoch: 8, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=29.76483, average_batch_loss=0.11627, average_batch_perplexity=1.12330]
Epoch: 8, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=29.46812, average_batch_loss=0.11511, average_batch_perplexity=1.12200]
Epoch: 8, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=29.46812, average_batch_loss=0.11511, average_batch_perplexity=1.12200]
Epoch: 8, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=29.96622, average_batch_loss=0.11706, average_batch_perplexity=1.12418]
Epoch: 8, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=29.96622, average_batch_loss=0.11706, average_batch_perplexity=1.12418]
Epoch: 8, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=29.96622, average_batch_loss=0.11706, average_batch_perplexity=1.12418]
Epoch: 8, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=33.13523, average_batch_loss=0.12943, average_batch_perplexity=1.13818]
Epoch: 8, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=33.13523, average_batch_loss=0.12943, average_batch_perplexity=1.13818]
Epoch: 8, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=29.03102, average_batch_loss=0.11340, average_batch_perplexity=1.12008]
Epoch: 8, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=29.03102, average_batch_loss=0.11340, average_batch_perplexity=1.12008]
Epoch: 8, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 23.24it/s, total_batch_loss=30.31429, average_batch_loss=0.11842, average_batch_perplexity=1.12571]
Epoch: 8, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 23.32it/s, total_batch_loss=30.31429, average_batch_loss=0.11842, average_batch_perplexity=1.12571]
Epoch: 8, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.32it/s, total_batch_loss=30.31429, average_batch_loss=0.11842, average_batch_perplexity=1.12571]
Epoch: 8, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 23.32it/s, total_batch_loss=31.22097, average_batch_loss=0.12196, average_batch_perplexity=1.12971]
Epoch: 8, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.32it/s, total_batch_loss=31.22097, average_batch_loss=0.12196, average_batch_perplexity=1.12971]
Epoch: 8, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 23.32it/s, total_batch_loss=21.50961, average_batch_loss=0.10341, average_batch_perplexity=1.10895]
                                                                                                                                                         
2024-09-20 14:10:29,895 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:29,895 - root - INFO - Total Batches:                   8
2024-09-20 14:10:29,896 - root - INFO - Average Epoch Valid Loss:        0.11721
2024-09-20 14:10:29,896 - root - INFO - Average Epoch Valid Perplexity:  1.12435
2024-09-20 14:10:29,897 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.8789]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.8594]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.38it/s, accuracy: 0.8594]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.38it/s, accuracy: 0.9258]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.38it/s, accuracy: 0.8793]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.67it/s, accuracy: 0.8793]
                                                                                           
2024-09-20 14:10:30,279 - root - INFO - Correct/Total Samples:           886/1000
2024-09-20 14:10:30,279 - root - INFO - Eval Accuracy:                   0.886
2024-09-20 14:10:30,298 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     8            1             0 13+48=061 13+48=061     True
     8            1             1 16+55=071 16+55=071     True
     8            1             2 79+34=113 79+34=103    False
     8            1             3 35+44=079 35+44=079     True
     8            1             4 16+50=066 16+50=066     True
     8            1             5 28+47=075 28+47=075     True
     8            1             6 00+74=074 00+74=064    False
     8            1             7 15+20=035 15+20=035     True
     8            1             8 72+60=132 72+60=122    False
     8            1             9 63+68=131 63+68=131     True
     8            1            10 29+45=074 29+45=074     True
     8            1            11 34+60=094 34+60=094     True
     8            1            12 53+70=123 53+70=123     True
     8            1            13 70+50=120 70+50=110    False
     8            1            14 11+84=095 11+84=095     True
     8            1            15 42+71=113 42+71=113     True
     8            1            16 98+22=120 98+22=120     True
     8            1            17 02+02=004 02+02=014    False
     8            1            18 15+85=100 15+85=100     True
     8            1            19 21+78=099 21+78=099     True
     8            1            20 61+79=140 61+79=140     True
     8            1            21 25+99=124 25+99=124     True
     8            1            22 09+85=094 09+85=094     True
     8            1            23 60+91=151 60+91=151     True
     8            1            24 35+30=065 35+30=065     True
     8            1            25 24+51=075 24+51=075     True
     8            1            26 93+91=184 93+91=184     True
     8            1            27 39+96=135 39+96=135     True
     8            1            28 64+35=099 64+35=099     True
     8            1            29 36+22=058 36+22=058     True
     8            1            30 68+45=113 68+45=113     True
     8            1            31 16+84=100 16+84=100     True
     8            1            32 91+52=143 91+52=143     True
     8            1            33 97+36=133 97+36=133     True
     8            1            34 27+37=064 27+37=063    False
     8            1            35 99+82=181 99+82=181     True
     8            1            36 03+42=045 03+42=045     True
     8            1            37 18+38=056 18+38=056     True
     8            1            38 32+20=052 32+20=052     True
     8            1            39 38+13=051 38+13=051     True
     8            1            40 68+42=110 68+42=100    False
     8            1            41 64+00=064 64+00=054    False
     8            1            42 48+94=142 48+94=142     True
     8            1            43 58+36=094 58+36=094     True
     8            1            44 41+22=063 41+22=053    False
     8            1            45 23+58=081 23+58=081     True
     8            1            46 67+46=113 67+46=113     True
     8            1            47 40+78=118 40+78=118     True
     8            1            48 90+38=128 90+38=128     True
     8            1            49 89+52=141 89+52=141     True
     8            1            50 37+77=114 37+77=113    False
     8            1            51 29+76=105 29+76=105     True
     8            1            52 42+90=132 42+90=132     True
     8            1            53 45+82=127 45+82=127     True
     8            1            54 35+95=130 35+95=130     True
     8            1            55 92+98=190 92+98=190     True
     8            1            56 73+91=164 73+91=164     True
     8            1            57 53+97=150 53+97=150     True
     8            1            58 98+69=167 98+69=167     True
     8            1            59 20+46=066 20+46=056    False
     8            1            60 48+69=117 48+69=117     True
     8            1            61 62+31=093 62+31=093     True
     8            1            62 80+59=139 80+59=139     True
     8            1            63 58+12=070 58+12=070     True
     8            1            64 08+96=104 08+96=104     True
     8            1            65 67+06=073 67+06=063    False
     8            1            66 22+04=026 22+04=026     True
     8            1            67 61+87=148 61+87=148     True
     8            1            68 95+27=122 95+27=122     True
     8            1            69 49+83=132 49+83=132     True
     8            1            70 43+00=043 43+00=043     True
     8            1            71 01+85=086 01+85=086     True
     8            1            72 11+68=079 11+68=079     True
     8            1            73 80+03=083 80+03=083     True
     8            1            74 54+83=137 54+83=137     True
     8            1            75 73+47=120 73+47=110    False
     8            1            76 99+93=192 99+93=192     True
     8            1            77 99+13=112 99+13=112     True
     8            1            78 92+66=158 92+66=158     True
     8            1            79 90+31=121 90+31=121     True
     8            1            80 25+69=094 25+69=094     True
     8            1            81 25+44=069 25+44=069     True
     8            1            82 00+93=093 00+93=093     True
     8            1            83 88+87=175 88+87=175     True
     8            1            84 47+56=103 47+56=103     True
     8            1            85 43+59=102 43+59=102     True
     8            1            86 22+00=022 22+00=022     True
     8            1            87 34+04=038 34+04=038     True
     8            1            88 65+13=078 65+13=078     True
     8            1            89 39+82=121 39+82=121     True
     8            1            90 66+83=149 66+83=149     True
     8            1            91 51+69=120 51+69=110    False
     8            1            92 80+21=101 80+21=101     True
     8            1            93 36+79=115 36+79=115     True
     8            1            94 21+68=089 21+68=089     True
     8            1            95 11+66=077 11+66=077     True
     8            1            96 55+19=074 55+19=074     True
     8            1            97 51+61=112 51+61=102    False
     8            1            98 38+88=126 38+88=126     True
     8            1            99 37+27=064 37+27=063    False
     8            1           100 18+63=081 18+63=081     True
     8            1           101 48+11=059 48+11=059     True
     8            1           102 72+68=140 72+68=140     True
     8            1           103 37+39=076 37+39=076     True
     8            1           104 64+95=159 64+95=159     True
     8            1           105 49+75=124 49+75=124     True
     8            1           106 45+66=111 45+66=111     True
     8            1           107 34+87=121 34+87=121     True
     8            1           108 02+84=086 02+84=086     True
     8            1           109 95+00=095 95+00=095     True
     8            1           110 09+56=065 09+56=065     True
     8            1           111 22+66=088 22+66=088     True
     8            1           112 43+18=061 43+18=061     True
     8            1           113 61+35=096 61+35=096     True
     8            1           114 13+73=086 13+73=086     True
     8            1           115 25+95=120 25+95=110    False
     8            1           116 73+96=169 73+96=169     True
     8            1           117 03+96=099 03+96=099     True
     8            1           118 97+82=179 97+82=179     True
     8            1           119 18+42=060 18+42=060     True
     8            1           120 29+98=127 29+98=127     True
     8            1           121 61+00=061 61+00=051    False
     8            1           122 22+98=120 22+98=120     True
     8            1           123 12+50=062 12+50=062     True
     8            1           124 02+58=060 02+58=060     True
     8            1           125 75+86=161 75+86=161     True
     8            1           126 31+57=088 31+57=088     True
     8            1           127 49+82=131 49+82=131     True
     8            1           128 15+33=048 15+33=048     True
     8            1           129 49+57=106 49+57=106     True
     8            1           130 61+70=131 61+70=121    False
     8            1           131 91+51=142 91+51=142     True
     8            1           132 50+05=055 50+05=055     True
     8            1           133 44+16=060 44+16=060     True
     8            1           134 92+01=093 92+01=093     True
     8            1           135 85+82=167 85+82=167     True
     8            1           136 07+41=048 07+41=048     True
     8            1           137 40+06=046 40+06=046     True
     8            1           138 79+62=141 79+62=141     True
     8            1           139 95+62=157 95+62=157     True
     8            1           140 42+93=135 42+93=135     True
     8            1           141 32+73=105 32+73=105     True
     8            1           142 47+09=056 47+09=056     True
     8            1           143 59+50=109 59+50=109     True
     8            1           144 61+77=138 61+77=138     True
     8            1           145 64+06=070 64+06=060    False
     8            1           146 35+10=045 35+10=045     True
     8            1           147 32+88=120 32+88=120     True
     8            1           148 03+95=098 03+95=098     True
     8            1           149 11+38=049 11+38=049     True
     8            1           150 21+67=088 21+67=088     True
     8            1           151 33+25=058 33+25=058     True
     8            1           152 63+45=108 63+45=108     True
     8            1           153 56+12=068 56+12=068     True
     8            1           154 19+79=098 19+79=098     True
     8            1           155 60+43=103 60+43=103     True
     8            1           156 07+61=068 07+61=068     True
     8            1           157 58+03=061 58+03=061     True
     8            1           158 11+10=021 11+10=021     True
     8            1           159 49+89=138 49+89=138     True
     8            1           160 37+58=095 37+58=095     True
     8            1           161 59+78=137 59+78=137     True
     8            1           162 11+21=032 11+21=032     True
     8            1           163 37+43=080 37+43=070    False
     8            1           164 44+21=065 44+21=065     True
     8            1           165 22+97=119 22+97=119     True
     8            1           166 65+35=100 65+35=090    False
     8            1           167 06+51=057 06+51=057     True
     8            1           168 65+25=090 65+25=080    False
     8            1           169 74+94=168 74+94=168     True
     8            1           170 87+55=142 87+55=142     True
     8            1           171 90+67=157 90+67=157     True
     8            1           172 11+02=013 11+02=013     True
     8            1           173 01+66=067 01+66=057    False
     8            1           174 56+00=056 56+00=056     True
     8            1           175 58+52=110 58+52=110     True
     8            1           176 24+99=123 24+99=123     True
     8            1           177 97+13=110 97+13=100    False
     8            1           178 42+94=136 42+94=136     True
     8            1           179 60+15=075 60+15=075     True
     8            1           180 20+46=066 20+46=056    False
     8            1           181 40+70=110 40+70=110     True
     8            1           182 95+45=140 95+45=140     True
     8            1           183 96+95=191 96+95=191     True
     8            1           184 98+20=118 98+20=118     True
     8            1           185 43+19=062 43+19=062     True
     8            1           186 50+69=119 50+69=119     True
     8            1           187 27+53=080 27+53=070    False
     8            1           188 24+25=049 24+25=049     True
     8            1           189 65+92=157 65+92=157     True
     8            1           190 28+14=042 28+14=042     True
     8            1           191 20+57=077 20+57=077     True
     8            1           192 59+97=156 59+97=156     True
     8            1           193 98+32=130 98+32=130     True
     8            1           194 55+84=139 55+84=139     True
     8            1           195 20+39=059 20+39=059     True
     8            1           196 86+47=133 86+47=133     True
     8            1           197 92+36=128 92+36=128     True
     8            1           198 05+38=043 05+38=043     True
     8            1           199 77+36=113 77+36=103    False
     8            1           200 41+64=105 41+64=105     True
     8            1           201 74+51=125 74+51=115    False
     8            1           202 74+55=129 74+55=129     True
     8            1           203 64+64=128 64+64=128     True
     8            1           204 60+19=079 60+19=079     True
     8            1           205 77+96=173 77+96=173     True
     8            1           206 22+30=052 22+30=052     True
     8            1           207 82+49=131 82+49=131     True
     8            1           208 39+67=106 39+67=106     True
     8            1           209 62+40=102 62+40=102     True
     8            1           210 28+71=099 28+71=099     True
     8            1           211 47+26=073 47+26=073     True
     8            1           212 98+54=152 98+54=152     True
     8            1           213 38+70=108 38+70=108     True
     8            1           214 63+40=103 63+40=103     True
     8            1           215 86+62=148 86+62=148     True
     8            1           216 22+65=087 22+65=087     True
     8            1           217 41+17=058 41+17=058     True
     8            1           218 68+88=156 68+88=156     True
     8            1           219 96+70=166 96+70=166     True
     8            1           220 99+29=128 99+29=128     True
     8            1           221 83+39=122 83+39=122     True
     8            1           222 26+55=081 26+55=081     True
     8            1           223 53+70=123 53+70=123     True
     8            1           224 94+12=106 94+12=106     True
     8            1           225 00+37=037 00+37=037     True
     8            1           226 36+94=130 36+94=130     True
     8            1           227 40+58=098 40+58=098     True
     8            1           228 19+80=099 19+80=099     True
     8            1           229 49+44=093 49+44=093     True
     8            1           230 70+27=097 70+27=097     True
     8            1           231 52+80=132 52+80=132     True
     8            1           232 77+90=167 77+90=167     True
     8            1           233 13+92=105 13+92=105     True
     8            1           234 59+09=068 59+09=068     True
     8            1           235 33+55=088 33+55=088     True
     8            1           236 85+16=101 85+16=101     True
     8            1           237 25+65=090 25+65=080    False
     8            1           238 46+20=066 46+20=056    False
     8            1           239 29+52=081 29+52=081     True
     8            1           240 32+36=068 32+36=068     True
     8            1           241 47+08=055 47+08=055     True
     8            1           242 21+84=105 21+84=105     True
     8            1           243 24+45=069 24+45=069     True
     8            1           244 29+15=044 29+15=044     True
     8            1           245 83+03=086 83+03=086     True
     8            1           246 83+36=119 83+36=119     True
     8            1           247 58+95=153 58+95=153     True
     8            1           248 76+79=155 76+79=155     True
     8            1           249 63+30=093 63+30=093     True
     8            1           250 38+24=062 38+24=062     True
     8            1           251 19+46=065 19+46=065     True
     8            1           252 99+66=165 99+66=165     True
     8            1           253 95+73=168 95+73=168     True
     8            1           254 65+27=092 65+27=092     True
     8            1           255 91+83=174 91+83=174     True
     8            2             0 65+49=114 65+49=104    False
     8            2             1 03+08=011 03+08=021    False
     8            2             2 67+81=148 67+81=148     True
     8            2             3 47+23=070 47+23=060    False
     8            2             4 43+91=134 43+91=134     True
     8            2             5 41+67=108 41+67=108     True
     8            2             6 02+33=035 02+33=035     True
     8            2             7 64+84=148 64+84=148     True
     8            2             8 81+64=145 81+64=145     True
     8            2             9 80+11=091 80+11=091     True
     8            2            10 78+01=079 78+01=079     True
     8            2            11 89+18=107 89+18=107     True
     8            2            12 45+52=097 45+52=097     True
     8            2            13 35+30=065 35+30=065     True
     8            2            14 53+32=085 53+32=085     True
     8            2            15 49+90=139 49+90=149    False
     8            2            16 41+37=078 41+37=078     True
     8            2            17 35+14=049 35+14=049     True
     8            2            18 92+50=142 92+50=142     True
     8            2            19 37+60=097 37+60=097     True
     8            2            20 91+61=152 91+61=152     True
     8            2            21 80+77=157 80+77=157     True
     8            2            22 66+24=090 66+24=080    False
     8            2            23 81+07=088 81+07=088     True
     8            2            24 85+59=144 85+59=144     True
     8            2            25 19+69=088 19+69=088     True
     8            2            26 91+44=135 91+44=135     True
     8            2            27 25+29=054 25+29=054     True
     8            2            28 27+08=035 27+08=035     True
     8            2            29 66+14=080 66+14=080     True
     8            2            30 95+11=106 95+11=106     True
     8            2            31 13+97=110 13+97=100    False
     8            2            32 94+40=134 94+40=134     True
     8            2            33 74+31=105 74+31=105     True
     8            2            34 49+00=049 49+00=049     True
     8            2            35 59+18=077 59+18=077     True
     8            2            36 07+65=072 07+65=062    False
     8            2            37 83+55=138 83+55=138     True
     8            2            38 49+80=129 49+80=139    False
     8            2            39 64+17=081 64+17=081     True
     8            2            40 48+83=131 48+83=131     True
     8            2            41 95+44=139 95+44=149    False
     8            2            42 71+26=097 71+26=097     True
     8            2            43 06+74=080 06+74=070    False
     8            2            44 34+24=058 34+24=058     True
     8            2            45 59+71=130 59+71=120    False
     8            2            46 68+32=100 68+32=090    False
     8            2            47 38+81=119 38+81=119     True
     8            2            48 29+56=085 29+56=085     True
     8            2            49 54+55=109 54+55=109     True
     8            2            50 31+27=058 31+27=058     True
     8            2            51 97+89=186 97+89=186     True
     8            2            52 48+09=057 48+09=057     True
     8            2            53 86+76=162 86+76=162     True
     8            2            54 82+59=141 82+59=141     True
     8            2            55 01+67=068 01+67=068     True
     8            2            56 26+06=032 26+06=032     True
     8            2            57 22+46=068 22+46=068     True
     8            2            58 85+16=101 85+16=101     True
     8            2            59 29+08=037 29+08=037     True
     8            2            60 73+94=167 73+94=167     True
     8            2            61 19+62=081 19+62=081     True
     8            2            62 86+62=148 86+62=148     True
     8            2            63 38+99=137 38+99=137     True
     8            2            64 64+25=089 64+25=089     True
     8            2            65 61+72=133 61+72=123    False
     8            2            66 78+88=166 78+88=166     True
     8            2            67 43+66=109 43+66=109     True
     8            2            68 69+35=104 69+35=104     True
     8            2            69 33+77=110 33+77=100    False
     8            2            70 37+37=074 37+37=073    False
     8            2            71 87+54=141 87+54=141     True
     8            2            72 68+90=158 68+90=158     True
     8            2            73 83+44=127 83+44=127     True
     8            2            74 41+09=050 41+09=050     True
     8            2            75 13+48=061 13+48=061     True
     8            2            76 01+41=042 01+41=042     True
     8            2            77 19+74=093 19+74=093     True
     8            2            78 15+05=020 15+05=020     True
     8            2            79 55+46=101 55+46=101     True
     8            2            80 68+33=101 68+33=101     True
     8            2            81 44+40=084 44+40=084     True
     8            2            82 88+03=091 88+03=091     True
     8            2            83 81+79=160 81+79=160     True
     8            2            84 18+98=116 18+98=116     True
     8            2            85 70+64=134 70+64=134     True
     8            2            86 26+44=070 26+44=060    False
     8            2            87 98+87=185 98+87=185     True
     8            2            88 18+74=092 18+74=092     True
     8            2            89 50+68=118 50+68=118     True
     8            2            90 13+51=064 13+51=064     True
     8            2            91 90+89=179 90+89=189    False
     8            2            92 47+78=125 47+78=125     True
     8            2            93 81+57=138 81+57=138     True
     8            2            94 34+47=081 34+47=081     True
     8            2            95 94+23=117 94+23=117     True
     8            2            96 07+70=077 07+70=077     True
     8            2            97 56+33=089 56+33=089     True
     8            2            98 33+04=037 33+04=037     True
     8            2            99 26+09=035 26+09=035     True
     8            2           100 14+92=106 14+92=106     True
     8            2           101 78+54=132 78+54=132     True
     8            2           102 36+76=112 36+76=112     True
     8            2           103 17+47=064 17+47=063    False
     8            2           104 28+18=046 28+18=046     True
     8            2           105 78+54=132 78+54=132     True
     8            2           106 84+72=156 84+72=156     True
     8            2           107 00+44=044 00+44=044     True
     8            2           108 50+41=091 50+41=081    False
     8            2           109 87+88=175 87+88=175     True
     8            2           110 11+66=077 11+66=077     True
     8            2           111 80+60=140 80+60=140     True
     8            2           112 78+76=154 78+76=154     True
     8            2           113 24+74=098 24+74=098     True
     8            2           114 88+48=136 88+48=136     True
     8            2           115 38+31=069 38+31=069     True
     8            2           116 29+27=056 29+27=056     True
     8            2           117 08+45=053 08+45=053     True
     8            2           118 28+13=041 28+13=041     True
     8            2           119 53+99=152 53+99=152     True
     8            2           120 47+92=139 47+92=139     True
     8            2           121 76+21=097 76+21=097     True
     8            2           122 53+96=149 53+96=149     True
     8            2           123 93+91=184 93+91=184     True
     8            2           124 97+33=130 97+33=120    False
     8            2           125 67+78=145 67+78=145     True
     8            2           126 58+05=063 58+05=063     True
     8            2           127 00+16=016 00+16=016     True
     8            2           128 80+19=099 80+19=099     True
     8            2           129 98+22=120 98+22=120     True
     8            2           130 09+62=071 09+62=071     True
     8            2           131 06+23=029 06+23=029     True
     8            2           132 32+99=131 32+99=131     True
     8            2           133 17+02=019 17+02=019     True
     8            2           134 64+35=099 64+35=099     True
     8            2           135 35+83=118 35+83=118     True
     8            2           136 71+36=107 71+36=107     True
     8            2           137 75+06=081 75+06=071    False
     8            2           138 88+95=183 88+95=183     True
     8            2           139 19+98=117 19+98=107    False
     8            2           140 28+89=117 28+89=117     True
     8            2           141 33+11=044 33+11=044     True
     8            2           142 34+49=083 34+49=083     True
     8            2           143 90+35=125 90+35=125     True
     8            2           144 22+90=112 22+90=112     True
     8            2           145 98+89=187 98+89=187     True
     8            2           146 88+47=135 88+47=135     True
     8            2           147 30+86=116 30+86=116     True
     8            2           148 31+48=079 31+48=079     True
     8            2           149 39+21=060 39+21=060     True
     8            2           150 19+17=036 19+17=036     True
     8            2           151 27+60=087 27+60=087     True
     8            2           152 12+16=028 12+16=028     True
     8            2           153 51+75=126 51+75=116    False
     8            2           154 10+74=084 10+74=084     True
     8            2           155 42+63=105 42+63=105     True
     8            2           156 40+14=054 40+14=054     True
     8            2           157 23+93=116 23+93=116     True
     8            2           158 85+26=111 85+26=111     True
     8            2           159 28+46=074 28+46=074     True
     8            2           160 28+33=061 28+33=061     True
     8            2           161 43+30=073 43+30=073     True
     8            2           162 89+72=161 89+72=161     True
     8            2           163 52+21=073 52+21=063    False
     8            2           164 21+54=075 21+54=075     True
     8            2           165 69+13=082 69+13=082     True
     8            2           166 07+60=067 07+60=067     True
     8            2           167 63+83=146 63+83=146     True
     8            2           168 80+69=149 80+69=159    False
     8            2           169 27+28=055 27+28=055     True
     8            2           170 42+31=073 42+31=063    False
     8            2           171 51+99=150 51+99=150     True
     8            2           172 28+75=103 28+75=103     True
     8            2           173 38+57=095 38+57=095     True
     8            2           174 83+16=099 83+16=099     True
     8            2           175 92+94=186 92+94=186     True
     8            2           176 55+75=130 55+75=120    False
     8            2           177 59+51=110 59+51=100    False
     8            2           178 33+09=042 33+09=042     True
     8            2           179 53+13=066 53+13=066     True
     8            2           180 05+70=075 05+70=075     True
     8            2           181 12+20=032 12+20=033    False
     8            2           182 11+49=060 11+49=060     True
     8            2           183 63+45=108 63+45=108     True
     8            2           184 92+23=115 92+23=115     True
     8            2           185 82+45=127 82+45=127     True
     8            2           186 23+41=064 23+41=054    False
     8            2           187 64+26=090 64+26=080    False
     8            2           188 91+24=115 91+24=115     True
     8            2           189 20+32=052 20+32=052     True
     8            2           190 83+21=104 83+21=104     True
     8            2           191 07+20=027 07+20=027     True
     8            2           192 94+14=108 94+14=108     True
     8            2           193 96+89=185 96+89=185     True
     8            2           194 13+08=021 13+08=021     True
     8            2           195 32+05=037 32+05=037     True
     8            2           196 09+51=060 09+51=060     True
     8            2           197 26+29=055 26+29=055     True
     8            2           198 49+65=114 49+65=114     True
     8            2           199 32+66=098 32+66=098     True
     8            2           200 41+08=049 41+08=049     True
     8            2           201 26+79=105 26+79=105     True
     8            2           202 29+91=120 29+91=120     True
     8            2           203 51+00=051 51+00=051     True
     8            2           204 61+60=121 61+60=111    False
     8            2           205 45+78=123 45+78=123     True
     8            2           206 56+16=072 56+16=072     True
     8            2           207 66+68=134 66+68=134     True
     8            2           208 32+16=048 32+16=048     True
     8            2           209 84+49=133 84+49=133     True
     8            2           210 45+09=054 45+09=054     True
     8            2           211 96+78=174 96+78=174     True
     8            2           212 10+02=012 10+02=012     True
     8            2           213 36+60=096 36+60=096     True
     8            2           214 44+36=080 44+36=080     True
     8            2           215 12+86=098 12+86=098     True
     8            2           216 94+54=148 94+54=148     True
     8            2           217 64+73=137 64+73=137     True
     8            2           218 73+10=083 73+10=083     True
     8            2           219 14+62=076 14+62=076     True
     8            2           220 25+22=047 25+22=047     True
     8            2           221 94+22=116 94+22=116     True
     8            2           222 41+76=117 41+76=117     True
     8            2           223 38+46=084 38+46=084     True
     8            2           224 71+72=143 71+72=143     True
     8            2           225 74+79=153 74+79=153     True
     8            2           226 99+67=166 99+67=166     True
     8            2           227 78+71=149 78+71=149     True
     8            2           228 23+19=042 23+19=042     True
     8            2           229 51+65=116 51+65=116     True
     8            2           230 94+86=180 94+86=180     True
     8            2           231 09+79=088 09+79=088     True
     8            2           232 69+39=108 69+39=108     True
     8            2           233 84+13=097 84+13=097     True
     8            2           234 36+59=095 36+59=095     True
     8            2           235 87+47=134 87+47=133    False
     8            2           236 50+00=050 50+00=050     True
     8            2           237 76+96=172 76+96=172     True
     8            2           238 12+18=030 12+18=030     True
     8            2           239 99+95=194 99+95=184    False
     8            2           240 22+00=022 22+00=022     True
     8            2           241 96+18=114 96+18=114     True
     8            2           242 51+20=071 51+20=071     True
     8            2           243 66+81=147 66+81=147     True
     8            2           244 78+18=096 78+18=096     True
     8            2           245 09+78=087 09+78=087     True
     8            2           246 24+20=044 24+20=044     True
     8            2           247 76+13=089 76+13=089     True
     8            2           248 05+10=015 05+10=015     True
     8            2           249 97+14=111 97+14=111     True
     8            2           250 92+38=130 92+38=130     True
     8            2           251 77+13=090 77+13=080    False
     8            2           252 70+19=089 70+19=089     True
     8            2           253 90+45=135 90+45=135     True
     8            2           254 50+09=059 50+09=059     True
     8            2           255 78+06=084 78+06=074    False
     8            3             0 03+25=028 03+25=028     True
     8            3             1 48+43=091 48+43=091     True
     8            3             2 39+47=086 39+47=086     True
     8            3             3 48+19=067 48+19=067     True
     8            3             4 07+22=029 07+22=029     True
     8            3             5 73+68=141 73+68=141     True
     8            3             6 14+56=070 14+56=070     True
     8            3             7 96+95=191 96+95=191     True
     8            3             8 96+28=124 96+28=124     True
     8            3             9 82+05=087 82+05=087     True
     8            3            10 27+94=121 27+94=121     True
     8            3            11 87+86=173 87+86=173     True
     8            3            12 00+68=068 00+68=068     True
     8            3            13 11+37=048 11+37=048     True
     8            3            14 95+93=188 95+93=188     True
     8            3            15 75+82=157 75+82=157     True
     8            3            16 41+71=112 41+71=112     True
     8            3            17 60+14=074 60+14=074     True
     8            3            18 77+77=154 77+77=143    False
     8            3            19 31+84=115 31+84=115     True
     8            3            20 31+57=088 31+57=088     True
     8            3            21 27+87=114 27+87=113    False
     8            3            22 31+89=120 31+89=120     True
     8            3            23 22+18=040 22+18=040     True
     8            3            24 38+25=063 38+25=063     True
     8            3            25 64+54=118 64+54=118     True
     8            3            26 85+60=145 85+60=145     True
     8            3            27 14+71=085 14+71=085     True
     8            3            28 06+16=022 06+16=022     True
     8            3            29 78+61=139 78+61=139     True
     8            3            30 65+75=140 65+75=130    False
     8            3            31 13+83=096 13+83=096     True
     8            3            32 75+49=124 75+49=114    False
     8            3            33 05+78=083 05+78=083     True
     8            3            34 66+55=121 66+55=121     True
     8            3            35 03+05=008 03+05=008     True
     8            3            36 69+99=168 69+99=168     True
     8            3            37 52+82=134 52+82=134     True
     8            3            38 45+97=142 45+97=142     True
     8            3            39 66+17=083 66+17=083     True
     8            3            40 36+17=053 36+17=053     True
     8            3            41 92+74=166 92+74=166     True
     8            3            42 48+44=092 48+44=092     True
     8            3            43 34+17=051 34+17=051     True
     8            3            44 56+11=067 56+11=067     True
     8            3            45 77+23=100 77+23=090    False
     8            3            46 10+11=021 10+11=021     True
     8            3            47 32+65=097 32+65=097     True
     8            3            48 53+49=102 53+49=102     True
     8            3            49 68+86=154 68+86=154     True
     8            3            50 52+94=146 52+94=146     True
     8            3            51 97+71=168 97+71=168     True
     8            3            52 05+37=042 05+37=042     True
     8            3            53 58+75=133 58+75=133     True
     8            3            54 06+24=030 06+24=030     True
     8            3            55 15+44=059 15+44=059     True
     8            3            56 90+49=139 90+49=139     True
     8            3            57 50+37=087 50+37=087     True
     8            3            58 88+61=149 88+61=159    False
     8            3            59 21+57=078 21+57=078     True
     8            3            60 24+85=109 24+85=109     True
     8            3            61 01+66=067 01+66=057    False
     8            3            62 50+46=096 50+46=096     True
     8            3            63 76+65=141 76+65=141     True
     8            3            64 23+74=097 23+74=097     True
     8            3            65 76+16=092 76+16=092     True
     8            3            66 06+08=014 06+08=014     True
     8            3            67 69+25=094 69+25=094     True
     8            3            68 15+23=038 15+23=038     True
     8            3            69 41+02=043 41+02=043     True
     8            3            70 16+66=082 16+66=082     True
     8            3            71 59+94=153 59+94=153     True
     8            3            72 32+88=120 32+88=120     True
     8            3            73 46+21=067 46+21=067     True
     8            3            74 57+28=085 57+28=085     True
     8            3            75 00+31=031 00+31=031     True
     8            3            76 77+07=084 77+07=073    False
     8            3            77 28+70=098 28+70=098     True
     8            3            78 05+61=066 05+61=056    False
     8            3            79 22+09=031 22+09=031     True
     8            3            80 08+94=102 08+94=102     True
     8            3            81 40+11=051 40+11=051     True
     8            3            82 10+48=058 10+48=058     True
     8            3            83 27+56=083 27+56=083     True
     8            3            84 42+16=058 42+16=058     True
     8            3            85 69+43=112 69+43=112     True
     8            3            86 57+69=126 57+69=126     True
     8            3            87 18+86=104 18+86=104     True
     8            3            88 86+80=166 86+80=166     True
     8            3            89 30+85=115 30+85=115     True
     8            3            90 77+66=143 77+66=143     True
     8            3            91 39+64=103 39+64=103     True
     8            3            92 76+61=137 76+61=137     True
     8            3            93 42+61=103 42+61=103     True
     8            3            94 07+30=037 07+30=037     True
     8            3            95 35+93=128 35+93=128     True
     8            3            96 40+90=130 40+90=130     True
     8            3            97 08+91=099 08+91=099     True
     8            3            98 62+34=096 62+34=096     True
     8            3            99 86+49=135 86+49=135     True
     8            3           100 73+23=096 73+23=096     True
     8            3           101 87+35=122 87+35=122     True
     8            3           102 35+31=066 35+31=066     True
     8            3           103 07+13=020 07+13=020     True
     8            3           104 39+41=080 39+41=080     True
     8            3           105 44+63=107 44+63=107     True
     8            3           106 94+66=160 94+66=160     True
     8            3           107 49+54=103 49+54=103     True
     8            3           108 79+46=125 79+46=125     True
     8            3           109 53+12=065 53+12=065     True
     8            3           110 60+92=152 60+92=152     True
     8            3           111 25+60=085 25+60=085     True
     8            3           112 64+53=117 64+53=117     True
     8            3           113 41+02=043 41+02=043     True
     8            3           114 00+97=097 00+97=097     True
     8            3           115 12+52=064 12+52=064     True
     8            3           116 39+50=089 39+50=089     True
     8            3           117 87+21=108 87+21=108     True
     8            3           118 04+99=103 04+99=103     True
     8            3           119 19+75=094 19+75=094     True
     8            3           120 90+05=095 90+05=095     True
     8            3           121 54+39=093 54+39=093     True
     8            3           122 29+26=055 29+26=055     True
     8            3           123 82+95=177 82+95=177     True
     8            3           124 55+09=064 55+09=064     True
     8            3           125 02+62=064 02+62=054    False
     8            3           126 68+30=098 68+30=098     True
     8            3           127 99+16=115 99+16=105    False
     8            3           128 63+11=074 63+11=074     True
     8            3           129 42+92=134 42+92=134     True
     8            3           130 99+16=115 99+16=105    False
     8            3           131 50+31=081 50+31=081     True
     8            3           132 23+46=069 23+46=069     True
     8            3           133 45+73=118 45+73=118     True
     8            3           134 89+77=166 89+77=166     True
     8            3           135 45+78=123 45+78=123     True
     8            3           136 96+60=156 96+60=156     True
     8            3           137 74+61=135 74+61=135     True
     8            3           138 87+01=088 87+01=088     True
     8            3           139 63+88=151 63+88=151     True
     8            3           140 59+72=131 59+72=131     True
     8            3           141 17+96=113 17+96=113     True
     8            3           142 89+77=166 89+77=166     True
     8            3           143 24+69=093 24+69=093     True
     8            3           144 75+83=158 75+83=158     True
     8            3           145 50+54=104 50+54=104     True
     8            3           146 93+47=140 93+47=140     True
     8            3           147 20+55=075 20+55=075     True
     8            3           148 91+79=170 91+79=170     True
     8            3           149 15+13=028 15+13=028     True
     8            3           150 86+09=095 86+09=095     True
     8            3           151 29+58=087 29+58=087     True
     8            3           152 01+29=030 01+29=030     True
     8            3           153 65+48=113 65+48=113     True
     8            3           154 96+45=141 96+45=141     True
     8            3           155 58+69=127 58+69=127     True
     8            3           156 84+43=127 84+43=127     True
     8            3           157 90+38=128 90+38=128     True
     8            3           158 39+97=136 39+97=136     True
     8            3           159 74+84=158 74+84=158     True
     8            3           160 86+22=108 86+22=108     True
     8            3           161 01+86=087 01+86=087     True
     8            3           162 81+63=144 81+63=144     True
     8            3           163 80+94=174 80+94=174     True
     8            3           164 44+42=086 44+42=086     True
     8            3           165 72+60=132 72+60=122    False
     8            3           166 28+07=035 28+07=035     True
     8            3           167 69+54=123 69+54=123     True
     8            3           168 68+77=145 68+77=145     True
     8            3           169 90+16=106 90+16=106     True
     8            3           170 64+50=114 64+50=114     True
     8            3           171 46+88=134 46+88=134     True
     8            3           172 55+99=154 55+99=154     True
     8            3           173 31+97=128 31+97=128     True
     8            3           174 79+28=107 79+28=107     True
     8            3           175 81+43=124 81+43=124     True
     8            3           176 41+15=056 41+15=056     True
     8            3           177 38+77=115 38+77=115     True
     8            3           178 25+06=031 25+06=031     True
     8            3           179 01+93=094 01+93=094     True
     8            3           180 97+22=119 97+22=109    False
     8            3           181 71+84=155 71+84=155     True
     8            3           182 26+36=062 26+36=062     True
     8            3           183 60+92=152 60+92=152     True
     8            3           184 02+94=096 02+94=096     True
     8            3           185 31+58=089 31+58=089     True
     8            3           186 70+52=122 70+52=112    False
     8            3           187 19+42=061 19+42=061     True
     8            3           188 95+73=168 95+73=168     True
     8            3           189 21+25=046 21+25=046     True
     8            3           190 13+58=071 13+58=071     True
     8            3           191 62+28=090 62+28=090     True
     8            3           192 38+14=052 38+14=052     True
     8            3           193 66+75=141 66+75=141     True
     8            3           194 24+59=083 24+59=083     True
     8            3           195 97+66=163 97+66=163     True
     8            3           196 76+70=146 76+70=146     True
     8            3           197 08+40=048 08+40=048     True
     8            3           198 84+00=084 84+00=084     True
     8            3           199 54+73=127 54+73=127     True
     8            3           200 16+88=104 16+88=104     True
     8            3           201 99+47=146 99+47=146     True
     8            3           202 31+95=126 31+95=126     True
     8            3           203 01+79=080 01+79=080     True
     8            3           204 03+68=071 03+68=061    False
     8            3           205 10+05=015 10+05=015     True
     8            3           206 98+90=188 98+90=188     True
     8            3           207 58+53=111 58+53=111     True
     8            3           208 34+87=121 34+87=121     True
     8            3           209 07+31=038 07+31=038     True
     8            3           210 59+08=067 59+08=067     True
     8            3           211 51+38=089 51+38=089     True
     8            3           212 62+62=124 62+62=114    False
     8            3           213 80+32=112 80+32=112     True
     8            3           214 69+16=085 69+16=085     True
     8            3           215 01+17=018 01+17=018     True
     8            3           216 74+41=115 74+41=115     True
     8            3           217 20+89=109 20+89=109     True
     8            3           218 53+50=103 53+50=103     True
     8            3           219 82+85=167 82+85=167     True
     8            3           220 34+47=081 34+47=081     True
     8            3           221 34+45=079 34+45=079     True
     8            3           222 77+34=111 77+34=111     True
     8            3           223 56+33=089 56+33=089     True
     8            3           224 97+56=153 97+56=153     True
     8            3           225 29+06=035 29+06=035     True
     8            3           226 78+96=174 78+96=174     True
     8            3           227 28+65=093 28+65=093     True
     8            3           228 61+64=125 61+64=115    False
     8            3           229 32+64=096 32+64=096     True
     8            3           230 98+32=130 98+32=130     True
     8            3           231 25+35=060 25+35=060     True
     8            3           232 05+08=013 05+08=013     True
     8            3           233 05+26=031 05+26=031     True
     8            3           234 84+71=155 84+71=155     True
     8            3           235 33+10=043 33+10=043     True
     8            3           236 98+35=133 98+35=133     True
     8            3           237 68+98=166 68+98=166     True
     8            3           238 03+63=066 03+63=056    False
     8            3           239 12+96=108 12+96=108     True
     8            3           240 02+81=083 02+81=083     True
     8            3           241 83+13=096 83+13=096     True
     8            3           242 55+92=147 55+92=147     True
     8            3           243 96+09=105 96+09=105     True
     8            3           244 61+08=069 61+08=069     True
     8            3           245 39+75=114 39+75=114     True
     8            3           246 40+74=114 40+74=114     True
     8            3           247 39+80=119 39+80=119     True
     8            3           248 57+95=152 57+95=152     True
     8            3           249 92+97=189 92+97=189     True
     8            3           250 33+03=036 33+03=036     True
     8            3           251 74+92=166 74+92=166     True
     8            3           252 99+09=108 99+09=108     True
     8            3           253 98+10=108 98+10=108     True
     8            3           254 46+77=123 46+77=123     True
     8            3           255 85+78=163 85+78=163     True
     8            4             0 41+21=062 41+21=052    False
     8            4             1 49+13=062 49+13=062     True
     8            4             2 59+07=066 59+07=066     True
     8            4             3 31+11=042 31+11=042     True
     8            4             4 74+16=090 74+16=090     True
     8            4             5 43+38=081 43+38=081     True
     8            4             6 08+67=075 08+67=075     True
     8            4             7 31+66=097 31+66=097     True
     8            4             8 10+31=041 10+31=041     True
     8            4             9 34+59=093 34+59=093     True
     8            4            10 78+42=120 78+42=110    False
     8            4            11 13+41=054 13+41=054     True
     8            4            12 97+89=186 97+89=186     True
     8            4            13 15+62=077 15+62=077     True
     8            4            14 39+36=075 39+36=075     True
     8            4            15 21+25=046 21+25=046     True
     8            4            16 74+56=130 74+56=120    False
     8            4            17 85+47=132 85+47=132     True
     8            4            18 47+32=079 47+32=079     True
     8            4            19 37+66=103 37+66=103     True
     8            4            20 16+29=045 16+29=045     True
     8            4            21 86+77=163 86+77=163     True
     8            4            22 80+07=087 80+07=087     True
     8            4            23 87+05=092 87+05=092     True
     8            4            24 58+16=074 58+16=074     True
     8            4            25 52+79=131 52+79=131     True
     8            4            26 91+08=099 91+08=099     True
     8            4            27 47+78=125 47+78=125     True
     8            4            28 86+96=182 86+96=182     True
     8            4            29 90+22=112 90+22=112     True
     8            4            30 31+18=049 31+18=049     True
     8            4            31 86+15=101 86+15=101     True
     8            4            32 15+95=110 15+95=100    False
     8            4            33 42+11=053 42+11=053     True
     8            4            34 65+99=164 65+99=164     True
     8            4            35 89+29=118 89+29=118     True
     8            4            36 35+11=046 35+11=046     True
     8            4            37 71+41=112 71+41=112     True
     8            4            38 16+24=040 16+24=040     True
     8            4            39 77+82=159 77+82=159     True
     8            4            40 55+89=144 55+89=144     True
     8            4            41 17+88=105 17+88=105     True
     8            4            42 54+72=126 54+72=126     True
     8            4            43 34+98=132 34+98=132     True
     8            4            44 09+97=106 09+97=106     True
     8            4            45 91+07=098 91+07=098     True
     8            4            46 55+94=149 55+94=149     True
     8            4            47 22+58=080 22+58=080     True
     8            4            48 91+37=128 91+37=128     True
     8            4            49 16+10=026 16+10=026     True
     8            4            50 96+32=128 96+32=128     True
     8            4            51 35+75=110 35+75=100    False
     8            4            52 88+73=161 88+73=161     True
     8            4            53 35+18=053 35+18=053     True
     8            4            54 33+10=043 33+10=043     True
     8            4            55 08+50=058 08+50=058     True
     8            4            56 22+62=084 22+62=084     True
     8            4            57 26+37=063 26+37=063     True
     8            4            58 80+27=107 80+27=108    False
     8            4            59 68+28=096 68+28=096     True
     8            4            60 48+03=051 48+03=051     True
     8            4            61 40+18=058 40+18=058     True
     8            4            62 16+59=075 16+59=075     True
     8            4            63 02+19=021 02+19=031    False
     8            4            64 01+09=010 01+09=010     True
     8            4            65 62+68=130 62+68=130     True
     8            4            66 09+71=080 09+71=070    False
     8            4            67 00+58=058 00+58=058     True
     8            4            68 16+45=061 16+45=061     True
     8            4            69 24+98=122 24+98=122     True
     8            4            70 47+92=139 47+92=139     True
     8            4            71 94+84=178 94+84=178     True
     8            4            72 21+32=053 21+32=053     True
     8            4            73 29+82=111 29+82=111     True
     8            4            74 32+79=111 32+79=111     True
     8            4            75 13+98=111 13+98=111     True
     8            4            76 41+94=135 41+94=135     True
     8            4            77 51+84=135 51+84=135     True
     8            4            78 42+05=047 42+05=047     True
     8            4            79 39+03=042 39+03=042     True
     8            4            80 02+92=094 02+92=094     True
     8            4            81 99+81=180 99+81=180     True
     8            4            82 32+68=100 32+68=100     True
     8            4            83 52+17=069 52+17=069     True
     8            4            84 56+58=114 56+58=114     True
     8            4            85 21+48=069 21+48=069     True
     8            4            86 61+71=132 61+71=122    False
     8            4            87 17+01=018 17+01=018     True
     8            4            88 68+23=091 68+23=091     True
     8            4            89 00+37=037 00+37=037     True
     8            4            90 94+88=182 94+88=182     True
     8            4            91 06+31=037 06+31=037     True
     8            4            92 27+18=045 27+18=045     True
     8            4            93 41+81=122 41+81=122     True
     8            4            94 15+86=101 15+86=101     True
     8            4            95 36+87=123 36+87=123     True
     8            4            96 17+37=054 17+37=053    False
     8            4            97 13+86=099 13+86=099     True
     8            4            98 29+69=098 29+69=098     True
     8            4            99 31+99=130 31+99=130     True
     8            4           100 47+29=076 47+29=076     True
     8            4           101 08+81=089 08+81=089     True
     8            4           102 72+82=154 72+82=154     True
     8            4           103 46+91=137 46+91=137     True
     8            4           104 70+35=105 70+35=105     True
     8            4           105 90+55=145 90+55=145     True
     8            4           106 99+99=198 99+99=198     True
     8            4           107 60+97=157 60+97=157     True
     8            4           108 03+40=043 03+40=043     True
     8            4           109 35+49=084 35+49=084     True
     8            4           110 32+02=034 32+02=034     True
     8            4           111 70+18=088 70+18=088     True
     8            4           112 99+05=104 99+05=104     True
     8            4           113 78+73=151 78+73=151     True
     8            4           114 03+02=005 03+02=005     True
     8            4           115 50+14=064 50+14=064     True
     8            4           116 62+02=064 62+02=054    False
     8            4           117 16+74=090 16+74=090     True
     8            4           118 68+65=133 68+65=133     True
     8            4           119 74+81=155 74+81=155     True
     8            4           120 37+48=085 37+48=085     True
     8            4           121 63+04=067 63+04=057    False
     8            4           122 06+62=068 06+62=068     True
     8            4           123 95+75=170 95+75=160    False
     8            4           124 92+37=129 92+37=129     True
     8            4           125 81+32=113 81+32=113     True
     8            4           126 53+28=081 53+28=081     True
     8            4           127 52+42=094 52+42=094     True
     8            4           128 66+97=163 66+97=163     True
     8            4           129 00+48=048 00+48=048     True
     8            4           130 65+32=097 65+32=097     True
     8            4           131 60+89=149 60+89=159    False
     8            4           132 71+61=132 71+61=122    False
     8            4           133 98+50=148 98+50=148     True
     8            4           134 90+96=186 90+96=186     True
     8            4           135 02+96=098 02+96=098     True
     8            4           136 62+75=137 62+75=137     True
     8            4           137 41+28=069 41+28=069     True
     8            4           138 95+79=174 95+79=174     True
     8            4           139 48+41=089 48+41=089     True
     8            4           140 87+95=182 87+95=182     True
     8            4           141 75+38=113 75+38=103    False
     8            4           142 31+55=086 31+55=086     True
     8            4           143 54+63=117 54+63=117     True
     8            4           144 75+82=157 75+82=157     True
     8            4           145 46+45=091 46+45=091     True
     8            4           146 13+08=021 13+08=021     True
     8            4           147 77+97=174 77+97=173    False
     8            4           148 37+35=072 37+35=072     True
     8            4           149 21+89=110 21+89=110     True
     8            4           150 58+51=109 58+51=109     True
     8            4           151 91+48=139 91+48=139     True
     8            4           152 33+23=056 33+23=056     True
     8            4           153 80+96=176 80+96=176     True
     8            4           154 78+02=080 78+02=070    False
     8            4           155 38+95=133 38+95=133     True
     8            4           156 99+25=124 99+25=124     True
     8            4           157 30+76=106 30+76=106     True
     8            4           158 42+40=082 42+40=082     True
     8            4           159 85+58=143 85+58=143     True
     8            4           160 44+46=090 44+46=090     True
     8            4           161 06+41=047 06+41=047     True
     8            4           162 65+90=155 65+90=155     True
     8            4           163 43+83=126 43+83=126     True
     8            4           164 36+61=097 36+61=097     True
     8            4           165 61+51=112 61+51=102    False
     8            4           166 38+09=047 38+09=047     True
     8            4           167 21+97=118 21+97=118     True
     8            4           168 83+30=113 83+30=113     True
     8            4           169 11+79=090 11+79=090     True
     8            4           170 14+29=043 14+29=043     True
     8            4           171 21+11=032 21+11=032     True
     8            4           172 43+53=096 43+53=096     True
     8            4           173 02+58=060 02+58=060     True
     8            4           174 78+82=160 78+82=160     True
     8            4           175 91+11=102 91+11=102     True
     8            4           176 58+54=112 58+54=112     True
     8            4           177 00+15=015 00+15=015     True
     8            4           178 83+51=134 83+51=134     True
     8            4           179 44+72=116 44+72=116     True
     8            4           180 71+20=091 71+20=091     True
     8            4           181 24+99=123 24+99=123     True
     8            4           182 46+30=076 46+30=076     True
     8            4           183 08+67=075 08+67=075     True
     8            4           184 47+42=089 47+42=089     True
     8            4           185 95+67=162 95+67=162     True
     8            4           186 40+56=096 40+56=096     True
     8            4           187 17+95=112 17+95=112     True
     8            4           188 94+66=160 94+66=160     True
     8            4           189 14+58=072 14+58=072     True
     8            4           190 56+05=061 56+05=061     True
     8            4           191 70+01=071 70+01=061    False
     8            4           192 97+59=156 97+59=156     True
     8            4           193 94+67=161 94+67=161     True
     8            4           194 13+41=054 13+41=054     True
     8            4           195 85+15=100 85+15=090    False
     8            4           196 48+53=101 48+53=101     True
     8            4           197 62+75=137 62+75=137     True
     8            4           198 87+47=134 87+47=133    False
     8            4           199 31+88=119 31+88=119     True
     8            4           200 97+16=113 97+16=113     True
     8            4           201 48+45=093 48+45=093     True
     8            4           202 99+00=099 99+00=099     True
     8            4           203 15+01=016 15+01=016     True
     8            4           204 28+96=124 28+96=124     True
     8            4           205 20+11=031 20+11=031     True
     8            4           206 07+56=063 07+56=063     True
     8            4           207 06+08=014 06+08=014     True
     8            4           208 45+46=091 45+46=091     True
     8            4           209 48+85=133 48+85=133     True
     8            4           210 62+14=076 62+14=076     True
     8            4           211 82+31=113 82+31=113     True
     8            4           212 85+88=173 85+88=173     True
     8            4           213 77+08=085 77+08=075    False
     8            4           214 16+64=080 16+64=080     True
     8            4           215 00+27=027 00+27=027     True
     8            4           216 36+75=111 36+75=111     True
     8            4           217 38+38=076 38+38=076     True
     8            4           218 88+32=120 88+32=110    False
     8            4           219 09+88=097 09+88=097     True
     8            4           220 96+87=183 96+87=183     True
     8            4           221 71+29=100 71+29=090    False
     8            4           222 99+13=112 99+13=112     True
     8            4           223 03+13=016 03+13=016     True
     8            4           224 67+23=090 67+23=080    False
     8            4           225 15+98=113 15+98=113     True
     8            4           226 10+08=018 10+08=018     True
     8            4           227 46+24=070 46+24=060    False
     8            4           228 55+63=118 55+63=118     True
     8            4           229 28+06=034 28+06=034     True
     8            4           230 43+87=130 43+87=130     True
     8            4           231 34+05=039 34+05=049    False
2024-09-20 14:10:30,300 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.
2024-09-20 14:10:30,301 - root - INFO - ====================================================== Starting Train Epoch: 9/9 ======================================================
2024-09-20 14:10:30,301 - root - INFO - Learning rates for each parameter group: 0.00117851130197757937, 0.00117851130197757937
  0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 9, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s]
Epoch: 9, Step: 1:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=50.79201, average_batch_loss=0.19841, average_batch_perplexity=1.21946, lr=0.001178511, 0.001178511]
Epoch: 9, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=50.79201, average_batch_loss=0.19841, average_batch_perplexity=1.21946, lr=0.001178511, 0.001178511]
Epoch: 9, Step: 2:   0%|          | 0/28 [00:00<?, ?it/s, total_batch_loss=48.12430, average_batch_loss=0.18799, average_batch_perplexity=1.20682, lr=0.001175901, 0.001175901]
Epoch: 9, Step: 2:   7%|▋         | 2/28 [00:00<00:02, 11.74it/s, total_batch_loss=48.12430, average_batch_loss=0.18799, average_batch_perplexity=1.20682, lr=0.001175901, 0.001175901]
Epoch: 9, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.74it/s, total_batch_loss=48.12430, average_batch_loss=0.18799, average_batch_perplexity=1.20682, lr=0.001175901, 0.001175901]
Epoch: 9, Step: 3:   7%|▋         | 2/28 [00:00<00:02, 11.74it/s, total_batch_loss=49.34882, average_batch_loss=0.19277, average_batch_perplexity=1.21260, lr=0.001173308, 0.001173308]
Epoch: 9, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.74it/s, total_batch_loss=49.34882, average_batch_loss=0.19277, average_batch_perplexity=1.21260, lr=0.001173308, 0.001173308]
Epoch: 9, Step: 4:   7%|▋         | 2/28 [00:00<00:02, 11.74it/s, total_batch_loss=46.44347, average_batch_loss=0.18142, average_batch_perplexity=1.19892, lr=0.001170732, 0.001170732]
Epoch: 9, Step: 4:  14%|█▍        | 4/28 [00:00<00:01, 12.24it/s, total_batch_loss=46.44347, average_batch_loss=0.18142, average_batch_perplexity=1.19892, lr=0.001170732, 0.001170732]
Epoch: 9, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.24it/s, total_batch_loss=46.44347, average_batch_loss=0.18142, average_batch_perplexity=1.19892, lr=0.001170732, 0.001170732]
Epoch: 9, Step: 5:  14%|█▍        | 4/28 [00:00<00:01, 12.24it/s, total_batch_loss=45.92714, average_batch_loss=0.17940, average_batch_perplexity=1.19650, lr=0.001168173, 0.001168173]
Epoch: 9, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.24it/s, total_batch_loss=45.92714, average_batch_loss=0.17940, average_batch_perplexity=1.19650, lr=0.001168173, 0.001168173]
Epoch: 9, Step: 6:  14%|█▍        | 4/28 [00:00<00:01, 12.24it/s, total_batch_loss=50.60395, average_batch_loss=0.19767, average_batch_perplexity=1.21856, lr=0.001165631, 0.001165631]
Epoch: 9, Step: 6:  21%|██▏       | 6/28 [00:00<00:01, 12.07it/s, total_batch_loss=50.60395, average_batch_loss=0.19767, average_batch_perplexity=1.21856, lr=0.001165631, 0.001165631]
Epoch: 9, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.07it/s, total_batch_loss=50.60395, average_batch_loss=0.19767, average_batch_perplexity=1.21856, lr=0.001165631, 0.001165631]
Epoch: 9, Step: 7:  21%|██▏       | 6/28 [00:00<00:01, 12.07it/s, total_batch_loss=44.93650, average_batch_loss=0.17553, average_batch_perplexity=1.19188, lr=0.001163105, 0.001163105]
Epoch: 9, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.07it/s, total_batch_loss=44.93650, average_batch_loss=0.17553, average_batch_perplexity=1.19188, lr=0.001163105, 0.001163105]
Epoch: 9, Step: 8:  21%|██▏       | 6/28 [00:00<00:01, 12.07it/s, total_batch_loss=49.44500, average_batch_loss=0.19314, average_batch_perplexity=1.21306, lr=0.001160596, 0.001160596]
Epoch: 9, Step: 8:  29%|██▊       | 8/28 [00:00<00:01, 12.19it/s, total_batch_loss=49.44500, average_batch_loss=0.19314, average_batch_perplexity=1.21306, lr=0.001160596, 0.001160596]
Epoch: 9, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.19it/s, total_batch_loss=49.44500, average_batch_loss=0.19314, average_batch_perplexity=1.21306, lr=0.001160596, 0.001160596]
Epoch: 9, Step: 9:  29%|██▊       | 8/28 [00:00<00:01, 12.19it/s, total_batch_loss=50.19176, average_batch_loss=0.19606, average_batch_perplexity=1.21660, lr=0.001158103, 0.001158103]
Epoch: 9, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.19it/s, total_batch_loss=50.19176, average_batch_loss=0.19606, average_batch_perplexity=1.21660, lr=0.001158103, 0.001158103]
Epoch: 9, Step: 10:  29%|██▊       | 8/28 [00:00<00:01, 12.19it/s, total_batch_loss=49.01073, average_batch_loss=0.19145, average_batch_perplexity=1.21100, lr=0.001155625, 0.001155625]
Epoch: 9, Step: 10:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=49.01073, average_batch_loss=0.19145, average_batch_perplexity=1.21100, lr=0.001155625, 0.001155625]
Epoch: 9, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=49.01073, average_batch_loss=0.19145, average_batch_perplexity=1.21100, lr=0.001155625, 0.001155625]
Epoch: 9, Step: 11:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=51.62689, average_batch_loss=0.20167, average_batch_perplexity=1.22344, lr=0.001153164, 0.001153164]
Epoch: 9, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=51.62689, average_batch_loss=0.20167, average_batch_perplexity=1.22344, lr=0.001153164, 0.001153164]
Epoch: 9, Step: 12:  36%|███▌      | 10/28 [00:00<00:01, 12.18it/s, total_batch_loss=46.63403, average_batch_loss=0.18216, average_batch_perplexity=1.19981, lr=0.001150718, 0.001150718]
Epoch: 9, Step: 12:  43%|████▎     | 12/28 [00:00<00:01, 12.27it/s, total_batch_loss=46.63403, average_batch_loss=0.18216, average_batch_perplexity=1.19981, lr=0.001150718, 0.001150718]
Epoch: 9, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=46.63403, average_batch_loss=0.18216, average_batch_perplexity=1.19981, lr=0.001150718, 0.001150718]
Epoch: 9, Step: 13:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=41.82396, average_batch_loss=0.16337, average_batch_perplexity=1.17748, lr=0.001148288, 0.001148288]
Epoch: 9, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=41.82396, average_batch_loss=0.16337, average_batch_perplexity=1.17748, lr=0.001148288, 0.001148288]
Epoch: 9, Step: 14:  43%|████▎     | 12/28 [00:01<00:01, 12.27it/s, total_batch_loss=51.75311, average_batch_loss=0.20216, average_batch_perplexity=1.22404, lr=0.001145873, 0.001145873]
Epoch: 9, Step: 14:  50%|█████     | 14/28 [00:01<00:01, 12.05it/s, total_batch_loss=51.75311, average_batch_loss=0.20216, average_batch_perplexity=1.22404, lr=0.001145873, 0.001145873]
Epoch: 9, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.05it/s, total_batch_loss=51.75311, average_batch_loss=0.20216, average_batch_perplexity=1.22404, lr=0.001145873, 0.001145873]
Epoch: 9, Step: 15:  50%|█████     | 14/28 [00:01<00:01, 12.05it/s, total_batch_loss=44.71657, average_batch_loss=0.17467, average_batch_perplexity=1.19086, lr=0.001143473, 0.001143473]
Epoch: 9, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.05it/s, total_batch_loss=44.71657, average_batch_loss=0.17467, average_batch_perplexity=1.19086, lr=0.001143473, 0.001143473]
Epoch: 9, Step: 16:  50%|█████     | 14/28 [00:01<00:01, 12.05it/s, total_batch_loss=39.45398, average_batch_loss=0.15412, average_batch_perplexity=1.16663, lr=0.001141089, 0.001141089]
Epoch: 9, Step: 16:  57%|█████▋    | 16/28 [00:01<00:00, 12.20it/s, total_batch_loss=39.45398, average_batch_loss=0.15412, average_batch_perplexity=1.16663, lr=0.001141089, 0.001141089]
Epoch: 9, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.20it/s, total_batch_loss=39.45398, average_batch_loss=0.15412, average_batch_perplexity=1.16663, lr=0.001141089, 0.001141089]
Epoch: 9, Step: 17:  57%|█████▋    | 16/28 [00:01<00:00, 12.20it/s, total_batch_loss=47.38599, average_batch_loss=0.18510, average_batch_perplexity=1.20334, lr=0.001138719, 0.001138719]
Epoch: 9, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.20it/s, total_batch_loss=47.38599, average_batch_loss=0.18510, average_batch_perplexity=1.20334, lr=0.001138719, 0.001138719]
Epoch: 9, Step: 18:  57%|█████▋    | 16/28 [00:01<00:00, 12.20it/s, total_batch_loss=45.27081, average_batch_loss=0.17684, average_batch_perplexity=1.19344, lr=0.001136364, 0.001136364]
Epoch: 9, Step: 18:  64%|██████▍   | 18/28 [00:01<00:00, 12.07it/s, total_batch_loss=45.27081, average_batch_loss=0.17684, average_batch_perplexity=1.19344, lr=0.001136364, 0.001136364]
Epoch: 9, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.07it/s, total_batch_loss=45.27081, average_batch_loss=0.17684, average_batch_perplexity=1.19344, lr=0.001136364, 0.001136364]
Epoch: 9, Step: 19:  64%|██████▍   | 18/28 [00:01<00:00, 12.07it/s, total_batch_loss=44.93793, average_batch_loss=0.17554, average_batch_perplexity=1.19189, lr=0.001134023, 0.001134023]
Epoch: 9, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.07it/s, total_batch_loss=44.93793, average_batch_loss=0.17554, average_batch_perplexity=1.19189, lr=0.001134023, 0.001134023]
Epoch: 9, Step: 20:  64%|██████▍   | 18/28 [00:01<00:00, 12.07it/s, total_batch_loss=42.98513, average_batch_loss=0.16791, average_batch_perplexity=1.18283, lr=0.001131697, 0.001131697]
Epoch: 9, Step: 20:  71%|███████▏  | 20/28 [00:01<00:00, 11.94it/s, total_batch_loss=42.98513, average_batch_loss=0.16791, average_batch_perplexity=1.18283, lr=0.001131697, 0.001131697]
Epoch: 9, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 11.94it/s, total_batch_loss=42.98513, average_batch_loss=0.16791, average_batch_perplexity=1.18283, lr=0.001131697, 0.001131697]
Epoch: 9, Step: 21:  71%|███████▏  | 20/28 [00:01<00:00, 11.94it/s, total_batch_loss=39.07924, average_batch_loss=0.15265, average_batch_perplexity=1.16492, lr=0.001129385, 0.001129385]
Epoch: 9, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 11.94it/s, total_batch_loss=39.07924, average_batch_loss=0.15265, average_batch_perplexity=1.16492, lr=0.001129385, 0.001129385]
Epoch: 9, Step: 22:  71%|███████▏  | 20/28 [00:01<00:00, 11.94it/s, total_batch_loss=35.62823, average_batch_loss=0.13917, average_batch_perplexity=1.14932, lr=0.001127087, 0.001127087]
Epoch: 9, Step: 22:  79%|███████▊  | 22/28 [00:01<00:00, 12.00it/s, total_batch_loss=35.62823, average_batch_loss=0.13917, average_batch_perplexity=1.14932, lr=0.001127087, 0.001127087]
Epoch: 9, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.00it/s, total_batch_loss=35.62823, average_batch_loss=0.13917, average_batch_perplexity=1.14932, lr=0.001127087, 0.001127087]
Epoch: 9, Step: 23:  79%|███████▊  | 22/28 [00:01<00:00, 12.00it/s, total_batch_loss=42.13190, average_batch_loss=0.16458, average_batch_perplexity=1.17890, lr=0.001124803, 0.001124803]
Epoch: 9, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.00it/s, total_batch_loss=42.13190, average_batch_loss=0.16458, average_batch_perplexity=1.17890, lr=0.001124803, 0.001124803]
Epoch: 9, Step: 24:  79%|███████▊  | 22/28 [00:01<00:00, 12.00it/s, total_batch_loss=42.47777, average_batch_loss=0.16593, average_batch_perplexity=1.18049, lr=0.001122533, 0.001122533]
Epoch: 9, Step: 24:  86%|████████▌ | 24/28 [00:01<00:00, 12.05it/s, total_batch_loss=42.47777, average_batch_loss=0.16593, average_batch_perplexity=1.18049, lr=0.001122533, 0.001122533]
Epoch: 9, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.05it/s, total_batch_loss=42.47777, average_batch_loss=0.16593, average_batch_perplexity=1.18049, lr=0.001122533, 0.001122533]
Epoch: 9, Step: 25:  86%|████████▌ | 24/28 [00:02<00:00, 12.05it/s, total_batch_loss=38.05074, average_batch_loss=0.14864, average_batch_perplexity=1.16025, lr=0.001120277, 0.001120277]
Epoch: 9, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.05it/s, total_batch_loss=38.05074, average_batch_loss=0.14864, average_batch_perplexity=1.16025, lr=0.001120277, 0.001120277]
Epoch: 9, Step: 26:  86%|████████▌ | 24/28 [00:02<00:00, 12.05it/s, total_batch_loss=37.62466, average_batch_loss=0.14697, average_batch_perplexity=1.15832, lr=0.001118034, 0.001118034]
Epoch: 9, Step: 26:  93%|█████████▎| 26/28 [00:02<00:00, 11.97it/s, total_batch_loss=37.62466, average_batch_loss=0.14697, average_batch_perplexity=1.15832, lr=0.001118034, 0.001118034]
Epoch: 9, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 11.97it/s, total_batch_loss=37.62466, average_batch_loss=0.14697, average_batch_perplexity=1.15832, lr=0.001118034, 0.001118034]
Epoch: 9, Step: 27:  93%|█████████▎| 26/28 [00:02<00:00, 11.97it/s, total_batch_loss=42.60037, average_batch_loss=0.16641, average_batch_perplexity=1.18105, lr=0.001115805, 0.001115805]
Epoch: 9, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 11.97it/s, total_batch_loss=42.60037, average_batch_loss=0.16641, average_batch_perplexity=1.18105, lr=0.001115805, 0.001115805]
Epoch: 9, Step: 28:  93%|█████████▎| 26/28 [00:02<00:00, 11.97it/s, total_batch_loss=17.79465, average_batch_loss=0.20221, average_batch_perplexity=1.22411, lr=0.001113589, 0.001113589]
Epoch: 9, Step: 28: 100%|██████████| 28/28 [00:02<00:00, 13.25it/s, total_batch_loss=17.79465, average_batch_loss=0.20221, average_batch_perplexity=1.22411, lr=0.001113589, 0.001113589]
                                                                                                                                                                                         

2024-09-20 14:10:32,575 - root - INFO - Total Samples:                   7000
2024-09-20 14:10:32,576 - root - INFO - Total Batches:                   28
2024-09-20 14:10:32,576 - root - INFO - Average Epoch Train Loss:        0.17669
2024-09-20 14:10:32,577 - root - INFO - Average Epoch Train Perplexity:  1.19326
2024-09-20 14:10:32,577 - root - INFO - 
2024-09-20 14:10:32,578 - root - INFO - ====================================================== Starting Valid Epoch: 9/9 ======================================================
  0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 9, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s]
Epoch: 9, Step: 1:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=40.87537, average_batch_loss=0.15967, average_batch_perplexity=1.17312]
Epoch: 9, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=40.87537, average_batch_loss=0.15967, average_batch_perplexity=1.17312]
Epoch: 9, Step: 2:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=42.97948, average_batch_loss=0.16789, average_batch_perplexity=1.18280]
Epoch: 9, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=42.97948, average_batch_loss=0.16789, average_batch_perplexity=1.18280]
Epoch: 9, Step: 3:   0%|          | 0/8 [00:00<?, ?it/s, total_batch_loss=48.15429, average_batch_loss=0.18810, average_batch_perplexity=1.20696]
Epoch: 9, Step: 3:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=48.15429, average_batch_loss=0.18810, average_batch_perplexity=1.20696]
Epoch: 9, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=48.15429, average_batch_loss=0.18810, average_batch_perplexity=1.20696]
Epoch: 9, Step: 4:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=39.25602, average_batch_loss=0.15334, average_batch_perplexity=1.16573]
Epoch: 9, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=39.25602, average_batch_loss=0.15334, average_batch_perplexity=1.16573]
Epoch: 9, Step: 5:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=41.24788, average_batch_loss=0.16112, average_batch_perplexity=1.17483]
Epoch: 9, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=41.24788, average_batch_loss=0.16112, average_batch_perplexity=1.17483]
Epoch: 9, Step: 6:  38%|███▊      | 3/8 [00:00<00:00, 22.67it/s, total_batch_loss=45.03639, average_batch_loss=0.17592, average_batch_perplexity=1.19235]
Epoch: 9, Step: 6:  75%|███████▌  | 6/8 [00:00<00:00, 22.89it/s, total_batch_loss=45.03639, average_batch_loss=0.17592, average_batch_perplexity=1.19235]
Epoch: 9, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 22.89it/s, total_batch_loss=45.03639, average_batch_loss=0.17592, average_batch_perplexity=1.19235]
Epoch: 9, Step: 7:  75%|███████▌  | 6/8 [00:00<00:00, 22.89it/s, total_batch_loss=41.77565, average_batch_loss=0.16319, average_batch_perplexity=1.17726]
Epoch: 9, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 22.89it/s, total_batch_loss=41.77565, average_batch_loss=0.16319, average_batch_perplexity=1.17726]
Epoch: 9, Step: 8:  75%|███████▌  | 6/8 [00:00<00:00, 22.89it/s, total_batch_loss=27.56857, average_batch_loss=0.13254, average_batch_perplexity=1.14173]
                                                                                                                                                         
2024-09-20 14:10:32,923 - root - INFO - Total Samples:                   2000
2024-09-20 14:10:32,924 - root - INFO - Total Batches:                   8
2024-09-20 14:10:32,924 - root - INFO - Average Epoch Valid Loss:        0.16345
2024-09-20 14:10:32,925 - root - INFO - Average Epoch Valid Perplexity:  1.17756
2024-09-20 14:10:32,925 - root - INFO - 
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.7500]
Evaluating and Generation.:   0%|          | 0/4 [00:00<?, ?it/s, accuracy: 0.7578]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.34it/s, accuracy: 0.7578]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.34it/s, accuracy: 0.7930]
Evaluating and Generation.:  50%|█████     | 2/4 [00:00<00:00, 10.34it/s, accuracy: 0.7457]
Evaluating and Generation.: 100%|██████████| 4/4 [00:00<00:00, 10.62it/s, accuracy: 0.7457]
                                                                                           
2024-09-20 14:10:33,309 - root - INFO - Correct/Total Samples:           762/1000
2024-09-20 14:10:33,310 - root - INFO - Eval Accuracy:                   0.762
2024-09-20 14:10:33,328 - root - INFO - 
 epoch  batch_index  sample_index  equation generated  correct
     9            1             0 13+48=061 13+48=071    False
     9            1             1 16+55=071 16+55=071     True
     9            1             2 79+34=113 79+34=113     True
     9            1             3 35+44=079 35+44=089    False
     9            1             4 16+50=066 16+50=066     True
     9            1             5 28+47=075 28+47=075     True
     9            1             6 00+74=074 00+74=074     True
     9            1             7 15+20=035 15+20=035     True
     9            1             8 72+60=132 72+60=132     True
     9            1             9 63+68=131 63+68=131     True
     9            1            10 29+45=074 29+45=074     True
     9            1            11 34+60=094 34+60=094     True
     9            1            12 53+70=123 53+70=123     True
     9            1            13 70+50=120 70+50=120     True
     9            1            14 11+84=095 11+84=096    False
     9            1            15 42+71=113 42+71=113     True
     9            1            16 98+22=120 98+22=110    False
     9            1            17 02+02=004 02+02=005    False
     9            1            18 15+85=100 15+85=100     True
     9            1            19 21+78=099 21+78=099     True
     9            1            20 61+79=140 61+79=140     True
     9            1            21 25+99=124 25+99=124     True
     9            1            22 09+85=094 09+85=094     True
     9            1            23 60+91=151 60+91=151     True
     9            1            24 35+30=065 35+30=075    False
     9            1            25 24+51=075 24+51=075     True
     9            1            26 93+91=184 93+91=185    False
     9            1            27 39+96=135 39+96=135     True
     9            1            28 64+35=099 64+35=099     True
     9            1            29 36+22=058 36+22=068    False
     9            1            30 68+45=113 68+45=113     True
     9            1            31 16+84=100 16+84=100     True
     9            1            32 91+52=143 91+52=144    False
     9            1            33 97+36=133 97+36=133     True
     9            1            34 27+37=064 27+37=074    False
     9            1            35 99+82=181 99+82=181     True
     9            1            36 03+42=045 03+42=056    False
     9            1            37 18+38=056 18+38=066    False
     9            1            38 32+20=052 32+20=052     True
     9            1            39 38+13=051 38+13=061    False
     9            1            40 68+42=110 68+42=110     True
     9            1            41 64+00=064 64+00=064     True
     9            1            42 48+94=142 48+94=142     True
     9            1            43 58+36=094 58+36=094     True
     9            1            44 41+22=063 41+22=064    False
     9            1            45 23+58=081 23+58=081     True
     9            1            46 67+46=113 67+46=113     True
     9            1            47 40+78=118 40+78=118     True
     9            1            48 90+38=128 90+38=128     True
     9            1            49 89+52=141 89+52=141     True
     9            1            50 37+77=114 37+77=114     True
     9            1            51 29+76=105 29+76=105     True
     9            1            52 42+90=132 42+90=132     True
     9            1            53 45+82=127 45+82=127     True
     9            1            54 35+95=130 35+95=120    False
     9            1            55 92+98=190 92+98=190     True
     9            1            56 73+91=164 73+91=164     True
     9            1            57 53+97=150 53+97=150     True
     9            1            58 98+69=167 98+69=167     True
     9            1            59 20+46=066 20+46=066     True
     9            1            60 48+69=117 48+69=117     True
     9            1            61 62+31=093 62+31=093     True
     9            1            62 80+59=139 80+59=139     True
     9            1            63 58+12=070 58+12=070     True
     9            1            64 08+96=104 08+96=104     True
     9            1            65 67+06=073 67+06=073     True
     9            1            66 22+04=026 22+04=027    False
     9            1            67 61+87=148 61+87=148     True
     9            1            68 95+27=122 95+27=122     True
     9            1            69 49+83=132 49+83=132     True
     9            1            70 43+00=043 43+00=053    False
     9            1            71 01+85=086 01+85=086     True
     9            1            72 11+68=079 11+68=079     True
     9            1            73 80+03=083 80+03=083     True
     9            1            74 54+83=137 54+83=137     True
     9            1            75 73+47=120 73+47=110    False
     9            1            76 99+93=192 99+93=192     True
     9            1            77 99+13=112 99+13=112     True
     9            1            78 92+66=158 92+66=158     True
     9            1            79 90+31=121 90+31=121     True
     9            1            80 25+69=094 25+69=094     True
     9            1            81 25+44=069 25+44=079    False
     9            1            82 00+93=093 00+93=093     True
     9            1            83 88+87=175 88+87=175     True
     9            1            84 47+56=103 47+56=103     True
     9            1            85 43+59=102 43+59=102     True
     9            1            86 22+00=022 22+00=022     True
     9            1            87 34+04=038 34+04=038     True
     9            1            88 65+13=078 65+13=078     True
     9            1            89 39+82=121 39+82=121     True
     9            1            90 66+83=149 66+83=149     True
     9            1            91 51+69=120 51+69=110    False
     9            1            92 80+21=101 80+21=101     True
     9            1            93 36+79=115 36+79=115     True
     9            1            94 21+68=089 21+68=089     True
     9            1            95 11+66=077 11+66=077     True
     9            1            96 55+19=074 55+19=074     True
     9            1            97 51+61=112 51+61=112     True
     9            1            98 38+88=126 38+88=126     True
     9            1            99 37+27=064 37+27=074    False
     9            1           100 18+63=081 18+63=081     True
     9            1           101 48+11=059 48+11=069    False
     9            1           102 72+68=140 72+68=140     True
     9            1           103 37+39=076 37+39=076     True
     9            1           104 64+95=159 64+95=159     True
     9            1           105 49+75=124 49+75=124     True
     9            1           106 45+66=111 45+66=111     True
     9            1           107 34+87=121 34+87=121     True
     9            1           108 02+84=086 02+84=087    False
     9            1           109 95+00=095 95+00=095     True
     9            1           110 09+56=065 09+56=065     True
     9            1           111 22+66=088 22+66=088     True
     9            1           112 43+18=061 43+18=071    False
     9            1           113 61+35=096 61+35=096     True
     9            1           114 13+73=086 13+73=087    False
     9            1           115 25+95=120 25+95=110    False
     9            1           116 73+96=169 73+96=169     True
     9            1           117 03+96=099 03+96=099     True
     9            1           118 97+82=179 97+82=179     True
     9            1           119 18+42=060 18+42=070    False
     9            1           120 29+98=127 29+98=127     True
     9            1           121 61+00=061 61+00=061     True
     9            1           122 22+98=120 22+98=120     True
     9            1           123 12+50=062 12+50=062     True
     9            1           124 02+58=060 02+58=060     True
     9            1           125 75+86=161 75+86=161     True
     9            1           126 31+57=088 31+57=088     True
     9            1           127 49+82=131 49+82=131     True
     9            1           128 15+33=048 15+33=058    False
     9            1           129 49+57=106 49+57=106     True
     9            1           130 61+70=131 61+70=131     True
     9            1           131 91+51=142 91+51=142     True
     9            1           132 50+05=055 50+05=055     True
     9            1           133 44+16=060 44+16=070    False
     9            1           134 92+01=093 92+01=093     True
     9            1           135 85+82=167 85+82=167     True
     9            1           136 07+41=048 07+41=058    False
     9            1           137 40+06=046 40+06=056    False
     9            1           138 79+62=141 79+62=141     True
     9            1           139 95+62=157 95+62=157     True
     9            1           140 42+93=135 42+93=136    False
     9            1           141 32+73=105 32+73=106    False
     9            1           142 47+09=056 47+09=056     True
     9            1           143 59+50=109 59+50=109     True
     9            1           144 61+77=138 61+77=138     True
     9            1           145 64+06=070 64+06=070     True
     9            1           146 35+10=045 35+10=055    False
     9            1           147 32+88=120 32+88=120     True
     9            1           148 03+95=098 03+95=098     True
     9            1           149 11+38=049 11+38=059    False
     9            1           150 21+67=088 21+67=088     True
     9            1           151 33+25=058 33+25=068    False
     9            1           152 63+45=108 63+45=108     True
     9            1           153 56+12=068 56+12=078    False
     9            1           154 19+79=098 19+79=097    False
     9            1           155 60+43=103 60+43=104    False
     9            1           156 07+61=068 07+61=068     True
     9            1           157 58+03=061 58+03=071    False
     9            1           158 11+10=021 11+10=011    False
     9            1           159 49+89=138 49+89=138     True
     9            1           160 37+58=095 37+58=095     True
     9            1           161 59+78=137 59+78=137     True
     9            1           162 11+21=032 11+21=032     True
     9            1           163 37+43=080 37+43=080     True
     9            1           164 44+21=065 44+21=065     True
     9            1           165 22+97=119 22+97=119     True
     9            1           166 65+35=100 65+35=100     True
     9            1           167 06+51=057 06+51=057     True
     9            1           168 65+25=090 65+25=090     True
     9            1           169 74+94=168 74+94=168     True
     9            1           170 87+55=142 87+55=142     True
     9            1           171 90+67=157 90+67=157     True
     9            1           172 11+02=013 11+02=014    False
     9            1           173 01+66=067 01+66=067     True
     9            1           174 56+00=056 56+00=056     True
     9            1           175 58+52=110 58+52=110     True
     9            1           176 24+99=123 24+99=123     True
     9            1           177 97+13=110 97+13=100    False
     9            1           178 42+94=136 42+94=137    False
     9            1           179 60+15=075 60+15=075     True
     9            1           180 20+46=066 20+46=066     True
     9            1           181 40+70=110 40+70=110     True
     9            1           182 95+45=140 95+45=140     True
     9            1           183 96+95=191 96+95=191     True
     9            1           184 98+20=118 98+20=118     True
     9            1           185 43+19=062 43+19=072    False
     9            1           186 50+69=119 50+69=119     True
     9            1           187 27+53=080 27+53=080     True
     9            1           188 24+25=049 24+25=059    False
     9            1           189 65+92=157 65+92=157     True
     9            1           190 28+14=042 28+14=052    False
     9            1           191 20+57=077 20+57=077     True
     9            1           192 59+97=156 59+97=156     True
     9            1           193 98+32=130 98+32=130     True
     9            1           194 55+84=139 55+84=139     True
     9            1           195 20+39=059 20+39=069    False
     9            1           196 86+47=133 86+47=133     True
     9            1           197 92+36=128 92+36=128     True
     9            1           198 05+38=043 05+38=053    False
     9            1           199 77+36=113 77+36=113     True
     9            1           200 41+64=105 41+64=105     True
     9            1           201 74+51=125 74+51=125     True
     9            1           202 74+55=129 74+55=129     True
     9            1           203 64+64=128 64+64=128     True
     9            1           204 60+19=079 60+19=079     True
     9            1           205 77+96=173 77+96=173     True
     9            1           206 22+30=052 22+30=062    False
     9            1           207 82+49=131 82+49=131     True
     9            1           208 39+67=106 39+67=106     True
     9            1           209 62+40=102 62+40=102     True
     9            1           210 28+71=099 28+71=099     True
     9            1           211 47+26=073 47+26=073     True
     9            1           212 98+54=152 98+54=152     True
     9            1           213 38+70=108 38+70=108     True
     9            1           214 63+40=103 63+40=103     True
     9            1           215 86+62=148 86+62=148     True
     9            1           216 22+65=087 22+65=087     True
     9            1           217 41+17=058 41+17=068    False
     9            1           218 68+88=156 68+88=156     True
     9            1           219 96+70=166 96+70=166     True
     9            1           220 99+29=128 99+29=127    False
     9            1           221 83+39=122 83+39=122     True
     9            1           222 26+55=081 26+55=081     True
     9            1           223 53+70=123 53+70=123     True
     9            1           224 94+12=106 94+12=107    False
     9            1           225 00+37=037 00+37=037     True
     9            1           226 36+94=130 36+94=120    False
     9            1           227 40+58=098 40+58=098     True
     9            1           228 19+80=099 19+80=099     True
     9            1           229 49+44=093 49+44=093     True
     9            1           230 70+27=097 70+27=097     True
     9            1           231 52+80=132 52+80=132     True
     9            1           232 77+90=167 77+90=167     True
     9            1           233 13+92=105 13+92=106    False
     9            1           234 59+09=068 59+09=067    False
     9            1           235 33+55=088 33+55=088     True
     9            1           236 85+16=101 85+16=101     True
     9            1           237 25+65=090 25+65=090     True
     9            1           238 46+20=066 46+20=076    False
     9            1           239 29+52=081 29+52=081     True
     9            1           240 32+36=068 32+36=078    False
     9            1           241 47+08=055 47+08=065    False
     9            1           242 21+84=105 21+84=106    False
     9            1           243 24+45=069 24+45=079    False
     9            1           244 29+15=044 29+15=044     True
     9            1           245 83+03=086 83+03=087    False
     9            1           246 83+36=119 83+36=119     True
     9            1           247 58+95=153 58+95=153     True
     9            1           248 76+79=155 76+79=155     True
     9            1           249 63+30=093 63+30=093     True
     9            1           250 38+24=062 38+24=072    False
     9            1           251 19+46=065 19+46=065     True
     9            1           252 99+66=165 99+66=165     True
     9            1           253 95+73=168 95+73=168     True
     9            1           254 65+27=092 65+27=092     True
     9            1           255 91+83=174 91+83=175    False
     9            2             0 65+49=114 65+49=114     True
     9            2             1 03+08=011 03+08=011     True
     9            2             2 67+81=148 67+81=148     True
     9            2             3 47+23=070 47+23=070     True
     9            2             4 43+91=134 43+91=135    False
     9            2             5 41+67=108 41+67=108     True
     9            2             6 02+33=035 02+33=046    False
     9            2             7 64+84=148 64+84=148     True
     9            2             8 81+64=145 81+64=145     True
     9            2             9 80+11=091 80+11=091     True
     9            2            10 78+01=079 78+01=079     True
     9            2            11 89+18=107 89+18=107     True
     9            2            12 45+52=097 45+52=097     True
     9            2            13 35+30=065 35+30=075    False
     9            2            14 53+32=085 53+32=086    False
     9            2            15 49+90=139 49+90=139     True
     9            2            16 41+37=078 41+37=078     True
     9            2            17 35+14=049 35+14=059    False
     9            2            18 92+50=142 92+50=142     True
     9            2            19 37+60=097 37+60=097     True
     9            2            20 91+61=152 91+61=152     True
     9            2            21 80+77=157 80+77=157     True
     9            2            22 66+24=090 66+24=090     True
     9            2            23 81+07=088 81+07=088     True
     9            2            24 85+59=144 85+59=144     True
     9            2            25 19+69=088 19+69=087    False
     9            2            26 91+44=135 91+44=135     True
     9            2            27 25+29=054 25+29=064    False
     9            2            28 27+08=035 27+08=035     True
     9            2            29 66+14=080 66+14=080     True
     9            2            30 95+11=106 95+11=106     True
     9            2            31 13+97=110 13+97=100    False
     9            2            32 94+40=134 94+40=134     True
     9            2            33 74+31=105 74+31=105     True
     9            2            34 49+00=049 49+00=059    False
     9            2            35 59+18=077 59+18=077     True
     9            2            36 07+65=072 07+65=072     True
     9            2            37 83+55=138 83+55=138     True
     9            2            38 49+80=129 49+80=129     True
     9            2            39 64+17=081 64+17=081     True
     9            2            40 48+83=131 48+83=131     True
     9            2            41 95+44=139 95+44=139     True
     9            2            42 71+26=097 71+26=097     True
     9            2            43 06+74=080 06+74=080     True
     9            2            44 34+24=058 34+24=068    False
     9            2            45 59+71=130 59+71=120    False
     9            2            46 68+32=100 68+32=100     True
     9            2            47 38+81=119 38+81=119     True
     9            2            48 29+56=085 29+56=085     True
     9            2            49 54+55=109 54+55=109     True
     9            2            50 31+27=058 31+27=068    False
     9            2            51 97+89=186 97+89=186     True
     9            2            52 48+09=057 48+09=057     True
     9            2            53 86+76=162 86+76=162     True
     9            2            54 82+59=141 82+59=141     True
     9            2            55 01+67=068 01+67=068     True
     9            2            56 26+06=032 26+06=032     True
     9            2            57 22+46=068 22+46=078    False
     9            2            58 85+16=101 85+16=101     True
     9            2            59 29+08=037 29+08=037     True
     9            2            60 73+94=167 73+94=167     True
     9            2            61 19+62=081 19+62=081     True
     9            2            62 86+62=148 86+62=148     True
     9            2            63 38+99=137 38+99=137     True
     9            2            64 64+25=089 64+25=089     True
     9            2            65 61+72=133 61+72=134    False
     9            2            66 78+88=166 78+88=166     True
     9            2            67 43+66=109 43+66=109     True
     9            2            68 69+35=104 69+35=104     True
     9            2            69 33+77=110 33+77=110     True
     9            2            70 37+37=074 37+37=084    False
     9            2            71 87+54=141 87+54=141     True
     9            2            72 68+90=158 68+90=158     True
     9            2            73 83+44=127 83+44=127     True
     9            2            74 41+09=050 41+09=050     True
     9            2            75 13+48=061 13+48=071    False
     9            2            76 01+41=042 01+41=052    False
     9            2            77 19+74=093 19+74=093     True
     9            2            78 15+05=020 15+05=010    False
     9            2            79 55+46=101 55+46=101     True
     9            2            80 68+33=101 68+33=101     True
     9            2            81 44+40=084 44+40=084     True
     9            2            82 88+03=091 88+03=091     True
     9            2            83 81+79=160 81+79=160     True
     9            2            84 18+98=116 18+98=116     True
     9            2            85 70+64=134 70+64=134     True
     9            2            86 26+44=070 26+44=070     True
     9            2            87 98+87=185 98+87=185     True
     9            2            88 18+74=092 18+74=092     True
     9            2            89 50+68=118 50+68=118     True
     9            2            90 13+51=064 13+51=065    False
     9            2            91 90+89=179 90+89=179     True
     9            2            92 47+78=125 47+78=125     True
     9            2            93 81+57=138 81+57=138     True
     9            2            94 34+47=081 34+47=081     True
     9            2            95 94+23=117 94+23=117     True
     9            2            96 07+70=077 07+70=077     True
     9            2            97 56+33=089 56+33=089     True
     9            2            98 33+04=037 33+04=037     True
     9            2            99 26+09=035 26+09=035     True
     9            2           100 14+92=106 14+92=107    False
     9            2           101 78+54=132 78+54=132     True
     9            2           102 36+76=112 36+76=112     True
     9            2           103 17+47=064 17+47=074    False
     9            2           104 28+18=046 28+18=046     True
     9            2           105 78+54=132 78+54=132     True
     9            2           106 84+72=156 84+72=157    False
     9            2           107 00+44=044 00+44=054    False
     9            2           108 50+41=091 50+41=091     True
     9            2           109 87+88=175 87+88=175     True
     9            2           110 11+66=077 11+66=077     True
     9            2           111 80+60=140 80+60=140     True
     9            2           112 78+76=154 78+76=154     True
     9            2           113 24+74=098 24+74=098     True
     9            2           114 88+48=136 88+48=136     True
     9            2           115 38+31=069 38+31=079    False
     9            2           116 29+27=056 29+27=066    False
     9            2           117 08+45=053 08+45=063    False
     9            2           118 28+13=041 28+13=051    False
     9            2           119 53+99=152 53+99=152     True
     9            2           120 47+92=139 47+92=139     True
     9            2           121 76+21=097 76+21=097     True
     9            2           122 53+96=149 53+96=149     True
     9            2           123 93+91=184 93+91=185    False
     9            2           124 97+33=130 97+33=120    False
     9            2           125 67+78=145 67+78=145     True
     9            2           126 58+05=063 58+05=073    False
     9            2           127 00+16=016 00+16=016     True
     9            2           128 80+19=099 80+19=099     True
     9            2           129 98+22=120 98+22=110    False
     9            2           130 09+62=071 09+62=071     True
     9            2           131 06+23=029 06+23=029     True
     9            2           132 32+99=131 32+99=131     True
     9            2           133 17+02=019 17+02=019     True
     9            2           134 64+35=099 64+35=099     True
     9            2           135 35+83=118 35+83=118     True
     9            2           136 71+36=107 71+36=107     True
     9            2           137 75+06=081 75+06=081     True
     9            2           138 88+95=183 88+95=183     True
     9            2           139 19+98=117 19+98=117     True
     9            2           140 28+89=117 28+89=117     True
     9            2           141 33+11=044 33+11=055    False
     9            2           142 34+49=083 34+49=083     True
     9            2           143 90+35=125 90+35=125     True
     9            2           144 22+90=112 22+90=112     True
     9            2           145 98+89=187 98+89=187     True
     9            2           146 88+47=135 88+47=135     True
     9            2           147 30+86=116 30+86=116     True
     9            2           148 31+48=079 31+48=089    False
     9            2           149 39+21=060 39+21=060     True
     9            2           150 19+17=036 19+17=036     True
     9            2           151 27+60=087 27+60=087     True
     9            2           152 12+16=028 12+16=028     True
     9            2           153 51+75=126 51+75=126     True
     9            2           154 10+74=084 10+74=084     True
     9            2           155 42+63=105 42+63=106    False
     9            2           156 40+14=054 40+14=064    False
     9            2           157 23+93=116 23+93=117    False
     9            2           158 85+26=111 85+26=111     True
     9            2           159 28+46=074 28+46=074     True
     9            2           160 28+33=061 28+33=071    False
     9            2           161 43+30=073 43+30=073     True
     9            2           162 89+72=161 89+72=161     True
     9            2           163 52+21=073 52+21=073     True
     9            2           164 21+54=075 21+54=075     True
     9            2           165 69+13=082 69+13=082     True
     9            2           166 07+60=067 07+60=067     True
     9            2           167 63+83=146 63+83=147    False
     9            2           168 80+69=149 80+69=149     True
     9            2           169 27+28=055 27+28=065    False
     9            2           170 42+31=073 42+31=073     True
     9            2           171 51+99=150 51+99=150     True
     9            2           172 28+75=103 28+75=103     True
     9            2           173 38+57=095 38+57=095     True
     9            2           174 83+16=099 83+16=099     True
     9            2           175 92+94=186 92+94=187    False
     9            2           176 55+75=130 55+75=120    False
     9            2           177 59+51=110 59+51=100    False
     9            2           178 33+09=042 33+09=052    False
     9            2           179 53+13=066 53+13=067    False
     9            2           180 05+70=075 05+70=075     True
     9            2           181 12+20=032 12+20=032     True
     9            2           182 11+49=060 11+49=070    False
     9            2           183 63+45=108 63+45=108     True
     9            2           184 92+23=115 92+23=116    False
     9            2           185 82+45=127 82+45=127     True
     9            2           186 23+41=064 23+41=065    False
     9            2           187 64+26=090 64+26=090     True
     9            2           188 91+24=115 91+24=115     True
     9            2           189 20+32=052 20+32=062    False
     9            2           190 83+21=104 83+21=105    False
     9            2           191 07+20=027 07+20=027     True
     9            2           192 94+14=108 94+14=108     True
     9            2           193 96+89=185 96+89=185     True
     9            2           194 13+08=021 13+08=021     True
     9            2           195 32+05=037 32+05=037     True
     9            2           196 09+51=060 09+51=060     True
     9            2           197 26+29=055 26+29=065    False
     9            2           198 49+65=114 49+65=114     True
     9            2           199 32+66=098 32+66=098     True
     9            2           200 41+08=049 41+08=059    False
     9            2           201 26+79=105 26+79=105     True
     9            2           202 29+91=120 29+91=110    False
     9            2           203 51+00=051 51+00=051     True
     9            2           204 61+60=121 61+60=111    False
     9            2           205 45+78=123 45+78=123     True
     9            2           206 56+16=072 56+16=072     True
     9            2           207 66+68=134 66+68=134     True
     9            2           208 32+16=048 32+16=058    False
     9            2           209 84+49=133 84+49=133     True
     9            2           210 45+09=054 45+09=064    False
     9            2           211 96+78=174 96+78=174     True
     9            2           212 10+02=012 10+02=012     True
     9            2           213 36+60=096 36+60=096     True
     9            2           214 44+36=080 44+36=080     True
     9            2           215 12+86=098 12+86=098     True
     9            2           216 94+54=148 94+54=148     True
     9            2           217 64+73=137 64+73=137     True
     9            2           218 73+10=083 73+10=083     True
     9            2           219 14+62=076 14+62=076     True
     9            2           220 25+22=047 25+22=057    False
     9            2           221 94+22=116 94+22=116     True
     9            2           222 41+76=117 41+76=117     True
     9            2           223 38+46=084 38+46=084     True
     9            2           224 71+72=143 71+72=144    False
     9            2           225 74+79=153 74+79=153     True
     9            2           226 99+67=166 99+67=166     True
     9            2           227 78+71=149 78+71=149     True
     9            2           228 23+19=042 23+19=052    False
     9            2           229 51+65=116 51+65=116     True
     9            2           230 94+86=180 94+86=180     True
     9            2           231 09+79=088 09+79=087    False
     9            2           232 69+39=108 69+39=107    False
     9            2           233 84+13=097 84+13=097     True
     9            2           234 36+59=095 36+59=095     True
     9            2           235 87+47=134 87+47=134     True
     9            2           236 50+00=050 50+00=050     True
     9            2           237 76+96=172 76+96=172     True
     9            2           238 12+18=030 12+18=030     True
     9            2           239 99+95=194 99+95=194     True
     9            2           240 22+00=022 22+00=022     True
     9            2           241 96+18=114 96+18=114     True
     9            2           242 51+20=071 51+20=071     True
     9            2           243 66+81=147 66+81=147     True
     9            2           244 78+18=096 78+18=096     True
     9            2           245 09+78=087 09+78=087     True
     9            2           246 24+20=044 24+20=054    False
     9            2           247 76+13=089 76+13=089     True
     9            2           248 05+10=015 05+10=015     True
     9            2           249 97+14=111 97+14=111     True
     9            2           250 92+38=130 92+38=130     True
     9            2           251 77+13=090 77+13=090     True
     9            2           252 70+19=089 70+19=089     True
     9            2           253 90+45=135 90+45=135     True
     9            2           254 50+09=059 50+09=069    False
     9            2           255 78+06=084 78+06=084     True
     9            3             0 03+25=028 03+25=028     True
     9            3             1 48+43=091 48+43=091     True
     9            3             2 39+47=086 39+47=086     True
     9            3             3 48+19=067 48+19=067     True
     9            3             4 07+22=029 07+22=029     True
     9            3             5 73+68=141 73+68=141     True
     9            3             6 14+56=070 14+56=070     True
     9            3             7 96+95=191 96+95=191     True
     9            3             8 96+28=124 96+28=124     True
     9            3             9 82+05=087 82+05=087     True
     9            3            10 27+94=121 27+94=121     True
     9            3            11 87+86=173 87+86=173     True
     9            3            12 00+68=068 00+68=068     True
     9            3            13 11+37=048 11+37=058    False
     9            3            14 95+93=188 95+93=188     True
     9            3            15 75+82=157 75+82=157     True
     9            3            16 41+71=112 41+71=112     True
     9            3            17 60+14=074 60+14=074     True
     9            3            18 77+77=154 77+77=154     True
     9            3            19 31+84=115 31+84=116    False
     9            3            20 31+57=088 31+57=088     True
     9            3            21 27+87=114 27+87=114     True
     9            3            22 31+89=120 31+89=120     True
     9            3            23 22+18=040 22+18=040     True
     9            3            24 38+25=063 38+25=073    False
     9            3            25 64+54=118 64+54=118     True
     9            3            26 85+60=145 85+60=145     True
     9            3            27 14+71=085 14+71=085     True
     9            3            28 06+16=022 06+16=022     True
     9            3            29 78+61=139 78+61=139     True
     9            3            30 65+75=140 65+75=140     True
     9            3            31 13+83=096 13+83=097    False
     9            3            32 75+49=124 75+49=114    False
     9            3            33 05+78=083 05+78=083     True
     9            3            34 66+55=121 66+55=121     True
     9            3            35 03+05=008 03+05=008     True
     9            3            36 69+99=168 69+99=167    False
     9            3            37 52+82=134 52+82=135    False
     9            3            38 45+97=142 45+97=142     True
     9            3            39 66+17=083 66+17=083     True
     9            3            40 36+17=053 36+17=063    False
     9            3            41 92+74=166 92+74=167    False
     9            3            42 48+44=092 48+44=092     True
     9            3            43 34+17=051 34+17=061    False
     9            3            44 56+11=067 56+11=077    False
     9            3            45 77+23=100 77+23=100     True
     9            3            46 10+11=021 10+11=011    False
     9            3            47 32+65=097 32+65=097     True
     9            3            48 53+49=102 53+49=102     True
     9            3            49 68+86=154 68+86=154     True
     9            3            50 52+94=146 52+94=147    False
     9            3            51 97+71=168 97+71=168     True
     9            3            52 05+37=042 05+37=052    False
     9            3            53 58+75=133 58+75=133     True
     9            3            54 06+24=030 06+24=030     True
     9            3            55 15+44=059 15+44=069    False
     9            3            56 90+49=139 90+49=139     True
     9            3            57 50+37=087 50+37=087     True
     9            3            58 88+61=149 88+61=149     True
     9            3            59 21+57=078 21+57=078     True
     9            3            60 24+85=109 24+85=109     True
     9            3            61 01+66=067 01+66=067     True
     9            3            62 50+46=096 50+46=096     True
     9            3            63 76+65=141 76+65=141     True
     9            3            64 23+74=097 23+74=097     True
     9            3            65 76+16=092 76+16=092     True
     9            3            66 06+08=014 06+08=014     True
     9            3            67 69+25=094 69+25=094     True
     9            3            68 15+23=038 15+23=038     True
     9            3            69 41+02=043 41+02=054    False
     9            3            70 16+66=082 16+66=082     True
     9            3            71 59+94=153 59+94=153     True
     9            3            72 32+88=120 32+88=120     True
     9            3            73 46+21=067 46+21=077    False
     9            3            74 57+28=085 57+28=085     True
     9            3            75 00+31=031 00+31=031     True
     9            3            76 77+07=084 77+07=084     True
     9            3            77 28+70=098 28+70=098     True
     9            3            78 05+61=066 05+61=066     True
     9            3            79 22+09=031 22+09=031     True
     9            3            80 08+94=102 08+94=102     True
     9            3            81 40+11=051 40+11=061    False
     9            3            82 10+48=058 10+48=068    False
     9            3            83 27+56=083 27+56=083     True
     9            3            84 42+16=058 42+16=068    False
     9            3            85 69+43=112 69+43=112     True
     9            3            86 57+69=126 57+69=126     True
     9            3            87 18+86=104 18+86=104     True
     9            3            88 86+80=166 86+80=166     True
     9            3            89 30+85=115 30+85=115     True
     9            3            90 77+66=143 77+66=143     True
     9            3            91 39+64=103 39+64=103     True
     9            3            92 76+61=137 76+61=137     True
     9            3            93 42+61=103 42+61=103     True
     9            3            94 07+30=037 07+30=037     True
     9            3            95 35+93=128 35+93=128     True
     9            3            96 40+90=130 40+90=130     True
     9            3            97 08+91=099 08+91=099     True
     9            3            98 62+34=096 62+34=097    False
     9            3            99 86+49=135 86+49=135     True
     9            3           100 73+23=096 73+23=097    False
     9            3           101 87+35=122 87+35=122     True
     9            3           102 35+31=066 35+31=076    False
     9            3           103 07+13=020 07+13=010    False
     9            3           104 39+41=080 39+41=080     True
     9            3           105 44+63=107 44+63=107     True
     9            3           106 94+66=160 94+66=160     True
     9            3           107 49+54=103 49+54=103     True
     9            3           108 79+46=125 79+46=125     True
     9            3           109 53+12=065 53+12=066    False
     9            3           110 60+92=152 60+92=152     True
     9            3           111 25+60=085 25+60=085     True
     9            3           112 64+53=117 64+53=117     True
     9            3           113 41+02=043 41+02=054    False
     9            3           114 00+97=097 00+97=097     True
     9            3           115 12+52=064 12+52=065    False
     9            3           116 39+50=089 39+50=089     True
     9            3           117 87+21=108 87+21=108     True
     9            3           118 04+99=103 04+99=103     True
     9            3           119 19+75=094 19+75=094     True
     9            3           120 90+05=095 90+05=095     True
     9            3           121 54+39=093 54+39=093     True
     9            3           122 29+26=055 29+26=065    False
     9            3           123 82+95=177 82+95=177     True
     9            3           124 55+09=064 55+09=064     True
     9            3           125 02+62=064 02+62=065    False
     9            3           126 68+30=098 68+30=098     True
     9            3           127 99+16=115 99+16=115     True
     9            3           128 63+11=074 63+11=075    False
     9            3           129 42+92=134 42+92=135    False
     9            3           130 99+16=115 99+16=115     True
     9            3           131 50+31=081 50+31=081     True
     9            3           132 23+46=069 23+46=079    False
     9            3           133 45+73=118 45+73=118     True
     9            3           134 89+77=166 89+77=166     True
     9            3           135 45+78=123 45+78=123     True
     9            3           136 96+60=156 96+60=156     True
     9            3           137 74+61=135 74+61=135     True
     9            3           138 87+01=088 87+01=088     True
     9            3           139 63+88=151 63+88=151     True
     9            3           140 59+72=131 59+72=131     True
     9            3           141 17+96=113 17+96=113     True
     9            3           142 89+77=166 89+77=166     True
     9            3           143 24+69=093 24+69=093     True
     9            3           144 75+83=158 75+83=158     True
     9            3           145 50+54=104 50+54=104     True
     9            3           146 93+47=140 93+47=140     True
     9            3           147 20+55=075 20+55=075     True
     9            3           148 91+79=170 91+79=170     True
     9            3           149 15+13=028 15+13=028     True
     9            3           150 86+09=095 86+09=095     True
     9            3           151 29+58=087 29+58=087     True
     9            3           152 01+29=030 01+29=030     True
     9            3           153 65+48=113 65+48=113     True
     9            3           154 96+45=141 96+45=141     True
     9            3           155 58+69=127 58+69=127     True
     9            3           156 84+43=127 84+43=127     True
     9            3           157 90+38=128 90+38=128     True
     9            3           158 39+97=136 39+97=136     True
     9            3           159 74+84=158 74+84=158     True
     9            3           160 86+22=108 86+22=108     True
     9            3           161 01+86=087 01+86=087     True
     9            3           162 81+63=144 81+63=145    False
     9            3           163 80+94=174 80+94=174     True
     9            3           164 44+42=086 44+42=086     True
     9            3           165 72+60=132 72+60=132     True
     9            3           166 28+07=035 28+07=035     True
     9            3           167 69+54=123 69+54=123     True
     9            3           168 68+77=145 68+77=145     True
     9            3           169 90+16=106 90+16=106     True
     9            3           170 64+50=114 64+50=114     True
     9            3           171 46+88=134 46+88=134     True
     9            3           172 55+99=154 55+99=154     True
     9            3           173 31+97=128 31+97=128     True
     9            3           174 79+28=107 79+28=107     True
     9            3           175 81+43=124 81+43=125    False
     9            3           176 41+15=056 41+15=066    False
     9            3           177 38+77=115 38+77=115     True
     9            3           178 25+06=031 25+06=031     True
     9            3           179 01+93=094 01+93=095    False
     9            3           180 97+22=119 97+22=109    False
     9            3           181 71+84=155 71+84=155     True
     9            3           182 26+36=062 26+36=072    False
     9            3           183 60+92=152 60+92=152     True
     9            3           184 02+94=096 02+94=097    False
     9            3           185 31+58=089 31+58=089     True
     9            3           186 70+52=122 70+52=122     True
     9            3           187 19+42=061 19+42=071    False
     9            3           188 95+73=168 95+73=168     True
     9            3           189 21+25=046 21+25=056    False
     9            3           190 13+58=071 13+58=071     True
     9            3           191 62+28=090 62+28=090     True
     9            3           192 38+14=052 38+14=062    False
     9            3           193 66+75=141 66+75=141     True
     9            3           194 24+59=083 24+59=083     True
     9            3           195 97+66=163 97+66=163     True
     9            3           196 76+70=146 76+70=146     True
     9            3           197 08+40=048 08+40=058    False
     9            3           198 84+00=084 84+00=084     True
     9            3           199 54+73=127 54+73=127     True
     9            3           200 16+88=104 16+88=104     True
     9            3           201 99+47=146 99+47=146     True
     9            3           202 31+95=126 31+95=126     True
     9            3           203 01+79=080 01+79=080     True
     9            3           204 03+68=071 03+68=071     True
     9            3           205 10+05=015 10+05=015     True
     9            3           206 98+90=188 98+90=188     True
     9            3           207 58+53=111 58+53=111     True
     9            3           208 34+87=121 34+87=121     True
     9            3           209 07+31=038 07+31=048    False
     9            3           210 59+08=067 59+08=067     True
     9            3           211 51+38=089 51+38=089     True
     9            3           212 62+62=124 62+62=125    False
     9            3           213 80+32=112 80+32=112     True
     9            3           214 69+16=085 69+16=085     True
     9            3           215 01+17=018 01+17=018     True
     9            3           216 74+41=115 74+41=115     True
     9            3           217 20+89=109 20+89=109     True
     9            3           218 53+50=103 53+50=103     True
     9            3           219 82+85=167 82+85=167     True
     9            3           220 34+47=081 34+47=081     True
     9            3           221 34+45=079 34+45=089    False
     9            3           222 77+34=111 77+34=111     True
     9            3           223 56+33=089 56+33=089     True
     9            3           224 97+56=153 97+56=153     True
     9            3           225 29+06=035 29+06=035     True
     9            3           226 78+96=174 78+96=174     True
     9            3           227 28+65=093 28+65=093     True
     9            3           228 61+64=125 61+64=126    False
     9            3           229 32+64=096 32+64=097    False
     9            3           230 98+32=130 98+32=130     True
     9            3           231 25+35=060 25+35=060     True
     9            3           232 05+08=013 05+08=013     True
     9            3           233 05+26=031 05+26=031     True
     9            3           234 84+71=155 84+71=155     True
     9            3           235 33+10=043 33+10=053    False
     9            3           236 98+35=133 98+35=133     True
     9            3           237 68+98=166 68+98=166     True
     9            3           238 03+63=066 03+63=067    False
     9            3           239 12+96=108 12+96=108     True
     9            3           240 02+81=083 02+81=083     True
     9            3           241 83+13=096 83+13=097    False
     9            3           242 55+92=147 55+92=147     True
     9            3           243 96+09=105 96+09=105     True
     9            3           244 61+08=069 61+08=069     True
     9            3           245 39+75=114 39+75=114     True
     9            3           246 40+74=114 40+74=114     True
     9            3           247 39+80=119 39+80=119     True
     9            3           248 57+95=152 57+95=152     True
     9            3           249 92+97=189 92+97=189     True
     9            3           250 33+03=036 33+03=037    False
     9            3           251 74+92=166 74+92=166     True
     9            3           252 99+09=108 99+09=107    False
     9            3           253 98+10=108 98+10=108     True
     9            3           254 46+77=123 46+77=123     True
     9            3           255 85+78=163 85+78=163     True
     9            4             0 41+21=062 41+21=062     True
     9            4             1 49+13=062 49+13=072    False
     9            4             2 59+07=066 59+07=066     True
     9            4             3 31+11=042 31+11=052    False
     9            4             4 74+16=090 74+16=090     True
     9            4             5 43+38=081 43+38=081     True
     9            4             6 08+67=075 08+67=075     True
     9            4             7 31+66=097 31+66=097     True
     9            4             8 10+31=041 10+31=051    False
     9            4             9 34+59=093 34+59=093     True
     9            4            10 78+42=120 78+42=110    False
     9            4            11 13+41=054 13+41=065    False
     9            4            12 97+89=186 97+89=186     True
     9            4            13 15+62=077 15+62=077     True
     9            4            14 39+36=075 39+36=075     True
     9            4            15 21+25=046 21+25=056    False
     9            4            16 74+56=130 74+56=120    False
     9            4            17 85+47=132 85+47=132     True
     9            4            18 47+32=079 47+32=079     True
     9            4            19 37+66=103 37+66=103     True
     9            4            20 16+29=045 16+29=045     True
     9            4            21 86+77=163 86+77=163     True
     9            4            22 80+07=087 80+07=087     True
     9            4            23 87+05=092 87+05=092     True
     9            4            24 58+16=074 58+16=074     True
     9            4            25 52+79=131 52+79=131     True
     9            4            26 91+08=099 91+08=099     True
     9            4            27 47+78=125 47+78=125     True
     9            4            28 86+96=182 86+96=182     True
     9            4            29 90+22=112 90+22=112     True
     9            4            30 31+18=049 31+18=059    False
     9            4            31 86+15=101 86+15=101     True
     9            4            32 15+95=110 15+95=100    False
     9            4            33 42+11=053 42+11=063    False
     9            4            34 65+99=164 65+99=164     True
     9            4            35 89+29=118 89+29=117    False
     9            4            36 35+11=046 35+11=056    False
     9            4            37 71+41=112 71+41=112     True
     9            4            38 16+24=040 16+24=040     True
     9            4            39 77+82=159 77+82=159     True
     9            4            40 55+89=144 55+89=144     True
     9            4            41 17+88=105 17+88=105     True
     9            4            42 54+72=126 54+72=127    False
     9            4            43 34+98=132 34+98=132     True
     9            4            44 09+97=106 09+97=106     True
     9            4            45 91+07=098 91+07=098     True
     9            4            46 55+94=149 55+94=149     True
     9            4            47 22+58=080 22+58=080     True
     9            4            48 91+37=128 91+37=128     True
     9            4            49 16+10=026 16+10=026     True
     9            4            50 96+32=128 96+32=128     True
     9            4            51 35+75=110 35+75=110     True
     9            4            52 88+73=161 88+73=161     True
     9            4            53 35+18=053 35+18=063    False
     9            4            54 33+10=043 33+10=053    False
     9            4            55 08+50=058 08+50=068    False
     9            4            56 22+62=084 22+62=085    False
     9            4            57 26+37=063 26+37=073    False
     9            4            58 80+27=107 80+27=107     True
     9            4            59 68+28=096 68+28=096     True
     9            4            60 48+03=051 48+03=061    False
     9            4            61 40+18=058 40+18=068    False
     9            4            62 16+59=075 16+59=075     True
     9            4            63 02+19=021 02+19=021     True
     9            4            64 01+09=010 01+09=010     True
     9            4            65 62+68=130 62+68=130     True
     9            4            66 09+71=080 09+71=080     True
     9            4            67 00+58=058 00+58=068    False
     9            4            68 16+45=061 16+45=071    False
     9            4            69 24+98=122 24+98=122     True
     9            4            70 47+92=139 47+92=139     True
     9            4            71 94+84=178 94+84=178     True
     9            4            72 21+32=053 21+32=064    False
     9            4            73 29+82=111 29+82=111     True
     9            4            74 32+79=111 32+79=111     True
     9            4            75 13+98=111 13+98=111     True
     9            4            76 41+94=135 41+94=136    False
     9            4            77 51+84=135 51+84=136    False
     9            4            78 42+05=047 42+05=057    False
     9            4            79 39+03=042 39+03=052    False
     9            4            80 02+92=094 02+92=095    False
     9            4            81 99+81=180 99+81=180     True
     9            4            82 32+68=100 32+68=100     True
     9            4            83 52+17=069 52+17=079    False
     9            4            84 56+58=114 56+58=114     True
     9            4            85 21+48=069 21+48=079    False
     9            4            86 61+71=132 61+71=132     True
     9            4            87 17+01=018 17+01=018     True
     9            4            88 68+23=091 68+23=091     True
     9            4            89 00+37=037 00+37=037     True
     9            4            90 94+88=182 94+88=182     True
     9            4            91 06+31=037 06+31=037     True
     9            4            92 27+18=045 27+18=055    False
     9            4            93 41+81=122 41+81=122     True
     9            4            94 15+86=101 15+86=101     True
     9            4            95 36+87=123 36+87=123     True
     9            4            96 17+37=054 17+37=064    False
     9            4            97 13+86=099 13+86=099     True
     9            4            98 29+69=098 29+69=097    False
     9            4            99 31+99=130 31+99=130     True
     9            4           100 47+29=076 47+29=076     True
     9            4           101 08+81=089 08+81=089     True
     9            4           102 72+82=154 72+82=155    False
     9            4           103 46+91=137 46+91=137     True
     9            4           104 70+35=105 70+35=105     True
     9            4           105 90+55=145 90+55=145     True
     9            4           106 99+99=198 99+99=197    False
     9            4           107 60+97=157 60+97=157     True
     9            4           108 03+40=043 03+40=053    False
     9            4           109 35+49=084 35+49=084     True
     9            4           110 32+02=034 32+02=035    False
     9            4           111 70+18=088 70+18=088     True
     9            4           112 99+05=104 99+05=104     True
     9            4           113 78+73=151 78+73=151     True
     9            4           114 03+02=005 03+02=006    False
     9            4           115 50+14=064 50+14=064     True
     9            4           116 62+02=064 62+02=065    False
     9            4           117 16+74=090 16+74=090     True
     9            4           118 68+65=133 68+65=133     True
     9            4           119 74+81=155 74+81=155     True
     9            4           120 37+48=085 37+48=085     True
     9            4           121 63+04=067 63+04=067     True
     9            4           122 06+62=068 06+62=068     True
     9            4           123 95+75=170 95+75=170     True
     9            4           124 92+37=129 92+37=129     True
     9            4           125 81+32=113 81+32=113     True
     9            4           126 53+28=081 53+28=081     True
     9            4           127 52+42=094 52+42=095    False
     9            4           128 66+97=163 66+97=163     True
     9            4           129 00+48=048 00+48=058    False
     9            4           130 65+32=097 65+32=097     True
     9            4           131 60+89=149 60+89=149     True
     9            4           132 71+61=132 71+61=122    False
     9            4           133 98+50=148 98+50=148     True
     9            4           134 90+96=186 90+96=186     True
     9            4           135 02+96=098 02+96=098     True
     9            4           136 62+75=137 62+75=137     True
     9            4           137 41+28=069 41+28=079    False
     9            4           138 95+79=174 95+79=174     True
     9            4           139 48+41=089 48+41=089     True
     9            4           140 87+95=182 87+95=182     True
     9            4           141 75+38=113 75+38=113     True
     9            4           142 31+55=086 31+55=086     True
     9            4           143 54+63=117 54+63=117     True
     9            4           144 75+82=157 75+82=157     True
     9            4           145 46+45=091 46+45=091     True
     9            4           146 13+08=021 13+08=021     True
     9            4           147 77+97=174 77+97=174     True
     9            4           148 37+35=072 37+35=082    False
     9            4           149 21+89=110 21+89=110     True
     9            4           150 58+51=109 58+51=109     True
     9            4           151 91+48=139 91+48=139     True
     9            4           152 33+23=056 33+23=067    False
     9            4           153 80+96=176 80+96=176     True
     9            4           154 78+02=080 78+02=080     True
     9            4           155 38+95=133 38+95=133     True
     9            4           156 99+25=124 99+25=124     True
     9            4           157 30+76=106 30+76=106     True
     9            4           158 42+40=082 42+40=082     True
     9            4           159 85+58=143 85+58=143     True
     9            4           160 44+46=090 44+46=090     True
     9            4           161 06+41=047 06+41=057    False
     9            4           162 65+90=155 65+90=155     True
     9            4           163 43+83=126 43+83=127    False
     9            4           164 36+61=097 36+61=097     True
     9            4           165 61+51=112 61+51=112     True
     9            4           166 38+09=047 38+09=047     True
     9            4           167 21+97=118 21+97=118     True
     9            4           168 83+30=113 83+30=113     True
     9            4           169 11+79=090 11+79=090     True
     9            4           170 14+29=043 14+29=053    False
     9            4           171 21+11=032 21+11=032     True
     9            4           172 43+53=096 43+53=097    False
     9            4           173 02+58=060 02+58=060     True
     9            4           174 78+82=160 78+82=160     True
     9            4           175 91+11=102 91+11=102     True
     9            4           176 58+54=112 58+54=112     True
     9            4           177 00+15=015 00+15=015     True
     9            4           178 83+51=134 83+51=135    False
     9            4           179 44+72=116 44+72=116     True
     9            4           180 71+20=091 71+20=091     True
     9            4           181 24+99=123 24+99=123     True
     9            4           182 46+30=076 46+30=076     True
     9            4           183 08+67=075 08+67=075     True
     9            4           184 47+42=089 47+42=089     True
     9            4           185 95+67=162 95+67=162     True
     9            4           186 40+56=096 40+56=096     True
     9            4           187 17+95=112 17+95=112     True
     9            4           188 94+66=160 94+66=160     True
     9            4           189 14+58=072 14+58=072     True
     9            4           190 56+05=061 56+05=071    False
     9            4           191 70+01=071 70+01=071     True
     9            4           192 97+59=156 97+59=156     True
     9            4           193 94+67=161 94+67=161     True
     9            4           194 13+41=054 13+41=065    False
     9            4           195 85+15=100 85+15=090    False
     9            4           196 48+53=101 48+53=101     True
     9            4           197 62+75=137 62+75=137     True
     9            4           198 87+47=134 87+47=134     True
     9            4           199 31+88=119 31+88=119     True
     9            4           200 97+16=113 97+16=113     True
     9            4           201 48+45=093 48+45=093     True
     9            4           202 99+00=099 99+00=099     True
     9            4           203 15+01=016 15+01=016     True
     9            4           204 28+96=124 28+96=124     True
     9            4           205 20+11=031 20+11=031     True
     9            4           206 07+56=063 07+56=073    False
     9            4           207 06+08=014 06+08=014     True
     9            4           208 45+46=091 45+46=091     True
     9            4           209 48+85=133 48+85=133     True
     9            4           210 62+14=076 62+14=077    False
     9            4           211 82+31=113 82+31=113     True
     9            4           212 85+88=173 85+88=173     True
     9            4           213 77+08=085 77+08=085     True
     9            4           214 16+64=080 16+64=080     True
     9            4           215 00+27=027 00+27=027     True
     9            4           216 36+75=111 36+75=111     True
     9            4           217 38+38=076 38+38=076     True
     9            4           218 88+32=120 88+32=110    False
     9            4           219 09+88=097 09+88=097     True
     9            4           220 96+87=183 96+87=183     True
     9            4           221 71+29=100 71+29=100     True
     9            4           222 99+13=112 99+13=112     True
     9            4           223 03+13=016 03+13=017    False
     9            4           224 67+23=090 67+23=090     True
     9            4           225 15+98=113 15+98=113     True
     9            4           226 10+08=018 10+08=018     True
     9            4           227 46+24=070 46+24=070     True
     9            4           228 55+63=118 55+63=118     True
     9            4           229 28+06=034 28+06=034     True
     9            4           230 43+87=130 43+87=120    False
     9            4           231 34+05=039 34+05=049    False
2024-09-20 14:10:33,330 - root - WARNING - The method `test_one_epoch` is not implemented. Please override this method in a subclass or use a custom callback.

Trained Attention Heatmaps#

torch.manual_seed(composer.global_.seed)

batch = next(iter(train_loader))

inputs, targets, target_padding_masks, future_masks = batch

trained_model = _trained_state.model
trained_model.eval()

with torch.no_grad():
    logits = model(inputs, target_padding_masks=target_padding_masks, future_masks=future_masks)

We zoom into just 1 example in the batch, note that the predicted values are jibberish for the first few tokens (before equal sign) because recall we told the model to practically “don’t care” about any tokens before the answer by padding them with masks. As long as our answer is correct, then it is expected.

example_input = inputs[0]
pprint(example_input)

decoded_input = decode_equation(vocabulary, example_input, show_special_tokens=True)
pprint(decoded_input)

example_target = targets[0]
pprint(example_target)

decoded_target = decode_equation(vocabulary, example_target, show_special_tokens=True)
pprint(decoded_target)

example_logits = logits[0]
example_prediction = torch.argmax(example_logits, dim=-1)
pprint(example_prediction)
pprint(decode_equation(vocabulary, example_prediction, show_special_tokens=True))
tensor([14,  3,  1, 10,  0,  4, 13,  0,  3,  5])
'<BOS>31+04=035'
tensor([16, 16, 16, 16, 16, 16,  0,  3,  5, 15])
'<PAD><PAD><PAD><PAD><PAD><PAD>035<EOS>'
tensor([ 1,  4,  0,  1,  5,  0,  0,  3,  5, 15])
'140150035<EOS>'

Indeed we get a correct answer 35.

We take last decoder block’s attention weights to visualize, we can take any decoder block’s attention weights to visualize, but we take the last one for simplicity and hinging on the fact that the last decoder block is the one that contains the most information about the input sequence.

last_decoder_block = trained_model.decoder_blocks[-1]
masked_self_attention_mha = last_decoder_block.masked_self_attention_mha

context_vector, attention_weights = masked_self_attention_mha.context_vector, masked_self_attention_mha.attention_weights
pprint(attention_weights.shape)
torch.Size([256, 4, 10, 10])

We want to select the example earlier, which is conveniently the first example.

# take first sample
example_attention_weights = attention_weights[0, :, :, :].unsqueeze(0)
pprint(example_attention_weights.shape)
torch.Size([1, 4, 10, 10])

So for the xtick and ytick, the attention weight matrix is \(T \times T\), and first row is first token, seonc row is second token etc. And note that the x axis is keys and y axis is queries. This convention is easy to see from the attention scores formula of Q @ K.T where Q is the query and K is the key.

The resulting attention scores matrix has dimensions (num_queries, num_keys), where each row corresponds to a query and each column corresponds to a key. In our case is just TxT since both query and keys have hame length. When visualizing the attention scores as a heatmap, the x-axis corresponds to the keys dimension (num_keys), and the y-axis corresponds to the queries dimension (num_queries). This alignment matches the mathematical formulation of the attention computation.

xticks = ['<BOS>'] + list(decoded_input[5:])
yticks = xticks

fig = show_attention_heatmaps(
    attention_weights=example_attention_weights,
    xlabel="Keys",
    ylabel="Queries",
    xticks=xticks,
    yticks=yticks,
    show_title=True,
    show_values=True,
    value_dp=2,
    figure_kwargs={"figsize": (40, 15)},
)
../../_images/6bb3043073b9728016ba32d0973f04d5c3166a0dc1602ffd1f84b1b349f9f9e8.png

If we want two samples.

_ = show_attention_heatmaps(attention_weights[0:2], show_title=True, show_values=False)
../../_images/b3be697a5e5d67982b1e47ad7eafd8342514b00ea5527e55447a21341264b9f1.png

the xy axis is keys and queries, which is correct Q @ K.T

last_decoder_block = trained_model.decoder_blocks[-1]
masked_self_attention_mha = last_decoder_block.masked_self_attention_mha
context_vector, attention_weights = masked_self_attention_mha.context_vector, masked_self_attention_mha.attention_weights

num_heads = attention_weights.size(1)

# Labels for each character in the sequence, including BOS
labels = ['<BOS>'] + list('59+14=073')

# Loop over each head and plot its heatmap
for head in range(num_heads):
    plt.figure(figsize=(10, 10))

    # Extract attention weights for the last sample in the last batch for this head
    attention_matrix = attention_weights[-1, head, :, :].detach().numpy()

    sns.heatmap(attention_matrix, annot=True, cmap='viridis', xticklabels=labels, yticklabels=labels)
    plt.title(f"Attention Weights Heatmap for '<BOS>59+14=073' - Head {head+1}")
    plt.xlabel("Keys")
    plt.ylabel("Queries")
    plt.show()
../../_images/d0fd5ad05ef60e803e027a6976dff27dbde01bf64704ed894f07e0f379c28c16.png ../../_images/f4654c6d78ecc86f9c328b4f376c49f1f6259f8ac269d6f7cdd9503f013fecd3.png ../../_images/cf5a46fe834031818b75b04665697eac6a0b6211fb6eec1d16a1949e62b3a404.png ../../_images/d2591a861a09662893c5dbd88599cf2d85c8217df28529f49a9f56ec1a449a36.png

Generation#

def what_is(question: str, model: torch.nn.Module, tokenizer: AdderTokenizer, vocabulary: AdderVocabulary, device: torch.device) -> str:
    """
    Function for computing the sum of two numbers with input in literal string format.

    Args:
        question (str): The question string containing the two numbers to add.
        model (torch.nn.Module): The trained model for addition.
        tokenizer (AdderTokenizer): The tokenizer for encoding and decoding.
        vocabulary (AdderVocabulary): The vocabulary used by the tokenizer.
        device (torch.device): The device to run the computation on.

    Returns:
        str: The question with the computed answer appended.
    """
    question_tokens = tokenizer.encode(question)
    question_tokens = torch.tensor(question_tokens, dtype=torch.long, device=device).unsqueeze(0)

    EQUAL = vocabulary.token_to_index[vocabulary.EQUAL]

    equal_index = torch.where(question_tokens == EQUAL)[1].item()
    starting_tokens = question_tokens[:, :equal_index + 1]

    with torch.no_grad():
        generated_tokens = model.generate(
            starting_tokens=starting_tokens,
            max_tokens=4,
            greedy=True,
        )

    generated_equation = tokenizer.decode(generated_tokens.squeeze().tolist(), remove_special_tokens=True)
    answer = generated_equation[generated_equation.index("=") + 1:]

    return question + answer
what_is("98+35=", model=trained_model, tokenizer=tokenizer, vocabulary=vocabulary, device=composer.trainer.device)
'98+35=133'

References and Further Readings#