Module `openpack_torch.lightning`

Expand source code

from logging import getLogger
from typing import Dict, List, Tuple

import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from omegaconf import DictConfig
from torchmetrics.functional import accuracy as accuracy_score

logger = getLogger(__name__)


class EarlyStopError(Exception):
    pass


class BaseLightningModule(pl.LightningModule):
    def __init__(self, cfg: DictConfig = None) -> None:
        self.cfg = cfg
        super().__init__()

        self.net: nn.Module = self.init_model(cfg)
        self.criterion: nn.Module = self.init_criterion(cfg)

        self.test_step_outputs: List = []

    def init_model(self, cfg: DictConfig) -> torch.nn.Module:
        raise NotImplementedError()

    def init_criterion(self, cfg: DictConfig):
        criterion = torch.nn.CrossEntropyLoss()
        return criterion

    def configure_optimizers(self) -> torch.optim.Optimizer:
        # == Optimizer ==
        if self.cfg.optimizer.type == "SGD":
            logger.info(f"SGD optimizer is selected! (lr={self.cfg.optimizer.lr})")
            optimizer = torch.optim.SGD(
                self.parameters(),
                lr=self.cfg.optimizer.lr,
                momentum=self.cfg.optimizer.momentum,
                weight_decay=self.cfg.optimizer.weight_decay,
            )
        elif self.cfg.optimizer.type == "Adam":
            logger.info(f"Adam optimizer is selected! (lr={self.cfg.optimizer.lr})")
            optimizer = torch.optim.Adam(
                self.parameters(),
                lr=self.cfg.optimizer.lr,
                weight_decay=self.cfg.optimizer.weight_decay,
            )
        else:
            raise ValueError(f"{self.cfg.optimizer.type} is not supported.")

        # == LR Scheduler ==
        if self.cfg.optimizer.scheduler.type == "None":
            logger.info("No scheduler is applied.")
            return optimizer
        elif self.cfg.optimizer.scheduler.type == "StepLR":
            logger.info("StepLR scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer,
                step_size=self.cfg.optimizer.scheduler.step_size,
                gamma=self.cfg.optimizer.scheduler.gamma,
            )
        elif self.cfg.optimizer.scheduler.type == "ExponentialLR":
            logger.info("StepLR scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer,
                gamma=self.cfg.optimizer.scheduler.gamma,
            )
        elif self.cfg.optimizer.scheduler.type == "CosineAnnealing":
            logger.info("CosineAnnealing scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=self.cfg.optimizer.scheduler.CosineAnnealing.T_max,
                eta_min=self.cfg.optimizer.scheduler.CosineAnnealing.eta_min,
                verbose=True,
            )
        else:
            raise ValueError()

        return {"optimizer": optimizer, "lr_scheduler": scheduler}

    def calc_accuracy(self, y: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
        """Returns accuracy score.

        Args:
            y (torch.Tensor): logit tensor. shape=(BATCH, CLASS, TIME), dtype=torch.float
            t (torch.Tensor): target tensor. shape=(BATCH, TIME), dtype=torch.long

        Returns:
            torch.Tensor: _description_
        """
        preds = F.softmax(y, dim=1)
        (batch_size, num_classes, window_size) = preds.size()
        preds_flat = preds.permute(1, 0, 2).reshape(
            num_classes, batch_size * window_size
        )
        t_flat = t.reshape(-1)

        # FIXME: I want to use macro average score.
        ignore_index = num_classes - 1
        acc = accuracy_score(
            preds_flat.transpose(0, 1),
            t_flat,
            task="multiclass",
            average="weighted",
            num_classes=num_classes,
            ignore_index=ignore_index,
        )
        return acc

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)

    def train_val_common_step(self, batch: Dict, batch_idx: int) -> Dict:
        raise NotImplementedError()

    def training_step(self, batch: Dict, batch_idx: int) -> Dict:
        output = self.train_val_common_step(batch, batch_idx)

        train_output = {f"train/{key}": val for key, val in output.items()}
        self.log_dict(
            train_output,
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )
        return output

    def validation_step(
        self, batch: Dict, batch_idx: int, dataloader_idx: int = 0
    ) -> Dict:
        output = self.train_val_common_step(batch, batch_idx)

        train_output = {f"val/{key}": val for key, val in output.items()}
        self.log_dict(
            train_output,
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )
        return output

    def test_step(self, batch: Dict, batch_idx: int) -> Dict:
        raise NotImplementedError()

    def on_test_epoch_end(self):
        if len(self.test_step_outputs) == 0:
            raise ValueError(
                "Size of test_step_outputs is 0. Did you forgot to call "
                "`self.test_step_outputs.append(outputs)` in test_step()?"
            )
        outputs = self.test_step_outputs

        keys = tuple(outputs[0].keys())
        results = {key: [] for key in keys}
        for d in outputs:
            for key in d.keys():
                results[key].append(d[key].cpu().numpy())

        for key in keys:
            results[key] = np.concatenate(results[key], axis=0)

        self.test_results = results

    def clear_test_outputs(self):
        self.test_step_outputs = []
        self.test_results = None

Classes

class BaseLightningModule (cfg: omegaconf.dictconfig.DictConfig = None)

Hooks to be used in LightningModule.

Expand source code

class BaseLightningModule(pl.LightningModule):
    def __init__(self, cfg: DictConfig = None) -> None:
        self.cfg = cfg
        super().__init__()

        self.net: nn.Module = self.init_model(cfg)
        self.criterion: nn.Module = self.init_criterion(cfg)

        self.test_step_outputs: List = []

    def init_model(self, cfg: DictConfig) -> torch.nn.Module:
        raise NotImplementedError()

    def init_criterion(self, cfg: DictConfig):
        criterion = torch.nn.CrossEntropyLoss()
        return criterion

    def configure_optimizers(self) -> torch.optim.Optimizer:
        # == Optimizer ==
        if self.cfg.optimizer.type == "SGD":
            logger.info(f"SGD optimizer is selected! (lr={self.cfg.optimizer.lr})")
            optimizer = torch.optim.SGD(
                self.parameters(),
                lr=self.cfg.optimizer.lr,
                momentum=self.cfg.optimizer.momentum,
                weight_decay=self.cfg.optimizer.weight_decay,
            )
        elif self.cfg.optimizer.type == "Adam":
            logger.info(f"Adam optimizer is selected! (lr={self.cfg.optimizer.lr})")
            optimizer = torch.optim.Adam(
                self.parameters(),
                lr=self.cfg.optimizer.lr,
                weight_decay=self.cfg.optimizer.weight_decay,
            )
        else:
            raise ValueError(f"{self.cfg.optimizer.type} is not supported.")

        # == LR Scheduler ==
        if self.cfg.optimizer.scheduler.type == "None":
            logger.info("No scheduler is applied.")
            return optimizer
        elif self.cfg.optimizer.scheduler.type == "StepLR":
            logger.info("StepLR scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer,
                step_size=self.cfg.optimizer.scheduler.step_size,
                gamma=self.cfg.optimizer.scheduler.gamma,
            )
        elif self.cfg.optimizer.scheduler.type == "ExponentialLR":
            logger.info("StepLR scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer,
                gamma=self.cfg.optimizer.scheduler.gamma,
            )
        elif self.cfg.optimizer.scheduler.type == "CosineAnnealing":
            logger.info("CosineAnnealing scheduler is selected.")
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=self.cfg.optimizer.scheduler.CosineAnnealing.T_max,
                eta_min=self.cfg.optimizer.scheduler.CosineAnnealing.eta_min,
                verbose=True,
            )
        else:
            raise ValueError()

        return {"optimizer": optimizer, "lr_scheduler": scheduler}

    def calc_accuracy(self, y: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
        """Returns accuracy score.

        Args:
            y (torch.Tensor): logit tensor. shape=(BATCH, CLASS, TIME), dtype=torch.float
            t (torch.Tensor): target tensor. shape=(BATCH, TIME), dtype=torch.long

        Returns:
            torch.Tensor: _description_
        """
        preds = F.softmax(y, dim=1)
        (batch_size, num_classes, window_size) = preds.size()
        preds_flat = preds.permute(1, 0, 2).reshape(
            num_classes, batch_size * window_size
        )
        t_flat = t.reshape(-1)

        # FIXME: I want to use macro average score.
        ignore_index = num_classes - 1
        acc = accuracy_score(
            preds_flat.transpose(0, 1),
            t_flat,
            task="multiclass",
            average="weighted",
            num_classes=num_classes,
            ignore_index=ignore_index,
        )
        return acc

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)

    def train_val_common_step(self, batch: Dict, batch_idx: int) -> Dict:
        raise NotImplementedError()

    def training_step(self, batch: Dict, batch_idx: int) -> Dict:
        output = self.train_val_common_step(batch, batch_idx)

        train_output = {f"train/{key}": val for key, val in output.items()}
        self.log_dict(
            train_output,
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )
        return output

    def validation_step(
        self, batch: Dict, batch_idx: int, dataloader_idx: int = 0
    ) -> Dict:
        output = self.train_val_common_step(batch, batch_idx)

        train_output = {f"val/{key}": val for key, val in output.items()}
        self.log_dict(
            train_output,
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )
        return output

    def test_step(self, batch: Dict, batch_idx: int) -> Dict:
        raise NotImplementedError()

    def on_test_epoch_end(self):
        if len(self.test_step_outputs) == 0:
            raise ValueError(
                "Size of test_step_outputs is 0. Did you forgot to call "
                "`self.test_step_outputs.append(outputs)` in test_step()?"
            )
        outputs = self.test_step_outputs

        keys = tuple(outputs[0].keys())
        results = {key: [] for key in keys}
        for d in outputs:
            for key in d.keys():
                results[key].append(d[key].cpu().numpy())

        for key in keys:
            results[key] = np.concatenate(results[key], axis=0)

        self.test_results = results

    def clear_test_outputs(self):
        self.test_step_outputs = []
        self.test_results = None

Ancestors

pytorch_lightning.core.module.LightningModule
lightning_fabric.utilities.device_dtype_mixin._DeviceDtypeModuleMixin
pytorch_lightning.core.mixins.hparams_mixin.HyperparametersMixin
pytorch_lightning.core.hooks.ModelHooks
pytorch_lightning.core.hooks.DataHooks
pytorch_lightning.core.hooks.CheckpointHooks
torch.nn.modules.module.Module

Methods

def calc_accuracy(self, y: torch.Tensor, t: torch.Tensor) ‑> torch.Tensor

Returns accuracy score.

Args

y : torch.Tensor: logit tensor. shape=(BATCH, CLASS, TIME), dtype=torch.float
t : torch.Tensor: target tensor. shape=(BATCH, TIME), dtype=torch.long

Returns

torch.Tensor: description

Expand source code

def calc_accuracy(self, y: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
    """Returns accuracy score.

    Args:
        y (torch.Tensor): logit tensor. shape=(BATCH, CLASS, TIME), dtype=torch.float
        t (torch.Tensor): target tensor. shape=(BATCH, TIME), dtype=torch.long

    Returns:
        torch.Tensor: _description_
    """
    preds = F.softmax(y, dim=1)
    (batch_size, num_classes, window_size) = preds.size()
    preds_flat = preds.permute(1, 0, 2).reshape(
        num_classes, batch_size * window_size
    )
    t_flat = t.reshape(-1)

    # FIXME: I want to use macro average score.
    ignore_index = num_classes - 1
    acc = accuracy_score(
        preds_flat.transpose(0, 1),
        t_flat,
        task="multiclass",
        average="weighted",
        num_classes=num_classes,
        ignore_index=ignore_index,
    )
    return acc

def clear_test_outputs(self)

Expand source code

def clear_test_outputs(self):
    self.test_step_outputs = []
    self.test_results = None

def configure_optimizers(self) ‑> torch.optim.optimizer.Optimizer

Choose what optimizers and learning-rate schedulers to use in your optimization. Normally you'd need one. But in the case of GANs or similar you might have multiple. Optimization with multiple optimizers only works in the manual optimization mode.

Return

Any of these 6 options.

Single optimizer.
List or Tuple of optimizers.
Two lists - The first list has multiple optimizers, and the second has multiple LR schedulers (or multiple lr_scheduler_config).
Dictionary, with an "optimizer" key, and (optionally) a "lr_scheduler" key whose value is a single LR scheduler or lr_scheduler_config.
None - Fit will run without any optimizer.

The lr_scheduler_config is a dictionary which contains the scheduler and its associated configuration. The default configuration is shown below.

.. code-block:: python

lr_scheduler_config = {
    # REQUIRED: The scheduler instance
    "scheduler": lr_scheduler,
    # The unit of the scheduler's step size, could also be 'step'.
    # 'epoch' updates the scheduler on epoch end whereas 'step'
    # updates it after a optimizer update.
    "interval": "epoch",
    # How many epochs/steps should pass between calls to
    # <code>scheduler.step()</code>. 1 corresponds to updating the learning
    # rate after every epoch/step.
    "frequency": 1,
    # Metric to to monitor for schedulers like <code>ReduceLROnPlateau</code>
    "monitor": "val_loss",
    # If set to <code>True</code>, will enforce that the value specified 'monitor'
    # is available when the scheduler is updated, thus stopping
    # training if not found. If set to <code>False</code>, it will only produce a warning
    "strict": True,
    # If using the <code>LearningRateMonitor</code> callback to monitor the
    # learning rate progress, this keyword can be used to specify
    # a custom logged name
    "name": None,
}

When there are schedulers in which the .step() method is conditioned on a value, such as the :class:torch.optim.lr_scheduler.ReduceLROnPlateau scheduler, Lightning requires that the lr_scheduler_config contains the keyword "monitor" set to the metric name that the scheduler should be conditioned on.

Testcode

The ReduceLROnPlateau scheduler requires a monitor

def configure_optimizers(self): optimizer = Adam(…) return { "optimizer": optimizer, "lr_scheduler": { "scheduler": ReduceLROnPlateau(optimizer, …), "monitor": "metric_to_track", "frequency": "indicates how often the metric is updated" # If "monitor" references validation metrics, then "frequency" should be set to a # multiple of "trainer.check_val_every_n_epoch". }, }

In the case of two optimizers, only one using the ReduceLROnPlateau scheduler

def configure_optimizers(self): optimizer1 = Adam(…) optimizer2 = SGD(…) scheduler1 = ReduceLROnPlateau(optimizer1, …) scheduler2 = LambdaLR(optimizer2, …) return ( { "optimizer": optimizer1, "lr_scheduler": { "scheduler": scheduler1, "monitor": "metric_to_track", }, }, {"optimizer": optimizer2, "lr_scheduler": scheduler2}, )

Metrics can be made available to monitor by simply logging it using self.log('metric_to_track', metric_val) in your :class:~pytorch_lightning.core.LightningModule.

Note

Some things to know:

Lightning calls .backward() and .step() automatically in case of automatic optimization.
If a learning rate scheduler is specified in configure_optimizers() with key "interval" (default "epoch") in the scheduler configuration, Lightning will call the scheduler's .step() method automatically in case of automatic optimization.
If you use 16-bit precision (precision=16), Lightning will automatically handle the optimizer.
If you use :class:torch.optim.LBFGS, Lightning handles the closure function automatically for you.
If you use multiple optimizers, you will have to switch to 'manual optimization' mode and step them yourself.
If you need to control how often the optimizer steps, override the :meth:optimizer_step hook.

Expand source code

def configure_optimizers(self) -> torch.optim.Optimizer:
    # == Optimizer ==
    if self.cfg.optimizer.type == "SGD":
        logger.info(f"SGD optimizer is selected! (lr={self.cfg.optimizer.lr})")
        optimizer = torch.optim.SGD(
            self.parameters(),
            lr=self.cfg.optimizer.lr,
            momentum=self.cfg.optimizer.momentum,
            weight_decay=self.cfg.optimizer.weight_decay,
        )
    elif self.cfg.optimizer.type == "Adam":
        logger.info(f"Adam optimizer is selected! (lr={self.cfg.optimizer.lr})")
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr=self.cfg.optimizer.lr,
            weight_decay=self.cfg.optimizer.weight_decay,
        )
    else:
        raise ValueError(f"{self.cfg.optimizer.type} is not supported.")

    # == LR Scheduler ==
    if self.cfg.optimizer.scheduler.type == "None":
        logger.info("No scheduler is applied.")
        return optimizer
    elif self.cfg.optimizer.scheduler.type == "StepLR":
        logger.info("StepLR scheduler is selected.")
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer,
            step_size=self.cfg.optimizer.scheduler.step_size,
            gamma=self.cfg.optimizer.scheduler.gamma,
        )
    elif self.cfg.optimizer.scheduler.type == "ExponentialLR":
        logger.info("StepLR scheduler is selected.")
        scheduler = torch.optim.lr_scheduler.ExponentialLR(
            optimizer,
            gamma=self.cfg.optimizer.scheduler.gamma,
        )
    elif self.cfg.optimizer.scheduler.type == "CosineAnnealing":
        logger.info("CosineAnnealing scheduler is selected.")
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=self.cfg.optimizer.scheduler.CosineAnnealing.T_max,
            eta_min=self.cfg.optimizer.scheduler.CosineAnnealing.eta_min,
            verbose=True,
        )
    else:
        raise ValueError()

    return {"optimizer": optimizer, "lr_scheduler": scheduler}

def forward(self, x: torch.Tensor) ‑> torch.Tensor

Same as :meth:torch.nn.Module.forward.

Args

*args: Whatever you decide to pass into the forward method.
**kwargs: Keyword arguments are also possible.

Return

Your model's output

Expand source code

def forward(self, x: torch.Tensor) -> torch.Tensor:
    return self.net(x)

def init_criterion(self, cfg: omegaconf.dictconfig.DictConfig)

Expand source code

def init_criterion(self, cfg: DictConfig):
    criterion = torch.nn.CrossEntropyLoss()
    return criterion

def init_model(self, cfg: omegaconf.dictconfig.DictConfig) ‑> torch.nn.modules.module.Module

Expand source code

def init_model(self, cfg: DictConfig) -> torch.nn.Module:
    raise NotImplementedError()

def on_test_epoch_end(self)

Called in the test loop at the very end of the epoch.

Expand source code

def on_test_epoch_end(self):
    if len(self.test_step_outputs) == 0:
        raise ValueError(
            "Size of test_step_outputs is 0. Did you forgot to call "
            "`self.test_step_outputs.append(outputs)` in test_step()?"
        )
    outputs = self.test_step_outputs

    keys = tuple(outputs[0].keys())
    results = {key: [] for key in keys}
    for d in outputs:
        for key in d.keys():
            results[key].append(d[key].cpu().numpy())

    for key in keys:
        results[key] = np.concatenate(results[key], axis=0)

    self.test_results = results

def test_step(self, batch: Dict, batch_idx: int) ‑> Dict

Operates on a single batch of data from the test set. In this step you'd normally generate examples or calculate anything of interest such as accuracy.

Args

batch: The output of your data iterable, normally a :class:~torch.utils.data.DataLoader.
batch_idx: The index of this batch.
dataloader_idx: The index of the dataloader that produced this batch. (only if multiple dataloaders used)

Return

:class:~torch.Tensor - The loss tensor
dict - A dictionary. Can include any keys, but must include the key 'loss'.
None - Skip to the next batch.

.. code-block:: python

# if you have one test dataloader:
def test_step(self, batch, batch_idx):
    ...


# if you have multiple test dataloaders:
def test_step(self, batch, batch_idx, dataloader_idx=0):
    ...

Examples::

# CASE 1: A single test dataset
def test_step(self, batch, batch_idx):
    x, y = batch

    # implement your own
    out = self(x)
    loss = self.loss(out, y)

    # log 6 example images
    # or generated text... or whatever
    sample_imgs = x[:6]
    grid = torchvision.utils.make_grid(sample_imgs)
    self.logger.experiment.add_image('example_images', grid, 0)

    # calculate acc
    labels_hat = torch.argmax(out, dim=1)
    test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)

    # log the outputs!
    self.log_dict({'test_loss': loss, 'test_acc': test_acc})

If you pass in multiple test dataloaders, :meth:test_step will have an additional argument. We recommend setting the default value of 0 so that you can quickly switch between single and multiple dataloaders.

.. code-block:: python

# CASE 2: multiple test dataloaders
def test_step(self, batch, batch_idx, dataloader_idx=0):
    # dataloader_idx tells you which dataset this is.
    ...

Note

If you don't need to test you don't need to implement this method.

Note

When the :meth:test_step is called, the model has been put in eval mode and PyTorch gradients have been disabled. At the end of the test epoch, the model goes back to training mode and gradients are enabled.

Expand source code

def test_step(self, batch: Dict, batch_idx: int) -> Dict:
    raise NotImplementedError()

def train_val_common_step(self, batch: Dict, batch_idx: int) ‑> Dict

Expand source code

def train_val_common_step(self, batch: Dict, batch_idx: int) -> Dict:
    raise NotImplementedError()

def training_step(self, batch: Dict, batch_idx: int) ‑> Dict

Here you compute and return the training loss and some additional metrics for e.g. the progress bar or logger.

Args

batch: The output of your data iterable, normally a :class:~torch.utils.data.DataLoader.
batch_idx: The index of this batch.
dataloader_idx: The index of the dataloader that produced this batch. (only if multiple dataloaders used)

Return

:class:~torch.Tensor - The loss tensor
dict - A dictionary. Can include any keys, but must include the key 'loss'.
None - Skip to the next batch. This is only supported for automatic optimization. This is not supported for multi-GPU, TPU, IPU, or DeepSpeed.

In this step you'd normally do the forward pass and calculate the loss for a batch. You can also do fancier things like multiple forward passes or something model specific.

Example::

def training_step(self, batch, batch_idx):
    x, y, z = batch
    out = self.encoder(x)
    loss = self.loss(out, x)
    return loss

To use multiple optimizers, you can switch to 'manual optimization' and control their stepping:

.. code-block:: python

def __init__(self):
    super().__init__()
    self.automatic_optimization = False


# Multiple optimizers (e.g.: GANs)
def training_step(self, batch, batch_idx):
    opt1, opt2 = self.optimizers()

    # do training_step with encoder
    ...
    opt1.step()
    # do training_step with decoder
    ...
    opt2.step()

Note

When accumulate_grad_batches > 1, the loss returned here will be automatically normalized by accumulate_grad_batches internally.

Expand source code

def training_step(self, batch: Dict, batch_idx: int) -> Dict:
    output = self.train_val_common_step(batch, batch_idx)

    train_output = {f"train/{key}": val for key, val in output.items()}
    self.log_dict(
        train_output,
        on_step=False,
        on_epoch=True,
        prog_bar=True,
        logger=True,
    )
    return output

def validation_step(self, batch: Dict, batch_idx: int, dataloader_idx: int = 0) ‑> Dict

Operates on a single batch of data from the validation set. In this step you'd might generate examples or calculate anything of interest like accuracy.

Args

batch: The output of your data iterable, normally a :class:~torch.utils.data.DataLoader.
batch_idx: The index of this batch.
dataloader_idx: The index of the dataloader that produced this batch. (only if multiple dataloaders used)

Return

:class:~torch.Tensor - The loss tensor
dict - A dictionary. Can include any keys, but must include the key 'loss'.
None - Skip to the next batch.

.. code-block:: python

# if you have one val dataloader:
def validation_step(self, batch, batch_idx):
    ...


# if you have multiple val dataloaders:
def validation_step(self, batch, batch_idx, dataloader_idx=0):
    ...

Examples::

# CASE 1: A single validation dataset
def validation_step(self, batch, batch_idx):
    x, y = batch

    # implement your own
    out = self(x)
    loss = self.loss(out, y)

    # log 6 example images
    # or generated text... or whatever
    sample_imgs = x[:6]
    grid = torchvision.utils.make_grid(sample_imgs)
    self.logger.experiment.add_image('example_images', grid, 0)

    # calculate acc
    labels_hat = torch.argmax(out, dim=1)
    val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)

    # log the outputs!
    self.log_dict({'val_loss': loss, 'val_acc': val_acc})

If you pass in multiple val dataloaders, :meth:validation_step will have an additional argument. We recommend setting the default value of 0 so that you can quickly switch between single and multiple dataloaders.

.. code-block:: python

# CASE 2: multiple validation dataloaders
def validation_step(self, batch, batch_idx, dataloader_idx=0):
    # dataloader_idx tells you which dataset this is.
    ...

Note

If you don't need to validate you don't need to implement this method.

Note

When the :meth:validation_step is called, the model has been put in eval mode and PyTorch gradients have been disabled. At the end of validation, the model goes back to training mode and gradients are enabled.

Expand source code

def validation_step(
    self, batch: Dict, batch_idx: int, dataloader_idx: int = 0
) -> Dict:
    output = self.train_val_common_step(batch, batch_idx)

    train_output = {f"val/{key}": val for key, val in output.items()}
    self.log_dict(
        train_output,
        on_step=False,
        on_epoch=True,
        prog_bar=True,
        logger=True,
    )
    return output

class EarlyStopError (*args, **kwargs)

Common base class for all non-exit exceptions.

Expand source code

class EarlyStopError(Exception):
    pass

Ancestors

builtins.Exception
builtins.BaseException