Module openpack_torch.data.datasets

Dataset Class for OpenPack dataset.

Expand source code
"""Dataset Class for OpenPack dataset.
"""
from logging import getLogger
from typing import Dict, List, Tuple

import numpy as np
import openpack_toolkit as optk
import torch
from omegaconf import DictConfig, open_dict

from ._baseclass import Sequence, SequenceSet, Window
from ._wrapper import (
    load_annot_wrapper,
    load_imu_wrapper,
    load_iot_data_wrapper,
    load_kinect_2d_kpt_wrapper,
)
from .preprocessing import compute_semantic_hard_boundary

log = getLogger(__name__)


def random_window_shift(win: Window, win_size: int, seq_len: int) -> Window:
    """Change cropping position up to 50%.

    Args:
        seq_len (int): length of whole sequence.
    """
    start = win.start
    stop = start + win_size
    if stop >= seq_len:
        stop = seq_len

    shift = int(np.random.uniform(-1, 1, size=(1,)) * ((stop - start) / 2))
    new_start, new_stop = start + shift, stop + shift

    if new_start < 0:
        new_start = 0
    if new_stop >= seq_len:
        new_stop = seq_len

    if new_start > new_stop:
        raise ValueError(
            f"start={start}, stop={stop} "
            f"-> new_start={new_start}, new_stop={new_stop} (shift={shift})"
        )

    new_win = Window(win.sequence_idx, win.segment_idx, new_start, new_stop)
    return new_win


# -----------------------------------------------------------------------------


class OpenPackImu(torch.utils.data.Dataset):
    """Dataset class for IMU data.
    Attributes:
        data (List[Dict]): each sequence is stored in dict. The dict has 5 keys (i.e.,
            user, session, data, label(=class index), unixtime). data is a np.ndarray with
            shape = ``(N, channel(=acc_x, acc_y, ...), window, 1)``.
        index (Tuple[Dict]): sample index. A dict in this tuple as 3 property.
            ``seq`` = sequence index, ``sqg`` = segment index which is a sequential number
            within the single sequence. ``pos`` = sample index of the start of this segment.
        classes (optk.ActSet): list of activity classes.
        window (int): sliding window size.
        debug (bool): If True, enable debug mode. Default to False.
        submission (bool): Set True when you make submission file. Annotation data will not be
            loaded and dummy data will be generated. Default to False.
    Todo:
        * Make a minimum copy of cfg (DictConfig) before using in ``load_dataset()``.
        * Add method for parameter validation (i.e., assert).
    """

    data: List[Dict] = None
    index: Tuple[Dict] = None
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "squeeze": True,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def __init__(
        self,
        cfg: DictConfig,
        user_session_list: Tuple[Tuple[int, int], ...] = None,
        classes: optk.ActSet = None,
        window: int = 30 * 60,
        random_crop=False,
        submission: bool = False,
        debug: bool = False,
    ) -> None:
        """Initialize OpenPackImu dataset class.
        Args:
            cfg (DictConfig): instance of ``optk.configs.OpenPackConfig``. path, dataset, and
                annotation attributes must be initialized.
            user_session (Tuple[Tuple[int, int], ...]): the list of pairs of user ID and session ID
                to be included.
            classes (optk.ActSet, optional): activity set definition.
                Defaults to OPENPACK_OPERATION_CLASSES.
            window (int, optional): window size [steps]. Defaults to 30*60 [s].
            submission (bool, optional): Set True when you want to load test data for submission.
                If True, the annotation data will no be replaced by dummy data. Defaults to False.
            debug (bool, optional): enable debug mode. Defaults to False.
        """
        super().__init__()
        self.cfg = cfg
        self.classes = classes
        self.window = window
        self.submission = submission
        self.debug = debug
        self.random_crop = random_crop

        if self.classes is None:
            class_set_key = cfg.dataset.annotation.name.replace("-", "_").upper()
            classes_tuple = eval(
                f"optk.configs.datasets.annotations.{class_set_key}"
            ).classes
            self.classes = optk.ActSet(classes_tuple)

        if user_session_list is not None:
            self.load_dataset(cfg, user_session_list, window, submission=submission)
            self.preprocessing()

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["imu"] = load_imu_wrapper(cfg)
        base_unixtime_seq = data_seq["imu"].unixtime

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="imu",
        )
        return ss

    def load_dataset(
        self,
        cfg: DictConfig,
        user_session_list: Tuple[Tuple[int, int], ...],
        window: int = None,
        submission: bool = False,
    ) -> None:
        """Called in ``__init__()`` and load required data.
        Args:
            user_session (Tuple[Tuple[str, str], ...]): _description_
            window (int, optional): _description_. Defaults to None.
            submission (bool, optional): _description_. Defaults to False.
        """
        data, index = [], []
        for seq_idx, (user, session) in enumerate(user_session_list):
            with open_dict(cfg):
                cfg.user = {"name": user}
                cfg.session = session
            ss = self.load_single_session(cfg, submission)

            data.append(ss)
            index += [
                Window(seq_idx, seg_idx, start, start + window)
                for seg_idx, start in enumerate(range(0, ss.seq_len(), window))
            ]

        self.data = data
        self.index = tuple(index)

    def preprocessing(self) -> None:
        """
        * Normalize [-3G, +3G] into [0, 1].
        """
        # NOTE: Normalize ACC data. ([-3G, +3G] -> [0, 1])
        # NOTE: Described in Appendix Sec.3.2.
        is_agq = (
            self.cfg.dataset.stream.spec.imu.gyro
            and self.cfg.dataset.stream.spec.imu.quat
        )
        devices = self.cfg.dataset.stream.spec.imu.devices

        for ss in self.data:
            x = ss.data.get("imu").data
            if is_agq:
                for i, device in enumerate(devices):
                    # ACC
                    dims = [10 * i + 0, 10 * i + 1, 10 * i + 2]
                    x[dims] = np.clip(x[dims], -3, +3)
                    x[dims] = (x[dims] + 3.0) / 6.0
                    # Gyro
                    for j, ch in enumerate(["x", "y", "z"]):
                        dim = 10 * i + (3 + j)
                        mean = (
                            self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].mean
                        )
                        std = (
                            self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].std
                        )
                        x[dim] = (x[dim] - mean) / std
            else:
                x = np.clip(x, -3, +3)
                x = (x + 3.0) / 6.0
            ss.data.get("imu").data = x

    @property
    def num_classes(self) -> int:
        """Returns the number of classes
        Returns:
            int
        """
        return len(self.classes)

    def __str__(self) -> str:
        s = (
            "OpenPackImu("
            f"index={len(self.index)}, "
            f"num_sequence={len(self.data)}, "
            f"submission={self.submission}, "
            f"random_crop={self.random_crop}"
            ")"
        )
        return s

    def __len__(self) -> int:
        return len(self.index)

    def __iter__(self):
        return self

    def __getitem__(self, index: int) -> Dict:
        win = self.index[index]
        ss = self.data[win.sequence_idx]

        # TODO: Implement  Random crop
        if self.random_crop:
            win = random_window_shift(win, self.window, len(ss))

        new_ss = ss.get_segment(win, self.window)
        tensors = new_ss.get_tensors(self.tensor_set_config)
        return tensors


# -----------------------------------------------------------------------------
# =====================
#   With Boundary Info
# =====================
class ImuBoundaryDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
            "boundary": {
                "new_key": "tb",
                "dtype": torch.float,
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def preprocessing(self) -> None:
        """
        * Compute boundary label.
        """
        super().preprocessing()

        # -- Add action boundary labels --
        for ss in self.data:
            t_id = ss.labels["annot"].data[0]
            unixtime = ss.labels["annot"].unixtime
            metadata = {"type": "boundary"}

            t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
            ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)


# =================
#   With IoT Data
# =================
class ImuIoTDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
            "iot": {
                "new_key": "x_iot",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                # "squeeze": True,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["imu"] = load_imu_wrapper(cfg)
        base_unixtime_seq = data_seq["imu"].unixtime

        data_seq["iot"] = load_iot_data_wrapper(cfg, base_unixtime_seq)

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="imu",
            # metadata=None,
        )
        return ss


class ImuBoundaryIoTDataset(ImuIoTDataset):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
            "iot": {
                "new_key": "x_iot",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
            "boundary": {
                "new_key": "tb",
                "dtype": torch.float,
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def preprocessing(self) -> None:
        """
        * Compute boundary label.
        """
        super().preprocessing()

        # -- Add action boundary labels --
        for ss in self.data:
            t_id = ss.labels["annot"].data[0]
            unixtime = ss.labels["annot"].unixtime
            metadata = {"type": "boundary"}

            t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
            ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)


# =============
#   Keypoints
# =============
class Kinect2dKptDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "kinect2dKpt": {
                "new_key": "x",
                "dtype": torch.float,
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["kinect2dKpt"] = load_kinect_2d_kpt_wrapper(cfg)
        base_unixtime_seq = data_seq["kinect2dKpt"].unixtime

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="kinect2dKpt",
        )
        return ss

    def preprocessing(self) -> None:
        """Standardize X-/Y-axis."""
        mean_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.x
        mean_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.y
        std_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.x
        std_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.y

        for ss in self.data:
            x = ss.data.get("kinect2dKpt").data
            x[0] = (x[0] - mean_x) / std_x
            x[1] = (x[1] - mean_y) / std_y
            ss.data.get("kinect2dKpt").data = x

Functions

def random_window_shift(win: openpack_torch.data._baseclass.Window, win_size: int, seq_len: int) ‑> openpack_torch.data._baseclass.Window

Change cropping position up to 50%.

Args

seq_len : int
length of whole sequence.
Expand source code
def random_window_shift(win: Window, win_size: int, seq_len: int) -> Window:
    """Change cropping position up to 50%.

    Args:
        seq_len (int): length of whole sequence.
    """
    start = win.start
    stop = start + win_size
    if stop >= seq_len:
        stop = seq_len

    shift = int(np.random.uniform(-1, 1, size=(1,)) * ((stop - start) / 2))
    new_start, new_stop = start + shift, stop + shift

    if new_start < 0:
        new_start = 0
    if new_stop >= seq_len:
        new_stop = seq_len

    if new_start > new_stop:
        raise ValueError(
            f"start={start}, stop={stop} "
            f"-> new_start={new_start}, new_stop={new_stop} (shift={shift})"
        )

    new_win = Window(win.sequence_idx, win.segment_idx, new_start, new_stop)
    return new_win

Classes

class ImuBoundaryDataset (cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...] = None, classes: openpack_toolkit.activity.ActSet = None, window: int = 1800, random_crop=False, submission: bool = False, debug: bool = False)

Dataset class for IMU data.

Attributes

data : List[Dict]
each sequence is stored in dict. The dict has 5 keys (i.e., user, session, data, label(=class index), unixtime). data is a np.ndarray with shape = (N, channel(=acc_x, acc_y, ...), window, 1).
index : Tuple[Dict]
sample index. A dict in this tuple as 3 property. seq = sequence index, sqg = segment index which is a sequential number within the single sequence. pos = sample index of the start of this segment.
classes : optk.ActSet
list of activity classes.
window : int
sliding window size.
debug : bool
If True, enable debug mode. Default to False.
submission : bool
Set True when you make submission file. Annotation data will not be loaded and dummy data will be generated. Default to False.

Todo

  • Make a minimum copy of cfg (DictConfig) before using in load_dataset().
  • Add method for parameter validation (i.e., assert).

Initialize OpenPackImu dataset class.

Args

cfg : DictConfig
instance of optk.configs.OpenPackConfig. path, dataset, and annotation attributes must be initialized.
user_session : Tuple[Tuple[int, int], …]
the list of pairs of user ID and session ID to be included.
classes : optk.ActSet, optional
activity set definition. Defaults to OPENPACK_OPERATION_CLASSES.
window : int, optional
window size [steps]. Defaults to 30*60 [s].
submission : bool, optional
Set True when you want to load test data for submission. If True, the annotation data will no be replaced by dummy data. Defaults to False.
debug : bool, optional
enable debug mode. Defaults to False.
Expand source code
class ImuBoundaryDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
            "boundary": {
                "new_key": "tb",
                "dtype": torch.float,
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def preprocessing(self) -> None:
        """
        * Compute boundary label.
        """
        super().preprocessing()

        # -- Add action boundary labels --
        for ss in self.data:
            t_id = ss.labels["annot"].data[0]
            unixtime = ss.labels["annot"].unixtime
            metadata = {"type": "boundary"}

            t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
            ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)

Ancestors

  • OpenPackImu
  • torch.utils.data.dataset.Dataset
  • typing.Generic

Class variables

var tensor_set_config : Dict

Methods

def preprocessing(self) ‑> None
  • Compute boundary label.
Expand source code
def preprocessing(self) -> None:
    """
    * Compute boundary label.
    """
    super().preprocessing()

    # -- Add action boundary labels --
    for ss in self.data:
        t_id = ss.labels["annot"].data[0]
        unixtime = ss.labels["annot"].unixtime
        metadata = {"type": "boundary"}

        t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
        ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)

Inherited members

class ImuBoundaryIoTDataset (cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...] = None, classes: openpack_toolkit.activity.ActSet = None, window: int = 1800, random_crop=False, submission: bool = False, debug: bool = False)

Dataset class for IMU data.

Attributes

data : List[Dict]
each sequence is stored in dict. The dict has 5 keys (i.e., user, session, data, label(=class index), unixtime). data is a np.ndarray with shape = (N, channel(=acc_x, acc_y, ...), window, 1).
index : Tuple[Dict]
sample index. A dict in this tuple as 3 property. seq = sequence index, sqg = segment index which is a sequential number within the single sequence. pos = sample index of the start of this segment.
classes : optk.ActSet
list of activity classes.
window : int
sliding window size.
debug : bool
If True, enable debug mode. Default to False.
submission : bool
Set True when you make submission file. Annotation data will not be loaded and dummy data will be generated. Default to False.

Todo

  • Make a minimum copy of cfg (DictConfig) before using in load_dataset().
  • Add method for parameter validation (i.e., assert).

Initialize OpenPackImu dataset class.

Args

cfg : DictConfig
instance of optk.configs.OpenPackConfig. path, dataset, and annotation attributes must be initialized.
user_session : Tuple[Tuple[int, int], …]
the list of pairs of user ID and session ID to be included.
classes : optk.ActSet, optional
activity set definition. Defaults to OPENPACK_OPERATION_CLASSES.
window : int, optional
window size [steps]. Defaults to 30*60 [s].
submission : bool, optional
Set True when you want to load test data for submission. If True, the annotation data will no be replaced by dummy data. Defaults to False.
debug : bool, optional
enable debug mode. Defaults to False.
Expand source code
class ImuBoundaryIoTDataset(ImuIoTDataset):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
            "iot": {
                "new_key": "x_iot",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
            "boundary": {
                "new_key": "tb",
                "dtype": torch.float,
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def preprocessing(self) -> None:
        """
        * Compute boundary label.
        """
        super().preprocessing()

        # -- Add action boundary labels --
        for ss in self.data:
            t_id = ss.labels["annot"].data[0]
            unixtime = ss.labels["annot"].unixtime
            metadata = {"type": "boundary"}

            t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
            ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)

Ancestors

Class variables

var tensor_set_config : Dict

Methods

def preprocessing(self) ‑> None
  • Compute boundary label.
Expand source code
def preprocessing(self) -> None:
    """
    * Compute boundary label.
    """
    super().preprocessing()

    # -- Add action boundary labels --
    for ss in self.data:
        t_id = ss.labels["annot"].data[0]
        unixtime = ss.labels["annot"].unixtime
        metadata = {"type": "boundary"}

        t_bd = compute_semantic_hard_boundary(t_id, len(self.classes))
        ss.labels["boundary"] = Sequence(unixtime, t_bd, metadata)

Inherited members

class ImuIoTDataset (cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...] = None, classes: openpack_toolkit.activity.ActSet = None, window: int = 1800, random_crop=False, submission: bool = False, debug: bool = False)

Dataset class for IMU data.

Attributes

data : List[Dict]
each sequence is stored in dict. The dict has 5 keys (i.e., user, session, data, label(=class index), unixtime). data is a np.ndarray with shape = (N, channel(=acc_x, acc_y, ...), window, 1).
index : Tuple[Dict]
sample index. A dict in this tuple as 3 property. seq = sequence index, sqg = segment index which is a sequential number within the single sequence. pos = sample index of the start of this segment.
classes : optk.ActSet
list of activity classes.
window : int
sliding window size.
debug : bool
If True, enable debug mode. Default to False.
submission : bool
Set True when you make submission file. Annotation data will not be loaded and dummy data will be generated. Default to False.

Todo

  • Make a minimum copy of cfg (DictConfig) before using in load_dataset().
  • Add method for parameter validation (i.e., assert).

Initialize OpenPackImu dataset class.

Args

cfg : DictConfig
instance of optk.configs.OpenPackConfig. path, dataset, and annotation attributes must be initialized.
user_session : Tuple[Tuple[int, int], …]
the list of pairs of user ID and session ID to be included.
classes : optk.ActSet, optional
activity set definition. Defaults to OPENPACK_OPERATION_CLASSES.
window : int, optional
window size [steps]. Defaults to 30*60 [s].
submission : bool, optional
Set True when you want to load test data for submission. If True, the annotation data will no be replaced by dummy data. Defaults to False.
debug : bool, optional
enable debug mode. Defaults to False.
Expand source code
class ImuIoTDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
            "iot": {
                "new_key": "x_iot",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                # "squeeze": True,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["imu"] = load_imu_wrapper(cfg)
        base_unixtime_seq = data_seq["imu"].unixtime

        data_seq["iot"] = load_iot_data_wrapper(cfg, base_unixtime_seq)

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="imu",
            # metadata=None,
        )
        return ss

Ancestors

  • OpenPackImu
  • torch.utils.data.dataset.Dataset
  • typing.Generic

Subclasses

Class variables

var tensor_set_config : Dict

Methods

def load_single_session(self, cfg, submission) ‑> openpack_torch.data._baseclass.SequenceSet
Expand source code
def load_single_session(self, cfg, submission) -> SequenceSet:
    data_seq = dict()
    data_seq["imu"] = load_imu_wrapper(cfg)
    base_unixtime_seq = data_seq["imu"].unixtime

    data_seq["iot"] = load_iot_data_wrapper(cfg, base_unixtime_seq)

    labels_seq = dict()
    labels_seq["annot"] = load_annot_wrapper(
        cfg, base_unixtime_seq, submission, self.classes
    )

    ss = SequenceSet(
        user=cfg.user.name,
        session=cfg.session,
        data=data_seq,
        labels=labels_seq,
        primary_seqence="imu",
        # metadata=None,
    )
    return ss

Inherited members

class Kinect2dKptDataset (cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...] = None, classes: openpack_toolkit.activity.ActSet = None, window: int = 1800, random_crop=False, submission: bool = False, debug: bool = False)

Dataset class for IMU data.

Attributes

data : List[Dict]
each sequence is stored in dict. The dict has 5 keys (i.e., user, session, data, label(=class index), unixtime). data is a np.ndarray with shape = (N, channel(=acc_x, acc_y, ...), window, 1).
index : Tuple[Dict]
sample index. A dict in this tuple as 3 property. seq = sequence index, sqg = segment index which is a sequential number within the single sequence. pos = sample index of the start of this segment.
classes : optk.ActSet
list of activity classes.
window : int
sliding window size.
debug : bool
If True, enable debug mode. Default to False.
submission : bool
Set True when you make submission file. Annotation data will not be loaded and dummy data will be generated. Default to False.

Todo

  • Make a minimum copy of cfg (DictConfig) before using in load_dataset().
  • Add method for parameter validation (i.e., assert).

Initialize OpenPackImu dataset class.

Args

cfg : DictConfig
instance of optk.configs.OpenPackConfig. path, dataset, and annotation attributes must be initialized.
user_session : Tuple[Tuple[int, int], …]
the list of pairs of user ID and session ID to be included.
classes : optk.ActSet, optional
activity set definition. Defaults to OPENPACK_OPERATION_CLASSES.
window : int, optional
window size [steps]. Defaults to 30*60 [s].
submission : bool, optional
Set True when you want to load test data for submission. If True, the annotation data will no be replaced by dummy data. Defaults to False.
debug : bool, optional
enable debug mode. Defaults to False.
Expand source code
class Kinect2dKptDataset(OpenPackImu):
    tensor_set_config: Dict = {
        "data": {
            "kinect2dKpt": {
                "new_key": "x",
                "dtype": torch.float,
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["kinect2dKpt"] = load_kinect_2d_kpt_wrapper(cfg)
        base_unixtime_seq = data_seq["kinect2dKpt"].unixtime

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="kinect2dKpt",
        )
        return ss

    def preprocessing(self) -> None:
        """Standardize X-/Y-axis."""
        mean_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.x
        mean_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.y
        std_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.x
        std_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.y

        for ss in self.data:
            x = ss.data.get("kinect2dKpt").data
            x[0] = (x[0] - mean_x) / std_x
            x[1] = (x[1] - mean_y) / std_y
            ss.data.get("kinect2dKpt").data = x

Ancestors

  • OpenPackImu
  • torch.utils.data.dataset.Dataset
  • typing.Generic

Class variables

var tensor_set_config : Dict

Methods

def load_single_session(self, cfg, submission) ‑> openpack_torch.data._baseclass.SequenceSet
Expand source code
def load_single_session(self, cfg, submission) -> SequenceSet:
    data_seq = dict()
    data_seq["kinect2dKpt"] = load_kinect_2d_kpt_wrapper(cfg)
    base_unixtime_seq = data_seq["kinect2dKpt"].unixtime

    labels_seq = dict()
    labels_seq["annot"] = load_annot_wrapper(
        cfg, base_unixtime_seq, submission, self.classes
    )

    ss = SequenceSet(
        user=cfg.user.name,
        session=cfg.session,
        data=data_seq,
        labels=labels_seq,
        primary_seqence="kinect2dKpt",
    )
    return ss
def preprocessing(self) ‑> None

Standardize X-/Y-axis.

Expand source code
def preprocessing(self) -> None:
    """Standardize X-/Y-axis."""
    mean_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.x
    mean_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.mean.y
    std_x = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.x
    std_y = self.cfg.dataset.stream.spec.kinect2dKpt.spec.stats.std.y

    for ss in self.data:
        x = ss.data.get("kinect2dKpt").data
        x[0] = (x[0] - mean_x) / std_x
        x[1] = (x[1] - mean_y) / std_y
        ss.data.get("kinect2dKpt").data = x

Inherited members

class OpenPackImu (cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...] = None, classes: openpack_toolkit.activity.ActSet = None, window: int = 1800, random_crop=False, submission: bool = False, debug: bool = False)

Dataset class for IMU data.

Attributes

data : List[Dict]
each sequence is stored in dict. The dict has 5 keys (i.e., user, session, data, label(=class index), unixtime). data is a np.ndarray with shape = (N, channel(=acc_x, acc_y, ...), window, 1).
index : Tuple[Dict]
sample index. A dict in this tuple as 3 property. seq = sequence index, sqg = segment index which is a sequential number within the single sequence. pos = sample index of the start of this segment.
classes : optk.ActSet
list of activity classes.
window : int
sliding window size.
debug : bool
If True, enable debug mode. Default to False.
submission : bool
Set True when you make submission file. Annotation data will not be loaded and dummy data will be generated. Default to False.

Todo

  • Make a minimum copy of cfg (DictConfig) before using in load_dataset().
  • Add method for parameter validation (i.e., assert).

Initialize OpenPackImu dataset class.

Args

cfg : DictConfig
instance of optk.configs.OpenPackConfig. path, dataset, and annotation attributes must be initialized.
user_session : Tuple[Tuple[int, int], …]
the list of pairs of user ID and session ID to be included.
classes : optk.ActSet, optional
activity set definition. Defaults to OPENPACK_OPERATION_CLASSES.
window : int, optional
window size [steps]. Defaults to 30*60 [s].
submission : bool, optional
Set True when you want to load test data for submission. If True, the annotation data will no be replaced by dummy data. Defaults to False.
debug : bool, optional
enable debug mode. Defaults to False.
Expand source code
class OpenPackImu(torch.utils.data.Dataset):
    """Dataset class for IMU data.
    Attributes:
        data (List[Dict]): each sequence is stored in dict. The dict has 5 keys (i.e.,
            user, session, data, label(=class index), unixtime). data is a np.ndarray with
            shape = ``(N, channel(=acc_x, acc_y, ...), window, 1)``.
        index (Tuple[Dict]): sample index. A dict in this tuple as 3 property.
            ``seq`` = sequence index, ``sqg`` = segment index which is a sequential number
            within the single sequence. ``pos`` = sample index of the start of this segment.
        classes (optk.ActSet): list of activity classes.
        window (int): sliding window size.
        debug (bool): If True, enable debug mode. Default to False.
        submission (bool): Set True when you make submission file. Annotation data will not be
            loaded and dummy data will be generated. Default to False.
    Todo:
        * Make a minimum copy of cfg (DictConfig) before using in ``load_dataset()``.
        * Add method for parameter validation (i.e., assert).
    """

    data: List[Dict] = None
    index: Tuple[Dict] = None
    tensor_set_config: Dict = {
        "data": {
            "imu": {
                "new_key": "x",
                "dtype": torch.float,
                "callbacks": [
                    lambda x: x.unsqueeze(2),
                ],
            },
        },
        "labels": {
            "annot": {
                "new_key": "t",
                "dtype": torch.long,
                "squeeze": True,
                "callbacks": [
                    lambda x: x.squeeze(0),
                ],
            },
        },
        "unixtime": {
            "new_key": "ts",
            "dtype": torch.long,
        },
    }

    def __init__(
        self,
        cfg: DictConfig,
        user_session_list: Tuple[Tuple[int, int], ...] = None,
        classes: optk.ActSet = None,
        window: int = 30 * 60,
        random_crop=False,
        submission: bool = False,
        debug: bool = False,
    ) -> None:
        """Initialize OpenPackImu dataset class.
        Args:
            cfg (DictConfig): instance of ``optk.configs.OpenPackConfig``. path, dataset, and
                annotation attributes must be initialized.
            user_session (Tuple[Tuple[int, int], ...]): the list of pairs of user ID and session ID
                to be included.
            classes (optk.ActSet, optional): activity set definition.
                Defaults to OPENPACK_OPERATION_CLASSES.
            window (int, optional): window size [steps]. Defaults to 30*60 [s].
            submission (bool, optional): Set True when you want to load test data for submission.
                If True, the annotation data will no be replaced by dummy data. Defaults to False.
            debug (bool, optional): enable debug mode. Defaults to False.
        """
        super().__init__()
        self.cfg = cfg
        self.classes = classes
        self.window = window
        self.submission = submission
        self.debug = debug
        self.random_crop = random_crop

        if self.classes is None:
            class_set_key = cfg.dataset.annotation.name.replace("-", "_").upper()
            classes_tuple = eval(
                f"optk.configs.datasets.annotations.{class_set_key}"
            ).classes
            self.classes = optk.ActSet(classes_tuple)

        if user_session_list is not None:
            self.load_dataset(cfg, user_session_list, window, submission=submission)
            self.preprocessing()

    def load_single_session(self, cfg, submission) -> SequenceSet:
        data_seq = dict()
        data_seq["imu"] = load_imu_wrapper(cfg)
        base_unixtime_seq = data_seq["imu"].unixtime

        labels_seq = dict()
        labels_seq["annot"] = load_annot_wrapper(
            cfg, base_unixtime_seq, submission, self.classes
        )

        ss = SequenceSet(
            user=cfg.user.name,
            session=cfg.session,
            data=data_seq,
            labels=labels_seq,
            primary_seqence="imu",
        )
        return ss

    def load_dataset(
        self,
        cfg: DictConfig,
        user_session_list: Tuple[Tuple[int, int], ...],
        window: int = None,
        submission: bool = False,
    ) -> None:
        """Called in ``__init__()`` and load required data.
        Args:
            user_session (Tuple[Tuple[str, str], ...]): _description_
            window (int, optional): _description_. Defaults to None.
            submission (bool, optional): _description_. Defaults to False.
        """
        data, index = [], []
        for seq_idx, (user, session) in enumerate(user_session_list):
            with open_dict(cfg):
                cfg.user = {"name": user}
                cfg.session = session
            ss = self.load_single_session(cfg, submission)

            data.append(ss)
            index += [
                Window(seq_idx, seg_idx, start, start + window)
                for seg_idx, start in enumerate(range(0, ss.seq_len(), window))
            ]

        self.data = data
        self.index = tuple(index)

    def preprocessing(self) -> None:
        """
        * Normalize [-3G, +3G] into [0, 1].
        """
        # NOTE: Normalize ACC data. ([-3G, +3G] -> [0, 1])
        # NOTE: Described in Appendix Sec.3.2.
        is_agq = (
            self.cfg.dataset.stream.spec.imu.gyro
            and self.cfg.dataset.stream.spec.imu.quat
        )
        devices = self.cfg.dataset.stream.spec.imu.devices

        for ss in self.data:
            x = ss.data.get("imu").data
            if is_agq:
                for i, device in enumerate(devices):
                    # ACC
                    dims = [10 * i + 0, 10 * i + 1, 10 * i + 2]
                    x[dims] = np.clip(x[dims], -3, +3)
                    x[dims] = (x[dims] + 3.0) / 6.0
                    # Gyro
                    for j, ch in enumerate(["x", "y", "z"]):
                        dim = 10 * i + (3 + j)
                        mean = (
                            self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].mean
                        )
                        std = (
                            self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].std
                        )
                        x[dim] = (x[dim] - mean) / std
            else:
                x = np.clip(x, -3, +3)
                x = (x + 3.0) / 6.0
            ss.data.get("imu").data = x

    @property
    def num_classes(self) -> int:
        """Returns the number of classes
        Returns:
            int
        """
        return len(self.classes)

    def __str__(self) -> str:
        s = (
            "OpenPackImu("
            f"index={len(self.index)}, "
            f"num_sequence={len(self.data)}, "
            f"submission={self.submission}, "
            f"random_crop={self.random_crop}"
            ")"
        )
        return s

    def __len__(self) -> int:
        return len(self.index)

    def __iter__(self):
        return self

    def __getitem__(self, index: int) -> Dict:
        win = self.index[index]
        ss = self.data[win.sequence_idx]

        # TODO: Implement  Random crop
        if self.random_crop:
            win = random_window_shift(win, self.window, len(ss))

        new_ss = ss.get_segment(win, self.window)
        tensors = new_ss.get_tensors(self.tensor_set_config)
        return tensors

Ancestors

  • torch.utils.data.dataset.Dataset
  • typing.Generic

Subclasses

Class variables

var data : List[Dict]
var index : Tuple[Dict]
var tensor_set_config : Dict

Instance variables

var num_classes : int

Returns the number of classes

Returns

int

Expand source code
@property
def num_classes(self) -> int:
    """Returns the number of classes
    Returns:
        int
    """
    return len(self.classes)

Methods

def load_dataset(self, cfg: omegaconf.dictconfig.DictConfig, user_session_list: Tuple[Tuple[int, int], ...], window: int = None, submission: bool = False) ‑> None

Called in __init__() and load required data.

Args

user_session : Tuple[Tuple[str, str], …]
description
window : int, optional
description. Defaults to None.
submission : bool, optional
description. Defaults to False.
Expand source code
def load_dataset(
    self,
    cfg: DictConfig,
    user_session_list: Tuple[Tuple[int, int], ...],
    window: int = None,
    submission: bool = False,
) -> None:
    """Called in ``__init__()`` and load required data.
    Args:
        user_session (Tuple[Tuple[str, str], ...]): _description_
        window (int, optional): _description_. Defaults to None.
        submission (bool, optional): _description_. Defaults to False.
    """
    data, index = [], []
    for seq_idx, (user, session) in enumerate(user_session_list):
        with open_dict(cfg):
            cfg.user = {"name": user}
            cfg.session = session
        ss = self.load_single_session(cfg, submission)

        data.append(ss)
        index += [
            Window(seq_idx, seg_idx, start, start + window)
            for seg_idx, start in enumerate(range(0, ss.seq_len(), window))
        ]

    self.data = data
    self.index = tuple(index)
def load_single_session(self, cfg, submission) ‑> openpack_torch.data._baseclass.SequenceSet
Expand source code
def load_single_session(self, cfg, submission) -> SequenceSet:
    data_seq = dict()
    data_seq["imu"] = load_imu_wrapper(cfg)
    base_unixtime_seq = data_seq["imu"].unixtime

    labels_seq = dict()
    labels_seq["annot"] = load_annot_wrapper(
        cfg, base_unixtime_seq, submission, self.classes
    )

    ss = SequenceSet(
        user=cfg.user.name,
        session=cfg.session,
        data=data_seq,
        labels=labels_seq,
        primary_seqence="imu",
    )
    return ss
def preprocessing(self) ‑> None
  • Normalize [-3G, +3G] into [0, 1].
Expand source code
def preprocessing(self) -> None:
    """
    * Normalize [-3G, +3G] into [0, 1].
    """
    # NOTE: Normalize ACC data. ([-3G, +3G] -> [0, 1])
    # NOTE: Described in Appendix Sec.3.2.
    is_agq = (
        self.cfg.dataset.stream.spec.imu.gyro
        and self.cfg.dataset.stream.spec.imu.quat
    )
    devices = self.cfg.dataset.stream.spec.imu.devices

    for ss in self.data:
        x = ss.data.get("imu").data
        if is_agq:
            for i, device in enumerate(devices):
                # ACC
                dims = [10 * i + 0, 10 * i + 1, 10 * i + 2]
                x[dims] = np.clip(x[dims], -3, +3)
                x[dims] = (x[dims] + 3.0) / 6.0
                # Gyro
                for j, ch in enumerate(["x", "y", "z"]):
                    dim = 10 * i + (3 + j)
                    mean = (
                        self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].mean
                    )
                    std = (
                        self.cfg.dataset.stream.spec.imu.stats[device].gyro[ch].std
                    )
                    x[dim] = (x[dim] - mean) / std
        else:
            x = np.clip(x, -3, +3)
            x = (x + 3.0) / 6.0
        ss.data.get("imu").data = x