Source code for epic_kitchens.dataset.epic_dataset

import copy
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union, Iterable

import PIL.Image
from gulpio import GulpDirectory

from epic_kitchens.labels import VERB_CLASS_COL, NOUN_CLASS_COL, UID_COL
from epic_kitchens.dataset.video_dataset import VideoDataset, VideoSegment


SegmentFilter = Callable[[VideoSegment], bool]
ClassGetter = Callable[[Dict[str, Any]], Any]
VideoTransform = Callable[[List[PIL.Image.Image]], List[PIL.Image.Image]]


def _verb_class_getter(metadata):
    return int(metadata[VERB_CLASS_COL])


def _noun_class_getter(metadata):
    return int(metadata[NOUN_CLASS_COL])


_class_getters = {
    "verb": _verb_class_getter,
    "noun": _noun_class_getter,
    "verb+noun": lambda metadata: {
        "verb": _verb_class_getter(metadata),
        "noun": _noun_class_getter(metadata),
    },
    None: lambda meta: None,
}

_verb_class_count = 125
_noun_class_count = 353
_class_count = {
    "verb": _verb_class_count,
    "noun": _noun_class_count,
    "verb+noun": (_verb_class_count, _noun_class_count),
    None: 0,
}


[docs]class GulpVideoSegment(VideoSegment):
    """SegmentRecord for a video segment stored in a gulp file.

    Assumes that the video segment has the following metadata in the gulp file:
      - id
      - num_frames
    """

    def __init__(
        self,
        gulp_metadata_dict: Dict[str, Any],
        class_getter: Callable[[Dict[str, Any]], Any],
    ) -> None:
        self.metadata = gulp_metadata_dict
        self.class_getter = class_getter
        self.gulp_index = gulp_metadata_dict[UID_COL]

    @property
    def id(self) -> str:
        """ID of video segment"""
        return self.gulp_index

    @property
    def label(self) -> Any:
        cls = self.class_getter(self.metadata)
        # WARNING: this type check should be removed once we regulp our data
        # so that classes are ints in the metadata json
        if isinstance(cls, float):
            return int(cls)
        else:
            return cls

    @property
    def num_frames(self) -> int:
        """Number of video frames"""
        return self.metadata["num_frames"]

    def __getitem__(self, item):
        return self.metadata[item]

    def __getattr__(self, item):
        return self.metadata[item]

    def __str__(self):
        return "GulpVideoSegment[label={label}, num_frames={num_frames}]".format(
            label=self.label, num_frames=self.num_frames
        )

    def __repr__(self):
        return "GulpVideoSegment({metadata}, {class_getter})".format(
            metadata=repr(self.metadata), class_getter=repr(self.class_getter)
        )


[docs]class EpicVideoDataset(VideoDataset):
    """VideoDataset for gulped RGB frames"""

[docs]    def __init__(
        self,
        gulp_path: Union[Path, str],
        class_type: str,
        *,
        with_metadata: bool = False,
        class_getter: Optional[ClassGetter] = None,
        segment_filter: Optional[SegmentFilter] = None,
        sample_transform: Optional[VideoTransform] = None
    ) -> None:
        """
        Args:
            gulp_path: Path to gulp directory containing the gulped EPIC RGB or flow frames

            class_type: One of verb, noun, verb+noun, None, determines what label the segment
                returns. ``None`` should be used for loading test datasets.

            with_metadata: When True the segments will yield a tuple (metadata, class) where the
                class is defined by the class getter and the metadata is the raw dictionary stored
                in the gulp file.

            class_getter: Optionally provide a callable that takes in the gulp dict representing the
                segment from which you should return the class you wish the segment to have.

            segment_filter: Optionally provide a callable that takes a segment and returns True if
                you want to keep the segment in the dataset, or False if you wish to exclude it.

            sample_transform: Optionally provide a sample transform function which takes a list of
                PIL images and transforms each of them. This is applied on the frames just before
                returning from :meth:`load_frames`.
        """
        super().__init__(
            _class_count[class_type],
            segment_filter=segment_filter,
            sample_transform=sample_transform,
        )
        if isinstance(gulp_path, str):
            gulp_path = Path(gulp_path)
        assert gulp_path.exists(), "Could not find the path {}".format(gulp_path)
        self.gulp_dir = GulpDirectory(str(gulp_path))
        if class_getter is None:
            class_getter = _class_getters[class_type]
        if with_metadata:
            original_getter = copy.copy(class_getter)
            class_getter = lambda metadata: (metadata, original_getter(metadata))
        self._video_segments = self._read_segments(
            self.gulp_dir.merged_meta_dict, class_getter
        )

    @property
    def video_segments(self) -> List[VideoSegment]:
        """
        List of video segments that are present in the dataset. The describe the start and stop
        times of the clip and its class.
        """
        return list(self._video_segments.values())

[docs]    def load_frames(
        self, segment: VideoSegment, indices: Optional[Iterable[int]] = None
    ) -> List[PIL.Image.Image]:
        """
        Load frame(s) from gulp directory.

        Args:
            segment: Video segment to load
            indices: Frames indices to read

        Returns:
            Frames indexed by ``indices`` from the ``segment``.

        """
        if indices is None:
            indices = range(0, segment.num_frames)
        selected_frames = []  # type: List[PIL.Image.Image]
        for i in indices:
            # Without passing a slice to the gulp directory index we load ALL the frames
            # so we create a slice with a single element -- that way we only read a single frame
            # from the gulp chunk, and not the whole chunk.
            # Here we also apply the sample transform to the loaded frames
            frames = self._sample_video_at_index(segment, i)
            frames = self.sample_transform(frames)
            selected_frames.extend(frames)
        return selected_frames

    def __len__(self):
        return len(self.video_segments)

    def __getitem__(self, id):
        return self._video_segments[id]

    def __contains__(self, id):
        return id in self._video_segments

    def __iter__(self):
        return iter(
            (self._video_segments[id_] for id_ in sorted(self._video_segments.keys()))
        )

    def _read_segments(
        self, gulp_dir_meta_dict, class_getter: Callable[[Dict[str, Any]], Any]
    ) -> Dict[str, VideoSegment]:
        segments = dict()  # type: Dict[str, VideoSegment]
        for video_id in gulp_dir_meta_dict:
            segment = GulpVideoSegment(
                gulp_dir_meta_dict[video_id]["meta_data"][0], class_getter
            )
            if self.segment_filter(segment):
                segments[segment.id] = segment
        return segments

    def _sample_video_at_index(
        self, record: VideoSegment, index: int
    ) -> List[PIL.Image.Image]:
        single_frame_slice = slice(index, index + 1)
        numpy_frame = self.gulp_dir[record.id, single_frame_slice][0][0]
        return [PIL.Image.fromarray(numpy_frame).convert("RGB")]


[docs]class EpicVideoFlowDataset(EpicVideoDataset):
    """VideoDataset for loading gulped flow. The loader assumes that flow :math:`u`, :math:`v`
    frames are stored alternately in a flat manner: :math:`[u_0, v_0, u_1, v_1, \ldots, u_n, v_n]`

    """

    def _sample_video_at_index(
        self, record: VideoSegment, index: int
    ) -> List[PIL.Image.Image]:
        # Flow pairs are stored in a contiguous manner in the gulp chunk:
        # [u_1, v_1, u_2, v_2, ..., u_n, v_n]
        # so we have to convert our desired frame index i to the gulp
        # indices j by j = (i * 2, (i + 1) * 2)
        flow_pair_slice = slice(index * 2, (index + 1) * 2)
        numpy_frames = self.gulp_dir[record.id, flow_pair_slice][0]
        frames = [
            PIL.Image.fromarray(numpy_frame).convert("L")
            for numpy_frame in numpy_frames
        ]
        return frames