Source code for epic_kitchens.dataset.epic_dataset

import copy
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union, Iterable

import PIL.Image
from gulpio import GulpDirectory

from epic_kitchens.labels import VERB_CLASS_COL, NOUN_CLASS_COL, UID_COL
from epic_kitchens.dataset.video_dataset import VideoDataset, VideoSegment


SegmentFilter = Callable[[VideoSegment], bool]
ClassGetter = Callable[[Dict[str, Any]], Any]
VideoTransform = Callable[[List[PIL.Image.Image]], List[PIL.Image.Image]]


def _verb_class_getter(metadata):
    return int(metadata[VERB_CLASS_COL])


def _noun_class_getter(metadata):
    return int(metadata[NOUN_CLASS_COL])


_class_getters = {
    "verb": _verb_class_getter,
    "noun": _noun_class_getter,
    "verb+noun": lambda metadata: {
        "verb": _verb_class_getter(metadata),
        "noun": _noun_class_getter(metadata),
    },
    None: lambda meta: None,
}

_verb_class_count = 125
_noun_class_count = 353
_class_count = {
    "verb": _verb_class_count,
    "noun": _noun_class_count,
    "verb+noun": (_verb_class_count, _noun_class_count),
    None: 0,
}


[docs]class GulpVideoSegment(VideoSegment): """SegmentRecord for a video segment stored in a gulp file. Assumes that the video segment has the following metadata in the gulp file: - id - num_frames """ def __init__( self, gulp_metadata_dict: Dict[str, Any], class_getter: Callable[[Dict[str, Any]], Any], ) -> None: self.metadata = gulp_metadata_dict self.class_getter = class_getter self.gulp_index = gulp_metadata_dict[UID_COL] @property def id(self) -> str: """ID of video segment""" return self.gulp_index @property def label(self) -> Any: cls = self.class_getter(self.metadata) # WARNING: this type check should be removed once we regulp our data # so that classes are ints in the metadata json if isinstance(cls, float): return int(cls) else: return cls @property def num_frames(self) -> int: """Number of video frames""" return self.metadata["num_frames"] def __getitem__(self, item): return self.metadata[item] def __getattr__(self, item): return self.metadata[item] def __str__(self): return "GulpVideoSegment[label={label}, num_frames={num_frames}]".format( label=self.label, num_frames=self.num_frames ) def __repr__(self): return "GulpVideoSegment({metadata}, {class_getter})".format( metadata=repr(self.metadata), class_getter=repr(self.class_getter) )
[docs]class EpicVideoDataset(VideoDataset): """VideoDataset for gulped RGB frames"""
[docs] def __init__( self, gulp_path: Union[Path, str], class_type: str, *, with_metadata: bool = False, class_getter: Optional[ClassGetter] = None, segment_filter: Optional[SegmentFilter] = None, sample_transform: Optional[VideoTransform] = None ) -> None: """ Args: gulp_path: Path to gulp directory containing the gulped EPIC RGB or flow frames class_type: One of verb, noun, verb+noun, None, determines what label the segment returns. ``None`` should be used for loading test datasets. with_metadata: When True the segments will yield a tuple (metadata, class) where the class is defined by the class getter and the metadata is the raw dictionary stored in the gulp file. class_getter: Optionally provide a callable that takes in the gulp dict representing the segment from which you should return the class you wish the segment to have. segment_filter: Optionally provide a callable that takes a segment and returns True if you want to keep the segment in the dataset, or False if you wish to exclude it. sample_transform: Optionally provide a sample transform function which takes a list of PIL images and transforms each of them. This is applied on the frames just before returning from :meth:`load_frames`. """ super().__init__( _class_count[class_type], segment_filter=segment_filter, sample_transform=sample_transform, ) if isinstance(gulp_path, str): gulp_path = Path(gulp_path) assert gulp_path.exists(), "Could not find the path {}".format(gulp_path) self.gulp_dir = GulpDirectory(str(gulp_path)) if class_getter is None: class_getter = _class_getters[class_type] if with_metadata: original_getter = copy.copy(class_getter) class_getter = lambda metadata: (metadata, original_getter(metadata)) self._video_segments = self._read_segments( self.gulp_dir.merged_meta_dict, class_getter )
@property def video_segments(self) -> List[VideoSegment]: """ List of video segments that are present in the dataset. The describe the start and stop times of the clip and its class. """ return list(self._video_segments.values())
[docs] def load_frames( self, segment: VideoSegment, indices: Optional[Iterable[int]] = None ) -> List[PIL.Image.Image]: """ Load frame(s) from gulp directory. Args: segment: Video segment to load indices: Frames indices to read Returns: Frames indexed by ``indices`` from the ``segment``. """ if indices is None: indices = range(0, segment.num_frames) selected_frames = [] # type: List[PIL.Image.Image] for i in indices: # Without passing a slice to the gulp directory index we load ALL the frames # so we create a slice with a single element -- that way we only read a single frame # from the gulp chunk, and not the whole chunk. # Here we also apply the sample transform to the loaded frames frames = self._sample_video_at_index(segment, i) frames = self.sample_transform(frames) selected_frames.extend(frames) return selected_frames
def __len__(self): return len(self.video_segments) def __getitem__(self, id): return self._video_segments[id] def __contains__(self, id): return id in self._video_segments def __iter__(self): return iter( (self._video_segments[id_] for id_ in sorted(self._video_segments.keys())) ) def _read_segments( self, gulp_dir_meta_dict, class_getter: Callable[[Dict[str, Any]], Any] ) -> Dict[str, VideoSegment]: segments = dict() # type: Dict[str, VideoSegment] for video_id in gulp_dir_meta_dict: segment = GulpVideoSegment( gulp_dir_meta_dict[video_id]["meta_data"][0], class_getter ) if self.segment_filter(segment): segments[segment.id] = segment return segments def _sample_video_at_index( self, record: VideoSegment, index: int ) -> List[PIL.Image.Image]: single_frame_slice = slice(index, index + 1) numpy_frame = self.gulp_dir[record.id, single_frame_slice][0][0] return [PIL.Image.fromarray(numpy_frame).convert("RGB")]
[docs]class EpicVideoFlowDataset(EpicVideoDataset): """VideoDataset for loading gulped flow. The loader assumes that flow :math:`u`, :math:`v` frames are stored alternately in a flat manner: :math:`[u_0, v_0, u_1, v_1, \ldots, u_n, v_n]` """ def _sample_video_at_index( self, record: VideoSegment, index: int ) -> List[PIL.Image.Image]: # Flow pairs are stored in a contiguous manner in the gulp chunk: # [u_1, v_1, u_2, v_2, ..., u_n, v_n] # so we have to convert our desired frame index i to the gulp # indices j by j = (i * 2, (i + 1) * 2) flow_pair_slice = slice(index * 2, (index + 1) * 2) numpy_frames = self.gulp_dir[record.id, flow_pair_slice][0] frames = [ PIL.Image.fromarray(numpy_frame).convert("L") for numpy_frame in numpy_frames ] return frames